misc-bdauvergne/indexable-pickle/ram_pickle.py

66 lines
1.8 KiB
Python

import struct
import pickle
class RamPickleWrite(object):
def __init__(self, sequence):
self.sequence = sequence
def pickle(self, fd):
base_offset = fd.tell()
index = []
fd.write(struct.pack('L', 0))
i = 0
while True:
batch = self.sequence[i:i + 100]
if not batch:
break
index.append(fd.tell())
pickle.dump(batch, fd)
i += 100
index_offset = fd.tell()
pickle.dump(index, fd)
fd.seek(base_offset)
fd.write(struct.pack('L', index_offset))
class RamPickleRead(object):
def __init__(self, fd):
self.fd = fd
buf = fd.read(struct.calcsize('L'))
index_offset, = struct.unpack('L', buf)
fd.seek(index_offset)
self.index = pickle.load(fd)
self.batches = {}
def load_batch(self, index):
page = index // 100
if len(self.index) <= page:
return
if page not in self.batches:
self.fd.seek(self.index[page])
self.batches[page] = pickle.load(self.fd)
return self.batches[page]
def __getitem__(self, index):
if isinstance(index, (long, int)):
batch = self.load_batch(index)
if not batch:
raise IndexError(index)
offset = index % 100
if len(batch) <= offset:
raise IndexError(index)
return batch[offset]
elif isinstance(index, slice):
l = []
i = index.start or 0
while index.stop is None or i < index.stop:
try:
l.append(self[i])
except IndexError:
break
i += index.step or 1
return l
else:
raise TypeError(index)