Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fsIndex.py 8.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. ##############################################################################
  2. #
  3. # Copyright (c) 2001, 2002 Zope Foundation and Contributors.
  4. # All Rights Reserved.
  5. #
  6. # This software is subject to the provisions of the Zope Public License,
  7. # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
  8. # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
  9. # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  10. # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
  11. # FOR A PARTICULAR PURPOSE.
  12. #
  13. ##############################################################################
  14. """Implement an OID to File-position (long integer) mapping."""
  15. # To save space, we do two things:
  16. #
  17. # 1. We split the keys (OIDS) into 6-byte prefixes and 2-byte suffixes.
  18. # We use the prefixes as keys in a mapping from prefix to mappings
  19. # of suffix to data:
  20. #
  21. # data is {prefix -> {suffix -> data}}
  22. #
  23. # 2. We limit the data size to 48 bits. This should allow databases
  24. # as large as 256 terabytes.
  25. #
  26. # Most of the space is consumed by items in the mappings from 2-byte
  27. # suffix to 6-byte data. This should reduce the overall memory usage to
  28. # 8-16 bytes per OID.
  29. #
  30. # Because
  31. # - the mapping from suffix to data contains at most 65535 entries,
  32. # - this is an in-memory data structure
  33. # - new keys are inserted sequentially,
  34. # we use a BTree bucket instead of a full BTree to store the results.
  35. #
  36. # We use p64 to convert integers to 8-byte strings and lop off the two
  37. # high-order bytes when saving. On loading data, we add the leading
  38. # bytes back before using u64 to convert the data back to (long)
  39. # integers.
  40. import struct
  41. from BTrees.fsBTree import fsBucket
  42. from BTrees.OOBTree import OOBTree
  43. import six
  44. from ZODB._compat import INT_TYPES
  45. from ZODB._compat import Pickler
  46. from ZODB._compat import Unpickler
  47. from ZODB._compat import _protocol
  48. # convert between numbers and six-byte strings
  49. def num2str(n):
  50. return struct.pack(">Q", n)[2:]
  51. def str2num(s):
  52. return struct.unpack(">Q", b"\000\000" + s)[0]
  53. def prefix_plus_one(s):
  54. num = str2num(s)
  55. return num2str(num + 1)
  56. def prefix_minus_one(s):
  57. num = str2num(s)
  58. return num2str(num - 1)
  59. def ensure_bytes(s):
  60. # on Python 3 we might pickle bytes and unpickle unicode strings
  61. return s.encode('ascii') if not isinstance(s, bytes) else s
  62. class fsIndex(object):
  63. def __init__(self, data=None):
  64. self._data = OOBTree()
  65. if data:
  66. self.update(data)
  67. def __getstate__(self):
  68. return dict(
  69. state_version = 1,
  70. _data = [(k, v.toString())
  71. for (k, v) in six.iteritems(self._data)
  72. ]
  73. )
  74. def __setstate__(self, state):
  75. version = state.pop('state_version', 0)
  76. getattr(self, '_setstate_%s' % version)(state)
  77. def _setstate_0(self, state):
  78. self.__dict__.clear()
  79. self.__dict__.update(state)
  80. self._data = OOBTree([
  81. (ensure_bytes(k), v)
  82. for (k, v) in self._data.items()
  83. ])
  84. def _setstate_1(self, state):
  85. self._data = OOBTree([
  86. (ensure_bytes(k), fsBucket().fromString(ensure_bytes(v)))
  87. for (k, v) in state['_data']
  88. ])
  89. def __getitem__(self, key):
  90. assert isinstance(key, bytes)
  91. return str2num(self._data[key[:6]][key[6:]])
  92. def save(self, pos, fname):
  93. with open(fname, 'wb') as f:
  94. pickler = Pickler(f, _protocol)
  95. pickler.fast = True
  96. pickler.dump(pos)
  97. for k, v in six.iteritems(self._data):
  98. pickler.dump((k, v.toString()))
  99. pickler.dump(None)
  100. @classmethod
  101. def load(class_, fname):
  102. with open(fname, 'rb') as f:
  103. unpickler = Unpickler(f)
  104. pos = unpickler.load()
  105. if not isinstance(pos, INT_TYPES):
  106. # NB: this might contain OIDs that got unpickled
  107. # into Unicode strings on Python 3; hope the caller
  108. # will pipe the result to fsIndex().update() to normalize
  109. # the keys
  110. return pos # Old format
  111. index = class_()
  112. data = index._data
  113. while 1:
  114. v = unpickler.load()
  115. if not v:
  116. break
  117. k, v = v
  118. data[ensure_bytes(k)] = fsBucket().fromString(ensure_bytes(v))
  119. return dict(pos=pos, index=index)
  120. def get(self, key, default=None):
  121. assert isinstance(key, bytes)
  122. tree = self._data.get(key[:6], default)
  123. if tree is default:
  124. return default
  125. v = tree.get(key[6:], default)
  126. if v is default:
  127. return default
  128. return str2num(v)
  129. def __setitem__(self, key, value):
  130. assert isinstance(key, bytes)
  131. value = num2str(value)
  132. treekey = key[:6]
  133. tree = self._data.get(treekey)
  134. if tree is None:
  135. tree = fsBucket()
  136. self._data[treekey] = tree
  137. tree[key[6:]] = value
  138. def __delitem__(self, key):
  139. assert isinstance(key, bytes)
  140. treekey = key[:6]
  141. tree = self._data.get(treekey)
  142. if tree is None:
  143. raise KeyError(key)
  144. del tree[key[6:]]
  145. if not tree:
  146. del self._data[treekey]
  147. def __len__(self):
  148. r = 0
  149. for tree in six.itervalues(self._data):
  150. r += len(tree)
  151. return r
  152. def update(self, mapping):
  153. for k, v in mapping.items():
  154. self[ensure_bytes(k)] = v
  155. def has_key(self, key):
  156. v = self.get(key, self)
  157. return v is not self
  158. def __contains__(self, key):
  159. assert isinstance(key, bytes)
  160. tree = self._data.get(key[:6])
  161. if tree is None:
  162. return False
  163. v = tree.get(key[6:], None)
  164. if v is None:
  165. return False
  166. return True
  167. def clear(self):
  168. self._data.clear()
  169. def __iter__(self):
  170. for prefix, tree in six.iteritems(self._data):
  171. for suffix in tree:
  172. yield prefix + suffix
  173. iterkeys = __iter__
  174. def keys(self):
  175. return list(self.iterkeys())
  176. def iteritems(self):
  177. for prefix, tree in six.iteritems(self._data):
  178. for suffix, value in six.iteritems(tree):
  179. yield (prefix + suffix, str2num(value))
  180. def items(self):
  181. return list(self.iteritems())
  182. def itervalues(self):
  183. for tree in six.itervalues(self._data):
  184. for value in six.itervalues(tree):
  185. yield str2num(value)
  186. def values(self):
  187. return list(self.itervalues())
  188. # Comment below applies for the following minKey and maxKey methods
  189. #
  190. # Obscure: what if `tree` is actually empty? We're relying here on
  191. # that this class doesn't implement __delitem__: once a key gets
  192. # into an fsIndex, the only way it can go away is by invoking
  193. # clear(). Therefore nothing in _data.values() is ever empty.
  194. #
  195. # Note that because `tree` is an fsBTree, its minKey()/maxKey() methods are
  196. # very efficient.
  197. def minKey(self, key=None):
  198. if key is None:
  199. smallest_prefix = self._data.minKey()
  200. else:
  201. smallest_prefix = self._data.minKey(key[:6])
  202. tree = self._data[smallest_prefix]
  203. assert tree
  204. if key is None:
  205. smallest_suffix = tree.minKey()
  206. else:
  207. try:
  208. smallest_suffix = tree.minKey(key[6:])
  209. except ValueError: # 'empty tree' (no suffix >= arg)
  210. next_prefix = prefix_plus_one(smallest_prefix)
  211. smallest_prefix = self._data.minKey(next_prefix)
  212. tree = self._data[smallest_prefix]
  213. assert tree
  214. smallest_suffix = tree.minKey()
  215. return smallest_prefix + smallest_suffix
  216. def maxKey(self, key=None):
  217. if key is None:
  218. biggest_prefix = self._data.maxKey()
  219. else:
  220. biggest_prefix = self._data.maxKey(key[:6])
  221. tree = self._data[biggest_prefix]
  222. assert tree
  223. if key is None:
  224. biggest_suffix = tree.maxKey()
  225. else:
  226. try:
  227. biggest_suffix = tree.maxKey(key[6:])
  228. except ValueError: # 'empty tree' (no suffix <= arg)
  229. next_prefix = prefix_minus_one(biggest_prefix)
  230. biggest_prefix = self._data.maxKey(next_prefix)
  231. tree = self._data[biggest_prefix]
  232. assert tree
  233. biggest_suffix = tree.maxKey()
  234. return biggest_prefix + biggest_suffix