You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parser.py 10KB


  1. import string, re, sys, datetime
  2. from .core import TomlError
  3. from .utils import rfc3339_re, parse_rfc3339_re
  4. if sys.version_info[0] == 2:
  5. _chr = unichr
  6. else:
  7. _chr = chr
  8. def load(fin, translate=lambda t, x, v: v, object_pairs_hook=dict):
  9. return loads(fin.read(), translate=translate, object_pairs_hook=object_pairs_hook, filename=getattr(fin, 'name', repr(fin)))
  10. def loads(s, filename='<string>', translate=lambda t, x, v: v, object_pairs_hook=dict):
  11. if isinstance(s, bytes):
  12. s = s.decode('utf-8')
  13. s = s.replace('\r\n', '\n')
  14. root = object_pairs_hook()
  15. tables = object_pairs_hook()
  16. scope = root
  17. src = _Source(s, filename=filename)
  18. ast = _p_toml(src, object_pairs_hook=object_pairs_hook)
  19. def error(msg):
  20. raise TomlError(msg, pos[0], pos[1], filename)
  21. def process_value(v, object_pairs_hook):
  22. kind, text, value, pos = v
  23. if kind == 'str' and value.startswith('\n'):
  24. value = value[1:]
  25. if kind == 'array':
  26. if value and any(k != value[0][0] for k, t, v, p in value[1:]):
  27. error('array-type-mismatch')
  28. value = [process_value(item, object_pairs_hook=object_pairs_hook) for item in value]
  29. elif kind == 'table':
  30. value = object_pairs_hook([(k, process_value(value[k], object_pairs_hook=object_pairs_hook)) for k in value])
  31. return translate(kind, text, value)
  32. for kind, value, pos in ast:
  33. if kind == 'kv':
  34. k, v = value
  35. if k in scope:
  36. error('duplicate_keys. Key "{0}" was used more than once.'.format(k))
  37. scope[k] = process_value(v, object_pairs_hook=object_pairs_hook)
  38. else:
  39. is_table_array = (kind == 'table_array')
  40. cur = tables
  41. for name in value[:-1]:
  42. if isinstance(cur.get(name), list):
  43. d, cur = cur[name][-1]
  44. else:
  45. d, cur = cur.setdefault(name, (None, object_pairs_hook()))
  46. scope = object_pairs_hook()
  47. name = value[-1]
  48. if name not in cur:
  49. if is_table_array:
  50. cur[name] = [(scope, object_pairs_hook())]
  51. else:
  52. cur[name] = (scope, object_pairs_hook())
  53. elif isinstance(cur[name], list):
  54. if not is_table_array:
  55. error('table_type_mismatch')
  56. cur[name].append((scope, object_pairs_hook()))
  57. else:
  58. if is_table_array:
  59. error('table_type_mismatch')
  60. old_scope, next_table = cur[name]
  61. if old_scope is not None:
  62. error('duplicate_tables')
  63. cur[name] = (scope, next_table)
  64. def merge_tables(scope, tables):
  65. if scope is None:
  66. scope = object_pairs_hook()
  67. for k in tables:
  68. if k in scope:
  69. error('key_table_conflict')
  70. v = tables[k]
  71. if isinstance(v, list):
  72. scope[k] = [merge_tables(sc, tbl) for sc, tbl in v]
  73. else:
  74. scope[k] = merge_tables(v[0], v[1])
  75. return scope
  76. return merge_tables(root, tables)
  77. class _Source:
  78. def __init__(self, s, filename=None):
  79. self.s = s
  80. self._pos = (1, 1)
  81. self._last = None
  82. self._filename = filename
  83. self.backtrack_stack = []
  84. def last(self):
  85. return self._last
  86. def pos(self):
  87. return self._pos
  88. def fail(self):
  89. return self._expect(None)
  90. def consume_dot(self):
  91. if self.s:
  92. self._last = self.s[0]
  93. self.s = self[1:]
  94. self._advance(self._last)
  95. return self._last
  96. return None
  97. def expect_dot(self):
  98. return self._expect(self.consume_dot())
  99. def consume_eof(self):
  100. if not self.s:
  101. self._last = ''
  102. return True
  103. return False
  104. def expect_eof(self):
  105. return self._expect(self.consume_eof())
  106. def consume(self, s):
  107. if self.s.startswith(s):
  108. self.s = self.s[len(s):]
  109. self._last = s
  110. self._advance(s)
  111. return True
  112. return False
  113. def expect(self, s):
  114. return self._expect(self.consume(s))
  115. def consume_re(self, re):
  116. m = re.match(self.s)
  117. if m:
  118. self.s = self.s[len(m.group(0)):]
  119. self._last = m
  120. self._advance(m.group(0))
  121. return m
  122. return None
  123. def expect_re(self, re):
  124. return self._expect(self.consume_re(re))
  125. def __enter__(self):
  126. self.backtrack_stack.append((self.s, self._pos))
  127. def __exit__(self, type, value, traceback):
  128. if type is None:
  129. self.backtrack_stack.pop()
  130. else:
  131. self.s, self._pos = self.backtrack_stack.pop()
  132. return type == TomlError
  133. def commit(self):
  134. self.backtrack_stack[-1] = (self.s, self._pos)
  135. def _expect(self, r):
  136. if not r:
  137. raise TomlError('msg', self._pos[0], self._pos[1], self._filename)
  138. return r
  139. def _advance(self, s):
  140. suffix_pos = s.rfind('\n')
  141. if suffix_pos == -1:
  142. self._pos = (self._pos[0], self._pos[1] + len(s))
  143. else:
  144. self._pos = (self._pos[0] + s.count('\n'), len(s) - suffix_pos)
  145. _ews_re = re.compile(r'(?:[ \t]|#[^\n]*\n|#[^\n]*\Z|\n)*')
  146. def _p_ews(s):
  147. s.expect_re(_ews_re)
  148. _ws_re = re.compile(r'[ \t]*')
  149. def _p_ws(s):
  150. s.expect_re(_ws_re)
  151. _escapes = { 'b': '\b', 'n': '\n', 'r': '\r', 't': '\t', '"': '"',
  152. '\\': '\\', 'f': '\f' }
  153. _basicstr_re = re.compile(r'[^"\\\000-\037]*')
  154. _short_uni_re = re.compile(r'u([0-9a-fA-F]{4})')
  155. _long_uni_re = re.compile(r'U([0-9a-fA-F]{8})')
  156. _escapes_re = re.compile(r'[btnfr\"\\]')
  157. _newline_esc_re = re.compile('\n[ \t\n]*')
  158. def _p_basicstr_content(s, content=_basicstr_re):
  159. res = []
  160. while True:
  161. res.append(s.expect_re(content).group(0))
  162. if not s.consume('\\'):
  163. break
  164. if s.consume_re(_newline_esc_re):
  165. pass
  166. elif s.consume_re(_short_uni_re) or s.consume_re(_long_uni_re):
  167. v = int(s.last().group(1), 16)
  168. if 0xd800 <= v < 0xe000:
  169. s.fail()
  170. res.append(_chr(v))
  171. else:
  172. s.expect_re(_escapes_re)
  173. res.append(_escapes[s.last().group(0)])
  174. return ''.join(res)
  175. _key_re = re.compile(r'[0-9a-zA-Z-_]+')
  176. def _p_key(s):
  177. with s:
  178. s.expect('"')
  179. r = _p_basicstr_content(s, _basicstr_re)
  180. s.expect('"')
  181. return r
  182. if s.consume('\''):
  183. if s.consume('\'\''):
  184. r = s.expect_re(_litstr_ml_re).group(0)
  185. s.expect('\'\'\'')
  186. else:
  187. r = s.expect_re(_litstr_re).group(0)
  188. s.expect('\'')
  189. return r
  190. return s.expect_re(_key_re).group(0)
  191. _float_re = re.compile(r'[+-]?(?:0|[1-9](?:_?\d)*)(?:\.\d(?:_?\d)*)?(?:[eE][+-]?(?:\d(?:_?\d)*))?')
  192. _basicstr_ml_re = re.compile(r'(?:""?(?!")|[^"\\\000-\011\013-\037])*')
  193. _litstr_re = re.compile(r"[^'\000\010\012-\037]*")
  194. _litstr_ml_re = re.compile(r"(?:(?:|'|'')(?:[^'\000-\010\013-\037]))*")
  195. def _p_value(s, object_pairs_hook):
  196. pos = s.pos()
  197. if s.consume('true'):
  198. return 'bool', s.last(), True, pos
  199. if s.consume('false'):
  200. return 'bool', s.last(), False, pos
  201. if s.consume('"'):
  202. if s.consume('""'):
  203. r = _p_basicstr_content(s, _basicstr_ml_re)
  204. s.expect('"""')
  205. else:
  206. r = _p_basicstr_content(s, _basicstr_re)
  207. s.expect('"')
  208. return 'str', r, r, pos
  209. if s.consume('\''):
  210. if s.consume('\'\''):
  211. r = s.expect_re(_litstr_ml_re).group(0)
  212. s.expect('\'\'\'')
  213. else:
  214. r = s.expect_re(_litstr_re).group(0)
  215. s.expect('\'')
  216. return 'str', r, r, pos
  217. if s.consume_re(rfc3339_re):
  218. m = s.last()
  219. return 'datetime', m.group(0), parse_rfc3339_re(m), pos
  220. if s.consume_re(_float_re):
  221. m = s.last().group(0)
  222. r = m.replace('_','')
  223. if '.' in m or 'e' in m or 'E' in m:
  224. return 'float', m, float(r), pos
  225. else:
  226. return 'int', m, int(r, 10), pos
  227. if s.consume('['):
  228. items = []
  229. with s:
  230. while True:
  231. _p_ews(s)
  232. items.append(_p_value(s, object_pairs_hook=object_pairs_hook))
  233. s.commit()
  234. _p_ews(s)
  235. s.expect(',')
  236. s.commit()
  237. _p_ews(s)
  238. s.expect(']')
  239. return 'array', None, items, pos
  240. if s.consume('{'):
  241. _p_ws(s)
  242. items = object_pairs_hook()
  243. if not s.consume('}'):
  244. k = _p_key(s)
  245. _p_ws(s)
  246. s.expect('=')
  247. _p_ws(s)
  248. items[k] = _p_value(s, object_pairs_hook=object_pairs_hook)
  249. _p_ws(s)
  250. while s.consume(','):
  251. _p_ws(s)
  252. k = _p_key(s)
  253. _p_ws(s)
  254. s.expect('=')
  255. _p_ws(s)
  256. items[k] = _p_value(s, object_pairs_hook=object_pairs_hook)
  257. _p_ws(s)
  258. s.expect('}')
  259. return 'table', None, items, pos
  260. s.fail()
  261. def _p_stmt(s, object_pairs_hook):
  262. pos = s.pos()
  263. if s.consume( '['):
  264. is_array = s.consume('[')
  265. _p_ws(s)
  266. keys = [_p_key(s)]
  267. _p_ws(s)
  268. while s.consume('.'):
  269. _p_ws(s)
  270. keys.append(_p_key(s))
  271. _p_ws(s)
  272. s.expect(']')
  273. if is_array:
  274. s.expect(']')
  275. return 'table_array' if is_array else 'table', keys, pos
  276. key = _p_key(s)
  277. _p_ws(s)
  278. s.expect('=')
  279. _p_ws(s)
  280. value = _p_value(s, object_pairs_hook=object_pairs_hook)
  281. return 'kv', (key, value), pos
  282. _stmtsep_re = re.compile(r'(?:[ \t]*(?:#[^\n]*)?\n)+[ \t]*')
  283. def _p_toml(s, object_pairs_hook):
  284. stmts = []
  285. _p_ews(s)
  286. with s:
  287. stmts.append(_p_stmt(s, object_pairs_hook=object_pairs_hook))
  288. while True:
  289. s.commit()
  290. s.expect_re(_stmtsep_re)
  291. stmts.append(_p_stmt(s, object_pairs_hook=object_pairs_hook))
  292. _p_ews(s)
  293. s.expect_eof()
  294. return stmts