You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

cparser.py 41KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963
  1. from . import model
  2. from .commontypes import COMMON_TYPES, resolve_common_type
  3. from .error import FFIError, CDefError
  4. try:
  5. from . import _pycparser as pycparser
  6. except ImportError:
  7. import pycparser
  8. import weakref, re, sys
  9. try:
  10. if sys.version_info < (3,):
  11. import thread as _thread
  12. else:
  13. import _thread
  14. lock = _thread.allocate_lock()
  15. except ImportError:
  16. lock = None
  17. def _workaround_for_static_import_finders():
  18. # Issue #392: packaging tools like cx_Freeze can not find these
  19. # because pycparser uses exec dynamic import. This is an obscure
  20. # workaround. This function is never called.
  21. import pycparser.yacctab
  22. import pycparser.lextab
  23. CDEF_SOURCE_STRING = "<cdef source string>"
  24. _r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$",
  25. re.DOTALL | re.MULTILINE)
  26. _r_define = re.compile(r"^\s*#\s*define\s+([A-Za-z_][A-Za-z_0-9]*)"
  27. r"\b((?:[^\n\\]|\\.)*?)$",
  28. re.DOTALL | re.MULTILINE)
  29. _r_partial_enum = re.compile(r"=\s*\.\.\.\s*[,}]|\.\.\.\s*\}")
  30. _r_enum_dotdotdot = re.compile(r"__dotdotdot\d+__$")
  31. _r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]")
  32. _r_words = re.compile(r"\w+|\S")
  33. _parser_cache = None
  34. _r_int_literal = re.compile(r"-?0?x?[0-9a-f]+[lu]*$", re.IGNORECASE)
  35. _r_stdcall1 = re.compile(r"\b(__stdcall|WINAPI)\b")
  36. _r_stdcall2 = re.compile(r"[(]\s*(__stdcall|WINAPI)\b")
  37. _r_cdecl = re.compile(r"\b__cdecl\b")
  38. _r_extern_python = re.compile(r'\bextern\s*"'
  39. r'(Python|Python\s*\+\s*C|C\s*\+\s*Python)"\s*.')
  40. _r_star_const_space = re.compile( # matches "* const "
  41. r"[*]\s*((const|volatile|restrict)\b\s*)+")
  42. _r_int_dotdotdot = re.compile(r"(\b(int|long|short|signed|unsigned|char)\s*)+"
  43. r"\.\.\.")
  44. _r_float_dotdotdot = re.compile(r"\b(double|float)\s*\.\.\.")
  45. def _get_parser():
  46. global _parser_cache
  47. if _parser_cache is None:
  48. _parser_cache = pycparser.CParser()
  49. return _parser_cache
  50. def _workaround_for_old_pycparser(csource):
  51. # Workaround for a pycparser issue (fixed between pycparser 2.10 and
  52. # 2.14): "char*const***" gives us a wrong syntax tree, the same as
  53. # for "char***(*const)". This means we can't tell the difference
  54. # afterwards. But "char(*const(***))" gives us the right syntax
  55. # tree. The issue only occurs if there are several stars in
  56. # sequence with no parenthesis inbetween, just possibly qualifiers.
  57. # Attempt to fix it by adding some parentheses in the source: each
  58. # time we see "* const" or "* const *", we add an opening
  59. # parenthesis before each star---the hard part is figuring out where
  60. # to close them.
  61. parts = []
  62. while True:
  63. match = _r_star_const_space.search(csource)
  64. if not match:
  65. break
  66. #print repr(''.join(parts)+csource), '=>',
  67. parts.append(csource[:match.start()])
  68. parts.append('('); closing = ')'
  69. parts.append(match.group()) # e.g. "* const "
  70. endpos = match.end()
  71. if csource.startswith('*', endpos):
  72. parts.append('('); closing += ')'
  73. level = 0
  74. i = endpos
  75. while i < len(csource):
  76. c = csource[i]
  77. if c == '(':
  78. level += 1
  79. elif c == ')':
  80. if level == 0:
  81. break
  82. level -= 1
  83. elif c in ',;=':
  84. if level == 0:
  85. break
  86. i += 1
  87. csource = csource[endpos:i] + closing + csource[i:]
  88. #print repr(''.join(parts)+csource)
  89. parts.append(csource)
  90. return ''.join(parts)
  91. def _preprocess_extern_python(csource):
  92. # input: `extern "Python" int foo(int);` or
  93. # `extern "Python" { int foo(int); }`
  94. # output:
  95. # void __cffi_extern_python_start;
  96. # int foo(int);
  97. # void __cffi_extern_python_stop;
  98. #
  99. # input: `extern "Python+C" int foo(int);`
  100. # output:
  101. # void __cffi_extern_python_plus_c_start;
  102. # int foo(int);
  103. # void __cffi_extern_python_stop;
  104. parts = []
  105. while True:
  106. match = _r_extern_python.search(csource)
  107. if not match:
  108. break
  109. endpos = match.end() - 1
  110. #print
  111. #print ''.join(parts)+csource
  112. #print '=>'
  113. parts.append(csource[:match.start()])
  114. if 'C' in match.group(1):
  115. parts.append('void __cffi_extern_python_plus_c_start; ')
  116. else:
  117. parts.append('void __cffi_extern_python_start; ')
  118. if csource[endpos] == '{':
  119. # grouping variant
  120. closing = csource.find('}', endpos)
  121. if closing < 0:
  122. raise CDefError("'extern \"Python\" {': no '}' found")
  123. if csource.find('{', endpos + 1, closing) >= 0:
  124. raise NotImplementedError("cannot use { } inside a block "
  125. "'extern \"Python\" { ... }'")
  126. parts.append(csource[endpos+1:closing])
  127. csource = csource[closing+1:]
  128. else:
  129. # non-grouping variant
  130. semicolon = csource.find(';', endpos)
  131. if semicolon < 0:
  132. raise CDefError("'extern \"Python\": no ';' found")
  133. parts.append(csource[endpos:semicolon+1])
  134. csource = csource[semicolon+1:]
  135. parts.append(' void __cffi_extern_python_stop;')
  136. #print ''.join(parts)+csource
  137. #print
  138. parts.append(csource)
  139. return ''.join(parts)
  140. def _warn_for_string_literal(csource):
  141. if '"' not in csource:
  142. return
  143. for line in csource.splitlines():
  144. if '"' in line and not line.lstrip().startswith('#'):
  145. import warnings
  146. warnings.warn("String literal found in cdef() or type source. "
  147. "String literals are ignored here, but you should "
  148. "remove them anyway because some character sequences "
  149. "confuse pre-parsing.")
  150. break
  151. def _warn_for_non_extern_non_static_global_variable(decl):
  152. if not decl.storage:
  153. import warnings
  154. warnings.warn("Global variable '%s' in cdef(): for consistency "
  155. "with C it should have a storage class specifier "
  156. "(usually 'extern')" % (decl.name,))
  157. def _preprocess(csource):
  158. # Remove comments. NOTE: this only work because the cdef() section
  159. # should not contain any string literal!
  160. csource = _r_comment.sub(' ', csource)
  161. # Remove the "#define FOO x" lines
  162. macros = {}
  163. for match in _r_define.finditer(csource):
  164. macroname, macrovalue = match.groups()
  165. macrovalue = macrovalue.replace('\\\n', '').strip()
  166. macros[macroname] = macrovalue
  167. csource = _r_define.sub('', csource)
  168. #
  169. if pycparser.__version__ < '2.14':
  170. csource = _workaround_for_old_pycparser(csource)
  171. #
  172. # BIG HACK: replace WINAPI or __stdcall with "volatile const".
  173. # It doesn't make sense for the return type of a function to be
  174. # "volatile volatile const", so we abuse it to detect __stdcall...
  175. # Hack number 2 is that "int(volatile *fptr)();" is not valid C
  176. # syntax, so we place the "volatile" before the opening parenthesis.
  177. csource = _r_stdcall2.sub(' volatile volatile const(', csource)
  178. csource = _r_stdcall1.sub(' volatile volatile const ', csource)
  179. csource = _r_cdecl.sub(' ', csource)
  180. #
  181. # Replace `extern "Python"` with start/end markers
  182. csource = _preprocess_extern_python(csource)
  183. #
  184. # Now there should not be any string literal left; warn if we get one
  185. _warn_for_string_literal(csource)
  186. #
  187. # Replace "[...]" with "[__dotdotdotarray__]"
  188. csource = _r_partial_array.sub('[__dotdotdotarray__]', csource)
  189. #
  190. # Replace "...}" with "__dotdotdotNUM__}". This construction should
  191. # occur only at the end of enums; at the end of structs we have "...;}"
  192. # and at the end of vararg functions "...);". Also replace "=...[,}]"
  193. # with ",__dotdotdotNUM__[,}]": this occurs in the enums too, when
  194. # giving an unknown value.
  195. matches = list(_r_partial_enum.finditer(csource))
  196. for number, match in enumerate(reversed(matches)):
  197. p = match.start()
  198. if csource[p] == '=':
  199. p2 = csource.find('...', p, match.end())
  200. assert p2 > p
  201. csource = '%s,__dotdotdot%d__ %s' % (csource[:p], number,
  202. csource[p2+3:])
  203. else:
  204. assert csource[p:p+3] == '...'
  205. csource = '%s __dotdotdot%d__ %s' % (csource[:p], number,
  206. csource[p+3:])
  207. # Replace "int ..." or "unsigned long int..." with "__dotdotdotint__"
  208. csource = _r_int_dotdotdot.sub(' __dotdotdotint__ ', csource)
  209. # Replace "float ..." or "double..." with "__dotdotdotfloat__"
  210. csource = _r_float_dotdotdot.sub(' __dotdotdotfloat__ ', csource)
  211. # Replace all remaining "..." with the same name, "__dotdotdot__",
  212. # which is declared with a typedef for the purpose of C parsing.
  213. return csource.replace('...', ' __dotdotdot__ '), macros
  214. def _common_type_names(csource):
  215. # Look in the source for what looks like usages of types from the
  216. # list of common types. A "usage" is approximated here as the
  217. # appearance of the word, minus a "definition" of the type, which
  218. # is the last word in a "typedef" statement. Approximative only
  219. # but should be fine for all the common types.
  220. look_for_words = set(COMMON_TYPES)
  221. look_for_words.add(';')
  222. look_for_words.add(',')
  223. look_for_words.add('(')
  224. look_for_words.add(')')
  225. look_for_words.add('typedef')
  226. words_used = set()
  227. is_typedef = False
  228. paren = 0
  229. previous_word = ''
  230. for word in _r_words.findall(csource):
  231. if word in look_for_words:
  232. if word == ';':
  233. if is_typedef:
  234. words_used.discard(previous_word)
  235. look_for_words.discard(previous_word)
  236. is_typedef = False
  237. elif word == 'typedef':
  238. is_typedef = True
  239. paren = 0
  240. elif word == '(':
  241. paren += 1
  242. elif word == ')':
  243. paren -= 1
  244. elif word == ',':
  245. if is_typedef and paren == 0:
  246. words_used.discard(previous_word)
  247. look_for_words.discard(previous_word)
  248. else: # word in COMMON_TYPES
  249. words_used.add(word)
  250. previous_word = word
  251. return words_used
  252. class Parser(object):
  253. def __init__(self):
  254. self._declarations = {}
  255. self._included_declarations = set()
  256. self._anonymous_counter = 0
  257. self._structnode2type = weakref.WeakKeyDictionary()
  258. self._options = {}
  259. self._int_constants = {}
  260. self._recomplete = []
  261. self._uses_new_feature = None
  262. def _parse(self, csource):
  263. csource, macros = _preprocess(csource)
  264. # XXX: for more efficiency we would need to poke into the
  265. # internals of CParser... the following registers the
  266. # typedefs, because their presence or absence influences the
  267. # parsing itself (but what they are typedef'ed to plays no role)
  268. ctn = _common_type_names(csource)
  269. typenames = []
  270. for name in sorted(self._declarations):
  271. if name.startswith('typedef '):
  272. name = name[8:]
  273. typenames.append(name)
  274. ctn.discard(name)
  275. typenames += sorted(ctn)
  276. #
  277. csourcelines = []
  278. csourcelines.append('# 1 "<cdef automatic initialization code>"')
  279. for typename in typenames:
  280. csourcelines.append('typedef int %s;' % typename)
  281. csourcelines.append('typedef int __dotdotdotint__, __dotdotdotfloat__,'
  282. ' __dotdotdot__;')
  283. # this forces pycparser to consider the following in the file
  284. # called <cdef source string> from line 1
  285. csourcelines.append('# 1 "%s"' % (CDEF_SOURCE_STRING,))
  286. csourcelines.append(csource)
  287. fullcsource = '\n'.join(csourcelines)
  288. if lock is not None:
  289. lock.acquire() # pycparser is not thread-safe...
  290. try:
  291. ast = _get_parser().parse(fullcsource)
  292. except pycparser.c_parser.ParseError as e:
  293. self.convert_pycparser_error(e, csource)
  294. finally:
  295. if lock is not None:
  296. lock.release()
  297. # csource will be used to find buggy source text
  298. return ast, macros, csource
  299. def _convert_pycparser_error(self, e, csource):
  300. # xxx look for "<cdef source string>:NUM:" at the start of str(e)
  301. # and interpret that as a line number. This will not work if
  302. # the user gives explicit ``# NUM "FILE"`` directives.
  303. line = None
  304. msg = str(e)
  305. match = re.match(r"%s:(\d+):" % (CDEF_SOURCE_STRING,), msg)
  306. if match:
  307. linenum = int(match.group(1), 10)
  308. csourcelines = csource.splitlines()
  309. if 1 <= linenum <= len(csourcelines):
  310. line = csourcelines[linenum-1]
  311. return line
  312. def convert_pycparser_error(self, e, csource):
  313. line = self._convert_pycparser_error(e, csource)
  314. msg = str(e)
  315. if line:
  316. msg = 'cannot parse "%s"\n%s' % (line.strip(), msg)
  317. else:
  318. msg = 'parse error\n%s' % (msg,)
  319. raise CDefError(msg)
  320. def parse(self, csource, override=False, packed=False, pack=None,
  321. dllexport=False):
  322. if packed:
  323. if packed != True:
  324. raise ValueError("'packed' should be False or True; use "
  325. "'pack' to give another value")
  326. if pack:
  327. raise ValueError("cannot give both 'pack' and 'packed'")
  328. pack = 1
  329. elif pack:
  330. if pack & (pack - 1):
  331. raise ValueError("'pack' must be a power of two, not %r" %
  332. (pack,))
  333. else:
  334. pack = 0
  335. prev_options = self._options
  336. try:
  337. self._options = {'override': override,
  338. 'packed': pack,
  339. 'dllexport': dllexport}
  340. self._internal_parse(csource)
  341. finally:
  342. self._options = prev_options
  343. def _internal_parse(self, csource):
  344. ast, macros, csource = self._parse(csource)
  345. # add the macros
  346. self._process_macros(macros)
  347. # find the first "__dotdotdot__" and use that as a separator
  348. # between the repeated typedefs and the real csource
  349. iterator = iter(ast.ext)
  350. for decl in iterator:
  351. if decl.name == '__dotdotdot__':
  352. break
  353. else:
  354. assert 0
  355. current_decl = None
  356. #
  357. try:
  358. self._inside_extern_python = '__cffi_extern_python_stop'
  359. for decl in iterator:
  360. current_decl = decl
  361. if isinstance(decl, pycparser.c_ast.Decl):
  362. self._parse_decl(decl)
  363. elif isinstance(decl, pycparser.c_ast.Typedef):
  364. if not decl.name:
  365. raise CDefError("typedef does not declare any name",
  366. decl)
  367. quals = 0
  368. if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType) and
  369. decl.type.type.names[-1].startswith('__dotdotdot')):
  370. realtype = self._get_unknown_type(decl)
  371. elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and
  372. isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and
  373. isinstance(decl.type.type.type,
  374. pycparser.c_ast.IdentifierType) and
  375. decl.type.type.type.names[-1].startswith('__dotdotdot')):
  376. realtype = self._get_unknown_ptr_type(decl)
  377. else:
  378. realtype, quals = self._get_type_and_quals(
  379. decl.type, name=decl.name, partial_length_ok=True)
  380. self._declare('typedef ' + decl.name, realtype, quals=quals)
  381. elif decl.__class__.__name__ == 'Pragma':
  382. pass # skip pragma, only in pycparser 2.15
  383. else:
  384. raise CDefError("unexpected <%s>: this construct is valid "
  385. "C but not valid in cdef()" %
  386. decl.__class__.__name__, decl)
  387. except CDefError as e:
  388. if len(e.args) == 1:
  389. e.args = e.args + (current_decl,)
  390. raise
  391. except FFIError as e:
  392. msg = self._convert_pycparser_error(e, csource)
  393. if msg:
  394. e.args = (e.args[0] + "\n *** Err: %s" % msg,)
  395. raise
  396. def _add_constants(self, key, val):
  397. if key in self._int_constants:
  398. if self._int_constants[key] == val:
  399. return # ignore identical double declarations
  400. raise FFIError(
  401. "multiple declarations of constant: %s" % (key,))
  402. self._int_constants[key] = val
  403. def _add_integer_constant(self, name, int_str):
  404. int_str = int_str.lower().rstrip("ul")
  405. neg = int_str.startswith('-')
  406. if neg:
  407. int_str = int_str[1:]
  408. # "010" is not valid oct in py3
  409. if (int_str.startswith("0") and int_str != '0'
  410. and not int_str.startswith("0x")):
  411. int_str = "0o" + int_str[1:]
  412. pyvalue = int(int_str, 0)
  413. if neg:
  414. pyvalue = -pyvalue
  415. self._add_constants(name, pyvalue)
  416. self._declare('macro ' + name, pyvalue)
  417. def _process_macros(self, macros):
  418. for key, value in macros.items():
  419. value = value.strip()
  420. if _r_int_literal.match(value):
  421. self._add_integer_constant(key, value)
  422. elif value == '...':
  423. self._declare('macro ' + key, value)
  424. else:
  425. raise CDefError(
  426. 'only supports one of the following syntax:\n'
  427. ' #define %s ... (literally dot-dot-dot)\n'
  428. ' #define %s NUMBER (with NUMBER an integer'
  429. ' constant, decimal/hex/octal)\n'
  430. 'got:\n'
  431. ' #define %s %s'
  432. % (key, key, key, value))
  433. def _declare_function(self, tp, quals, decl):
  434. tp = self._get_type_pointer(tp, quals)
  435. if self._options.get('dllexport'):
  436. tag = 'dllexport_python '
  437. elif self._inside_extern_python == '__cffi_extern_python_start':
  438. tag = 'extern_python '
  439. elif self._inside_extern_python == '__cffi_extern_python_plus_c_start':
  440. tag = 'extern_python_plus_c '
  441. else:
  442. tag = 'function '
  443. self._declare(tag + decl.name, tp)
  444. def _parse_decl(self, decl):
  445. node = decl.type
  446. if isinstance(node, pycparser.c_ast.FuncDecl):
  447. tp, quals = self._get_type_and_quals(node, name=decl.name)
  448. assert isinstance(tp, model.RawFunctionType)
  449. self._declare_function(tp, quals, decl)
  450. else:
  451. if isinstance(node, pycparser.c_ast.Struct):
  452. self._get_struct_union_enum_type('struct', node)
  453. elif isinstance(node, pycparser.c_ast.Union):
  454. self._get_struct_union_enum_type('union', node)
  455. elif isinstance(node, pycparser.c_ast.Enum):
  456. self._get_struct_union_enum_type('enum', node)
  457. elif not decl.name:
  458. raise CDefError("construct does not declare any variable",
  459. decl)
  460. #
  461. if decl.name:
  462. tp, quals = self._get_type_and_quals(node,
  463. partial_length_ok=True)
  464. if tp.is_raw_function:
  465. self._declare_function(tp, quals, decl)
  466. elif (tp.is_integer_type() and
  467. hasattr(decl, 'init') and
  468. hasattr(decl.init, 'value') and
  469. _r_int_literal.match(decl.init.value)):
  470. self._add_integer_constant(decl.name, decl.init.value)
  471. elif (tp.is_integer_type() and
  472. isinstance(decl.init, pycparser.c_ast.UnaryOp) and
  473. decl.init.op == '-' and
  474. hasattr(decl.init.expr, 'value') and
  475. _r_int_literal.match(decl.init.expr.value)):
  476. self._add_integer_constant(decl.name,
  477. '-' + decl.init.expr.value)
  478. elif (tp is model.void_type and
  479. decl.name.startswith('__cffi_extern_python_')):
  480. # hack: `extern "Python"` in the C source is replaced
  481. # with "void __cffi_extern_python_start;" and
  482. # "void __cffi_extern_python_stop;"
  483. self._inside_extern_python = decl.name
  484. else:
  485. if self._inside_extern_python !='__cffi_extern_python_stop':
  486. raise CDefError(
  487. "cannot declare constants or "
  488. "variables with 'extern \"Python\"'")
  489. if (quals & model.Q_CONST) and not tp.is_array_type:
  490. self._declare('constant ' + decl.name, tp, quals=quals)
  491. else:
  492. _warn_for_non_extern_non_static_global_variable(decl)
  493. self._declare('variable ' + decl.name, tp, quals=quals)
  494. def parse_type(self, cdecl):
  495. return self.parse_type_and_quals(cdecl)[0]
  496. def parse_type_and_quals(self, cdecl):
  497. ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl)[:2]
  498. assert not macros
  499. exprnode = ast.ext[-1].type.args.params[0]
  500. if isinstance(exprnode, pycparser.c_ast.ID):
  501. raise CDefError("unknown identifier '%s'" % (exprnode.name,))
  502. return self._get_type_and_quals(exprnode.type)
  503. def _declare(self, name, obj, included=False, quals=0):
  504. if name in self._declarations:
  505. prevobj, prevquals = self._declarations[name]
  506. if prevobj is obj and prevquals == quals:
  507. return
  508. if not self._options.get('override'):
  509. raise FFIError(
  510. "multiple declarations of %s (for interactive usage, "
  511. "try cdef(xx, override=True))" % (name,))
  512. assert '__dotdotdot__' not in name.split()
  513. self._declarations[name] = (obj, quals)
  514. if included:
  515. self._included_declarations.add(obj)
  516. def _extract_quals(self, type):
  517. quals = 0
  518. if isinstance(type, (pycparser.c_ast.TypeDecl,
  519. pycparser.c_ast.PtrDecl)):
  520. if 'const' in type.quals:
  521. quals |= model.Q_CONST
  522. if 'volatile' in type.quals:
  523. quals |= model.Q_VOLATILE
  524. if 'restrict' in type.quals:
  525. quals |= model.Q_RESTRICT
  526. return quals
  527. def _get_type_pointer(self, type, quals, declname=None):
  528. if isinstance(type, model.RawFunctionType):
  529. return type.as_function_pointer()
  530. if (isinstance(type, model.StructOrUnionOrEnum) and
  531. type.name.startswith('$') and type.name[1:].isdigit() and
  532. type.forcename is None and declname is not None):
  533. return model.NamedPointerType(type, declname, quals)
  534. return model.PointerType(type, quals)
  535. def _get_type_and_quals(self, typenode, name=None, partial_length_ok=False):
  536. # first, dereference typedefs, if we have it already parsed, we're good
  537. if (isinstance(typenode, pycparser.c_ast.TypeDecl) and
  538. isinstance(typenode.type, pycparser.c_ast.IdentifierType) and
  539. len(typenode.type.names) == 1 and
  540. ('typedef ' + typenode.type.names[0]) in self._declarations):
  541. tp, quals = self._declarations['typedef ' + typenode.type.names[0]]
  542. quals |= self._extract_quals(typenode)
  543. return tp, quals
  544. #
  545. if isinstance(typenode, pycparser.c_ast.ArrayDecl):
  546. # array type
  547. if typenode.dim is None:
  548. length = None
  549. else:
  550. length = self._parse_constant(
  551. typenode.dim, partial_length_ok=partial_length_ok)
  552. tp, quals = self._get_type_and_quals(typenode.type,
  553. partial_length_ok=partial_length_ok)
  554. return model.ArrayType(tp, length), quals
  555. #
  556. if isinstance(typenode, pycparser.c_ast.PtrDecl):
  557. # pointer type
  558. itemtype, itemquals = self._get_type_and_quals(typenode.type)
  559. tp = self._get_type_pointer(itemtype, itemquals, declname=name)
  560. quals = self._extract_quals(typenode)
  561. return tp, quals
  562. #
  563. if isinstance(typenode, pycparser.c_ast.TypeDecl):
  564. quals = self._extract_quals(typenode)
  565. type = typenode.type
  566. if isinstance(type, pycparser.c_ast.IdentifierType):
  567. # assume a primitive type. get it from .names, but reduce
  568. # synonyms to a single chosen combination
  569. names = list(type.names)
  570. if names != ['signed', 'char']: # keep this unmodified
  571. prefixes = {}
  572. while names:
  573. name = names[0]
  574. if name in ('short', 'long', 'signed', 'unsigned'):
  575. prefixes[name] = prefixes.get(name, 0) + 1
  576. del names[0]
  577. else:
  578. break
  579. # ignore the 'signed' prefix below, and reorder the others
  580. newnames = []
  581. for prefix in ('unsigned', 'short', 'long'):
  582. for i in range(prefixes.get(prefix, 0)):
  583. newnames.append(prefix)
  584. if not names:
  585. names = ['int'] # implicitly
  586. if names == ['int']: # but kill it if 'short' or 'long'
  587. if 'short' in prefixes or 'long' in prefixes:
  588. names = []
  589. names = newnames + names
  590. ident = ' '.join(names)
  591. if ident == 'void':
  592. return model.void_type, quals
  593. if ident == '__dotdotdot__':
  594. raise FFIError(':%d: bad usage of "..."' %
  595. typenode.coord.line)
  596. tp0, quals0 = resolve_common_type(self, ident)
  597. return tp0, (quals | quals0)
  598. #
  599. if isinstance(type, pycparser.c_ast.Struct):
  600. # 'struct foobar'
  601. tp = self._get_struct_union_enum_type('struct', type, name)
  602. return tp, quals
  603. #
  604. if isinstance(type, pycparser.c_ast.Union):
  605. # 'union foobar'
  606. tp = self._get_struct_union_enum_type('union', type, name)
  607. return tp, quals
  608. #
  609. if isinstance(type, pycparser.c_ast.Enum):
  610. # 'enum foobar'
  611. tp = self._get_struct_union_enum_type('enum', type, name)
  612. return tp, quals
  613. #
  614. if isinstance(typenode, pycparser.c_ast.FuncDecl):
  615. # a function type
  616. return self._parse_function_type(typenode, name), 0
  617. #
  618. # nested anonymous structs or unions end up here
  619. if isinstance(typenode, pycparser.c_ast.Struct):
  620. return self._get_struct_union_enum_type('struct', typenode, name,
  621. nested=True), 0
  622. if isinstance(typenode, pycparser.c_ast.Union):
  623. return self._get_struct_union_enum_type('union', typenode, name,
  624. nested=True), 0
  625. #
  626. raise FFIError(":%d: bad or unsupported type declaration" %
  627. typenode.coord.line)
  628. def _parse_function_type(self, typenode, funcname=None):
  629. params = list(getattr(typenode.args, 'params', []))
  630. for i, arg in enumerate(params):
  631. if not hasattr(arg, 'type'):
  632. raise CDefError("%s arg %d: unknown type '%s'"
  633. " (if you meant to use the old C syntax of giving"
  634. " untyped arguments, it is not supported)"
  635. % (funcname or 'in expression', i + 1,
  636. getattr(arg, 'name', '?')))
  637. ellipsis = (
  638. len(params) > 0 and
  639. isinstance(params[-1].type, pycparser.c_ast.TypeDecl) and
  640. isinstance(params[-1].type.type,
  641. pycparser.c_ast.IdentifierType) and
  642. params[-1].type.type.names == ['__dotdotdot__'])
  643. if ellipsis:
  644. params.pop()
  645. if not params:
  646. raise CDefError(
  647. "%s: a function with only '(...)' as argument"
  648. " is not correct C" % (funcname or 'in expression'))
  649. args = [self._as_func_arg(*self._get_type_and_quals(argdeclnode.type))
  650. for argdeclnode in params]
  651. if not ellipsis and args == [model.void_type]:
  652. args = []
  653. result, quals = self._get_type_and_quals(typenode.type)
  654. # the 'quals' on the result type are ignored. HACK: we absure them
  655. # to detect __stdcall functions: we textually replace "__stdcall"
  656. # with "volatile volatile const" above.
  657. abi = None
  658. if hasattr(typenode.type, 'quals'): # else, probable syntax error anyway
  659. if typenode.type.quals[-3:] == ['volatile', 'volatile', 'const']:
  660. abi = '__stdcall'
  661. return model.RawFunctionType(tuple(args), result, ellipsis, abi)
  662. def _as_func_arg(self, type, quals):
  663. if isinstance(type, model.ArrayType):
  664. return model.PointerType(type.item, quals)
  665. elif isinstance(type, model.RawFunctionType):
  666. return type.as_function_pointer()
  667. else:
  668. return type
  669. def _get_struct_union_enum_type(self, kind, type, name=None, nested=False):
  670. # First, a level of caching on the exact 'type' node of the AST.
  671. # This is obscure, but needed because pycparser "unrolls" declarations
  672. # such as "typedef struct { } foo_t, *foo_p" and we end up with
  673. # an AST that is not a tree, but a DAG, with the "type" node of the
  674. # two branches foo_t and foo_p of the trees being the same node.
  675. # It's a bit silly but detecting "DAG-ness" in the AST tree seems
  676. # to be the only way to distinguish this case from two independent
  677. # structs. See test_struct_with_two_usages.
  678. try:
  679. return self._structnode2type[type]
  680. except KeyError:
  681. pass
  682. #
  683. # Note that this must handle parsing "struct foo" any number of
  684. # times and always return the same StructType object. Additionally,
  685. # one of these times (not necessarily the first), the fields of
  686. # the struct can be specified with "struct foo { ...fields... }".
  687. # If no name is given, then we have to create a new anonymous struct
  688. # with no caching; in this case, the fields are either specified
  689. # right now or never.
  690. #
  691. force_name = name
  692. name = type.name
  693. #
  694. # get the type or create it if needed
  695. if name is None:
  696. # 'force_name' is used to guess a more readable name for
  697. # anonymous structs, for the common case "typedef struct { } foo".
  698. if force_name is not None:
  699. explicit_name = '$%s' % force_name
  700. else:
  701. self._anonymous_counter += 1
  702. explicit_name = '$%d' % self._anonymous_counter
  703. tp = None
  704. else:
  705. explicit_name = name
  706. key = '%s %s' % (kind, name)
  707. tp, _ = self._declarations.get(key, (None, None))
  708. #
  709. if tp is None:
  710. if kind == 'struct':
  711. tp = model.StructType(explicit_name, None, None, None)
  712. elif kind == 'union':
  713. tp = model.UnionType(explicit_name, None, None, None)
  714. elif kind == 'enum':
  715. if explicit_name == '__dotdotdot__':
  716. raise CDefError("Enums cannot be declared with ...")
  717. tp = self._build_enum_type(explicit_name, type.values)
  718. else:
  719. raise AssertionError("kind = %r" % (kind,))
  720. if name is not None:
  721. self._declare(key, tp)
  722. else:
  723. if kind == 'enum' and type.values is not None:
  724. raise NotImplementedError(
  725. "enum %s: the '{}' declaration should appear on the first "
  726. "time the enum is mentioned, not later" % explicit_name)
  727. if not tp.forcename:
  728. tp.force_the_name(force_name)
  729. if tp.forcename and '$' in tp.name:
  730. self._declare('anonymous %s' % tp.forcename, tp)
  731. #
  732. self._structnode2type[type] = tp
  733. #
  734. # enums: done here
  735. if kind == 'enum':
  736. return tp
  737. #
  738. # is there a 'type.decls'? If yes, then this is the place in the
  739. # C sources that declare the fields. If no, then just return the
  740. # existing type, possibly still incomplete.
  741. if type.decls is None:
  742. return tp
  743. #
  744. if tp.fldnames is not None:
  745. raise CDefError("duplicate declaration of struct %s" % name)
  746. fldnames = []
  747. fldtypes = []
  748. fldbitsize = []
  749. fldquals = []
  750. for decl in type.decls:
  751. if (isinstance(decl.type, pycparser.c_ast.IdentifierType) and
  752. ''.join(decl.type.names) == '__dotdotdot__'):
  753. # XXX pycparser is inconsistent: 'names' should be a list
  754. # of strings, but is sometimes just one string. Use
  755. # str.join() as a way to cope with both.
  756. self._make_partial(tp, nested)
  757. continue
  758. if decl.bitsize is None:
  759. bitsize = -1
  760. else:
  761. bitsize = self._parse_constant(decl.bitsize)
  762. self._partial_length = False
  763. type, fqual = self._get_type_and_quals(decl.type,
  764. partial_length_ok=True)
  765. if self._partial_length:
  766. self._make_partial(tp, nested)
  767. if isinstance(type, model.StructType) and type.partial:
  768. self._make_partial(tp, nested)
  769. fldnames.append(decl.name or '')
  770. fldtypes.append(type)
  771. fldbitsize.append(bitsize)
  772. fldquals.append(fqual)
  773. tp.fldnames = tuple(fldnames)
  774. tp.fldtypes = tuple(fldtypes)
  775. tp.fldbitsize = tuple(fldbitsize)
  776. tp.fldquals = tuple(fldquals)
  777. if fldbitsize != [-1] * len(fldbitsize):
  778. if isinstance(tp, model.StructType) and tp.partial:
  779. raise NotImplementedError("%s: using both bitfields and '...;'"
  780. % (tp,))
  781. tp.packed = self._options.get('packed')
  782. if tp.completed: # must be re-completed: it is not opaque any more
  783. tp.completed = 0
  784. self._recomplete.append(tp)
  785. return tp
  786. def _make_partial(self, tp, nested):
  787. if not isinstance(tp, model.StructOrUnion):
  788. raise CDefError("%s cannot be partial" % (tp,))
  789. if not tp.has_c_name() and not nested:
  790. raise NotImplementedError("%s is partial but has no C name" %(tp,))
  791. tp.partial = True
  792. def _parse_constant(self, exprnode, partial_length_ok=False):
  793. # for now, limited to expressions that are an immediate number
  794. # or positive/negative number
  795. if isinstance(exprnode, pycparser.c_ast.Constant):
  796. s = exprnode.value
  797. if '0' <= s[0] <= '9':
  798. s = s.rstrip('uUlL')
  799. try:
  800. if s.startswith('0'):
  801. return int(s, 8)
  802. else:
  803. return int(s, 10)
  804. except ValueError:
  805. if len(s) > 1:
  806. if s.lower()[0:2] == '0x':
  807. return int(s, 16)
  808. elif s.lower()[0:2] == '0b':
  809. return int(s, 2)
  810. raise CDefError("invalid constant %r" % (s,))
  811. elif s[0] == "'" and s[-1] == "'" and (
  812. len(s) == 3 or (len(s) == 4 and s[1] == "\\")):
  813. return ord(s[-2])
  814. else:
  815. raise CDefError("invalid constant %r" % (s,))
  816. #
  817. if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and
  818. exprnode.op == '+'):
  819. return self._parse_constant(exprnode.expr)
  820. #
  821. if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and
  822. exprnode.op == '-'):
  823. return -self._parse_constant(exprnode.expr)
  824. # load previously defined int constant
  825. if (isinstance(exprnode, pycparser.c_ast.ID) and
  826. exprnode.name in self._int_constants):
  827. return self._int_constants[exprnode.name]
  828. #
  829. if (isinstance(exprnode, pycparser.c_ast.ID) and
  830. exprnode.name == '__dotdotdotarray__'):
  831. if partial_length_ok:
  832. self._partial_length = True
  833. return '...'
  834. raise FFIError(":%d: unsupported '[...]' here, cannot derive "
  835. "the actual array length in this context"
  836. % exprnode.coord.line)
  837. #
  838. if isinstance(exprnode, pycparser.c_ast.BinaryOp):
  839. left = self._parse_constant(exprnode.left)
  840. right = self._parse_constant(exprnode.right)
  841. if exprnode.op == '+':
  842. return left + right
  843. elif exprnode.op == '-':
  844. return left - right
  845. elif exprnode.op == '*':
  846. return left * right
  847. elif exprnode.op == '/':
  848. return self._c_div(left, right)
  849. elif exprnode.op == '%':
  850. return left - self._c_div(left, right) * right
  851. elif exprnode.op == '<<':
  852. return left << right
  853. elif exprnode.op == '>>':
  854. return left >> right
  855. elif exprnode.op == '&':
  856. return left & right
  857. elif exprnode.op == '|':
  858. return left | right
  859. elif exprnode.op == '^':
  860. return left ^ right
  861. #
  862. raise FFIError(":%d: unsupported expression: expected a "
  863. "simple numeric constant" % exprnode.coord.line)
  864. def _c_div(self, a, b):
  865. result = a // b
  866. if ((a < 0) ^ (b < 0)) and (a % b) != 0:
  867. result += 1
  868. return result
  869. def _build_enum_type(self, explicit_name, decls):
  870. if decls is not None:
  871. partial = False
  872. enumerators = []
  873. enumvalues = []
  874. nextenumvalue = 0
  875. for enum in decls.enumerators:
  876. if _r_enum_dotdotdot.match(enum.name):
  877. partial = True
  878. continue
  879. if enum.value is not None:
  880. nextenumvalue = self._parse_constant(enum.value)
  881. enumerators.append(enum.name)
  882. enumvalues.append(nextenumvalue)
  883. self._add_constants(enum.name, nextenumvalue)
  884. nextenumvalue += 1
  885. enumerators = tuple(enumerators)
  886. enumvalues = tuple(enumvalues)
  887. tp = model.EnumType(explicit_name, enumerators, enumvalues)
  888. tp.partial = partial
  889. else: # opaque enum
  890. tp = model.EnumType(explicit_name, (), ())
  891. return tp
  892. def include(self, other):
  893. for name, (tp, quals) in other._declarations.items():
  894. if name.startswith('anonymous $enum_$'):
  895. continue # fix for test_anonymous_enum_include
  896. kind = name.split(' ', 1)[0]
  897. if kind in ('struct', 'union', 'enum', 'anonymous', 'typedef'):
  898. self._declare(name, tp, included=True, quals=quals)
  899. for k, v in other._int_constants.items():
  900. self._add_constants(k, v)
  901. def _get_unknown_type(self, decl):
  902. typenames = decl.type.type.names
  903. if typenames == ['__dotdotdot__']:
  904. return model.unknown_type(decl.name)
  905. if typenames == ['__dotdotdotint__']:
  906. if self._uses_new_feature is None:
  907. self._uses_new_feature = "'typedef int... %s'" % decl.name
  908. return model.UnknownIntegerType(decl.name)
  909. if typenames == ['__dotdotdotfloat__']:
  910. # note: not for 'long double' so far
  911. if self._uses_new_feature is None:
  912. self._uses_new_feature = "'typedef float... %s'" % decl.name
  913. return model.UnknownFloatType(decl.name)
  914. raise FFIError(':%d: unsupported usage of "..." in typedef'
  915. % decl.coord.line)
  916. def _get_unknown_ptr_type(self, decl):
  917. if decl.type.type.type.names == ['__dotdotdot__']:
  918. return model.unknown_ptr_type(decl.name)
  919. raise FFIError(':%d: unsupported usage of "..." in typedef'
  920. % decl.coord.line)