c_preproc.py 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091
  1. #!/usr/bin/env python
  2. # encoding: utf-8
  3. # Thomas Nagy, 2006-2018 (ita)
  4. """
  5. C/C++ preprocessor for finding dependencies
  6. Reasons for using the Waf preprocessor by default
  7. #. Some c/c++ extensions (Qt) require a custom preprocessor for obtaining the dependencies (.moc files)
  8. #. Not all compilers provide .d files for obtaining the dependencies (portability)
  9. #. A naive file scanner will not catch the constructs such as "#include foo()"
  10. #. A naive file scanner will catch unnecessary dependencies (change an unused header -> recompile everything)
  11. Regarding the speed concerns:
  12. * the preprocessing is performed only when files must be compiled
  13. * the macros are evaluated only for #if/#elif/#include
  14. * system headers are not scanned by default
  15. Now if you do not want the Waf preprocessor, the tool +gccdeps* uses the .d files produced
  16. during the compilation to track the dependencies (useful when used with the boost libraries).
  17. It only works with gcc >= 4.4 though.
  18. A dumb preprocessor is also available in the tool *c_dumbpreproc*
  19. """
  20. # TODO: more varargs, pragma once
  21. import re, string, traceback
  22. from waflib import Logs, Utils, Errors
  23. class PreprocError(Errors.WafError):
  24. pass
  25. FILE_CACHE_SIZE = 100000
  26. LINE_CACHE_SIZE = 100000
  27. POPFILE = '-'
  28. "Constant representing a special token used in :py:meth:`waflib.Tools.c_preproc.c_parser.start` iteration to switch to a header read previously"
  29. recursion_limit = 150
  30. "Limit on the amount of files to read in the dependency scanner"
  31. go_absolute = False
  32. "Set to True to track headers on files in /usr/include, else absolute paths are ignored (but it becomes very slow)"
  33. standard_includes = ['/usr/local/include', '/usr/include']
  34. if Utils.is_win32:
  35. standard_includes = []
  36. use_trigraphs = 0
  37. """Apply trigraph rules (False by default)"""
  38. # obsolete, do not use
  39. strict_quotes = 0
  40. g_optrans = {
  41. 'not':'!',
  42. 'not_eq':'!',
  43. 'and':'&&',
  44. 'and_eq':'&=',
  45. 'or':'||',
  46. 'or_eq':'|=',
  47. 'xor':'^',
  48. 'xor_eq':'^=',
  49. 'bitand':'&',
  50. 'bitor':'|',
  51. 'compl':'~',
  52. }
  53. """Operators such as and/or/xor for c++. Set an empty dict to disable."""
  54. # ignore #warning and #error
  55. re_lines = re.compile(
  56. '^[ \t]*(?:#|%:)[ \t]*(ifdef|ifndef|if|else|elif|endif|include|import|define|undef|pragma)[ \t]*(.*)\r*$',
  57. re.IGNORECASE | re.MULTILINE)
  58. """Match #include lines"""
  59. re_mac = re.compile("^[a-zA-Z_]\w*")
  60. """Match macro definitions"""
  61. re_fun = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*[(]')
  62. """Match macro functions"""
  63. re_pragma_once = re.compile('^\s*once\s*', re.IGNORECASE)
  64. """Match #pragma once statements"""
  65. re_nl = re.compile('\\\\\r*\n', re.MULTILINE)
  66. """Match newlines"""
  67. re_cpp = re.compile(r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE )
  68. """Filter C/C++ comments"""
  69. trig_def = [('??'+a, b) for a, b in zip("=-/!'()<>", r'#~\|^[]{}')]
  70. """Trigraph definitions"""
  71. chr_esc = {'0':0, 'a':7, 'b':8, 't':9, 'n':10, 'f':11, 'v':12, 'r':13, '\\':92, "'":39}
  72. """Escape characters"""
  73. NUM = 'i'
  74. """Number token"""
  75. OP = 'O'
  76. """Operator token"""
  77. IDENT = 'T'
  78. """Identifier token"""
  79. STR = 's'
  80. """String token"""
  81. CHAR = 'c'
  82. """Character token"""
  83. tok_types = [NUM, STR, IDENT, OP]
  84. """Token types"""
  85. exp_types = [
  86. r"""0[xX](?P<hex>[a-fA-F0-9]+)(?P<qual1>[uUlL]*)|L*?'(?P<char>(\\.|[^\\'])+)'|(?P<n1>\d+)[Ee](?P<exp0>[+-]*?\d+)(?P<float0>[fFlL]*)|(?P<n2>\d*\.\d+)([Ee](?P<exp1>[+-]*?\d+))?(?P<float1>[fFlL]*)|(?P<n4>\d+\.\d*)([Ee](?P<exp2>[+-]*?\d+))?(?P<float2>[fFlL]*)|(?P<oct>0*)(?P<n0>\d+)(?P<qual2>[uUlL]*)""",
  87. r'L?"([^"\\]|\\.)*"',
  88. r'[a-zA-Z_]\w*',
  89. r'%:%:|<<=|>>=|\.\.\.|<<|<%|<:|<=|>>|>=|\+\+|\+=|--|->|-=|\*=|/=|%:|%=|%>|==|&&|&=|\|\||\|=|\^=|:>|!=|##|[\(\)\{\}\[\]<>\?\|\^\*\+&=:!#;,%/\-\?\~\.]',
  90. ]
  91. """Expression types"""
  92. re_clexer = re.compile('|'.join(["(?P<%s>%s)" % (name, part) for name, part in zip(tok_types, exp_types)]), re.M)
  93. """Match expressions into tokens"""
  94. accepted = 'a'
  95. """Parser state is *accepted*"""
  96. ignored = 'i'
  97. """Parser state is *ignored*, for example preprocessor lines in an #if 0 block"""
  98. undefined = 'u'
  99. """Parser state is *undefined* at the moment"""
  100. skipped = 's'
  101. """Parser state is *skipped*, for example preprocessor lines in a #elif 0 block"""
  102. def repl(m):
  103. """Replace function used with :py:attr:`waflib.Tools.c_preproc.re_cpp`"""
  104. s = m.group()
  105. if s[0] == '/':
  106. return ' '
  107. return s
  108. prec = {}
  109. """
  110. Operator precendence rules required for parsing expressions of the form::
  111. #if 1 && 2 != 0
  112. """
  113. ops = ['* / %', '+ -', '<< >>', '< <= >= >', '== !=', '& | ^', '&& ||', ',']
  114. for x, syms in enumerate(ops):
  115. for u in syms.split():
  116. prec[u] = x
  117. def reduce_nums(val_1, val_2, val_op):
  118. """
  119. Apply arithmetic rules to compute a result
  120. :param val1: input parameter
  121. :type val1: int or string
  122. :param val2: input parameter
  123. :type val2: int or string
  124. :param val_op: C operator in *+*, */*, *-*, etc
  125. :type val_op: string
  126. :rtype: int
  127. """
  128. #print val_1, val_2, val_op
  129. # now perform the operation, make certain a and b are numeric
  130. try:
  131. a = 0 + val_1
  132. except TypeError:
  133. a = int(val_1)
  134. try:
  135. b = 0 + val_2
  136. except TypeError:
  137. b = int(val_2)
  138. d = val_op
  139. if d == '%':
  140. c = a % b
  141. elif d=='+':
  142. c = a + b
  143. elif d=='-':
  144. c = a - b
  145. elif d=='*':
  146. c = a * b
  147. elif d=='/':
  148. c = a / b
  149. elif d=='^':
  150. c = a ^ b
  151. elif d=='==':
  152. c = int(a == b)
  153. elif d=='|' or d == 'bitor':
  154. c = a | b
  155. elif d=='||' or d == 'or' :
  156. c = int(a or b)
  157. elif d=='&' or d == 'bitand':
  158. c = a & b
  159. elif d=='&&' or d == 'and':
  160. c = int(a and b)
  161. elif d=='!=' or d == 'not_eq':
  162. c = int(a != b)
  163. elif d=='^' or d == 'xor':
  164. c = int(a^b)
  165. elif d=='<=':
  166. c = int(a <= b)
  167. elif d=='<':
  168. c = int(a < b)
  169. elif d=='>':
  170. c = int(a > b)
  171. elif d=='>=':
  172. c = int(a >= b)
  173. elif d=='<<':
  174. c = a << b
  175. elif d=='>>':
  176. c = a >> b
  177. else:
  178. c = 0
  179. return c
  180. def get_num(lst):
  181. """
  182. Try to obtain a number from a list of tokens. The token types are defined in :py:attr:`waflib.Tools.ccroot.tok_types`.
  183. :param lst: list of preprocessor tokens
  184. :type lst: list of tuple (tokentype, value)
  185. :return: a pair containing the number and the rest of the list
  186. :rtype: tuple(value, list)
  187. """
  188. if not lst:
  189. raise PreprocError('empty list for get_num')
  190. (p, v) = lst[0]
  191. if p == OP:
  192. if v == '(':
  193. count_par = 1
  194. i = 1
  195. while i < len(lst):
  196. (p, v) = lst[i]
  197. if p == OP:
  198. if v == ')':
  199. count_par -= 1
  200. if count_par == 0:
  201. break
  202. elif v == '(':
  203. count_par += 1
  204. i += 1
  205. else:
  206. raise PreprocError('rparen expected %r' % lst)
  207. (num, _) = get_term(lst[1:i])
  208. return (num, lst[i+1:])
  209. elif v == '+':
  210. return get_num(lst[1:])
  211. elif v == '-':
  212. num, lst = get_num(lst[1:])
  213. return (reduce_nums('-1', num, '*'), lst)
  214. elif v == '!':
  215. num, lst = get_num(lst[1:])
  216. return (int(not int(num)), lst)
  217. elif v == '~':
  218. num, lst = get_num(lst[1:])
  219. return (~ int(num), lst)
  220. else:
  221. raise PreprocError('Invalid op token %r for get_num' % lst)
  222. elif p == NUM:
  223. return v, lst[1:]
  224. elif p == IDENT:
  225. # all macros should have been replaced, remaining identifiers eval to 0
  226. return 0, lst[1:]
  227. else:
  228. raise PreprocError('Invalid token %r for get_num' % lst)
  229. def get_term(lst):
  230. """
  231. Evaluate an expression recursively, for example::
  232. 1+1+1 -> 2+1 -> 3
  233. :param lst: list of tokens
  234. :type lst: list of tuple(token, value)
  235. :return: the value and the remaining tokens
  236. :rtype: value, list
  237. """
  238. if not lst:
  239. raise PreprocError('empty list for get_term')
  240. num, lst = get_num(lst)
  241. if not lst:
  242. return (num, [])
  243. (p, v) = lst[0]
  244. if p == OP:
  245. if v == ',':
  246. # skip
  247. return get_term(lst[1:])
  248. elif v == '?':
  249. count_par = 0
  250. i = 1
  251. while i < len(lst):
  252. (p, v) = lst[i]
  253. if p == OP:
  254. if v == ')':
  255. count_par -= 1
  256. elif v == '(':
  257. count_par += 1
  258. elif v == ':':
  259. if count_par == 0:
  260. break
  261. i += 1
  262. else:
  263. raise PreprocError('rparen expected %r' % lst)
  264. if int(num):
  265. return get_term(lst[1:i])
  266. else:
  267. return get_term(lst[i+1:])
  268. else:
  269. num2, lst = get_num(lst[1:])
  270. if not lst:
  271. # no more tokens to process
  272. num2 = reduce_nums(num, num2, v)
  273. return get_term([(NUM, num2)] + lst)
  274. # operator precedence
  275. p2, v2 = lst[0]
  276. if p2 != OP:
  277. raise PreprocError('op expected %r' % lst)
  278. if prec[v2] >= prec[v]:
  279. num2 = reduce_nums(num, num2, v)
  280. return get_term([(NUM, num2)] + lst)
  281. else:
  282. num3, lst = get_num(lst[1:])
  283. num3 = reduce_nums(num2, num3, v2)
  284. return get_term([(NUM, num), (p, v), (NUM, num3)] + lst)
  285. raise PreprocError('cannot reduce %r' % lst)
  286. def reduce_eval(lst):
  287. """
  288. Take a list of tokens and output true or false for #if/#elif conditions.
  289. :param lst: a list of tokens
  290. :type lst: list of tuple(token, value)
  291. :return: a token
  292. :rtype: tuple(NUM, int)
  293. """
  294. num, lst = get_term(lst)
  295. return (NUM, num)
  296. def stringize(lst):
  297. """
  298. Merge a list of tokens into a string
  299. :param lst: a list of tokens
  300. :type lst: list of tuple(token, value)
  301. :rtype: string
  302. """
  303. lst = [str(v2) for (p2, v2) in lst]
  304. return "".join(lst)
  305. def paste_tokens(t1, t2):
  306. """
  307. Token pasting works between identifiers, particular operators, and identifiers and numbers::
  308. a ## b -> ab
  309. > ## = -> >=
  310. a ## 2 -> a2
  311. :param t1: token
  312. :type t1: tuple(type, value)
  313. :param t2: token
  314. :type t2: tuple(type, value)
  315. """
  316. p1 = None
  317. if t1[0] == OP and t2[0] == OP:
  318. p1 = OP
  319. elif t1[0] == IDENT and (t2[0] == IDENT or t2[0] == NUM):
  320. p1 = IDENT
  321. elif t1[0] == NUM and t2[0] == NUM:
  322. p1 = NUM
  323. if not p1:
  324. raise PreprocError('tokens do not make a valid paste %r and %r' % (t1, t2))
  325. return (p1, t1[1] + t2[1])
  326. def reduce_tokens(lst, defs, ban=[]):
  327. """
  328. Replace the tokens in lst, using the macros provided in defs, and a list of macros that cannot be re-applied
  329. :param lst: list of tokens
  330. :type lst: list of tuple(token, value)
  331. :param defs: macro definitions
  332. :type defs: dict
  333. :param ban: macros that cannot be substituted (recursion is not allowed)
  334. :type ban: list of string
  335. :return: the new list of tokens
  336. :rtype: value, list
  337. """
  338. i = 0
  339. while i < len(lst):
  340. (p, v) = lst[i]
  341. if p == IDENT and v == "defined":
  342. del lst[i]
  343. if i < len(lst):
  344. (p2, v2) = lst[i]
  345. if p2 == IDENT:
  346. if v2 in defs:
  347. lst[i] = (NUM, 1)
  348. else:
  349. lst[i] = (NUM, 0)
  350. elif p2 == OP and v2 == '(':
  351. del lst[i]
  352. (p2, v2) = lst[i]
  353. del lst[i] # remove the ident, and change the ) for the value
  354. if v2 in defs:
  355. lst[i] = (NUM, 1)
  356. else:
  357. lst[i] = (NUM, 0)
  358. else:
  359. raise PreprocError('Invalid define expression %r' % lst)
  360. elif p == IDENT and v in defs:
  361. if isinstance(defs[v], str):
  362. a, b = extract_macro(defs[v])
  363. defs[v] = b
  364. macro_def = defs[v]
  365. to_add = macro_def[1]
  366. if isinstance(macro_def[0], list):
  367. # macro without arguments
  368. del lst[i]
  369. accu = to_add[:]
  370. reduce_tokens(accu, defs, ban+[v])
  371. for tmp in accu:
  372. lst.insert(i, tmp)
  373. i += 1
  374. else:
  375. # collect the arguments for the funcall
  376. args = []
  377. del lst[i]
  378. if i >= len(lst):
  379. raise PreprocError('expected ( after %r (got nothing)' % v)
  380. (p2, v2) = lst[i]
  381. if p2 != OP or v2 != '(':
  382. raise PreprocError('expected ( after %r' % v)
  383. del lst[i]
  384. one_param = []
  385. count_paren = 0
  386. while i < len(lst):
  387. p2, v2 = lst[i]
  388. del lst[i]
  389. if p2 == OP and count_paren == 0:
  390. if v2 == '(':
  391. one_param.append((p2, v2))
  392. count_paren += 1
  393. elif v2 == ')':
  394. if one_param:
  395. args.append(one_param)
  396. break
  397. elif v2 == ',':
  398. if not one_param:
  399. raise PreprocError('empty param in funcall %r' % v)
  400. args.append(one_param)
  401. one_param = []
  402. else:
  403. one_param.append((p2, v2))
  404. else:
  405. one_param.append((p2, v2))
  406. if v2 == '(':
  407. count_paren += 1
  408. elif v2 == ')':
  409. count_paren -= 1
  410. else:
  411. raise PreprocError('malformed macro')
  412. # substitute the arguments within the define expression
  413. accu = []
  414. arg_table = macro_def[0]
  415. j = 0
  416. while j < len(to_add):
  417. (p2, v2) = to_add[j]
  418. if p2 == OP and v2 == '#':
  419. # stringize is for arguments only
  420. if j+1 < len(to_add) and to_add[j+1][0] == IDENT and to_add[j+1][1] in arg_table:
  421. toks = args[arg_table[to_add[j+1][1]]]
  422. accu.append((STR, stringize(toks)))
  423. j += 1
  424. else:
  425. accu.append((p2, v2))
  426. elif p2 == OP and v2 == '##':
  427. # token pasting, how can man invent such a complicated system?
  428. if accu and j+1 < len(to_add):
  429. # we have at least two tokens
  430. t1 = accu[-1]
  431. if to_add[j+1][0] == IDENT and to_add[j+1][1] in arg_table:
  432. toks = args[arg_table[to_add[j+1][1]]]
  433. if toks:
  434. accu[-1] = paste_tokens(t1, toks[0]) #(IDENT, accu[-1][1] + toks[0][1])
  435. accu.extend(toks[1:])
  436. else:
  437. # error, case "a##"
  438. accu.append((p2, v2))
  439. accu.extend(toks)
  440. elif to_add[j+1][0] == IDENT and to_add[j+1][1] == '__VA_ARGS__':
  441. # first collect the tokens
  442. va_toks = []
  443. st = len(macro_def[0])
  444. pt = len(args)
  445. for x in args[pt-st+1:]:
  446. va_toks.extend(x)
  447. va_toks.append((OP, ','))
  448. if va_toks:
  449. va_toks.pop() # extra comma
  450. if len(accu)>1:
  451. (p3, v3) = accu[-1]
  452. (p4, v4) = accu[-2]
  453. if v3 == '##':
  454. # remove the token paste
  455. accu.pop()
  456. if v4 == ',' and pt < st:
  457. # remove the comma
  458. accu.pop()
  459. accu += va_toks
  460. else:
  461. accu[-1] = paste_tokens(t1, to_add[j+1])
  462. j += 1
  463. else:
  464. # Invalid paste, case "##a" or "b##"
  465. accu.append((p2, v2))
  466. elif p2 == IDENT and v2 in arg_table:
  467. toks = args[arg_table[v2]]
  468. reduce_tokens(toks, defs, ban+[v])
  469. accu.extend(toks)
  470. else:
  471. accu.append((p2, v2))
  472. j += 1
  473. reduce_tokens(accu, defs, ban+[v])
  474. for x in range(len(accu)-1, -1, -1):
  475. lst.insert(i, accu[x])
  476. i += 1
  477. def eval_macro(lst, defs):
  478. """
  479. Reduce the tokens by :py:func:`waflib.Tools.c_preproc.reduce_tokens` and try to return a 0/1 result by :py:func:`waflib.Tools.c_preproc.reduce_eval`.
  480. :param lst: list of tokens
  481. :type lst: list of tuple(token, value)
  482. :param defs: macro definitions
  483. :type defs: dict
  484. :rtype: int
  485. """
  486. reduce_tokens(lst, defs, [])
  487. if not lst:
  488. raise PreprocError('missing tokens to evaluate')
  489. if lst:
  490. p, v = lst[0]
  491. if p == IDENT and v not in defs:
  492. raise PreprocError('missing macro %r' % lst)
  493. p, v = reduce_eval(lst)
  494. return int(v) != 0
  495. def extract_macro(txt):
  496. """
  497. Process a macro definition of the form::
  498. #define f(x, y) x * y
  499. into a function or a simple macro without arguments
  500. :param txt: expression to exact a macro definition from
  501. :type txt: string
  502. :return: a tuple containing the name, the list of arguments and the replacement
  503. :rtype: tuple(string, [list, list])
  504. """
  505. t = tokenize(txt)
  506. if re_fun.search(txt):
  507. p, name = t[0]
  508. p, v = t[1]
  509. if p != OP:
  510. raise PreprocError('expected (')
  511. i = 1
  512. pindex = 0
  513. params = {}
  514. prev = '('
  515. while 1:
  516. i += 1
  517. p, v = t[i]
  518. if prev == '(':
  519. if p == IDENT:
  520. params[v] = pindex
  521. pindex += 1
  522. prev = p
  523. elif p == OP and v == ')':
  524. break
  525. else:
  526. raise PreprocError('unexpected token (3)')
  527. elif prev == IDENT:
  528. if p == OP and v == ',':
  529. prev = v
  530. elif p == OP and v == ')':
  531. break
  532. else:
  533. raise PreprocError('comma or ... expected')
  534. elif prev == ',':
  535. if p == IDENT:
  536. params[v] = pindex
  537. pindex += 1
  538. prev = p
  539. elif p == OP and v == '...':
  540. raise PreprocError('not implemented (1)')
  541. else:
  542. raise PreprocError('comma or ... expected (2)')
  543. elif prev == '...':
  544. raise PreprocError('not implemented (2)')
  545. else:
  546. raise PreprocError('unexpected else')
  547. #~ print (name, [params, t[i+1:]])
  548. return (name, [params, t[i+1:]])
  549. else:
  550. (p, v) = t[0]
  551. if len(t) > 1:
  552. return (v, [[], t[1:]])
  553. else:
  554. # empty define, assign an empty token
  555. return (v, [[], [('T','')]])
  556. re_include = re.compile('^\s*(<(?:.*)>|"(?:.*)")')
  557. def extract_include(txt, defs):
  558. """
  559. Process a line in the form::
  560. #include foo
  561. :param txt: include line to process
  562. :type txt: string
  563. :param defs: macro definitions
  564. :type defs: dict
  565. :return: the file name
  566. :rtype: string
  567. """
  568. m = re_include.search(txt)
  569. if m:
  570. txt = m.group(1)
  571. return txt[0], txt[1:-1]
  572. # perform preprocessing and look at the result, it must match an include
  573. toks = tokenize(txt)
  574. reduce_tokens(toks, defs, ['waf_include'])
  575. if not toks:
  576. raise PreprocError('could not parse include %r' % txt)
  577. if len(toks) == 1:
  578. if toks[0][0] == STR:
  579. return '"', toks[0][1]
  580. else:
  581. if toks[0][1] == '<' and toks[-1][1] == '>':
  582. ret = '<', stringize(toks).lstrip('<').rstrip('>')
  583. return ret
  584. raise PreprocError('could not parse include %r' % txt)
  585. def parse_char(txt):
  586. """
  587. Parse a c character
  588. :param txt: character to parse
  589. :type txt: string
  590. :return: a character literal
  591. :rtype: string
  592. """
  593. if not txt:
  594. raise PreprocError('attempted to parse a null char')
  595. if txt[0] != '\\':
  596. return ord(txt)
  597. c = txt[1]
  598. if c == 'x':
  599. if len(txt) == 4 and txt[3] in string.hexdigits:
  600. return int(txt[2:], 16)
  601. return int(txt[2:], 16)
  602. elif c.isdigit():
  603. if c == '0' and len(txt)==2:
  604. return 0
  605. for i in 3, 2, 1:
  606. if len(txt) > i and txt[1:1+i].isdigit():
  607. return (1+i, int(txt[1:1+i], 8))
  608. else:
  609. try:
  610. return chr_esc[c]
  611. except KeyError:
  612. raise PreprocError('could not parse char literal %r' % txt)
  613. def tokenize(s):
  614. """
  615. Convert a string into a list of tokens (shlex.split does not apply to c/c++/d)
  616. :param s: input to tokenize
  617. :type s: string
  618. :return: a list of tokens
  619. :rtype: list of tuple(token, value)
  620. """
  621. return tokenize_private(s)[:] # force a copy of the results
  622. def tokenize_private(s):
  623. ret = []
  624. for match in re_clexer.finditer(s):
  625. m = match.group
  626. for name in tok_types:
  627. v = m(name)
  628. if v:
  629. if name == IDENT:
  630. if v in g_optrans:
  631. name = OP
  632. elif v.lower() == "true":
  633. v = 1
  634. name = NUM
  635. elif v.lower() == "false":
  636. v = 0
  637. name = NUM
  638. elif name == NUM:
  639. if m('oct'):
  640. v = int(v, 8)
  641. elif m('hex'):
  642. v = int(m('hex'), 16)
  643. elif m('n0'):
  644. v = m('n0')
  645. else:
  646. v = m('char')
  647. if v:
  648. v = parse_char(v)
  649. else:
  650. v = m('n2') or m('n4')
  651. elif name == OP:
  652. if v == '%:':
  653. v = '#'
  654. elif v == '%:%:':
  655. v = '##'
  656. elif name == STR:
  657. # remove the quotes around the string
  658. v = v[1:-1]
  659. ret.append((name, v))
  660. break
  661. return ret
  662. def format_defines(lst):
  663. ret = []
  664. for y in lst:
  665. if y:
  666. pos = y.find('=')
  667. if pos == -1:
  668. # "-DFOO" should give "#define FOO 1"
  669. ret.append(y)
  670. elif pos > 0:
  671. # all others are assumed to be -DX=Y
  672. ret.append('%s %s' % (y[:pos], y[pos+1:]))
  673. else:
  674. raise ValueError('Invalid define expression %r' % y)
  675. return ret
  676. class c_parser(object):
  677. """
  678. Used by :py:func:`waflib.Tools.c_preproc.scan` to parse c/h files. Note that by default,
  679. only project headers are parsed.
  680. """
  681. def __init__(self, nodepaths=None, defines=None):
  682. self.lines = []
  683. """list of lines read"""
  684. if defines is None:
  685. self.defs = {}
  686. else:
  687. self.defs = dict(defines) # make a copy
  688. self.state = []
  689. self.count_files = 0
  690. self.currentnode_stack = []
  691. self.nodepaths = nodepaths or []
  692. """Include paths"""
  693. self.nodes = []
  694. """List of :py:class:`waflib.Node.Node` found so far"""
  695. self.names = []
  696. """List of file names that could not be matched by any file"""
  697. self.curfile = ''
  698. """Current file"""
  699. self.ban_includes = set()
  700. """Includes that must not be read (#pragma once)"""
  701. self.listed = set()
  702. """Include nodes/names already listed to avoid duplicates in self.nodes/self.names"""
  703. def cached_find_resource(self, node, filename):
  704. """
  705. Find a file from the input directory
  706. :param node: directory
  707. :type node: :py:class:`waflib.Node.Node`
  708. :param filename: header to find
  709. :type filename: string
  710. :return: the node if found, or None
  711. :rtype: :py:class:`waflib.Node.Node`
  712. """
  713. try:
  714. cache = node.ctx.preproc_cache_node
  715. except AttributeError:
  716. cache = node.ctx.preproc_cache_node = Utils.lru_cache(FILE_CACHE_SIZE)
  717. key = (node, filename)
  718. try:
  719. return cache[key]
  720. except KeyError:
  721. ret = node.find_resource(filename)
  722. if ret:
  723. if getattr(ret, 'children', None):
  724. ret = None
  725. elif ret.is_child_of(node.ctx.bldnode):
  726. tmp = node.ctx.srcnode.search_node(ret.path_from(node.ctx.bldnode))
  727. if tmp and getattr(tmp, 'children', None):
  728. ret = None
  729. cache[key] = ret
  730. return ret
  731. def tryfind(self, filename, kind='"', env=None):
  732. """
  733. Try to obtain a node from the filename based from the include paths. Will add
  734. the node found to :py:attr:`waflib.Tools.c_preproc.c_parser.nodes` or the file name to
  735. :py:attr:`waflib.Tools.c_preproc.c_parser.names` if no corresponding file is found. Called by
  736. :py:attr:`waflib.Tools.c_preproc.c_parser.start`.
  737. :param filename: header to find
  738. :type filename: string
  739. :return: the node if found
  740. :rtype: :py:class:`waflib.Node.Node`
  741. """
  742. if filename.endswith('.moc'):
  743. # we could let the qt4 module use a subclass, but then the function "scan" below must be duplicated
  744. # in the qt4 and in the qt5 classes. So we have two lines here and it is sufficient.
  745. self.names.append(filename)
  746. return None
  747. self.curfile = filename
  748. found = None
  749. if kind == '"':
  750. if env.MSVC_VERSION:
  751. for n in reversed(self.currentnode_stack):
  752. found = self.cached_find_resource(n, filename)
  753. if found:
  754. break
  755. else:
  756. found = self.cached_find_resource(self.currentnode_stack[-1], filename)
  757. if not found:
  758. for n in self.nodepaths:
  759. found = self.cached_find_resource(n, filename)
  760. if found:
  761. break
  762. listed = self.listed
  763. if found and not found in self.ban_includes:
  764. if found not in listed:
  765. listed.add(found)
  766. self.nodes.append(found)
  767. self.addlines(found)
  768. else:
  769. if filename not in listed:
  770. listed.add(filename)
  771. self.names.append(filename)
  772. return found
  773. def filter_comments(self, node):
  774. """
  775. Filter the comments from a c/h file, and return the preprocessor lines.
  776. The regexps :py:attr:`waflib.Tools.c_preproc.re_cpp`, :py:attr:`waflib.Tools.c_preproc.re_nl` and :py:attr:`waflib.Tools.c_preproc.re_lines` are used internally.
  777. :return: the preprocessor directives as a list of (keyword, line)
  778. :rtype: a list of string pairs
  779. """
  780. # return a list of tuples : keyword, line
  781. code = node.read()
  782. if use_trigraphs:
  783. for (a, b) in trig_def:
  784. code = code.split(a).join(b)
  785. code = re_nl.sub('', code)
  786. code = re_cpp.sub(repl, code)
  787. return re_lines.findall(code)
  788. def parse_lines(self, node):
  789. try:
  790. cache = node.ctx.preproc_cache_lines
  791. except AttributeError:
  792. cache = node.ctx.preproc_cache_lines = Utils.lru_cache(LINE_CACHE_SIZE)
  793. try:
  794. return cache[node]
  795. except KeyError:
  796. cache[node] = lines = self.filter_comments(node)
  797. lines.append((POPFILE, ''))
  798. lines.reverse()
  799. return lines
  800. def addlines(self, node):
  801. """
  802. Add the lines from a header in the list of preprocessor lines to parse
  803. :param node: header
  804. :type node: :py:class:`waflib.Node.Node`
  805. """
  806. self.currentnode_stack.append(node.parent)
  807. self.count_files += 1
  808. if self.count_files > recursion_limit:
  809. # issue #812
  810. raise PreprocError('recursion limit exceeded')
  811. if Logs.verbose:
  812. Logs.debug('preproc: reading file %r', node)
  813. try:
  814. lines = self.parse_lines(node)
  815. except EnvironmentError:
  816. raise PreprocError('could not read the file %r' % node)
  817. except Exception:
  818. if Logs.verbose > 0:
  819. Logs.error('parsing %r failed %s', node, traceback.format_exc())
  820. else:
  821. self.lines.extend(lines)
  822. def start(self, node, env):
  823. """
  824. Preprocess a source file to obtain the dependencies, which are accumulated to :py:attr:`waflib.Tools.c_preproc.c_parser.nodes`
  825. and :py:attr:`waflib.Tools.c_preproc.c_parser.names`.
  826. :param node: source file
  827. :type node: :py:class:`waflib.Node.Node`
  828. :param env: config set containing additional defines to take into account
  829. :type env: :py:class:`waflib.ConfigSet.ConfigSet`
  830. """
  831. Logs.debug('preproc: scanning %s (in %s)', node.name, node.parent.name)
  832. self.current_file = node
  833. self.addlines(node)
  834. # macros may be defined on the command-line, so they must be parsed as if they were part of the file
  835. if env.DEFINES:
  836. lst = format_defines(env.DEFINES)
  837. lst.reverse()
  838. self.lines.extend([('define', x) for x in lst])
  839. while self.lines:
  840. (token, line) = self.lines.pop()
  841. if token == POPFILE:
  842. self.count_files -= 1
  843. self.currentnode_stack.pop()
  844. continue
  845. try:
  846. state = self.state
  847. # make certain we define the state if we are about to enter in an if block
  848. if token[:2] == 'if':
  849. state.append(undefined)
  850. elif token == 'endif':
  851. state.pop()
  852. # skip lines when in a dead 'if' branch, wait for the endif
  853. if token[0] != 'e':
  854. if skipped in self.state or ignored in self.state:
  855. continue
  856. if token == 'if':
  857. ret = eval_macro(tokenize(line), self.defs)
  858. if ret:
  859. state[-1] = accepted
  860. else:
  861. state[-1] = ignored
  862. elif token == 'ifdef':
  863. m = re_mac.match(line)
  864. if m and m.group() in self.defs:
  865. state[-1] = accepted
  866. else:
  867. state[-1] = ignored
  868. elif token == 'ifndef':
  869. m = re_mac.match(line)
  870. if m and m.group() in self.defs:
  871. state[-1] = ignored
  872. else:
  873. state[-1] = accepted
  874. elif token == 'include' or token == 'import':
  875. (kind, inc) = extract_include(line, self.defs)
  876. self.current_file = self.tryfind(inc, kind, env)
  877. if token == 'import':
  878. self.ban_includes.add(self.current_file)
  879. elif token == 'elif':
  880. if state[-1] == accepted:
  881. state[-1] = skipped
  882. elif state[-1] == ignored:
  883. if eval_macro(tokenize(line), self.defs):
  884. state[-1] = accepted
  885. elif token == 'else':
  886. if state[-1] == accepted:
  887. state[-1] = skipped
  888. elif state[-1] == ignored:
  889. state[-1] = accepted
  890. elif token == 'define':
  891. try:
  892. self.defs[self.define_name(line)] = line
  893. except AttributeError:
  894. raise PreprocError('Invalid define line %r' % line)
  895. elif token == 'undef':
  896. m = re_mac.match(line)
  897. if m and m.group() in self.defs:
  898. self.defs.__delitem__(m.group())
  899. #print "undef %s" % name
  900. elif token == 'pragma':
  901. if re_pragma_once.match(line.lower()):
  902. self.ban_includes.add(self.current_file)
  903. except Exception as e:
  904. if Logs.verbose:
  905. Logs.debug('preproc: line parsing failed (%s): %s %s', e, line, traceback.format_exc())
  906. def define_name(self, line):
  907. """
  908. :param line: define line
  909. :type line: string
  910. :rtype: string
  911. :return: the define name
  912. """
  913. return re_mac.match(line).group()
  914. def scan(task):
  915. """
  916. Get the dependencies using a c/c++ preprocessor, this is required for finding dependencies of the kind::
  917. #include some_macro()
  918. This function is bound as a task method on :py:class:`waflib.Tools.c.c` and :py:class:`waflib.Tools.cxx.cxx` for example
  919. """
  920. try:
  921. incn = task.generator.includes_nodes
  922. except AttributeError:
  923. raise Errors.WafError('%r is missing a feature such as "c", "cxx" or "includes": ' % task.generator)
  924. if go_absolute:
  925. nodepaths = incn + [task.generator.bld.root.find_dir(x) for x in standard_includes]
  926. else:
  927. nodepaths = [x for x in incn if x.is_child_of(x.ctx.srcnode) or x.is_child_of(x.ctx.bldnode)]
  928. tmp = c_parser(nodepaths)
  929. tmp.start(task.inputs[0], task.env)
  930. return (tmp.nodes, tmp.names)