You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parse.js 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752
  1. /*
  2. * Author: Alex Kocharin <alex@kocharin.ru>
  3. * GIT: https://github.com/rlidwka/jju
  4. * License: WTFPL, grab your copy here: http://www.wtfpl.net/txt/copying/
  5. */
  6. // RTFM: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
  7. var Uni = require('./unicode')
  8. function isHexDigit(x) {
  9. return (x >= '0' && x <= '9')
  10. || (x >= 'A' && x <= 'F')
  11. || (x >= 'a' && x <= 'f')
  12. }
  13. function isOctDigit(x) {
  14. return x >= '0' && x <= '7'
  15. }
  16. function isDecDigit(x) {
  17. return x >= '0' && x <= '9'
  18. }
  19. var unescapeMap = {
  20. '\'': '\'',
  21. '"' : '"',
  22. '\\': '\\',
  23. 'b' : '\b',
  24. 'f' : '\f',
  25. 'n' : '\n',
  26. 'r' : '\r',
  27. 't' : '\t',
  28. 'v' : '\v',
  29. '/' : '/',
  30. }
  31. function formatError(input, msg, position, lineno, column, json5) {
  32. var result = msg + ' at ' + (lineno + 1) + ':' + (column + 1)
  33. , tmppos = position - column - 1
  34. , srcline = ''
  35. , underline = ''
  36. var isLineTerminator = json5 ? Uni.isLineTerminator : Uni.isLineTerminatorJSON
  37. // output no more than 70 characters before the wrong ones
  38. if (tmppos < position - 70) {
  39. tmppos = position - 70
  40. }
  41. while (1) {
  42. var chr = input[++tmppos]
  43. if (isLineTerminator(chr) || tmppos === input.length) {
  44. if (position >= tmppos) {
  45. // ending line error, so show it after the last char
  46. underline += '^'
  47. }
  48. break
  49. }
  50. srcline += chr
  51. if (position === tmppos) {
  52. underline += '^'
  53. } else if (position > tmppos) {
  54. underline += input[tmppos] === '\t' ? '\t' : ' '
  55. }
  56. // output no more than 78 characters on the string
  57. if (srcline.length > 78) break
  58. }
  59. return result + '\n' + srcline + '\n' + underline
  60. }
  61. function parse(input, options) {
  62. // parse as a standard JSON mode
  63. var json5 = !(options.mode === 'json' || options.legacy)
  64. var isLineTerminator = json5 ? Uni.isLineTerminator : Uni.isLineTerminatorJSON
  65. var isWhiteSpace = json5 ? Uni.isWhiteSpace : Uni.isWhiteSpaceJSON
  66. var length = input.length
  67. , lineno = 0
  68. , linestart = 0
  69. , position = 0
  70. , stack = []
  71. var tokenStart = function() {}
  72. var tokenEnd = function(v) {return v}
  73. /* tokenize({
  74. raw: '...',
  75. type: 'whitespace'|'comment'|'key'|'literal'|'separator'|'newline',
  76. value: 'number'|'string'|'whatever',
  77. path: [...],
  78. })
  79. */
  80. if (options._tokenize) {
  81. ;(function() {
  82. var start = null
  83. tokenStart = function() {
  84. if (start !== null) throw Error('internal error, token overlap')
  85. start = position
  86. }
  87. tokenEnd = function(v, type) {
  88. if (start != position) {
  89. var hash = {
  90. raw: input.substr(start, position-start),
  91. type: type,
  92. stack: stack.slice(0),
  93. }
  94. if (v !== undefined) hash.value = v
  95. options._tokenize.call(null, hash)
  96. }
  97. start = null
  98. return v
  99. }
  100. })()
  101. }
  102. function fail(msg) {
  103. var column = position - linestart
  104. if (!msg) {
  105. if (position < length) {
  106. var token = '\'' +
  107. JSON
  108. .stringify(input[position])
  109. .replace(/^"|"$/g, '')
  110. .replace(/'/g, "\\'")
  111. .replace(/\\"/g, '"')
  112. + '\''
  113. if (!msg) msg = 'Unexpected token ' + token
  114. } else {
  115. if (!msg) msg = 'Unexpected end of input'
  116. }
  117. }
  118. var error = SyntaxError(formatError(input, msg, position, lineno, column, json5))
  119. error.row = lineno + 1
  120. error.column = column + 1
  121. throw error
  122. }
  123. function newline(chr) {
  124. // account for <cr><lf>
  125. if (chr === '\r' && input[position] === '\n') position++
  126. linestart = position
  127. lineno++
  128. }
  129. function parseGeneric() {
  130. var result
  131. while (position < length) {
  132. tokenStart()
  133. var chr = input[position++]
  134. if (chr === '"' || (chr === '\'' && json5)) {
  135. return tokenEnd(parseString(chr), 'literal')
  136. } else if (chr === '{') {
  137. tokenEnd(undefined, 'separator')
  138. return parseObject()
  139. } else if (chr === '[') {
  140. tokenEnd(undefined, 'separator')
  141. return parseArray()
  142. } else if (chr === '-'
  143. || chr === '.'
  144. || isDecDigit(chr)
  145. // + number Infinity NaN
  146. || (json5 && (chr === '+' || chr === 'I' || chr === 'N'))
  147. ) {
  148. return tokenEnd(parseNumber(), 'literal')
  149. } else if (chr === 'n') {
  150. parseKeyword('null')
  151. return tokenEnd(null, 'literal')
  152. } else if (chr === 't') {
  153. parseKeyword('true')
  154. return tokenEnd(true, 'literal')
  155. } else if (chr === 'f') {
  156. parseKeyword('false')
  157. return tokenEnd(false, 'literal')
  158. } else {
  159. position--
  160. return tokenEnd(undefined)
  161. }
  162. }
  163. }
  164. function parseKey() {
  165. var result
  166. while (position < length) {
  167. tokenStart()
  168. var chr = input[position++]
  169. if (chr === '"' || (chr === '\'' && json5)) {
  170. return tokenEnd(parseString(chr), 'key')
  171. } else if (chr === '{') {
  172. tokenEnd(undefined, 'separator')
  173. return parseObject()
  174. } else if (chr === '[') {
  175. tokenEnd(undefined, 'separator')
  176. return parseArray()
  177. } else if (chr === '.'
  178. || isDecDigit(chr)
  179. ) {
  180. return tokenEnd(parseNumber(true), 'key')
  181. } else if (json5
  182. && Uni.isIdentifierStart(chr) || (chr === '\\' && input[position] === 'u')) {
  183. // unicode char or a unicode sequence
  184. var rollback = position - 1
  185. var result = parseIdentifier()
  186. if (result === undefined) {
  187. position = rollback
  188. return tokenEnd(undefined)
  189. } else {
  190. return tokenEnd(result, 'key')
  191. }
  192. } else {
  193. position--
  194. return tokenEnd(undefined)
  195. }
  196. }
  197. }
  198. function skipWhiteSpace() {
  199. tokenStart()
  200. while (position < length) {
  201. var chr = input[position++]
  202. if (isLineTerminator(chr)) {
  203. position--
  204. tokenEnd(undefined, 'whitespace')
  205. tokenStart()
  206. position++
  207. newline(chr)
  208. tokenEnd(undefined, 'newline')
  209. tokenStart()
  210. } else if (isWhiteSpace(chr)) {
  211. // nothing
  212. } else if (chr === '/'
  213. && json5
  214. && (input[position] === '/' || input[position] === '*')
  215. ) {
  216. position--
  217. tokenEnd(undefined, 'whitespace')
  218. tokenStart()
  219. position++
  220. skipComment(input[position++] === '*')
  221. tokenEnd(undefined, 'comment')
  222. tokenStart()
  223. } else {
  224. position--
  225. break
  226. }
  227. }
  228. return tokenEnd(undefined, 'whitespace')
  229. }
  230. function skipComment(multi) {
  231. while (position < length) {
  232. var chr = input[position++]
  233. if (isLineTerminator(chr)) {
  234. // LineTerminator is an end of singleline comment
  235. if (!multi) {
  236. // let parent function deal with newline
  237. position--
  238. return
  239. }
  240. newline(chr)
  241. } else if (chr === '*' && multi) {
  242. // end of multiline comment
  243. if (input[position] === '/') {
  244. position++
  245. return
  246. }
  247. } else {
  248. // nothing
  249. }
  250. }
  251. if (multi) {
  252. fail('Unclosed multiline comment')
  253. }
  254. }
  255. function parseKeyword(keyword) {
  256. // keyword[0] is not checked because it should've checked earlier
  257. var _pos = position
  258. var len = keyword.length
  259. for (var i=1; i<len; i++) {
  260. if (position >= length || keyword[i] != input[position]) {
  261. position = _pos-1
  262. fail()
  263. }
  264. position++
  265. }
  266. }
  267. function parseObject() {
  268. var result = options.null_prototype ? Object.create(null) : {}
  269. , empty_object = {}
  270. , is_non_empty = false
  271. while (position < length) {
  272. skipWhiteSpace()
  273. var item1 = parseKey()
  274. skipWhiteSpace()
  275. tokenStart()
  276. var chr = input[position++]
  277. tokenEnd(undefined, 'separator')
  278. if (chr === '}' && item1 === undefined) {
  279. if (!json5 && is_non_empty) {
  280. position--
  281. fail('Trailing comma in object')
  282. }
  283. return result
  284. } else if (chr === ':' && item1 !== undefined) {
  285. skipWhiteSpace()
  286. stack.push(item1)
  287. var item2 = parseGeneric()
  288. stack.pop()
  289. if (item2 === undefined) fail('No value found for key ' + item1)
  290. if (typeof(item1) !== 'string') {
  291. if (!json5 || typeof(item1) !== 'number') {
  292. fail('Wrong key type: ' + item1)
  293. }
  294. }
  295. if ((item1 in empty_object || empty_object[item1] != null) && options.reserved_keys !== 'replace') {
  296. if (options.reserved_keys === 'throw') {
  297. fail('Reserved key: ' + item1)
  298. } else {
  299. // silently ignore it
  300. }
  301. } else {
  302. if (typeof(options.reviver) === 'function') {
  303. item2 = options.reviver.call(null, item1, item2)
  304. }
  305. if (item2 !== undefined) {
  306. is_non_empty = true
  307. Object.defineProperty(result, item1, {
  308. value: item2,
  309. enumerable: true,
  310. configurable: true,
  311. writable: true,
  312. })
  313. }
  314. }
  315. skipWhiteSpace()
  316. tokenStart()
  317. var chr = input[position++]
  318. tokenEnd(undefined, 'separator')
  319. if (chr === ',') {
  320. continue
  321. } else if (chr === '}') {
  322. return result
  323. } else {
  324. fail()
  325. }
  326. } else {
  327. position--
  328. fail()
  329. }
  330. }
  331. fail()
  332. }
  333. function parseArray() {
  334. var result = []
  335. while (position < length) {
  336. skipWhiteSpace()
  337. stack.push(result.length)
  338. var item = parseGeneric()
  339. stack.pop()
  340. skipWhiteSpace()
  341. tokenStart()
  342. var chr = input[position++]
  343. tokenEnd(undefined, 'separator')
  344. if (item !== undefined) {
  345. if (typeof(options.reviver) === 'function') {
  346. item = options.reviver.call(null, String(result.length), item)
  347. }
  348. if (item === undefined) {
  349. result.length++
  350. item = true // hack for check below, not included into result
  351. } else {
  352. result.push(item)
  353. }
  354. }
  355. if (chr === ',') {
  356. if (item === undefined) {
  357. fail('Elisions are not supported')
  358. }
  359. } else if (chr === ']') {
  360. if (!json5 && item === undefined && result.length) {
  361. position--
  362. fail('Trailing comma in array')
  363. }
  364. return result
  365. } else {
  366. position--
  367. fail()
  368. }
  369. }
  370. }
  371. function parseNumber() {
  372. // rewind because we don't know first char
  373. position--
  374. var start = position
  375. , chr = input[position++]
  376. , t
  377. var to_num = function(is_octal) {
  378. var str = input.substr(start, position - start)
  379. if (is_octal) {
  380. var result = parseInt(str.replace(/^0o?/, ''), 8)
  381. } else {
  382. var result = Number(str)
  383. }
  384. if (Number.isNaN(result)) {
  385. position--
  386. fail('Bad numeric literal - "' + input.substr(start, position - start + 1) + '"')
  387. } else if (!json5 && !str.match(/^-?(0|[1-9][0-9]*)(\.[0-9]+)?(e[+-]?[0-9]+)?$/i)) {
  388. // additional restrictions imposed by json
  389. position--
  390. fail('Non-json numeric literal - "' + input.substr(start, position - start + 1) + '"')
  391. } else {
  392. return result
  393. }
  394. }
  395. // ex: -5982475.249875e+29384
  396. // ^ skipping this
  397. if (chr === '-' || (chr === '+' && json5)) chr = input[position++]
  398. if (chr === 'N' && json5) {
  399. parseKeyword('NaN')
  400. return NaN
  401. }
  402. if (chr === 'I' && json5) {
  403. parseKeyword('Infinity')
  404. // returning +inf or -inf
  405. return to_num()
  406. }
  407. if (chr >= '1' && chr <= '9') {
  408. // ex: -5982475.249875e+29384
  409. // ^^^ skipping these
  410. while (position < length && isDecDigit(input[position])) position++
  411. chr = input[position++]
  412. }
  413. // special case for leading zero: 0.123456
  414. if (chr === '0') {
  415. chr = input[position++]
  416. // new syntax, "0o777" old syntax, "0777"
  417. var is_octal = chr === 'o' || chr === 'O' || isOctDigit(chr)
  418. var is_hex = chr === 'x' || chr === 'X'
  419. if (json5 && (is_octal || is_hex)) {
  420. while (position < length
  421. && (is_hex ? isHexDigit : isOctDigit)( input[position] )
  422. ) position++
  423. var sign = 1
  424. if (input[start] === '-') {
  425. sign = -1
  426. start++
  427. } else if (input[start] === '+') {
  428. start++
  429. }
  430. return sign * to_num(is_octal)
  431. }
  432. }
  433. if (chr === '.') {
  434. // ex: -5982475.249875e+29384
  435. // ^^^ skipping these
  436. while (position < length && isDecDigit(input[position])) position++
  437. chr = input[position++]
  438. }
  439. if (chr === 'e' || chr === 'E') {
  440. chr = input[position++]
  441. if (chr === '-' || chr === '+') position++
  442. // ex: -5982475.249875e+29384
  443. // ^^^ skipping these
  444. while (position < length && isDecDigit(input[position])) position++
  445. chr = input[position++]
  446. }
  447. // we have char in the buffer, so count for it
  448. position--
  449. return to_num()
  450. }
  451. function parseIdentifier() {
  452. // rewind because we don't know first char
  453. position--
  454. var result = ''
  455. while (position < length) {
  456. var chr = input[position++]
  457. if (chr === '\\'
  458. && input[position] === 'u'
  459. && isHexDigit(input[position+1])
  460. && isHexDigit(input[position+2])
  461. && isHexDigit(input[position+3])
  462. && isHexDigit(input[position+4])
  463. ) {
  464. // UnicodeEscapeSequence
  465. chr = String.fromCharCode(parseInt(input.substr(position+1, 4), 16))
  466. position += 5
  467. }
  468. if (result.length) {
  469. // identifier started
  470. if (Uni.isIdentifierPart(chr)) {
  471. result += chr
  472. } else {
  473. position--
  474. return result
  475. }
  476. } else {
  477. if (Uni.isIdentifierStart(chr)) {
  478. result += chr
  479. } else {
  480. return undefined
  481. }
  482. }
  483. }
  484. fail()
  485. }
  486. function parseString(endChar) {
  487. // 7.8.4 of ES262 spec
  488. var result = ''
  489. while (position < length) {
  490. var chr = input[position++]
  491. if (chr === endChar) {
  492. return result
  493. } else if (chr === '\\') {
  494. if (position >= length) fail()
  495. chr = input[position++]
  496. if (unescapeMap[chr] && (json5 || (chr != 'v' && chr != "'"))) {
  497. result += unescapeMap[chr]
  498. } else if (json5 && isLineTerminator(chr)) {
  499. // line continuation
  500. newline(chr)
  501. } else if (chr === 'u' || (chr === 'x' && json5)) {
  502. // unicode/character escape sequence
  503. var off = chr === 'u' ? 4 : 2
  504. // validation for \uXXXX
  505. for (var i=0; i<off; i++) {
  506. if (position >= length) fail()
  507. if (!isHexDigit(input[position])) fail('Bad escape sequence')
  508. position++
  509. }
  510. result += String.fromCharCode(parseInt(input.substr(position-off, off), 16))
  511. } else if (json5 && isOctDigit(chr)) {
  512. if (chr < '4' && isOctDigit(input[position]) && isOctDigit(input[position+1])) {
  513. // three-digit octal
  514. var digits = 3
  515. } else if (isOctDigit(input[position])) {
  516. // two-digit octal
  517. var digits = 2
  518. } else {
  519. var digits = 1
  520. }
  521. position += digits - 1
  522. result += String.fromCharCode(parseInt(input.substr(position-digits, digits), 8))
  523. /*if (!isOctDigit(input[position])) {
  524. // \0 is allowed still
  525. result += '\0'
  526. } else {
  527. fail('Octal literals are not supported')
  528. }*/
  529. } else if (json5) {
  530. // \X -> x
  531. result += chr
  532. } else {
  533. position--
  534. fail()
  535. }
  536. } else if (isLineTerminator(chr)) {
  537. fail()
  538. } else {
  539. if (!json5 && chr.charCodeAt(0) < 32) {
  540. position--
  541. fail('Unexpected control character')
  542. }
  543. // SourceCharacter but not one of " or \ or LineTerminator
  544. result += chr
  545. }
  546. }
  547. fail()
  548. }
  549. skipWhiteSpace()
  550. var return_value = parseGeneric()
  551. if (return_value !== undefined || position < length) {
  552. skipWhiteSpace()
  553. if (position >= length) {
  554. if (typeof(options.reviver) === 'function') {
  555. return_value = options.reviver.call(null, '', return_value)
  556. }
  557. return return_value
  558. } else {
  559. fail()
  560. }
  561. } else {
  562. if (position) {
  563. fail('No data, only a whitespace')
  564. } else {
  565. fail('No data, empty input')
  566. }
  567. }
  568. }
  569. /*
  570. * parse(text, options)
  571. * or
  572. * parse(text, reviver)
  573. *
  574. * where:
  575. * text - string
  576. * options - object
  577. * reviver - function
  578. */
  579. module.exports.parse = function parseJSON(input, options) {
  580. // support legacy functions
  581. if (typeof(options) === 'function') {
  582. options = {
  583. reviver: options
  584. }
  585. }
  586. if (input === undefined) {
  587. // parse(stringify(x)) should be equal x
  588. // with JSON functions it is not 'cause of undefined
  589. // so we're fixing it
  590. return undefined
  591. }
  592. // JSON.parse compat
  593. if (typeof(input) !== 'string') input = String(input)
  594. if (options == null) options = {}
  595. if (options.reserved_keys == null) options.reserved_keys = 'ignore'
  596. if (options.reserved_keys === 'throw' || options.reserved_keys === 'ignore') {
  597. if (options.null_prototype == null) {
  598. options.null_prototype = true
  599. }
  600. }
  601. try {
  602. return parse(input, options)
  603. } catch(err) {
  604. // jju is a recursive parser, so JSON.parse("{{{{{{{") could blow up the stack
  605. //
  606. // this catch is used to skip all those internal calls
  607. if (err instanceof SyntaxError && err.row != null && err.column != null) {
  608. var old_err = err
  609. err = SyntaxError(old_err.message)
  610. err.column = old_err.column
  611. err.row = old_err.row
  612. }
  613. throw err
  614. }
  615. }
  616. module.exports.tokenize = function tokenizeJSON(input, options) {
  617. if (options == null) options = {}
  618. options._tokenize = function(smth) {
  619. if (options._addstack) smth.stack.unshift.apply(smth.stack, options._addstack)
  620. tokens.push(smth)
  621. }
  622. var tokens = []
  623. tokens.data = module.exports.parse(input, options)
  624. return tokens
  625. }