'use strict' var assert = require('assert') var createDebug = require('debug') var assign = require('../constant/assign.js') var codes = require('../character/codes.js') var markdownLineEnding = require('../character/markdown-line-ending.js') var chunkedPush = require('./chunked-push.js') var chunkedSplice = require('./chunked-splice.js') var miniflat = require('./miniflat.js') var resolveAll = require('./resolve-all.js') var serializeChunks = require('./serialize-chunks.js') var shallow = require('./shallow.js') var sliceChunks = require('./slice-chunks.js') function _interopDefaultLegacy(e) { return e && typeof e === 'object' && 'default' in e ? e : {default: e} } var assert__default = /*#__PURE__*/ _interopDefaultLegacy(assert) var createDebug__default = /*#__PURE__*/ _interopDefaultLegacy(createDebug) var debug = createDebug__default['default']('micromark') // Create a tokenizer. // Tokenizers deal with one type of data (e.g., containers, flow, text). // The parser is the object dealing with it all. // `initialize` works like other constructs, except that only its `tokenize` // function is used, in which case it doesn’t receive an `ok` or `nok`. // `from` can be given to set the point before the first character, although // when further lines are indented, they must be set with `defineSkip`. function createTokenizer(parser, initialize, from) { var point = from ? shallow(from) : {line: 1, column: 1, offset: 0} var columnStart = {} var resolveAllConstructs = [] var chunks = [] var stack = [] var consumed = true // Tools used for tokenizing. var effects = { consume: consume, enter: enter, exit: exit, attempt: constructFactory(onsuccessfulconstruct), check: constructFactory(onsuccessfulcheck), interrupt: constructFactory(onsuccessfulcheck, {interrupt: true}), lazy: constructFactory(onsuccessfulcheck, {lazy: true}) } // State and tools for resolving and serializing. var context = { previous: codes.eof, events: [], parser: parser, sliceStream: sliceStream, sliceSerialize: sliceSerialize, now: now, defineSkip: skip, write: write } // The state function. var state = initialize.tokenize.call(context, effects) // Track which character we expect to be consumed, to catch bugs. var expectedCode if (initialize.resolveAll) { resolveAllConstructs.push(initialize) } // Store where we are in the input stream. point._index = 0 point._bufferIndex = -1 return context function write(slice) { chunks = chunkedPush(chunks, slice) main() // Exit if we’re not done, resolve might change stuff. if (chunks[chunks.length - 1] !== codes.eof) { return [] } addResult(initialize, 0) // Otherwise, resolve, and exit. context.events = resolveAll(resolveAllConstructs, context.events, context) return context.events } // // Tools. // function sliceSerialize(token) { return serializeChunks(sliceStream(token)) } function sliceStream(token) { return sliceChunks(chunks, token) } function now() { return shallow(point) } function skip(value) { columnStart[value.line] = value.column accountForPotentialSkip() debug('position: define skip: `%j`', point) } // // State management. // // Main loop (note that `_index` and `_bufferIndex` in `point` are modified by // `consume`). // Here is where we walk through the chunks, which either include strings of // several characters, or numerical character codes. // The reason to do this in a loop instead of a call is so the stack can // drain. function main() { var chunkIndex var chunk while (point._index < chunks.length) { chunk = chunks[point._index] // If we’re in a buffer chunk, loop through it. if (typeof chunk === 'string') { chunkIndex = point._index if (point._bufferIndex < 0) { point._bufferIndex = 0 } while ( point._index === chunkIndex && point._bufferIndex < chunk.length ) { go(chunk.charCodeAt(point._bufferIndex)) } } else { go(chunk) } } } // Deal with one code. function go(code) { assert__default['default'].equal( consumed, true, 'expected character to be consumed' ) consumed = undefined debug('main: passing `%s` to %s', code, state.name) expectedCode = code state = state(code) } // Move a character forward. function consume(code) { assert__default['default'].equal( code, expectedCode, 'expected given code to equal expected code' ) debug('consume: `%s`', code) assert__default['default'].equal( consumed, undefined, 'expected code to not have been consumed' ) assert__default['default']( code === null ? !context.events.length || context.events[context.events.length - 1][0] === 'exit' : context.events[context.events.length - 1][0] === 'enter', 'expected last token to be open' ) if (markdownLineEnding(code)) { point.line++ point.column = 1 point.offset += code === codes.carriageReturnLineFeed ? 2 : 1 accountForPotentialSkip() debug('position: after eol: `%j`', point) } else if (code !== codes.virtualSpace) { point.column++ point.offset++ } // Not in a string chunk. if (point._bufferIndex < 0) { point._index++ } else { point._bufferIndex++ // At end of string chunk. if (point._bufferIndex === chunks[point._index].length) { point._bufferIndex = -1 point._index++ } } // Expose the previous character. context.previous = code // Mark as consumed. consumed = true } // Start a token. function enter(type, fields) { var token = fields || {} token.type = type token.start = now() assert__default['default'].equal( typeof type, 'string', 'expected string type' ) assert__default['default'].notEqual( type.length, 0, 'expected non-empty string' ) debug('enter: `%s`', type) context.events.push(['enter', token, context]) stack.push(token) return token } // Stop a token. function exit(type) { assert__default['default'].equal( typeof type, 'string', 'expected string type' ) assert__default['default'].notEqual( type.length, 0, 'expected non-empty string' ) assert__default['default'].notEqual( stack.length, 0, 'cannot close w/o open tokens' ) var token = stack.pop() token.end = now() assert__default['default'].equal( type, token.type, 'expected exit token to match current token' ) assert__default['default']( !( token.start._index === token.end._index && token.start._bufferIndex === token.end._bufferIndex ), 'expected non-empty token (`' + type + '`)' ) debug('exit: `%s`', token.type) context.events.push(['exit', token, context]) return token } // Use results. function onsuccessfulconstruct(construct, info) { addResult(construct, info.from) } // Discard results. function onsuccessfulcheck(construct, info) { info.restore() } // Factory to attempt/check/interrupt. function constructFactory(onreturn, fields) { return hook // Handle either an object mapping codes to constructs, a list of // constructs, or a single construct. function hook(constructs, returnState, bogusState) { var listOfConstructs var constructIndex var currentConstruct var info return constructs.tokenize || 'length' in constructs ? handleListOfConstructs(miniflat(constructs)) : handleMapOfConstructs function handleMapOfConstructs(code) { if (code in constructs || codes.eof in constructs) { return handleListOfConstructs( constructs.null ? /* c8 ignore next */ miniflat(constructs[code]).concat(miniflat(constructs.null)) : constructs[code] )(code) } return bogusState(code) } function handleListOfConstructs(list) { listOfConstructs = list constructIndex = 0 return handleConstruct(list[constructIndex]) } function handleConstruct(construct) { return start function start(code) { // To do: not nede to store if there is no bogus state, probably? // Currently doesn’t work because `inspect` in document does a check // w/o a bogus, which doesn’t make sense. But it does seem to help perf // by not storing. info = store() currentConstruct = construct if (!construct.partial) { context.currentConstruct = construct } if ( construct.name && context.parser.constructs.disable.null.indexOf(construct.name) > -1 ) { return nok(code) } return construct.tokenize.call( fields ? assign({}, context, fields) : context, effects, ok, nok )(code) } } function ok(code) { assert__default['default'].equal(code, expectedCode, 'expected code') consumed = true onreturn(currentConstruct, info) return returnState } function nok(code) { assert__default['default'].equal(code, expectedCode, 'expected code') consumed = true info.restore() if (++constructIndex < listOfConstructs.length) { return handleConstruct(listOfConstructs[constructIndex]) } return bogusState } } } function addResult(construct, from) { if (construct.resolveAll && resolveAllConstructs.indexOf(construct) < 0) { resolveAllConstructs.push(construct) } if (construct.resolve) { chunkedSplice( context.events, from, context.events.length - from, construct.resolve(context.events.slice(from), context) ) } if (construct.resolveTo) { context.events = construct.resolveTo(context.events, context) } assert__default['default']( construct.partial || !context.events.length || context.events[context.events.length - 1][0] === 'exit', 'expected last token to end' ) } function store() { var startPoint = now() var startPrevious = context.previous var startCurrentConstruct = context.currentConstruct var startEventsIndex = context.events.length var startStack = Array.from(stack) return {restore: restore, from: startEventsIndex} function restore() { point = startPoint context.previous = startPrevious context.currentConstruct = startCurrentConstruct context.events.length = startEventsIndex stack = startStack accountForPotentialSkip() debug('position: restore: `%j`', point) } } function accountForPotentialSkip() { if (point.line in columnStart && point.column < 2) { point.column = columnStart[point.line] point.offset += columnStart[point.line] - 1 } } } module.exports = createTokenizer