'use strict' var assign = require('../constant/assign.js') var markdownLineEnding = require('../character/markdown-line-ending.js') var chunkedPush = require('./chunked-push.js') var chunkedSplice = require('./chunked-splice.js') var miniflat = require('./miniflat.js') var resolveAll = require('./resolve-all.js') var serializeChunks = require('./serialize-chunks.js') var shallow = require('./shallow.js') var sliceChunks = require('./slice-chunks.js') // Create a tokenizer. // Tokenizers deal with one type of data (e.g., containers, flow, text). // The parser is the object dealing with it all. // `initialize` works like other constructs, except that only its `tokenize` // function is used, in which case it doesn’t receive an `ok` or `nok`. // `from` can be given to set the point before the first character, although // when further lines are indented, they must be set with `defineSkip`. function createTokenizer(parser, initialize, from) { var point = from ? shallow(from) : { line: 1, column: 1, offset: 0 } var columnStart = {} var resolveAllConstructs = [] var chunks = [] var stack = [] var effects = { consume: consume, enter: enter, exit: exit, attempt: constructFactory(onsuccessfulconstruct), check: constructFactory(onsuccessfulcheck), interrupt: constructFactory(onsuccessfulcheck, { interrupt: true }), lazy: constructFactory(onsuccessfulcheck, { lazy: true }) } // State and tools for resolving and serializing. var context = { previous: null, events: [], parser: parser, sliceStream: sliceStream, sliceSerialize: sliceSerialize, now: now, defineSkip: skip, write: write } // The state function. var state = initialize.tokenize.call(context, effects) // Track which character we expect to be consumed, to catch bugs. if (initialize.resolveAll) { resolveAllConstructs.push(initialize) } // Store where we are in the input stream. point._index = 0 point._bufferIndex = -1 return context function write(slice) { chunks = chunkedPush(chunks, slice) main() // Exit if we’re not done, resolve might change stuff. if (chunks[chunks.length - 1] !== null) { return [] } addResult(initialize, 0) // Otherwise, resolve, and exit. context.events = resolveAll(resolveAllConstructs, context.events, context) return context.events } // // Tools. // function sliceSerialize(token) { return serializeChunks(sliceStream(token)) } function sliceStream(token) { return sliceChunks(chunks, token) } function now() { return shallow(point) } function skip(value) { columnStart[value.line] = value.column accountForPotentialSkip() } // // State management. // // Main loop (note that `_index` and `_bufferIndex` in `point` are modified by // `consume`). // Here is where we walk through the chunks, which either include strings of // several characters, or numerical character codes. // The reason to do this in a loop instead of a call is so the stack can // drain. function main() { var chunkIndex var chunk while (point._index < chunks.length) { chunk = chunks[point._index] // If we’re in a buffer chunk, loop through it. if (typeof chunk === 'string') { chunkIndex = point._index if (point._bufferIndex < 0) { point._bufferIndex = 0 } while ( point._index === chunkIndex && point._bufferIndex < chunk.length ) { go(chunk.charCodeAt(point._bufferIndex)) } } else { go(chunk) } } } // Deal with one code. function go(code) { state = state(code) } // Move a character forward. function consume(code) { if (markdownLineEnding(code)) { point.line++ point.column = 1 point.offset += code === -3 ? 2 : 1 accountForPotentialSkip() } else if (code !== -1) { point.column++ point.offset++ } // Not in a string chunk. if (point._bufferIndex < 0) { point._index++ } else { point._bufferIndex++ // At end of string chunk. if (point._bufferIndex === chunks[point._index].length) { point._bufferIndex = -1 point._index++ } } // Expose the previous character. context.previous = code // Mark as consumed. } // Start a token. function enter(type, fields) { var token = fields || {} token.type = type token.start = now() context.events.push(['enter', token, context]) stack.push(token) return token } // Stop a token. function exit(type) { var token = stack.pop() token.end = now() context.events.push(['exit', token, context]) return token } // Use results. function onsuccessfulconstruct(construct, info) { addResult(construct, info.from) } // Discard results. function onsuccessfulcheck(construct, info) { info.restore() } // Factory to attempt/check/interrupt. function constructFactory(onreturn, fields) { return hook // Handle either an object mapping codes to constructs, a list of // constructs, or a single construct. function hook(constructs, returnState, bogusState) { var listOfConstructs var constructIndex var currentConstruct var info return constructs.tokenize || 'length' in constructs ? handleListOfConstructs(miniflat(constructs)) : handleMapOfConstructs function handleMapOfConstructs(code) { if (code in constructs || null in constructs) { return handleListOfConstructs( constructs.null ? /* c8 ignore next */ miniflat(constructs[code]).concat(miniflat(constructs.null)) : constructs[code] )(code) } return bogusState(code) } function handleListOfConstructs(list) { listOfConstructs = list constructIndex = 0 return handleConstruct(list[constructIndex]) } function handleConstruct(construct) { return start function start(code) { // To do: not nede to store if there is no bogus state, probably? // Currently doesn’t work because `inspect` in document does a check // w/o a bogus, which doesn’t make sense. But it does seem to help perf // by not storing. info = store() currentConstruct = construct if (!construct.partial) { context.currentConstruct = construct } if ( construct.name && context.parser.constructs.disable.null.indexOf(construct.name) > -1 ) { return nok() } return construct.tokenize.call( fields ? assign({}, context, fields) : context, effects, ok, nok )(code) } } function ok(code) { onreturn(currentConstruct, info) return returnState } function nok(code) { info.restore() if (++constructIndex < listOfConstructs.length) { return handleConstruct(listOfConstructs[constructIndex]) } return bogusState } } } function addResult(construct, from) { if (construct.resolveAll && resolveAllConstructs.indexOf(construct) < 0) { resolveAllConstructs.push(construct) } if (construct.resolve) { chunkedSplice( context.events, from, context.events.length - from, construct.resolve(context.events.slice(from), context) ) } if (construct.resolveTo) { context.events = construct.resolveTo(context.events, context) } } function store() { var startPoint = now() var startPrevious = context.previous var startCurrentConstruct = context.currentConstruct var startEventsIndex = context.events.length var startStack = Array.from(stack) return { restore: restore, from: startEventsIndex } function restore() { point = startPoint context.previous = startPrevious context.currentConstruct = startCurrentConstruct context.events.length = startEventsIndex stack = startStack accountForPotentialSkip() } } function accountForPotentialSkip() { if (point.line in columnStart && point.column < 2) { point.column = columnStart[point.line] point.offset += columnStart[point.line] - 1 } } } module.exports = createTokenizer