// While micromark is a lexer/tokenizer, the common case of going from markdown
// to html is currently built in as this module, even though the parts can be
// used separately to build ASTs, CSTs, or many other output formats.
//
// Having an HTML compiler built in is useful because it allows us to check for
// compliancy to CommonMark, the de facto norm of markdown, specified in roughly
// 600 input/output cases.
//
// This module has an interface which accepts lists of events instead of the
// whole at once, however, because markdown can’t be truly streaming, we buffer
// events before processing and outputting the final result.
export default compileHtml
import decodeEntity from 'parse-entities/decode-entity.js'
import codes from '../character/codes.mjs'
import assign from '../constant/assign.mjs'
import constants from '../constant/constants.mjs'
import own from '../constant/has-own-property.mjs'
import types from '../constant/types.mjs'
import combineHtmlExtensions from '../util/combine-html-extensions.mjs'
import chunkedPush from '../util/chunked-push.mjs'
import miniflat from '../util/miniflat.mjs'
import normalizeIdentifier from '../util/normalize-identifier.mjs'
import normalizeUri from '../util/normalize-uri.mjs'
import safeFromInt from '../util/safe-from-int.mjs'
// This ensures that certain characters which have special meaning in HTML are
// dealt with.
// Technically, we can skip `>` and `"` in many cases, but CM includes them.
var characterReferences = {'"': 'quot', '&': 'amp', '<': 'lt', '>': 'gt'}
// These two are allowlists of essentially safe protocols for full URLs in
// respectively the `href` (on ``) and `src` (on ``) attributes.
// They are based on what is allowed on GitHub,
// a ')
}
setData('slurpAllLineEndings')
}
function onexitparagraph() {
if (tightStack[tightStack.length - 1]) {
setData('slurpAllLineEndings', true)
} else {
tag('\n
`.
// This variable hold the default line ending when given (or `undefined`),
// and in the latter case will be updated to the first found line ending if
// there is one.
var lineEndingStyle = settings.defaultLineEnding
// Return the function that handles a slice of events.
return compile
// Deal w/ a slice of events.
// Return either the empty string if there’s nothing of note to return, or the
// result when done.
function compile(events) {
// As definitions can come after references, we need to figure out the media
// (urls and titles) defined by them before handling the references.
// So, we do sort of what HTML does: put metadata at the start (in head), and
// then put content after (`body`).
var head = []
var body = []
var index
var start
var listStack
var handler
var result
index = -1
start = 0
listStack = []
while (++index < events.length) {
// Figure out the line ending style used in the document.
if (
!lineEndingStyle &&
(events[index][1].type === types.lineEnding ||
events[index][1].type === types.lineEndingBlank)
) {
lineEndingStyle = events[index][2].sliceSerialize(events[index][1])
}
// Preprocess lists to infer whether the list is loose or not.
if (
events[index][1].type === types.listOrdered ||
events[index][1].type === types.listUnordered
) {
if (events[index][0] === 'enter') {
listStack.push(index)
} else {
prepareList(events.slice(listStack.pop(), index))
}
}
// Move definitions to the front.
if (events[index][1].type === types.definition) {
if (events[index][0] === 'enter') {
body = chunkedPush(body, events.slice(start, index))
start = index
} else {
head = chunkedPush(head, events.slice(start, index + 1))
start = index + 1
}
}
}
head = chunkedPush(head, body)
head = chunkedPush(head, events.slice(start))
result = head
index = -1
// Handle the start of the document, if defined.
if (handlers.enter.null) {
handlers.enter.null.call(context)
}
// Handle all events.
while (++index < events.length) {
handler = handlers[result[index][0]]
if (own.call(handler, result[index][1].type)) {
handler[result[index][1].type].call(
assign({sliceSerialize: result[index][2].sliceSerialize}, context),
result[index][1]
)
}
}
// Handle the end of the document, if defined.
if (handlers.exit.null) {
handlers.exit.null.call(context)
}
return buffers[0].join('')
}
// Figure out whether lists are loose or not.
function prepareList(slice) {
var length = slice.length - 1 // Skip close.
var index = 0 // Skip open.
var containerBalance = 0
var loose
var atMarker
var event
while (++index < length) {
event = slice[index]
if (event[1]._container) {
atMarker = undefined
if (event[0] === 'enter') {
containerBalance++
} else {
containerBalance--
}
} else if (event[1].type === types.listItemPrefix) {
if (event[0] === 'exit') {
atMarker = true
}
} else if (event[1].type === types.linePrefix) {
// Ignore
} else if (event[1].type === types.lineEndingBlank) {
if (event[0] === 'enter' && !containerBalance) {
if (atMarker) {
atMarker = undefined
} else {
loose = true
}
}
} else {
atMarker = undefined
}
}
slice[0][1]._loose = loose
}
// Set data into the key-value store.
function setData(key, value) {
data[key] = value
}
// Get data from the key-value store.
function getData(key) {
return data[key]
}
// Capture some of the output data.
function buffer() {
buffers.push([])
}
// Stop capturing and access the output data.
function resume() {
return buffers.pop().join('')
}
// Output (parts of) HTML tags.
function tag(value) {
if (!tags) return
setData('lastWasTag', true)
buffers[buffers.length - 1].push(value)
}
// Output raw data.
function raw(value) {
setData('lastWasTag')
buffers[buffers.length - 1].push(value)
}
// Output an extra line ending.
function lineEnding() {
raw(lineEndingStyle || '\n')
}
// Output an extra line ending if the previous value wasn’t EOF/EOL.
function lineEndingIfNeeded() {
var buffer = buffers[buffers.length - 1]
var slice = buffer[buffer.length - 1]
var previous = slice ? slice.charCodeAt(slice.length - 1) : codes.eof
if (
previous === codes.lf ||
previous === codes.cr ||
previous === codes.eof
) {
return
}
lineEnding()
}
// Make a value safe for injection in HTML (except w/ `ignoreEncode`).
function encode(value) {
return getData('ignoreEncode') ? value : value.replace(/["&<>]/g, replace)
function replace(value) {
return '&' + characterReferences[value] + ';'
}
}
// Make a value safe for injection as a URL.
// This does encode unsafe characters with percent-encoding, skipping already
// encoded sequences (`normalizeUri`).
// Further unsafe characters are encoded as character references (`encode`).
// Finally, if the URL includes an unknown protocol (such as a dangerous
// example, `javascript:`), the value is ignored.
function url(url, protocol) {
var value = encode(normalizeUri(url || ''))
var colon = value.indexOf(':')
var questionMark = value.indexOf('?')
var numberSign = value.indexOf('#')
var slash = value.indexOf('/')
if (
settings.allowDangerousProtocol ||
// If there is no protocol, it’s relative.
colon < 0 ||
// If the first colon is after a `?`, `#`, or `/`, it’s not a protocol.
(slash > -1 && colon > slash) ||
(questionMark > -1 && colon > questionMark) ||
(numberSign > -1 && colon > numberSign) ||
// It is a protocol, it should be allowed.
protocol.test(value.slice(0, colon))
) {
return value
}
return ''
}
//
// Handlers.
//
function onenterlistordered(token) {
tightStack.push(!token._loose)
lineEndingIfNeeded()
tag('')
} else {
onexitlistitem()
}
lineEndingIfNeeded()
tag('
')
}
function onexitlistunordered() {
onexitlistitem()
tightStack.pop()
lineEnding()
tag('')
}
function onexitlistitem() {
if (getData('lastWasTag') && !getData('slurpAllLineEndings')) {
lineEndingIfNeeded()
}
tag('')
setData('slurpAllLineEndings')
}
function onenterblockquote() {
tightStack.push(false)
lineEndingIfNeeded()
tag('')
}
function onexitblockquote() {
tightStack.pop()
lineEndingIfNeeded()
tag('
')
setData('slurpAllLineEndings')
}
function onenterparagraph() {
if (!tightStack[tightStack.length - 1]) {
lineEndingIfNeeded()
tag('')
setData('fencedCodeInside', true)
setData('slurpOneLineEnding', true)
}
setData('fencesCount', getData('fencesCount') + 1)
}
function onentercodeindented() {
lineEndingIfNeeded()
tag('
')
if (getData('fencesCount') < 2) lineEndingIfNeeded()
setData('flowCodeSeenData')
setData('fencesCount')
setData('slurpOneLineEnding')
}
function onenterimage() {
mediaStack.push({image: true})
tags = undefined // Disallow tags.
}
function onenterlink() {
mediaStack.push({})
}
function onexitlabeltext(token) {
mediaStack[mediaStack.length - 1].labelId = this.sliceSerialize(token)
}
function onexitlabel() {
mediaStack[mediaStack.length - 1].label = resume()
}
function onexitreferencestring(token) {
mediaStack[mediaStack.length - 1].referenceId = this.sliceSerialize(token)
}
function onenterresource() {
buffer() // We can have line endings in the resource, ignore them.
mediaStack[mediaStack.length - 1].destination = ''
}
function onenterresourcedestinationstring() {
buffer()
// Ignore encoding the result, as we’ll first percent encode the url and
// encode manually after.
setData('ignoreEncode', true)
}
function onexitresourcedestinationstring() {
mediaStack[mediaStack.length - 1].destination = resume()
setData('ignoreEncode')
}
function onexitresourcetitlestring() {
mediaStack[mediaStack.length - 1].title = resume()
}
function onexitmedia() {
var index = mediaStack.length - 1 // Skip current.
var media = mediaStack[index]
var context =
media.destination === undefined
? definitions[normalizeIdentifier(media.referenceId || media.labelId)]
: media
tags = true
while (index--) {
if (mediaStack[index].image) {
tags = undefined
break
}
}
if (media.image) {
tag('')
} else {
tag('>')
raw(media.label)
tag('')
}
function onexitflowcode() {
// Send an extra line feed if we saw data.
if (getData('flowCodeSeenData')) lineEndingIfNeeded()
tag('
')
}
function onenterhtmlflow() {
lineEndingIfNeeded()
onenterhtml()
}
function onexithtml() {
setData('ignoreEncode')
}
function onenterhtml() {
if (settings.allowDangerousHtml) {
setData('ignoreEncode', true)
}
}
function onenteremphasis() {
tag('')
}
function onenterstrong() {
tag('')
}
function onentercodetext() {
setData('inCodeText', true)
tag('')
}
function onexitcodetext() {
setData('inCodeText')
tag('
')
}
function onexitemphasis() {
tag('')
}
function onexitstrong() {
tag('')
}
function onexitthematicbreak() {
lineEndingIfNeeded()
tag('