ezqms-1171: drop h4-h6 during import of controlled doc (#6487)

Signed-off-by: Alexey Zinoviev <alexey.zinoviev@xored.com>
This commit is contained in:
Alexey Zinoviev 2024-09-05 17:07:46 +04:00 committed by GitHub
parent 3eb7d6e965
commit b8b196ecd9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,5 +1,6 @@
import { parseDocument } from 'htmlparser2'
import { AnyNode, Document } from 'domhandler'
import { findAll } from 'domutils'
import { FileSpec, FileSpecType, TocFileSpec } from './types'
import { createMetadataExtractor } from './meta'
@ -62,6 +63,13 @@ class TocContentExtractor implements ContentExtractor {
export async function extract (contents: string, spec: FileSpec, headerRoot?: AnyNode): Promise<ExtractedFile> {
const extractor = new TocContentExtractor(spec)
const doc = parseDocument(contents)
// We do not support headers > 3 so
// Traverse all Document's childrent and replace all h4-h6 with paragraphs
findAll((n) => ['h4', 'h5', 'h6'].includes(n.tagName), doc.childNodes).forEach((node) => {
node.name = 'p'
})
return extractor.extract(doc, headerRoot)
}