diff --git a/format/all/all.go b/format/all/all.go
index 0263dbd3..ba1127fa 100644
--- a/format/all/all.go
+++ b/format/all/all.go
@@ -28,6 +28,7 @@ import (
_ "github.com/wader/fq/format/jpeg"
_ "github.com/wader/fq/format/json"
_ "github.com/wader/fq/format/macho"
+ _ "github.com/wader/fq/format/markdown"
_ "github.com/wader/fq/format/math"
_ "github.com/wader/fq/format/matroska"
_ "github.com/wader/fq/format/mp3"
diff --git a/format/format.go b/format/format.go
index e42a5900..2ac54a72 100644
--- a/format/format.go
+++ b/format/format.go
@@ -86,6 +86,7 @@ const (
JSONL = "jsonl"
MACHO = "macho"
MACHO_FAT = "macho_fat"
+ MARKDOWN = "markdown"
MATROSKA = "matroska"
MP3 = "mp3"
MP3_FRAME = "mp3_frame"
diff --git a/format/markdown/markdown.go b/format/markdown/markdown.go
new file mode 100644
index 00000000..dd921ef0
--- /dev/null
+++ b/format/markdown/markdown.go
@@ -0,0 +1,319 @@
+package markdown
+
+import (
+ "embed"
+ "fmt"
+ "io/ioutil"
+
+ "github.com/gomarkdown/markdown"
+ "github.com/gomarkdown/markdown/ast"
+ "github.com/wader/fq/format"
+ "github.com/wader/fq/pkg/bitio"
+ "github.com/wader/fq/pkg/decode"
+ "github.com/wader/fq/pkg/interp"
+ "github.com/wader/fq/pkg/scalar"
+)
+
+//go:embed markdown.jq
+var markdownFS embed.FS
+
+func init() {
+ interp.RegisterFormat(decode.Format{
+ Name: format.MARKDOWN,
+ Description: "Markdown",
+ DecodeFn: decodeMarkdown,
+ Functions: []string{"_todisplay"},
+ })
+ interp.RegisterFS(markdownFS)
+}
+
+func decodeMarkdown(d *decode.D, _ any) any {
+ b, err := ioutil.ReadAll(bitio.NewIOReader(d.RawLen(d.Len())))
+ if err != nil {
+ panic(err)
+ }
+
+ var s scalar.S
+ s.Actual = node(markdown.Parse(b, nil))
+ d.Value.V = &s
+ d.Value.Range.Len = d.Len()
+
+ return nil
+}
+
+func stringSlice[T string | []byte](ss []T) []any {
+ var vs []any
+ for _, e := range ss {
+ vs = append(vs, string(e))
+ }
+ return vs
+}
+
+func sliceMap[F, T any](vs []F, fn func(F) T) []T {
+ ts := make([]T, len(vs))
+ for i, v := range vs {
+ ts[i] = fn(v)
+ }
+ return ts
+}
+
+func intSlice[T ~int](ss []T) []any {
+ var vs []any
+ for _, e := range ss {
+ vs = append(vs, e)
+ }
+ return vs
+}
+
+func attr(v map[string]any, attr *ast.Attribute) {
+ if attr == nil {
+ return
+ }
+
+ v["id"] = string(attr.ID)
+
+ var as []any
+ for _, a := range attr.Attrs {
+ as = append(as, string(a))
+ }
+ v["attrs"] = as
+
+ var cs []any
+ for _, a := range attr.Classes {
+ cs = append(cs, string(a))
+ }
+ v["classes"] = cs
+}
+
+func leaf(v map[string]any, typ string, l ast.Leaf) {
+ v["type"] = typ
+ v["literal"] = string(l.Literal)
+
+ attr(v, l.Attribute)
+}
+
+func container(v map[string]any, typ string, c ast.Container) {
+ v["type"] = typ
+ v["literal"] = string(c.Literal)
+
+ var cs []any
+ children := c.GetChildren()
+ for _, n := range children {
+ cv := node(n)
+ if cv != nil {
+ cs = append(cs, node(n))
+ }
+ }
+ v["children"] = cs
+
+ attr(v, c.Attribute)
+}
+
+func listType(t ast.ListType) []any {
+ var vs []any
+
+ if t&ast.ListTypeOrdered == ast.ListTypeOrdered {
+ vs = append(vs, "ordered")
+ }
+ if t%ast.ListTypeOrdered == ast.ListTypeOrdered {
+ vs = append(vs, "ordered")
+ }
+ if t%ast.ListTypeDefinition == ast.ListTypeDefinition {
+ vs = append(vs, "definition")
+ }
+ if t%ast.ListTypeTerm == ast.ListTypeTerm {
+ vs = append(vs, "term")
+ }
+ if t%ast.ListItemContainsBlock == ast.ListItemContainsBlock {
+ vs = append(vs, "contains_block")
+ }
+ if t%ast.ListItemBeginningOfList == ast.ListItemBeginningOfList {
+ vs = append(vs, "beginning_of_list")
+ }
+ if t%ast.ListItemEndOfList == ast.ListItemEndOfList {
+ vs = append(vs, "end_of_list")
+ }
+
+ return vs
+}
+
+func node(n ast.Node) any {
+ v := map[string]any{}
+
+ switch n := n.(type) {
+ case *ast.Text:
+ if n.Leaf.Attribute == nil {
+ if len(n.Leaf.Literal) > 0 {
+ return string(n.Leaf.Literal)
+ }
+ // skip
+ return nil
+ }
+ case *ast.Softbreak:
+ leaf(v, "softbreak", n.Leaf)
+ case *ast.Hardbreak:
+ leaf(v, "hardbreak", n.Leaf)
+ case *ast.NonBlockingSpace:
+ leaf(v, "nbsp", n.Leaf)
+ case *ast.Emph:
+ container(v, "em", n.Container)
+ case *ast.Strong:
+ container(v, "strong", n.Container)
+ case *ast.Del:
+ container(v, "del", n.Container)
+ case *ast.BlockQuote:
+ container(v, "blockquote", n.Container)
+ case *ast.Aside:
+ container(v, "aside", n.Container)
+ case *ast.Link:
+ container(v, "link", n.Container)
+ v["destination"] = string(n.Destination)
+ v["title"] = string(n.Title)
+ v["note_id"] = n.NoteID
+ v["deferred_id"] = string(n.DeferredID)
+ v["additional_attributes"] = stringSlice(n.AdditionalAttributes)
+ case *ast.CrossReference:
+ container(v, "cross_reference", n.Container)
+ v["destination"] = string(n.Destination)
+ case *ast.Citation:
+ leaf(v, "citation", n.Leaf)
+ v["destination"] = stringSlice(n.Destination)
+ v["type"] = sliceMap(n.Type, func(v ast.CitationTypes) string {
+ switch v {
+ case ast.CitationTypeNone:
+ return "none"
+ case ast.CitationTypeSuppressed:
+ return "suppressed"
+ case ast.CitationTypeInformative:
+ return "informative"
+ case ast.CitationTypeNormative:
+ return "normative"
+ default:
+ return "unknown"
+ }
+ })
+ v["type"] = intSlice(n.Type)
+ v["suffix"] = stringSlice(n.Suffix)
+ case *ast.Image:
+ container(v, "image", n.Container)
+ v["destination"] = string(n.Destination)
+ v["title"] = string(n.Title)
+ case *ast.Code:
+ leaf(v, "code", n.Leaf)
+ case *ast.CodeBlock:
+ leaf(v, "code_block", n.Leaf)
+ v["is_fenced"] = n.IsFenced
+ v["info"] = string(n.Info)
+ if n.FenceChar != 0 {
+ v["fence_char"] = string(n.FenceChar)
+ }
+ v["fence_length"] = n.FenceLength
+ v["fence_offset"] = n.FenceOffset
+ case *ast.Caption:
+ container(v, "caption", n.Container)
+ case *ast.CaptionFigure:
+ container(v, "caption_figure", n.Container)
+ v["heading_id"] = n.HeadingID
+ case *ast.Document:
+ container(v, "document", n.Container)
+ case *ast.Paragraph:
+ container(v, "paragraph", n.Container)
+ case *ast.HTMLSpan:
+ leaf(v, "html_span", n.Leaf)
+ case *ast.HTMLBlock:
+ leaf(v, "html_block", n.Leaf)
+ case *ast.Heading:
+ container(v, "heading", n.Container)
+ v["level"] = n.Level
+ v["heading_id"] = n.HeadingID
+ v["is_titleblock"] = n.IsTitleblock
+ v["is_special"] = n.IsSpecial
+ case *ast.HorizontalRule:
+ leaf(v, "hr", n.Leaf)
+ case *ast.List:
+ container(v, "list", n.Container)
+ v["list_flags"] = listType(n.ListFlags)
+ v["tight"] = n.Tight
+ if n.BulletChar != 0 {
+ v["bullet_char"] = string(n.BulletChar)
+ }
+ if n.Delimiter != 0 {
+ v["delimiter"] = string(n.Delimiter)
+ }
+ v["start"] = n.Start
+ v["ref_link"] = string(n.RefLink)
+ v["is_footnotes_list"] = n.IsFootnotesList
+ case *ast.ListItem:
+ container(v, "list_item", n.Container)
+ v["list_flags"] = listType(n.ListFlags)
+ v["tight"] = n.Tight
+ if n.BulletChar != 0 {
+ v["bullet_char"] = string(n.BulletChar)
+ }
+ if n.Delimiter != 0 {
+ v["delimiter"] = string(n.Delimiter)
+ }
+ v["ref_link"] = string(n.RefLink)
+ v["is_footnotes_list"] = n.IsFootnotesList
+ case *ast.Table:
+ container(v, "table", n.Container)
+ case *ast.TableCell:
+ container(v, "table_cell", n.Container)
+ v["is_header"] = n.IsHeader
+ v["align"] = n.Align.String()
+ v["col_span"] = n.ColSpan
+ case *ast.TableHeader:
+ container(v, "table_header", n.Container)
+ case *ast.TableBody:
+ container(v, "table_body", n.Container)
+ case *ast.TableRow:
+ container(v, "table_row", n.Container)
+ case *ast.TableFooter:
+ container(v, "table_footer", n.Container)
+ case *ast.Math:
+ leaf(v, "math", n.Leaf)
+ case *ast.MathBlock:
+ container(v, "math_block", n.Container)
+ case *ast.DocumentMatter:
+ container(v, "document_matter", n.Container)
+ v["matter"] = func(v ast.DocumentMatters) string {
+ switch v {
+ case ast.DocumentMatterNone:
+ return "none"
+ case ast.DocumentMatterFront:
+ return "front"
+ case ast.DocumentMatterMain:
+ return "main"
+ case ast.DocumentMatterBack:
+ return "back"
+ default:
+ return "unknown"
+ }
+ }(n.Matter)
+ case *ast.Callout:
+ leaf(v, "callout", n.Leaf)
+ v["id"] = string(n.ID)
+ case *ast.Index:
+ leaf(v, "index", n.Leaf)
+ v["primary"] = n.Primary
+ v["item"] = string(n.Item)
+ v["subitem"] = string(n.Subitem)
+ v["id"] = n.ID
+ case *ast.Subscript:
+ leaf(v, "subscript", n.Leaf)
+ case *ast.Superscript:
+ leaf(v, "superscript", n.Leaf)
+ case *ast.Footnotes:
+ container(v, "footnotes", n.Container)
+ default:
+ panic(fmt.Sprintf("unknown node %T", node))
+ }
+
+ for k, e := range v {
+ if s, ok := e.(string); ok && s == "" {
+ delete(v, k)
+ }
+ }
+
+ return v
+}
diff --git a/format/markdown/testdata/test.fqtest b/format/markdown/testdata/test.fqtest
new file mode 100644
index 00000000..a0152ca1
--- /dev/null
+++ b/format/markdown/testdata/test.fqtest
@@ -0,0 +1,316 @@
+$ fq -d markdown . test.md
+{
+ "children": [
+ {
+ "children": [
+ "Before"
+ ],
+ "type": "paragraph"
+ },
+ {
+ "children": [
+ "header 1"
+ ],
+ "is_special": false,
+ "is_titleblock": false,
+ "level": 1,
+ "type": "heading"
+ },
+ {
+ "children": [
+ "Paragraph with ",
+ {
+ "children": [
+ "bold"
+ ],
+ "type": "strong"
+ },
+ " and ",
+ {
+ "children": [
+ "italic"
+ ],
+ "type": "em"
+ },
+ "\non\nmultiple\nlines."
+ ],
+ "type": "paragraph"
+ },
+ {
+ "children": [
+ {
+ "children": [
+ "Some citation"
+ ],
+ "type": "paragraph"
+ }
+ ],
+ "type": "blockquote"
+ },
+ {
+ "children": [
+ "A footnote",
+ {
+ "additional_attributes": [],
+ "children": [
+ "^1"
+ ],
+ "destination": "footnote1",
+ "note_id": 0,
+ "type": "link"
+ },
+ " and this also",
+ {
+ "additional_attributes": [],
+ "children": [
+ "^note"
+ ],
+ "destination": "footnote2",
+ "note_id": 0,
+ "type": "link"
+ }
+ ],
+ "type": "paragraph"
+ },
+ {
+ "children": [
+ "header 2"
+ ],
+ "is_special": false,
+ "is_titleblock": false,
+ "level": 2,
+ "type": "heading"
+ },
+ {
+ "fence_length": 0,
+ "fence_offset": 0,
+ "info": "jq",
+ "is_fenced": true,
+ "literal": "code\nblock\n",
+ "type": "code_block"
+ },
+ {
+ "fence_length": 0,
+ "fence_offset": 0,
+ "is_fenced": false,
+ "literal": "also\ncode\n",
+ "type": "code_block"
+ },
+ {
+ "children": [
+ "header 3"
+ ],
+ "is_special": false,
+ "is_titleblock": false,
+ "level": 3,
+ "type": "heading"
+ },
+ {
+ "children": [
+ "Some text with ",
+ {
+ "literal": "code",
+ "type": "code"
+ }
+ ],
+ "type": "paragraph"
+ },
+ {
+ "children": [
+ "header 4"
+ ],
+ "is_special": false,
+ "is_titleblock": false,
+ "level": 4,
+ "type": "heading"
+ },
+ {
+ "children": [
+ "Some text ",
+ {
+ "additional_attributes": [],
+ "children": [
+ "with a link"
+ ],
+ "destination": "http://host/path",
+ "note_id": 0,
+ "type": "link"
+ }
+ ],
+ "type": "paragraph"
+ },
+ {
+ "children": [
+ "An image ",
+ {
+ "children": [
+ "img alt text"
+ ],
+ "destination": "path/image.png",
+ "type": "image"
+ }
+ ],
+ "type": "paragraph"
+ },
+ {
+ "children": [
+ "header 5"
+ ],
+ "is_special": false,
+ "is_titleblock": false,
+ "level": 5,
+ "type": "heading"
+ },
+ {
+ "children": [
+ {
+ "bullet_char": "-",
+ "children": [
+ {
+ "children": [
+ "list of"
+ ],
+ "type": "paragraph"
+ }
+ ],
+ "delimiter": ".",
+ "is_footnotes_list": false,
+ "list_flags": [],
+ "tight": false,
+ "type": "list_item"
+ },
+ {
+ "bullet_char": "-",
+ "children": [
+ {
+ "children": [
+ "things"
+ ],
+ "type": "paragraph"
+ }
+ ],
+ "delimiter": ".",
+ "is_footnotes_list": false,
+ "list_flags": [],
+ "tight": false,
+ "type": "list_item"
+ }
+ ],
+ "delimiter": ".",
+ "is_footnotes_list": false,
+ "list_flags": [],
+ "start": 0,
+ "tight": true,
+ "type": "list"
+ },
+ {
+ "children": [
+ "a table"
+ ],
+ "type": "paragraph"
+ },
+ {
+ "children": [
+ {
+ "children": [
+ {
+ "children": [
+ {
+ "children": [
+ "a"
+ ],
+ "col_span": 0,
+ "is_header": true,
+ "type": "table_cell"
+ },
+ {
+ "children": [
+ "b"
+ ],
+ "col_span": 0,
+ "is_header": true,
+ "type": "table_cell"
+ },
+ {
+ "children": [
+ "c"
+ ],
+ "col_span": 0,
+ "is_header": true,
+ "type": "table_cell"
+ }
+ ],
+ "type": "table_row"
+ }
+ ],
+ "type": "table_header"
+ },
+ {
+ "children": [
+ {
+ "children": [
+ {
+ "children": [
+ "1"
+ ],
+ "col_span": 0,
+ "is_header": false,
+ "type": "table_cell"
+ },
+ {
+ "children": [
+ "2"
+ ],
+ "col_span": 0,
+ "is_header": false,
+ "type": "table_cell"
+ },
+ {
+ "children": [
+ "3"
+ ],
+ "col_span": 0,
+ "is_header": false,
+ "type": "table_cell"
+ }
+ ],
+ "type": "table_row"
+ }
+ ],
+ "type": "table_body"
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "children": [
+ "header 6"
+ ],
+ "is_special": false,
+ "is_titleblock": false,
+ "level": 6,
+ "type": "heading"
+ },
+ {
+ "children": [
+ "Some text with line ",
+ {
+ "literal": "
",
+ "type": "html_span"
+ },
+ " break and ",
+ {
+ "literal": "",
+ "type": "html_span"
+ },
+ "bold",
+ {
+ "literal": "",
+ "type": "html_span"
+ }
+ ],
+ "type": "paragraph"
+ }
+ ],
+ "type": "document"
+}
diff --git a/format/markdown/testdata/test.md b/format/markdown/testdata/test.md
new file mode 100644
index 00000000..e131e5a9
--- /dev/null
+++ b/format/markdown/testdata/test.md
@@ -0,0 +1,50 @@
+Before
+
+# header 1
+
+Paragraph with **bold** and *italic*
+on
+multiple
+lines.
+
+> Some citation
+
+A footnote[^1] and this also[^note]
+
+## header 2
+
+```jq
+code
+block
+```
+
+ also
+ code
+
+### header 3
+
+Some text with `code`
+
+#### header 4
+
+Some text [with a link](http://host/path)
+
+An image ![img alt text](path/image.png)
+
+##### header 5
+
+- list of
+- things
+
+a table
+
+| a | b | c |
+| --- | --- | --- |
+| 1 | 2 | 3 |
+
+###### header 6
+
+Some text with line
break and bold
+
+[^1]: footnote1
+[^note]: footnote2
diff --git a/go.mod b/go.mod
index d3b54095..1b565b9f 100644
--- a/go.mod
+++ b/go.mod
@@ -25,6 +25,10 @@ require (
// bump: gomod-golang-snappy link "Source diff $CURRENT..$LATEST" https://github.com/golang/snappy/compare/v$CURRENT..v$LATEST
github.com/golang/snappy v0.0.4
+ // has no tags
+ // go get -d github.com/gomarkdown/markdown@master && go mod tidy
+ github.com/gomarkdown/markdown v0.0.0-20220627144906-e9a81102ebeb
+
// has no tags yet
// bump-disabled: gomod-gopacket /github\.com\/gopacket\/gopacket v(.*)/ https://github.com/gopacket/gopacket.git|^1
// bump-disabled: gomod-gopacket command go get -d github.com/gopacket/gopacket@v$LATEST && go mod tidy
diff --git a/go.sum b/go.sum
index cd320c9f..3d064f28 100644
--- a/go.sum
+++ b/go.sum
@@ -4,6 +4,8 @@ github.com/creasty/defaults v1.6.0 h1:ltuE9cfphUtlrBeomuu8PEyISTXnxqkBIoQfXgv7BS
github.com/creasty/defaults v1.6.0/go.mod h1:iGzKe6pbEHnpMPtfDXZEr0NVxWnPTjb1bbDy08fPzYM=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/gomarkdown/markdown v0.0.0-20220627144906-e9a81102ebeb h1:5b/eFaSaKPFG9ygDBaPKkydKU5nFJYk08g9jPIVogMg=
+github.com/gomarkdown/markdown v0.0.0-20220627144906-e9a81102ebeb/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
github.com/gopacket/gopacket v0.0.0-20220819214934-ee81b8c880da h1:AAwDU9N39fQNYUtg270aiU6N7U2ZVsGZKiRwsCMsWEo=
github.com/gopacket/gopacket v0.0.0-20220819214934-ee81b8c880da/go.mod h1:DlRRfaM/QjAu2ADqraIure1Eif0HpNL8hmyVQ+qci5Y=
github.com/itchyny/timefmt-go v0.1.3 h1:7M3LGVDsqcd0VZH2U+x393obrzZisp7C0uEe921iRkU=
diff --git a/pkg/interp/testdata/args.fqtest b/pkg/interp/testdata/args.fqtest
index ef3272e5..5e03684e 100644
--- a/pkg/interp/testdata/args.fqtest
+++ b/pkg/interp/testdata/args.fqtest
@@ -162,6 +162,7 @@ json JavaScript Object Notation
jsonl JavaScript Object Notation Lines
macho Mach-O macOS executable
macho_fat Fat Mach-O macOS executable (multi-architecture)
+markdown Markdown
matroska Matroska file
mp3 MP3 file
mp3_frame MPEG audio layer 3 frame