mirror of
https://github.com/wader/fq.git
synced 2024-11-28 03:02:55 +03:00
cae288e6be
json, yaml, toml, xml, html, csv are now normal formats and most of them also particiate in probing (not html and csv). Also fixes a bunch of bugs in to/fromxml, to/fromjq etc.
651 lines
9.8 KiB
Plaintext
651 lines
9.8 KiB
Plaintext
/test:
|
|
test
|
|
$ fq -d html . /test
|
|
{
|
|
"html": {
|
|
"body": "test",
|
|
"head": ""
|
|
}
|
|
}
|
|
$ fq -d raw -ni . all.xml multi_diff.xml multi_same.xml ns.xml simple.xml escape.xml noscript.html
|
|
null> inputs | {name: input_filename, str: (tobytes | tostring)} | slurp("files")
|
|
null> spew("files") | .name, (.str | fromhtml | ., (toxml({indent: 2}) | println))
|
|
"all.xml"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"elm": {
|
|
"first": {
|
|
"#comment": "comment"
|
|
},
|
|
"last": {
|
|
"#comment": "comment1 comment2",
|
|
"#text": "text1\n \n text2",
|
|
"-attr1": "v1",
|
|
"-attr2": "v2"
|
|
},
|
|
"middle": "text"
|
|
}
|
|
},
|
|
"head": ""
|
|
}
|
|
}
|
|
<html>
|
|
<body>
|
|
<elm>
|
|
<first>
|
|
<!--comment--></first>
|
|
<last attr1="v1" attr2="v2">text1
 
 text2
|
|
<!--comment1 comment2--></last>
|
|
<middle>text</middle>
|
|
</elm>
|
|
</body>
|
|
<head></head>
|
|
</html>
|
|
"multi_diff.xml"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"elm1": "",
|
|
"elm2": ""
|
|
},
|
|
"head": ""
|
|
}
|
|
}
|
|
<html>
|
|
<body>
|
|
<elm1></elm1>
|
|
<elm2></elm2>
|
|
</body>
|
|
<head></head>
|
|
</html>
|
|
"multi_same.xml"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"elm": [
|
|
"",
|
|
""
|
|
]
|
|
},
|
|
"head": ""
|
|
}
|
|
}
|
|
<html>
|
|
<body>
|
|
<elm></elm>
|
|
<elm></elm>
|
|
</body>
|
|
<head></head>
|
|
</html>
|
|
"ns.xml"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"elm": {
|
|
"-xmlns:ns1": "http://test1",
|
|
"-xmlns:ns2": "http://test2",
|
|
"aaa": "3",
|
|
"ns1:aaa": {
|
|
"#text": "1",
|
|
"-ns1:attr1": "v1"
|
|
},
|
|
"ns2:aaa": {
|
|
"#text": "2",
|
|
"-ns2:attr2": "v2",
|
|
"ns1:ccc": {
|
|
"-ns1:attr3": "v3"
|
|
},
|
|
"ns2:ccc": {
|
|
"-ns2:attr4": "v4"
|
|
},
|
|
"ns3:ccc": {
|
|
"-ns2:attr5": "v5"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"head": ""
|
|
}
|
|
}
|
|
<html>
|
|
<body>
|
|
<elm xmlns:ns1="http://test1" xmlns:ns2="http://test2">
|
|
<aaa>3</aaa>
|
|
<ns1:aaa ns1:attr1="v1">1</ns1:aaa>
|
|
<ns2:aaa ns2:attr2="v2">2
|
|
<ns1:ccc ns1:attr3="v3"></ns1:ccc>
|
|
<ns2:ccc ns2:attr4="v4"></ns2:ccc>
|
|
<ns3:ccc ns2:attr5="v5"></ns3:ccc>
|
|
</ns2:aaa>
|
|
</elm>
|
|
</body>
|
|
<head></head>
|
|
</html>
|
|
"simple.xml"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"elm": ""
|
|
},
|
|
"head": ""
|
|
}
|
|
}
|
|
<html>
|
|
<body>
|
|
<elm></elm>
|
|
</body>
|
|
<head></head>
|
|
</html>
|
|
"escape.xml"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"a": {
|
|
"#text": "&<>",
|
|
"-attr": "&<>"
|
|
}
|
|
},
|
|
"head": ""
|
|
}
|
|
}
|
|
<html>
|
|
<body>
|
|
<a attr="&<>">&<></a>
|
|
</body>
|
|
<head></head>
|
|
</html>
|
|
"noscript.html"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"a": "text"
|
|
},
|
|
"head": {
|
|
"noscript": ""
|
|
}
|
|
}
|
|
}
|
|
<html>
|
|
<body>
|
|
<a>text</a>
|
|
</body>
|
|
<head>
|
|
<noscript></noscript>
|
|
</head>
|
|
</html>
|
|
null> spew("files") | .name, (.str | fromhtml({seq: true}) | ., (toxml({indent: 2}) | println))
|
|
"all.xml"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"#seq": 1,
|
|
"elm": {
|
|
"first": {
|
|
"#comment": "comment",
|
|
"#seq": 0
|
|
},
|
|
"last": {
|
|
"#comment": "comment1 comment2",
|
|
"#seq": 2,
|
|
"#text": "text1\n \n text2",
|
|
"-attr1": "v1",
|
|
"-attr2": "v2"
|
|
},
|
|
"middle": {
|
|
"#seq": 1,
|
|
"#text": "text"
|
|
}
|
|
}
|
|
},
|
|
"head": {
|
|
"#seq": 0
|
|
}
|
|
}
|
|
}
|
|
<html>
|
|
<head></head>
|
|
<body>
|
|
<elm>
|
|
<first>
|
|
<!--comment--></first>
|
|
<middle>text</middle>
|
|
<last attr1="v1" attr2="v2">text1
 
 text2
|
|
<!--comment1 comment2--></last>
|
|
</elm>
|
|
</body>
|
|
</html>
|
|
"multi_diff.xml"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"#seq": 1,
|
|
"elm1": {
|
|
"#seq": 0
|
|
},
|
|
"elm2": {
|
|
"#seq": 1
|
|
}
|
|
},
|
|
"head": {
|
|
"#seq": 0
|
|
}
|
|
}
|
|
}
|
|
<html>
|
|
<head></head>
|
|
<body>
|
|
<elm1></elm1>
|
|
<elm2></elm2>
|
|
</body>
|
|
</html>
|
|
"multi_same.xml"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"#seq": 1,
|
|
"elm": [
|
|
{
|
|
"#seq": 0
|
|
},
|
|
{
|
|
"#seq": 1
|
|
}
|
|
]
|
|
},
|
|
"head": {
|
|
"#seq": 0
|
|
}
|
|
}
|
|
}
|
|
<html>
|
|
<head></head>
|
|
<body>
|
|
<elm></elm>
|
|
<elm></elm>
|
|
</body>
|
|
</html>
|
|
"ns.xml"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"#seq": 1,
|
|
"elm": {
|
|
"-xmlns:ns1": "http://test1",
|
|
"-xmlns:ns2": "http://test2",
|
|
"aaa": {
|
|
"#seq": 2,
|
|
"#text": "3"
|
|
},
|
|
"ns1:aaa": {
|
|
"#seq": 0,
|
|
"#text": "1",
|
|
"-ns1:attr1": "v1"
|
|
},
|
|
"ns2:aaa": {
|
|
"#seq": 1,
|
|
"#text": "2",
|
|
"-ns2:attr2": "v2",
|
|
"ns1:ccc": {
|
|
"#seq": 0,
|
|
"-ns1:attr3": "v3"
|
|
},
|
|
"ns2:ccc": {
|
|
"#seq": 1,
|
|
"-ns2:attr4": "v4"
|
|
},
|
|
"ns3:ccc": {
|
|
"#seq": 2,
|
|
"-ns2:attr5": "v5"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"head": {
|
|
"#seq": 0
|
|
}
|
|
}
|
|
}
|
|
<html>
|
|
<head></head>
|
|
<body>
|
|
<elm xmlns:ns1="http://test1" xmlns:ns2="http://test2">
|
|
<ns1:aaa ns1:attr1="v1">1</ns1:aaa>
|
|
<ns2:aaa ns2:attr2="v2">2
|
|
<ns1:ccc ns1:attr3="v3"></ns1:ccc>
|
|
<ns2:ccc ns2:attr4="v4"></ns2:ccc>
|
|
<ns3:ccc ns2:attr5="v5"></ns3:ccc>
|
|
</ns2:aaa>
|
|
<aaa>3</aaa>
|
|
</elm>
|
|
</body>
|
|
</html>
|
|
"simple.xml"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"#seq": 1,
|
|
"elm": ""
|
|
},
|
|
"head": {
|
|
"#seq": 0
|
|
}
|
|
}
|
|
}
|
|
<html>
|
|
<head></head>
|
|
<body>
|
|
<elm></elm>
|
|
</body>
|
|
</html>
|
|
"escape.xml"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"#seq": 1,
|
|
"a": {
|
|
"#text": "&<>",
|
|
"-attr": "&<>"
|
|
}
|
|
},
|
|
"head": {
|
|
"#seq": 0
|
|
}
|
|
}
|
|
}
|
|
<html>
|
|
<head></head>
|
|
<body>
|
|
<a attr="&<>">&<></a>
|
|
</body>
|
|
</html>
|
|
"noscript.html"
|
|
{
|
|
"html": {
|
|
"body": {
|
|
"#seq": 1,
|
|
"a": "text"
|
|
},
|
|
"head": {
|
|
"#seq": 0,
|
|
"noscript": ""
|
|
}
|
|
}
|
|
}
|
|
<html>
|
|
<head>
|
|
<noscript></noscript>
|
|
</head>
|
|
<body>
|
|
<a>text</a>
|
|
</body>
|
|
</html>
|
|
null> spew("files") | .name, (.str | fromhtml({array: true}) | ., (toxml({indent: 2}) | println))
|
|
"all.xml"
|
|
[
|
|
"html",
|
|
[
|
|
[
|
|
"head"
|
|
],
|
|
[
|
|
"body",
|
|
[
|
|
[
|
|
"elm",
|
|
[
|
|
[
|
|
"first",
|
|
{
|
|
"#comment": "comment"
|
|
}
|
|
],
|
|
[
|
|
"middle",
|
|
{
|
|
"#text": "text"
|
|
}
|
|
],
|
|
[
|
|
"last",
|
|
{
|
|
"#comment": "comment1 comment2",
|
|
"#text": "text1\n \n text2",
|
|
"attr1": "v1",
|
|
"attr2": "v2"
|
|
}
|
|
]
|
|
]
|
|
]
|
|
]
|
|
]
|
|
]
|
|
]
|
|
<html>
|
|
<head></head>
|
|
<body>
|
|
<elm>
|
|
<first>
|
|
<!--comment--></first>
|
|
<middle>text</middle>
|
|
<last attr1="v1" attr2="v2">text1
 
 text2
|
|
<!--comment1 comment2--></last>
|
|
</elm>
|
|
</body>
|
|
</html>
|
|
"multi_diff.xml"
|
|
[
|
|
"html",
|
|
[
|
|
[
|
|
"head"
|
|
],
|
|
[
|
|
"body",
|
|
[
|
|
[
|
|
"elm1"
|
|
],
|
|
[
|
|
"elm2"
|
|
]
|
|
]
|
|
]
|
|
]
|
|
]
|
|
<html>
|
|
<head></head>
|
|
<body>
|
|
<elm1></elm1>
|
|
<elm2></elm2>
|
|
</body>
|
|
</html>
|
|
"multi_same.xml"
|
|
[
|
|
"html",
|
|
[
|
|
[
|
|
"head"
|
|
],
|
|
[
|
|
"body",
|
|
[
|
|
[
|
|
"elm"
|
|
],
|
|
[
|
|
"elm"
|
|
]
|
|
]
|
|
]
|
|
]
|
|
]
|
|
<html>
|
|
<head></head>
|
|
<body>
|
|
<elm></elm>
|
|
<elm></elm>
|
|
</body>
|
|
</html>
|
|
"ns.xml"
|
|
[
|
|
"html",
|
|
[
|
|
[
|
|
"head"
|
|
],
|
|
[
|
|
"body",
|
|
[
|
|
[
|
|
"elm",
|
|
{
|
|
"xmlns:ns1": "http://test1",
|
|
"xmlns:ns2": "http://test2"
|
|
},
|
|
[
|
|
[
|
|
"ns1:aaa",
|
|
{
|
|
"#text": "1",
|
|
"ns1:attr1": "v1"
|
|
}
|
|
],
|
|
[
|
|
"ns2:aaa",
|
|
{
|
|
"#text": "2",
|
|
"ns2:attr2": "v2"
|
|
},
|
|
[
|
|
[
|
|
"ns1:ccc",
|
|
{
|
|
"ns1:attr3": "v3"
|
|
}
|
|
],
|
|
[
|
|
"ns2:ccc",
|
|
{
|
|
"ns2:attr4": "v4"
|
|
}
|
|
],
|
|
[
|
|
"ns3:ccc",
|
|
{
|
|
"ns2:attr5": "v5"
|
|
}
|
|
]
|
|
]
|
|
],
|
|
[
|
|
"aaa",
|
|
{
|
|
"#text": "3"
|
|
}
|
|
]
|
|
]
|
|
]
|
|
]
|
|
]
|
|
]
|
|
]
|
|
<html>
|
|
<head></head>
|
|
<body>
|
|
<elm xmlns:ns1="http://test1" xmlns:ns2="http://test2">
|
|
<ns1:aaa ns1:attr1="v1">1</ns1:aaa>
|
|
<ns2:aaa ns2:attr2="v2">2
|
|
<ns1:ccc ns1:attr3="v3"></ns1:ccc>
|
|
<ns2:ccc ns2:attr4="v4"></ns2:ccc>
|
|
<ns3:ccc ns2:attr5="v5"></ns3:ccc>
|
|
</ns2:aaa>
|
|
<aaa>3</aaa>
|
|
</elm>
|
|
</body>
|
|
</html>
|
|
"simple.xml"
|
|
[
|
|
"html",
|
|
[
|
|
[
|
|
"head"
|
|
],
|
|
[
|
|
"body",
|
|
[
|
|
[
|
|
"elm"
|
|
]
|
|
]
|
|
]
|
|
]
|
|
]
|
|
<html>
|
|
<head></head>
|
|
<body>
|
|
<elm></elm>
|
|
</body>
|
|
</html>
|
|
"escape.xml"
|
|
[
|
|
"html",
|
|
[
|
|
[
|
|
"head"
|
|
],
|
|
[
|
|
"body",
|
|
[
|
|
[
|
|
"a",
|
|
{
|
|
"#text": "&<>",
|
|
"attr": "&<>"
|
|
}
|
|
]
|
|
]
|
|
]
|
|
]
|
|
]
|
|
<html>
|
|
<head></head>
|
|
<body>
|
|
<a attr="&<>">&<></a>
|
|
</body>
|
|
</html>
|
|
"noscript.html"
|
|
[
|
|
"html",
|
|
[
|
|
[
|
|
"head",
|
|
[
|
|
[
|
|
"noscript"
|
|
]
|
|
]
|
|
],
|
|
[
|
|
"body",
|
|
[
|
|
[
|
|
"a",
|
|
{
|
|
"#text": "text"
|
|
}
|
|
]
|
|
]
|
|
]
|
|
]
|
|
]
|
|
<html>
|
|
<head>
|
|
<noscript></noscript>
|
|
</head>
|
|
<body>
|
|
<a>text</a>
|
|
</body>
|
|
</html>
|
|
null> ^D
|