diff --git a/format/all/all.go b/format/all/all.go index 1f820865..e15a5759 100644 --- a/format/all/all.go +++ b/format/all/all.go @@ -25,6 +25,7 @@ import ( _ "github.com/wader/fq/format/flac" _ "github.com/wader/fq/format/gif" _ "github.com/wader/fq/format/gzip" + _ "github.com/wader/fq/format/http" _ "github.com/wader/fq/format/icc" _ "github.com/wader/fq/format/id3" _ "github.com/wader/fq/format/inet" diff --git a/format/format.go b/format/format.go index 4df45e97..07f7fd3b 100644 --- a/format/format.go +++ b/format/format.go @@ -53,6 +53,7 @@ var ( TCP_Stream = &decode.Group{Name: "tcp_stream", DefaultInArg: TCP_Stream_In{}} // ex: http UDP_Payload = &decode.Group{Name: "udp_payload", DefaultInArg: UDP_Payload_In{}} // ex: dns MP3_Frame_Tags = &decode.Group{Name: "mp3_frame_tags"} + Content_Type = &decode.Group{Name: "content_type", DefaultInArg: Content_Type_In{}} Bytes = &decode.Group{Name: "bytes"} Bits = &decode.Group{Name: "bits"} @@ -114,6 +115,8 @@ var ( HEVC_SPS = &decode.Group{Name: "hevc_sps"} HEVC_VPS = &decode.Group{Name: "hevc_vps"} HTML = &decode.Group{Name: "html"} + HTTP = &decode.Group{Name: "http"} + HTTP_Chunked = &decode.Group{Name: "http_chunked"} ICC_Profile = &decode.Group{Name: "icc_profile"} ICMP = &decode.Group{Name: "icmp"} ICMPv6 = &decode.Group{Name: "icmpv6"} @@ -133,6 +136,7 @@ var ( MachO_Fat = &decode.Group{Name: "macho_fat"} Markdown = &decode.Group{Name: "markdown"} Matroska = &decode.Group{Name: "matroska"} + MIME_Multi_Part = &decode.Group{Name: "mime_multipart"} MOC3 = &decode.Group{Name: "moc3"} MP3 = &decode.Group{Name: "mp3"} MP3_Frame = &decode.Group{Name: "mp3_frame"} @@ -141,10 +145,10 @@ var ( MP4 = &decode.Group{Name: "mp4"} MPEG_ASC = &decode.Group{Name: "mpeg_asc"} MPEG_ES = &decode.Group{Name: "mpeg_es"} - MPES_PES = &decode.Group{Name: "mpeg_pes"} MPEG_PES_Packet = &decode.Group{Name: "mpeg_pes_packet"} MPEG_SPU = &decode.Group{Name: "mpeg_spu"} MPEG_TS = &decode.Group{Name: "mpeg_ts"} + MPES_PES = &decode.Group{Name: "mpeg_pes"} MsgPack = &decode.Group{Name: "msgpack"} Ogg = &decode.Group{Name: "ogg"} Ogg_Page = &decode.Group{Name: "ogg_page"} @@ -164,6 +168,7 @@ var ( SLL2_Packet = &decode.Group{Name: "sll2_packet"} TAR = &decode.Group{Name: "tar"} TCP_Segment = &decode.Group{Name: "tcp_segment"} + TextProto = &decode.Group{Name: "textproto"} TIFF = &decode.Group{Name: "tiff"} TLS = &decode.Group{Name: "tls"} TOML = &decode.Group{Name: "toml"} @@ -359,6 +364,9 @@ type AVI_In struct { DecodeExtendedChunks bool `doc:"Decode extended chunks"` } +type Bitcoin_Block_In struct { + HasHeader bool `doc:"Has blkdat header"` +} type Zip_In struct { Uncompress bool `doc:"Uncompress and probe files"` } @@ -380,8 +388,32 @@ type CSV_In struct { Comment string `doc:"Comment line character"` } -type Bitcoin_Block_In struct { - HasHeader bool `doc:"Has blkdat header"` +type Mime_Multipart_In struct { + Boundary string `doc:"Part boundary"` +} + +type TextProto_In struct { + Name string `doc:"Name of key/value"` +} + +type TextProto_Out struct { + Pairs map[string][]string +} + +type Content_Type_In struct { + ContentType string + Pairs map[string]string +} + +type Http_Chunked_In struct { + ContentEncoding string + ContentType string + Pairs map[string]string +} + +type Gzip_In struct { + ContentType string + Pairs map[string]string } type TLS_In struct { diff --git a/format/gzip/gzip.go b/format/gzip/gzip.go index c8e70aca..95d8e7d4 100644 --- a/format/gzip/gzip.go +++ b/format/gzip/gzip.go @@ -18,16 +18,19 @@ import ( ) var probeGroup decode.Group +var contentTypeGroup decode.Group func init() { interp.RegisterFormat( format.Gzip, &decode.Format{ - Description: "gzip compression", - Groups: []*decode.Group{format.Probe}, - DecodeFn: gzipDecode, + Description: "gzip compression", + Groups: []*decode.Group{format.Probe}, + DecodeFn: gzipDecode, + DefaultInArg: format.Gzip_In{}, Dependencies: []decode.Dependency{ {Groups: []*decode.Group{format.Probe}, Out: &probeGroup}, + {Groups: []*decode.Group{format.Content_Type}, Out: &contentTypeGroup}, }, }) } @@ -130,6 +133,9 @@ func gzipDecodeMember(d *decode.D) bitio.ReaderAtSeeker { } func gzipDecode(d *decode.D) any { + var gzi format.Gzip_In + d.ArgAs((&gzi)) + d.Endian = decode.LittleEndian var brs []bitio.ReadAtSeeker @@ -151,7 +157,10 @@ func gzipDecode(d *decode.D) any { if err != nil { d.IOPanic(err, "NewMultiReader") } - dv, _, _ := d.TryFieldFormatBitBuf("uncompressed", cbr, &probeGroup, format.Probe_In{}) + dv, _, _ := d.TryFieldFormatBitBuf("uncompressed", cbr, &probeGroup, format.Content_Type_In{ + ContentType: gzi.ContentType, + Pairs: gzi.Pairs, + }) if dv == nil { d.FieldRootBitBuf("uncompressed", cbr) } diff --git a/format/http/http.go b/format/http/http.go new file mode 100644 index 00000000..751b2c10 --- /dev/null +++ b/format/http/http.go @@ -0,0 +1,347 @@ +package http + +// TODO: pipeline +// TODO: range request reassembly? +// TODO: Trailer, only chunked? +// TODO: mime_multi_part decoder? +// TODO: content_type group? pass gzip pass along content_type? +// TODO: text/* and encoding? ISO-8859-1? +// TODO: PRI * HTTP/2.0, h2? +// TODO: 101 Switch protocol, Connection: Upgrade + +/* + +echo reqbody | curl --trace bla -H "Transfer-Encoding: chunked" -d @- http://0:8080 +while true ; do echo -e 'HTTP/1.0 200 OK\r\nrespbody' | nc -v -l 8080 ; done + +split("\n") | reduce .[] as $l ({state: "send", send: [], recv: []}; if $l | startswith("=>") then .state="send" elif $l | startswith("<=") then .state="recv" elif $l | test("^\\d") then .[.state] += [$l] end) | .["send", "recv"] |= (map(capture(": (?.{1,47})").hex | gsub(" "; "")) | add | hex) | .send | http | d + +*/ + +/* + + + Content-Type: multipart/form-data; boundary=AaB03x + + --AaB03x + Content-Disposition: form-data; name="submit-name" + + Larry + --AaB03x + Content-Disposition: form-data; name="files"; filename="file1.txt" + Content-Type: text/plain + + ... contents of file1.txt ... + --AaB03x-- + + Multi file: + + Content-Type: multipart/form-data; boundary=AaB03x + + --AaB03x + Content-Disposition: form-data; name="submit-name" + + Larry + --AaB03x + Content-Disposition: form-data; name="files" + Content-Type: multipart/mixed; boundary=BbC04y + + --BbC04y + Content-Disposition: file; filename="file1.txt" + Content-Type: text/plain + + ... contents of file1.txt ... + --BbC04y + Content-Disposition: file; filename="file2.gif" + Content-Type: image/gif + Content-Transfer-Encoding: binary + + ...contents of file2.gif... + --BbC04y-- + --AaB03x-- + +*/ + +import ( + "encoding/csv" + "fmt" + "log" + "strconv" + "strings" + + "github.com/wader/fq/format" + "github.com/wader/fq/internal/lazyre" + "github.com/wader/fq/pkg/decode" + "github.com/wader/fq/pkg/interp" + "github.com/wader/fq/pkg/scalar" +) + +var httpContentTypeGroup decode.Group +var httpTextprotoGroup decode.Group +var httpHttpChunkedGroup decode.Group +var httpGzipGroup decode.Group + +func init() { + interp.RegisterFormat( + format.HTTP, + &decode.Format{ + Description: "Hypertext Transfer Protocol 1 and 1.1", // TODO: and v1.1? + Groups: []*decode.Group{format.TCP_Stream}, + DecodeFn: httpDecode, + RootArray: true, + Dependencies: []decode.Dependency{ + {Groups: []*decode.Group{format.Content_Type}, Out: &httpContentTypeGroup}, + {Groups: []*decode.Group{format.TextProto}, Out: &httpTextprotoGroup}, + {Groups: []*decode.Group{format.HTTP_Chunked}, Out: &httpHttpChunkedGroup}, + {Groups: []*decode.Group{format.Gzip}, Out: &httpGzipGroup}, + }, + }) +} + +func headersFirst(m map[string][]string, key string) string { + for k, vs := range m { + if strings.EqualFold(k, key) { + return vs[0] + } + } + return "" +} + +// https://www.rfc-editor.org/rfc/rfc6750#section-3 +type Pairs struct { + Scheme string + Params map[string]string +} + +// quoteSplit splits but respects quotes and escapes, and can mix quotes +func quoteSplit(s string, sep rune) ([]string, error) { + r := csv.NewReader(strings.NewReader(s)) + // allows mix quotes and explicit "," + r.LazyQuotes = true + r.Comma = sep + return r.Read() +} + +// multipart/form-data; boundary=... +// form-data; name="aaa_file"; filename="aaa" +func parsePairs(s string) (Pairs, error) { + var w Pairs + parts := strings.SplitN(s, ";", 2) + if len(parts) < 1 { + return Pairs{}, fmt.Errorf("invalid params") + } + w.Scheme = parts[0] + if len(parts) < 2 { + return w, nil + } + + pairsStr := strings.TrimSpace(parts[1]) + pairs, pairsErr := quoteSplit(pairsStr, ';') + if pairsErr != nil { + return Pairs{}, pairsErr + } + + w.Params = map[string]string{} + for _, p := range pairs { + kv, kvErr := quoteSplit(p, '=') + if kvErr != nil { + return Pairs{}, kvErr + } + if len(kv) != 2 { + return Pairs{}, fmt.Errorf("invalid pair") + } + w.Params[kv[0]] = kv[1] + } + + return w, nil +} + +// "GET /path HTTP/1.1" +// note that version can end with "\r\n" or EOF +var requestLineRE = &lazyre.RE{S: `^(?P[^ ]*[ ]+)(?P[^ ]*[ ]+)(?P.*?(?:\r\n|$))`} + +// "HTTP/1.1 200 OK" +// note that text can end with "\r\n" or EOF +var statusLineRE = &lazyre.RE{S: `^(?P[^ ]*[ ]+)(?P[^ ]*[ ]*)(?P.*?(?:\r\n|$))`} +var headersEndLineRE = &lazyre.RE{S: `^(?P.*\r?\n)`} + +// TODO: more methods? +var probePrefixRE = &lazyre.RE{S: `` + + `^` + + `(?:` + + // response + `HTTP/1` + + `|` + + // request + `(?P` + + // http methods + `CONNECT` + + `|DELETE` + + `|GET` + + `|HEAD` + + `|PATCH` + + `|POST` + + `|PUT` + + `|TRACE` + + `|OPTIONS` + + // dav methods + `|COPY` + + `|LOCK` + + `|MKCOL` + + `|MOVE` + + `|PROPFIND` + + `|PROPPATCH` + + `|UNLOCK` + + `)` + + ` [[:graph:]]` + // + `)`, +} + +func httpDecodeMessage(d *decode.D, isRequest bool) { + matches := map[string]string{} + if isRequest { + d.FieldStruct("request_line", func(d *decode.D) { + d.FieldRE(requestLineRE.Must(), &matches, scalar.ActualTrimSpace) + }) + } else { + d.FieldStruct("status_line", func(d *decode.D) { + d.FieldRE(statusLineRE.Must(), &matches, scalar.ActualTrimSpace) + }) + } + log.Printf("matches: %#+v\n", matches) + // no body, seems to happen + if d.End() { + return + } + + isHTTPv11 := matches["version"] == "HTTP/1.1" + isHEAD := matches["method"] == "HEAD" + + _, tpoV := d.FieldFormat("headers", &httpTextprotoGroup, format.TextProto_In{Name: "header"}) + tpo, ok := tpoV.(format.TextProto_Out) + if !ok { + panic(fmt.Sprintf("expected TextProtoOut got %#+v", tpoV)) + } + headers := tpo.Pairs + d.FieldRE(headersEndLineRE.Must(), nil) + + contentLength := headersFirst(headers, "content-length") + connection := headersFirst(headers, "connection") + transferEncoding := headersFirst(headers, "transfer-encoding") + contentEncoding := headersFirst(headers, "content-encoding") + contentType := headersFirst(headers, "content-type") + + bodyLen := int64(-1) + + if connection == "Upgrade" { + upgrade := headersFirst(headers, "upgrade") + // TODO: h2, h2c + // TODO: h2c would need HTTP2-Settings from request? + // h2 => http2 over tls + // h2c => http2 cleartext + _ = upgrade + + } else { + if isHEAD { + // assume zero content-length for HEAD + bodyLen = 0 + } else { + if contentLength != "" { + if n, err := strconv.ParseInt(contentLength, 10, 64); err == nil { + bodyLen = n + } + } else { + if isHTTPv11 && connection != "closed" { + // http 1.1 is persistent by default + bodyLen = 0 + } else { + // TODO: assume reset? + } + } + } + } + + if bodyLen < 0 { + bodyLen = d.BitsLeft() / 8 + } + + // log.Printf("headers: %#+v\n", headers) + + // log.Printf("contentType: %#+v\n", contentType) + // TODO: content-range + // TODO: Transfer-Encoding + // chunked + trailer + + // TODO: gzip format hint for subformat? + + contentTypeValues, _ := parsePairs(contentType) + + switch transferEncoding { + case "chunked": + d.FieldFormat("body", &httpHttpChunkedGroup, format.Http_Chunked_In{ + ContentEncoding: contentEncoding, + ContentType: contentTypeValues.Scheme, + Pairs: contentTypeValues.Params, + }) + default: + bodyGroup := &httpContentTypeGroup + bodyGroupInArg := format.Content_Type_In{ + ContentType: contentTypeValues.Scheme, + Pairs: contentTypeValues.Params, + } + + d.FramedFn(bodyLen*8, func(d *decode.D) { + switch contentEncoding { + case "gzip": + if dv, _, _ := d.TryFieldFormat("body", &httpGzipGroup, nil); dv == nil { + d.FieldRawLen("body", d.BitsLeft()) + } + default: + if bodyGroup != nil { + log.Printf("bodyGroup: %#+v\n", bodyGroup) + log.Printf("http bodyGroupInArg: %#+v\n", bodyGroupInArg) + d.FieldFormatOrRawLen("body", d.BitsLeft(), bodyGroup, bodyGroupInArg) + + } else { + d.FieldRawLen("body", d.BitsLeft()) + } + } + + // Transfer-Encoding: chunked + // Transfer-Encoding: compress + // Transfer-Encoding: deflate + // Transfer-Encoding: gzip + + // // Several values can be listed, separated by a comma + // Transfer-Encoding: gzip, chunked + + }) + } +} + +func httpDecode(d *decode.D) any { + var isRequest bool + var tsi format.TCP_Stream_In + + if d.ArgAs(&tsi) { + m := d.RE(probePrefixRE.Must()) + if m == nil { + d.Fatalf("no request or response prefix found") + } + isRequest = tsi.IsClient + } else { + isRequest = string(d.PeekBytes(5)) != "HTTP/" + } + + name := "response" + if isRequest { + name = "request" + } + for !d.End() { + d.FieldStruct(name, func(d *decode.D) { + httpDecodeMessage(d, isRequest) + }) + } + + return nil +} diff --git a/format/http/http_chunked.go b/format/http/http_chunked.go new file mode 100644 index 00000000..7478f751 --- /dev/null +++ b/format/http/http_chunked.go @@ -0,0 +1,95 @@ +package http + +import ( + "log" + "strconv" + + "github.com/wader/fq/format" + "github.com/wader/fq/internal/lazyre" + "github.com/wader/fq/pkg/bitio" + "github.com/wader/fq/pkg/decode" + "github.com/wader/fq/pkg/interp" + "github.com/wader/fq/pkg/scalar" +) + +var httpChunkedContentTypeGroup decode.Group +var httpChunkedGzipGroup decode.Group + +func init() { + interp.RegisterFormat( + format.HTTP_Chunked, + &decode.Format{ + Description: "HTTP chunked encoding", + DecodeFn: httpChunkedDecode, + DefaultInArg: format.Http_Chunked_In{}, + Dependencies: []decode.Dependency{ + {Groups: []*decode.Group{format.Content_Type}, Out: &httpChunkedContentTypeGroup}, + {Groups: []*decode.Group{format.Gzip}, Out: &httpChunkedGzipGroup}, + }, + }) +} + +var chunkStartLineRE = &lazyre.RE{S: `(?P.*\r?\n)`} + +func httpChunkedDecode(d *decode.D) any { + var hci format.Http_Chunked_In + hciOk := d.ArgAs(&hci) + + var chunkBRs []bitio.ReadAtSeeker + + d.FieldArray("chunks", func(d *decode.D) { + seenEnd := false + for !seenEnd { + d.FieldStruct("chunk", func(d *decode.D) { + // TODO: chunk extension + cm := map[string]string{} + d.FieldRE(chunkStartLineRE.Must(), &cm, scalar.ActualTrimSpace) + + lengthStr := cm["length"] + length, err := strconv.ParseInt(lengthStr, 16, 64) + if err != nil { + d.Fatalf("failed to parse length %q", lengthStr) + } + + br := d.FieldRawLen("data", length*8) + chunkBRs = append(chunkBRs, br) + d.FieldUTF8("new_line", 2) + if length == 0 { + // TODO: trailer + seenEnd = true + return + } + }) + } + }) + + mbr, err := bitio.NewMultiReader(chunkBRs...) + if err != nil { + d.IOPanic(err, "bitio.NewMultiReader") + } + + if hciOk { + log.Printf("chunked bodyGroupInArg: %#+v\n", hci) + + d.FieldStructRootBitBufFn("data", mbr, func(d *decode.D) { + // TODO: http content encoding group? + switch hci.ContentEncoding { + case "gzip": + d.FieldFormatOrRaw("body", &httpChunkedGzipGroup, format.Gzip_In{ + ContentType: hci.ContentType, + Pairs: hci.Pairs, + }) + default: + d.FieldFormatOrRaw("body", &httpChunkedContentTypeGroup, format.Content_Type_In{ + ContentType: hci.ContentType, + Pairs: hci.Pairs, + }) + } + }) + + } else { + d.FieldRootBitBuf("data", mbr) + } + + return nil +} diff --git a/format/http/mime_multipart.go b/format/http/mime_multipart.go new file mode 100644 index 00000000..ed51efb6 --- /dev/null +++ b/format/http/mime_multipart.go @@ -0,0 +1,105 @@ +package http + +import ( + "fmt" + "log" + "regexp" + + "github.com/wader/fq/format" + "github.com/wader/fq/pkg/decode" + "github.com/wader/fq/pkg/interp" +) + +var mimeMultipartTextprotoGroup decode.Group + +func init() { + interp.RegisterFormat( + format.MIME_Multi_Part, + &decode.Format{ + Description: "MIME multipart", + Groups: []*decode.Group{format.Content_Type}, + DecodeFn: mimeMultipartDecode, + DefaultInArg: format.Mime_Multipart_In{}, + Dependencies: []decode.Dependency{ + {Groups: []*decode.Group{format.TextProto}, Out: &mimeMultipartTextprotoGroup}, + }, + }) +} + +const multipartContentType = "multipart/form-data" + +func mimeMultipartDecode(d *decode.D) any { + var boundary string + + log.Println("multipart:") + + var cti format.Content_Type_In + var mmpi format.Mime_Multipart_In + if d.ArgAs(&cti) { + log.Printf("cti: %#+v\n", cti) + if cti.ContentType != multipartContentType { + d.Fatalf("content-type not " + multipartContentType) + } + boundary = cti.Pairs["boundary"] + } else if d.ArgAs(&mmpi) { + log.Printf("mmpi: %#+v\n", mmpi) + boundary = mmpi.Boundary + } + + if boundary == "" { + d.Fatalf("no boundary set") + } + + const boundaryREEndGroup = 1 + var boundaryRE = regexp.MustCompile(fmt.Sprintf("--%s(?P--)?\r\n", regexp.QuoteMeta(boundary))) + var endBoundaryLen int64 + + firstBoundaryRs := d.RE(boundaryRE) + if firstBoundaryRs == nil { + d.Fatalf("first boundary %q not found", boundary) + } + firstBoundaryR := firstBoundaryRs[0] + + d.FieldUTF8("preamble", int(firstBoundaryR.Start/8)) + d.FieldArray("parts", func(d *decode.D) { + for { + boundaryStartRs := d.RE(boundaryRE) + boundaryStartR := boundaryStartRs[0] + boundaryStartEnd := boundaryStartRs[boundaryREEndGroup] + + if boundaryStartRs == nil { + d.Fatalf("boundary %q not found", boundary) + } + if boundaryStartEnd.Start != -1 { + // found a boundary with ending "--" + endBoundaryLen = boundaryStartR.Len + break + } + + d.FieldStruct("part", func(d *decode.D) { + d.FieldUTF8("start_boundary", int(boundaryStartR.Len/8)) + + boundaryEndRs := d.RE(boundaryRE) + if boundaryEndRs == nil { + d.Fatalf("boundary end %q not found", boundary) + } + boundaryEndR := boundaryEndRs[0] + + partLen := (boundaryEndR.Start - boundaryStartR.Stop()) /* \r\n */ + d.FramedFn(partLen, func(d *decode.D) { + d.FieldFormat("headers", &mimeMultipartTextprotoGroup, format.TextProto_In{Name: "header"}) + d.FieldUTF8("header_end", 2) + d.FieldRawLen("data", d.BitsLeft()-16) + d.FieldUTF8("data_end", 2) + }) + + d.SeekAbs(boundaryEndRs[0].Start) + }) + } + }) + + d.FieldUTF8("end_boundary", int(endBoundaryLen/8)) + d.FieldUTF8("epilogue", int(d.BitsLeft()/8)) + + return nil +} diff --git a/format/http/testdata/http/Caddyfile b/format/http/testdata/http/Caddyfile new file mode 100644 index 00000000..955bb90f --- /dev/null +++ b/format/http/testdata/http/Caddyfile @@ -0,0 +1,17 @@ +{ + auto_https off + debug +} + +http://:8080 { + root * ./static + encode gzip { + minimum_length 1 + match { + status 2xx + } + } + + respond /ok ok 200 + file_server +} diff --git a/format/http/testdata/http/curltrace.jq b/format/http/testdata/http/curltrace.jq new file mode 100644 index 00000000..ff2e561e --- /dev/null +++ b/format/http/testdata/http/curltrace.jq @@ -0,0 +1,24 @@ +# convert curl trace to {send: , recv: } +# Trace format: +# == Info: Trying 0.0.0.0:8080... +# == Info: Connected to 0.0.0.0 (127.0.0.1) port 8080 (#0) +# => Send header, 156 bytes (0x9c) +# 0000: 50 4f 53 54 20 2f 6f 6b 20 48 54 54 50 2f 31 2e POST /ok HTTP/1. +# <= Recv header, 17 bytes (0x11) +# 0000: 48 54 54 50 2f 31 2e 31 20 32 30 30 20 4f 4b 0d HTTP/1.1 200 OK. +# 0010: 0a . +def from_curl_trace: + ( reduce split("\n")[] as $l ( + {state: "send", send: [], recv: []}; + if $l | startswith("=>") then .state="send" + elif $l | startswith("<=") then .state="recv" + elif $l | test("^\\d") then .[.state] += [$l] + end + ) + | (.send, .recv) |= + ( map(capture(": (?.{1,47})").hex + | gsub(" "; "")) + | add + | hex + ) + ); diff --git a/format/http/testdata/http/gzip_client b/format/http/testdata/http/gzip_client new file mode 100644 index 00000000..45c6f0c5 --- /dev/null +++ b/format/http/testdata/http/gzip_client @@ -0,0 +1,6 @@ +GET /ccc HTTP/1.1 +Host: 0.0.0.0:8080 +User-Agent: curl/7.77.0 +Accept: */* +Accept-Encoding: deflate, gzip + diff --git a/format/http/testdata/http/gzip_client.fqtest b/format/http/testdata/http/gzip_client.fqtest new file mode 100644 index 00000000..d2c172d4 --- /dev/null +++ b/format/http/testdata/http/gzip_client.fqtest @@ -0,0 +1 @@ +$ fq -d http dv gzip_client diff --git a/format/http/testdata/http/gzip_png_client b/format/http/testdata/http/gzip_png_client new file mode 100644 index 00000000..59a6d2d6 --- /dev/null +++ b/format/http/testdata/http/gzip_png_client @@ -0,0 +1,6 @@ +GET /4x4.png HTTP/1.1 +Host: 0.0.0.0:8080 +User-Agent: curl/7.77.0 +Accept: */* +Accept-Encoding: deflate, gzip + diff --git a/format/http/testdata/http/gzip_png_client.fqtest b/format/http/testdata/http/gzip_png_client.fqtest new file mode 100644 index 00000000..2c78b361 --- /dev/null +++ b/format/http/testdata/http/gzip_png_client.fqtest @@ -0,0 +1 @@ +$ fq -d http dv gzip_png_client diff --git a/format/http/testdata/http/gzip_png_server b/format/http/testdata/http/gzip_png_server new file mode 100644 index 00000000..7d45c05d Binary files /dev/null and b/format/http/testdata/http/gzip_png_server differ diff --git a/format/http/testdata/http/gzip_png_server.fqtest b/format/http/testdata/http/gzip_png_server.fqtest new file mode 100644 index 00000000..d1eb9a11 --- /dev/null +++ b/format/http/testdata/http/gzip_png_server.fqtest @@ -0,0 +1 @@ +$ fq -d http dv gzip_png_server diff --git a/format/http/testdata/http/gzip_server b/format/http/testdata/http/gzip_server new file mode 100644 index 00000000..78c40090 Binary files /dev/null and b/format/http/testdata/http/gzip_server differ diff --git a/format/http/testdata/http/gzip_server.fqtest b/format/http/testdata/http/gzip_server.fqtest new file mode 100644 index 00000000..f17451a2 --- /dev/null +++ b/format/http/testdata/http/gzip_server.fqtest @@ -0,0 +1 @@ +$ fq -d http dv gzip_server diff --git a/format/http/testdata/http/multi_part_multi_client b/format/http/testdata/http/multi_part_multi_client new file mode 100644 index 00000000..96e26243 --- /dev/null +++ b/format/http/testdata/http/multi_part_multi_client @@ -0,0 +1,18 @@ +POST /ok HTTP/1.1 +Host: 0.0.0.0:8080 +User-Agent: curl/7.77.0 +Accept: */* +Content-Length: 358 +Content-Type: multipart/form-data; boundary=------------------------3eaaa05c7138e96b + +--------------------------3eaaa05c7138e96b +Content-Disposition: form-data; name="aaa_file"; filename="aaa" +Content-Type: application/octet-stream + +aaa +--------------------------3eaaa05c7138e96b +Content-Disposition: form-data; name="bbb_file"; filename="bbb" +Content-Type: application/octet-stream + +bbb +--------------------------3eaaa05c7138e96b-- diff --git a/format/http/testdata/http/multi_part_multi_client.fqtest b/format/http/testdata/http/multi_part_multi_client.fqtest new file mode 100644 index 00000000..df1031a6 --- /dev/null +++ b/format/http/testdata/http/multi_part_multi_client.fqtest @@ -0,0 +1 @@ +$ fq -d http dv multi_part_multi_client diff --git a/format/http/testdata/http/multi_part_multi_server b/format/http/testdata/http/multi_part_multi_server new file mode 100644 index 00000000..8b33d505 --- /dev/null +++ b/format/http/testdata/http/multi_part_multi_server @@ -0,0 +1,6 @@ +HTTP/1.1 200 OK +Server: Caddy +Date: Tue, 05 Apr 2022 13:17:44 GMT +Content-Length: 2 + +ok \ No newline at end of file diff --git a/format/http/testdata/http/multi_part_multi_server.fqtest b/format/http/testdata/http/multi_part_multi_server.fqtest new file mode 100644 index 00000000..eb995818 --- /dev/null +++ b/format/http/testdata/http/multi_part_multi_server.fqtest @@ -0,0 +1 @@ +$ fq -d http dv multi_part_multi_server diff --git a/format/http/testdata/http/multi_part_single_client b/format/http/testdata/http/multi_part_single_client new file mode 100644 index 00000000..2b116cd1 --- /dev/null +++ b/format/http/testdata/http/multi_part_single_client @@ -0,0 +1,13 @@ +POST /ok HTTP/1.1 +Host: 0.0.0.0:8080 +User-Agent: curl/7.77.0 +Accept: */* +Content-Length: 202 +Content-Type: multipart/form-data; boundary=------------------------86670d766d5fa801 + +--------------------------86670d766d5fa801 +Content-Disposition: form-data; name="aaa_file"; filename="aaa" +Content-Type: application/octet-stream + +aaa +--------------------------86670d766d5fa801-- diff --git a/format/http/testdata/http/multi_part_single_client.fqtest b/format/http/testdata/http/multi_part_single_client.fqtest new file mode 100644 index 00000000..a176d87e --- /dev/null +++ b/format/http/testdata/http/multi_part_single_client.fqtest @@ -0,0 +1 @@ +$ fq -d http dv multi_part_single_client diff --git a/format/http/testdata/http/multi_part_single_server b/format/http/testdata/http/multi_part_single_server new file mode 100644 index 00000000..8b33d505 --- /dev/null +++ b/format/http/testdata/http/multi_part_single_server @@ -0,0 +1,6 @@ +HTTP/1.1 200 OK +Server: Caddy +Date: Tue, 05 Apr 2022 13:17:44 GMT +Content-Length: 2 + +ok \ No newline at end of file diff --git a/format/http/testdata/http/multi_part_single_server.fqtest b/format/http/testdata/http/multi_part_single_server.fqtest new file mode 100644 index 00000000..03f40a03 --- /dev/null +++ b/format/http/testdata/http/multi_part_single_server.fqtest @@ -0,0 +1 @@ +$ fq -d http dv multi_part_single_server diff --git a/format/http/testdata/http/multi_request_client b/format/http/testdata/http/multi_request_client new file mode 100644 index 00000000..61430900 --- /dev/null +++ b/format/http/testdata/http/multi_request_client @@ -0,0 +1,10 @@ +GET /aaa HTTP/1.1 +Host: 0.0.0.0:8080 +User-Agent: curl/7.77.0 +Accept: */* + +GET /bbb HTTP/1.1 +Host: 0.0.0.0:8080 +User-Agent: curl/7.77.0 +Accept: */* + diff --git a/format/http/testdata/http/multi_request_client.fqtest b/format/http/testdata/http/multi_request_client.fqtest new file mode 100644 index 00000000..61bda7ac --- /dev/null +++ b/format/http/testdata/http/multi_request_client.fqtest @@ -0,0 +1 @@ +$ fq -d http dv multi_request_client diff --git a/format/http/testdata/http/multi_request_server b/format/http/testdata/http/multi_request_server new file mode 100644 index 00000000..05304da3 --- /dev/null +++ b/format/http/testdata/http/multi_request_server @@ -0,0 +1,17 @@ +HTTP/1.1 200 OK +Accept-Ranges: bytes +Content-Length: 3 +Etag: "r9v8og3" +Last-Modified: Tue, 05 Apr 2022 12:14:40 GMT +Server: Caddy +Date: Tue, 05 Apr 2022 13:17:44 GMT + +aaaHTTP/1.1 200 OK +Accept-Ranges: bytes +Content-Length: 3 +Etag: "r9v8og3" +Last-Modified: Tue, 05 Apr 2022 12:14:40 GMT +Server: Caddy +Date: Tue, 05 Apr 2022 13:17:44 GMT + +bbb \ No newline at end of file diff --git a/format/http/testdata/http/multi_request_server.fqtest b/format/http/testdata/http/multi_request_server.fqtest new file mode 100644 index 00000000..43b41c82 --- /dev/null +++ b/format/http/testdata/http/multi_request_server.fqtest @@ -0,0 +1 @@ +$ fq -d http dv multi_request_server diff --git a/format/http/testdata/http/run.sh b/format/http/testdata/http/run.sh new file mode 100644 index 00000000..6aac1225 --- /dev/null +++ b/format/http/testdata/http/run.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +# run caddy in other terminal +# caddy run + +gen_test() { + fq -Rs -L . 'include "curltrace"; from_curl_trace.send' $1.trace >$1_client + fq -Rs -L . 'include "curltrace"; from_curl_trace.recv' $1.trace >$1_server + echo "\$ fq -d http dv $1_client" >$1_client.fqtest + echo "\$ fq -d http dv $1_server" >$1_server.fqtest +} + +echo reqbody | curl -s --trace sinple_request.trace -d @- http://0:8080/ok >/dev/null +gen_test sinple_request +rm -f sinple_request.trace + +curl -s --trace multi_request.trace http://0:8080/aaa http://0:8080/bbb >/dev/null +gen_test multi_request +rm -f multi_request.trace + +curl -s --trace multi_part_single.trace --form aaa_file='@static/aaa' http://0:8080/ok >/dev/null +gen_test multi_part_single +rm -f multi_part_single.trace + +curl -s --trace multi_part_multi.trace --form aaa_file='@static/aaa' --form bbb_file='@static/bbb' http://0:8080/ok >/dev/null +gen_test multi_part_multi +rm -f multi_part_multi.trace + +curl -s --trace gzip.trace --compressed http://0:8080/ccc >/dev/null +gen_test gzip +rm -f gzip.trace + +curl -s --trace gzip_png.trace --compressed http://0:8080/4x4.png >/dev/null +gen_test gzip_png +rm -f gzip_png.trace diff --git a/format/http/testdata/http/sinple_request_client b/format/http/testdata/http/sinple_request_client new file mode 100644 index 00000000..75e4cd18 --- /dev/null +++ b/format/http/testdata/http/sinple_request_client @@ -0,0 +1,8 @@ +POST /ok HTTP/1.1 +Host: 0.0.0.0:8080 +User-Agent: curl/7.77.0 +Accept: */* +Content-Length: 7 +Content-Type: application/x-www-form-urlencoded + +reqbody \ No newline at end of file diff --git a/format/http/testdata/http/sinple_request_client.fqtest b/format/http/testdata/http/sinple_request_client.fqtest new file mode 100644 index 00000000..9c824549 --- /dev/null +++ b/format/http/testdata/http/sinple_request_client.fqtest @@ -0,0 +1 @@ +$ fq -d http dv sinple_request_client diff --git a/format/http/testdata/http/sinple_request_server b/format/http/testdata/http/sinple_request_server new file mode 100644 index 00000000..d2e48eae --- /dev/null +++ b/format/http/testdata/http/sinple_request_server @@ -0,0 +1,6 @@ +HTTP/1.1 200 OK +Server: Caddy +Date: Tue, 05 Apr 2022 13:17:43 GMT +Content-Length: 2 + +ok \ No newline at end of file diff --git a/format/http/testdata/http/sinple_request_server.fqtest b/format/http/testdata/http/sinple_request_server.fqtest new file mode 100644 index 00000000..f453ee12 --- /dev/null +++ b/format/http/testdata/http/sinple_request_server.fqtest @@ -0,0 +1 @@ +$ fq -d http dv sinple_request_server diff --git a/format/http/testdata/http/static/4x4.png b/format/http/testdata/http/static/4x4.png new file mode 100644 index 00000000..bf5305bc Binary files /dev/null and b/format/http/testdata/http/static/4x4.png differ diff --git a/format/http/testdata/http/static/aaa b/format/http/testdata/http/static/aaa new file mode 100644 index 00000000..7c4a013e --- /dev/null +++ b/format/http/testdata/http/static/aaa @@ -0,0 +1 @@ +aaa \ No newline at end of file diff --git a/format/http/testdata/http/static/bbb b/format/http/testdata/http/static/bbb new file mode 100644 index 00000000..01f02e32 --- /dev/null +++ b/format/http/testdata/http/static/bbb @@ -0,0 +1 @@ +bbb \ No newline at end of file diff --git a/format/http/testdata/http/static/ccc b/format/http/testdata/http/static/ccc new file mode 100644 index 00000000..72426338 --- /dev/null +++ b/format/http/testdata/http/static/ccc @@ -0,0 +1,2 @@ +ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc +ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc diff --git a/format/http/testdata/http_chunked/test b/format/http/testdata/http_chunked/test new file mode 100644 index 00000000..f47b118c --- /dev/null +++ b/format/http/testdata/http_chunked/test @@ -0,0 +1,10 @@ +4 +Wiki +6 +pedia +E +in + +chunks. +0 + diff --git a/format/http/testdata/http_chunked/test.fqtest b/format/http/testdata/http_chunked/test.fqtest new file mode 100644 index 00000000..fc55328a --- /dev/null +++ b/format/http/testdata/http_chunked/test.fqtest @@ -0,0 +1 @@ +$ fq -d http_chunked dv test diff --git a/format/http/testdata/mime_multipart/rfc1341_1 b/format/http/testdata/mime_multipart/rfc1341_1 new file mode 100644 index 00000000..edfe8a7b --- /dev/null +++ b/format/http/testdata/mime_multipart/rfc1341_1 @@ -0,0 +1,15 @@ +This is the preamble. It is to be ignored, though it +is a handy place for mail composers to include an +explanatory note to non-MIME compliant readers. +--simple boundary + +This is implicitly typed plain ASCII text. +It does NOT end with a linebreak. +--simple boundary +Content-type: text/plain; charset=us-ascii + +This is explicitly typed plain ASCII text. +It DOES end with a linebreak. + +--simple boundary-- +This is the epilogue. It is also to be ignored. \ No newline at end of file diff --git a/format/http/testdata/mime_multipart/test b/format/http/testdata/mime_multipart/test new file mode 100644 index 00000000..b9a3007e --- /dev/null +++ b/format/http/testdata/mime_multipart/test @@ -0,0 +1,11 @@ +--------------------------3eaaa05c7138e96b +Content-Disposition: form-data; name="aaa_file"; filename="aaa" +Content-Type: application/octet-stream + +aaa +--------------------------3eaaa05c7138e96b +Content-Disposition: form-data; name="bbb_file"; filename="bbb" +Content-Type: application/octet-stream + +bbb +--------------------------3eaaa05c7138e96b-- diff --git a/format/http/textproto.go b/format/http/textproto.go new file mode 100644 index 00000000..b09eef5a --- /dev/null +++ b/format/http/textproto.go @@ -0,0 +1,61 @@ +package http + +import ( + "github.com/wader/fq/format" + "github.com/wader/fq/internal/lazyre" + "github.com/wader/fq/pkg/decode" + "github.com/wader/fq/pkg/interp" + "github.com/wader/fq/pkg/scalar" +) + +func init() { + interp.RegisterFormat( + format.TextProto, + &decode.Format{ + Description: "Generic text-based protocol (HTTP,SMTP-like)", + RootArray: true, + DecodeFn: decodeTextProto, + DefaultInArg: format.TextProto_In{ + Name: "pair", + }, + }) +} + +// TODO: line folding correct? +// TODO: move to decode? also make d.FieldArray/Struct return T? + +var textprotoLineRE = &lazyre.RE{S: `` + + (`(?P[\w-]+:)`) + + (`(?P` + + `\s*` + // eagerly skip leading whitespace + `(?:` + + `.*?(?:\r?\n[\t ].*?)*` + + `)` + + `\r?\n` + + `)` + + ``)} + +func decodeTextProto(d *decode.D) any { + var tpi format.TextProto_In + d.ArgAs(&tpi) + + m := map[string][]string{} + + for !d.End() { + c := d.PeekBytes(1)[0] + if c == '\n' || c == '\r' { + break + } + + d.FieldStruct(tpi.Name, func(d *decode.D) { + cm := map[string]string{} + // TODO: don't strip :? + d.FieldRE(textprotoLineRE.Must(), &cm, scalar.StrActualTrim(" :\r\n")) + name := cm["name"] + value := cm["value"] + m[name] = append(m[name], value) + }) + } + + return format.TextProto_Out{Pairs: m} +} diff --git a/format/inet.go b/format/inet.go index c6ef0008..3c337bf8 100644 --- a/format/inet.go +++ b/format/inet.go @@ -1181,6 +1181,7 @@ var UDPPortMap = scalar.UintMap{ const ( TCPPortDomain = 53 + TCPPortHTTP = 80 TCPPortRTMP = 1935 ) @@ -1240,7 +1241,7 @@ var TCPPortMap = scalar.UintMap{ 76: {Sym: "deos", Description: "Distributed External Object Store"}, 78: {Sym: "vettcp", Description: "vettcp"}, 79: {Sym: "finger", Description: "Finger"}, - 80: {Sym: "http", Description: "World Wide Web HTTP"}, + TCPPortHTTP: {Sym: "http", Description: "World Wide Web HTTP"}, 81: {Sym: "hosts2-ns", Description: "HOSTS2 Name Server"}, 82: {Sym: "xfer", Description: "XFER Utility"}, 83: {Sym: "mit-ml-dev", Description: "MIT ML Device"}, diff --git a/format/jpeg/jpeg.go b/format/jpeg/jpeg.go index 0f45dc45..71086c05 100644 --- a/format/jpeg/jpeg.go +++ b/format/jpeg/jpeg.go @@ -22,8 +22,12 @@ func init() { format.JPEG, &decode.Format{ Description: "Joint Photographic Experts Group file", - Groups: []*decode.Group{format.Probe, format.Image}, - DecodeFn: jpegDecode, + Groups: []*decode.Group{ + format.Probe, + format.Image, + format.Content_Type, + }, + DecodeFn: jpegDecode, Dependencies: []decode.Dependency{ {Groups: []*decode.Group{format.Exif}, Out: &exifFormat}, {Groups: []*decode.Group{format.ICC_Profile}, Out: &iccProfileFormat}, @@ -166,6 +170,11 @@ var markers = scalar.UintMap{ } func jpegDecode(d *decode.D) any { + var cti format.Content_Type_In + if d.ArgAs(&cti) && cti.ContentType != "image/jpeg" { + d.Fatalf("content-type not image/jpeg") + } + d.AssertLeastBytesLeft(2) if !bytes.Equal(d.PeekBytes(2), []byte{0xff, SOI}) { d.Errorf("no SOI marker") diff --git a/format/mp4/mp4.go b/format/mp4/mp4.go index ba2baea9..28ae3893 100644 --- a/format/mp4/mp4.go +++ b/format/mp4/mp4.go @@ -18,6 +18,8 @@ package mp4 import ( "embed" "fmt" + "log" + "strings" "github.com/wader/fq/format" "github.com/wader/fq/internal/cmpex" @@ -63,6 +65,7 @@ func init() { Groups: []*decode.Group{ format.Probe, format.Image, // avif + format.Content_Type, }, DecodeFn: mp4Decode, DefaultInArg: format.MP4_In{ @@ -453,6 +456,13 @@ func mp4Decode(d *decode.D) any { var mi format.MP4_In d.ArgAs(&mi) + log.Printf("mp4 in: %#+v\n", mi) + + var cti format.Content_Type_In + if d.ArgAs(&cti) && !strings.HasSuffix(cti.ContentType, "/mp4") { + d.Fatalf("content-type not */mp4") + } + ctx := &decodeContext{ opts: mi, path: []pathEntry{{typ: "root"}}, diff --git a/format/zip/zip.go b/format/zip/zip.go index d78df744..938ba8d8 100644 --- a/format/zip/zip.go +++ b/format/zip/zip.go @@ -273,6 +273,11 @@ func fieldsExtraFields(d *decode.D) extraFields { } func zipDecode(d *decode.D) any { + var cti format.Content_Type_In + if d.ArgAs(&cti) && cti.ContentType != "application/zip" { + d.Fatalf("content-type not image/jpeg") + } + var zi format.Zip_In d.ArgAs(&zi) diff --git a/pkg/decode/decode.go b/pkg/decode/decode.go index e4673434..e836cdcb 100644 --- a/pkg/decode/decode.go +++ b/pkg/decode/decode.go @@ -1316,5 +1316,7 @@ func (d *D) FieldRE(re *regexp.Regexp, mRef *map[string]string, sms ...scalar.St } } - d.SeekAbs(rs[0].Stop()) + if len(rs) > 0 { + d.SeekAbs(rs[0].Stop()) + } }