diff --git a/common/httpx/csp.go b/common/httpx/csp.go index 7a68ac9..e1c4831 100644 --- a/common/httpx/csp.go +++ b/common/httpx/csp.go @@ -1,10 +1,12 @@ package httpx import ( - "net/http" + "bytes" "strings" + "github.com/PuerkitoBio/goquery" "github.com/projectdiscovery/httpx/common/slice" + "github.com/projectdiscovery/stringsutil" ) // CSPHeaders is an incomplete list of most common CSP headers @@ -21,22 +23,28 @@ type CSPData struct { } // CSPGrab fills the CSPData -func (h *HTTPX) CSPGrab(r *http.Response) *CSPData { +func (h *HTTPX) CSPGrab(r *Response) *CSPData { domains := make(map[string]struct{}) + // extract from headers for _, cspHeader := range CSPHeaders { - cspRaw := r.Header.Get(cspHeader) - if cspRaw != "" { - rules := strings.Split(cspRaw, ";") - for _, rule := range rules { - // rule is like aa bb domain1 domain2 domain3 - tokens := strings.Split(rule, " ") - // we extracts only potential domains - for _, t := range tokens { - if isPotentialDomain(t) { - domains[t] = struct{}{} + if cspValues, ok := r.Headers[cspHeader]; ok { + for _, cspValue := range cspValues { + parsePotentialDomains(domains, cspValue) + } + } + } + + // extract from body + if len(r.Data) > 0 { + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Data)) + if err == nil { + doc.Find("meta").Each(func(i int, s *goquery.Selection) { + if _, ok := s.Attr("http-equiv"); ok { + if content, ok := s.Attr("content"); ok { + parsePotentialDomains(domains, content) } } - } + }) } } @@ -46,6 +54,17 @@ func (h *HTTPX) CSPGrab(r *http.Response) *CSPData { return nil } +func parsePotentialDomains(domains map[string]struct{}, data string) { + // rule is like aa bb domain1 domain2 domain3 + tokens := stringsutil.SplitAny(data, " ", ";", ",") + // we extracts only potential domains + for _, t := range tokens { + if isPotentialDomain(t) { + domains[t] = struct{}{} + } + } +} + func isPotentialDomain(s string) bool { return strings.Contains(s, ".") || strings.HasPrefix(s, "http") } diff --git a/common/httpx/httpx.go b/common/httpx/httpx.go index a0cc15d..d2b159a 100644 --- a/common/httpx/httpx.go +++ b/common/httpx/httpx.go @@ -230,7 +230,7 @@ get_response: resp.TLSData = h.TLSGrab(httpresp) } - resp.CSPData = h.CSPGrab(httpresp) + resp.CSPData = h.CSPGrab(&resp) // build the redirect flow by reverse cycling the response<-request chain if !h.Options.Unsafe { diff --git a/go.mod b/go.mod index c690ae4..f45e6e8 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/projectdiscovery/rawhttp v0.0.8-0.20210814181734-56cca67b6e7e github.com/projectdiscovery/retryablehttp-go v1.0.2 github.com/projectdiscovery/sliceutil v0.0.0-20210804143453-61f3e7fd43ea - github.com/projectdiscovery/stringsutil v0.0.0-20210830151154-f567170afdd9 + github.com/projectdiscovery/stringsutil v0.0.0-20220208075244-7c05502ca8e9 github.com/projectdiscovery/urlutil v0.0.0-20210805190935-3d83726391c1 github.com/projectdiscovery/wappalyzergo v0.0.33 github.com/remeh/sizedwaitgroup v1.0.0 @@ -48,7 +48,9 @@ require github.com/spaolacci/murmur3 v1.1.0 require github.com/mfonda/simhash v0.0.0-20151007195837-79f94a1100d6 require ( + github.com/PuerkitoBio/goquery v1.8.0 // indirect github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 // indirect + github.com/andybalholm/cascadia v1.3.1 // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/cnf/structhash v0.0.0-20201127153200-e1b16c1ebc08 // indirect github.com/dimchansky/utfbom v1.1.1 // indirect diff --git a/go.sum b/go.sum index 209ce80..4d760cb 100644 --- a/go.sum +++ b/go.sum @@ -1,11 +1,15 @@ github.com/Masterminds/glide v0.13.2/go.mod h1:STyF5vcenH/rUqTEv+/hBXlSTo7KYwg2oc2f4tzPWic= github.com/Masterminds/semver v1.4.2/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= github.com/Masterminds/vcs v1.13.0/go.mod h1:N09YCmOQr6RLxC6UNHzuVwAdodYbbnycGHSmwVJjcKA= +github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= +github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= github.com/akrylysov/pogreb v0.10.0/go.mod h1:pNs6QmpQ1UlTJKDezuRWmaqkgUE2TuU0YTWyqJZ7+lI= github.com/akrylysov/pogreb v0.10.1 h1:FqlR8VR7uCbJdfUob916tPM+idpKgeESDXOA1K0DK4w= github.com/akrylysov/pogreb v0.10.1/go.mod h1:pNs6QmpQ1UlTJKDezuRWmaqkgUE2TuU0YTWyqJZ7+lI= github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 h1:MzBOUgng9orim59UnfUTLRjMpd09C5uEVQ6RPGeCaVI= github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129/go.mod h1:rFgpPQZYZ8vdbc+48xibu8ALc3yeyd64IhHS+PU6Yyg= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw= @@ -163,6 +167,8 @@ github.com/projectdiscovery/stringsutil v0.0.0-20210804142656-fd3c28dbaafe/go.mo github.com/projectdiscovery/stringsutil v0.0.0-20210823090203-2f5f137e8e1d/go.mod h1:oTRc18WBv9t6BpaN9XBY+QmG28PUpsyDzRht56Qf49I= github.com/projectdiscovery/stringsutil v0.0.0-20210830151154-f567170afdd9 h1:xbL1/7h0k6HE3RzPdYk9W/8pUxESrGWewTaZdIB5Pes= github.com/projectdiscovery/stringsutil v0.0.0-20210830151154-f567170afdd9/go.mod h1:oTRc18WBv9t6BpaN9XBY+QmG28PUpsyDzRht56Qf49I= +github.com/projectdiscovery/stringsutil v0.0.0-20220208075244-7c05502ca8e9 h1:4fvUw6b4sS4GoWbHr60mJo3dI//4mGt3BuLx8Sz9aNw= +github.com/projectdiscovery/stringsutil v0.0.0-20220208075244-7c05502ca8e9/go.mod h1:oTRc18WBv9t6BpaN9XBY+QmG28PUpsyDzRht56Qf49I= github.com/projectdiscovery/urlutil v0.0.0-20210805190935-3d83726391c1 h1:9dYmONRtwy+xP8UAGHxEQ0cxO3umc9qiFmnYsoDUps4= github.com/projectdiscovery/urlutil v0.0.0-20210805190935-3d83726391c1/go.mod h1:oXLErqOpqEAp/ueQlknysFxHO3CUNoSiDNnkiHG+Jpo= github.com/projectdiscovery/wappalyzergo v0.0.33 h1:vlKAil++KSdJ0vkX7/3Xiom0R6L1aeoYDbFITVcYCZs=