Adding CSP parsing form HTML meta

This commit is contained in:
mzack 2022-03-22 16:47:55 +01:00
parent 91493fb59b
commit ffb2079dfb
4 changed files with 42 additions and 15 deletions

View File

@ -1,10 +1,12 @@
package httpx
import (
"net/http"
"bytes"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/projectdiscovery/httpx/common/slice"
"github.com/projectdiscovery/stringsutil"
)
// CSPHeaders is an incomplete list of most common CSP headers
@ -21,22 +23,28 @@ type CSPData struct {
}
// CSPGrab fills the CSPData
func (h *HTTPX) CSPGrab(r *http.Response) *CSPData {
func (h *HTTPX) CSPGrab(r *Response) *CSPData {
domains := make(map[string]struct{})
// extract from headers
for _, cspHeader := range CSPHeaders {
cspRaw := r.Header.Get(cspHeader)
if cspRaw != "" {
rules := strings.Split(cspRaw, ";")
for _, rule := range rules {
// rule is like aa bb domain1 domain2 domain3
tokens := strings.Split(rule, " ")
// we extracts only potential domains
for _, t := range tokens {
if isPotentialDomain(t) {
domains[t] = struct{}{}
if cspValues, ok := r.Headers[cspHeader]; ok {
for _, cspValue := range cspValues {
parsePotentialDomains(domains, cspValue)
}
}
}
// extract from body
if len(r.Data) > 0 {
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Data))
if err == nil {
doc.Find("meta").Each(func(i int, s *goquery.Selection) {
if _, ok := s.Attr("http-equiv"); ok {
if content, ok := s.Attr("content"); ok {
parsePotentialDomains(domains, content)
}
}
}
})
}
}
@ -46,6 +54,17 @@ func (h *HTTPX) CSPGrab(r *http.Response) *CSPData {
return nil
}
func parsePotentialDomains(domains map[string]struct{}, data string) {
// rule is like aa bb domain1 domain2 domain3
tokens := stringsutil.SplitAny(data, " ", ";", ",")
// we extracts only potential domains
for _, t := range tokens {
if isPotentialDomain(t) {
domains[t] = struct{}{}
}
}
}
func isPotentialDomain(s string) bool {
return strings.Contains(s, ".") || strings.HasPrefix(s, "http")
}

View File

@ -230,7 +230,7 @@ get_response:
resp.TLSData = h.TLSGrab(httpresp)
}
resp.CSPData = h.CSPGrab(httpresp)
resp.CSPData = h.CSPGrab(&resp)
// build the redirect flow by reverse cycling the response<-request chain
if !h.Options.Unsafe {

4
go.mod
View File

@ -29,7 +29,7 @@ require (
github.com/projectdiscovery/rawhttp v0.0.8-0.20210814181734-56cca67b6e7e
github.com/projectdiscovery/retryablehttp-go v1.0.2
github.com/projectdiscovery/sliceutil v0.0.0-20210804143453-61f3e7fd43ea
github.com/projectdiscovery/stringsutil v0.0.0-20210830151154-f567170afdd9
github.com/projectdiscovery/stringsutil v0.0.0-20220208075244-7c05502ca8e9
github.com/projectdiscovery/urlutil v0.0.0-20210805190935-3d83726391c1
github.com/projectdiscovery/wappalyzergo v0.0.33
github.com/remeh/sizedwaitgroup v1.0.0
@ -48,7 +48,9 @@ require github.com/spaolacci/murmur3 v1.1.0
require github.com/mfonda/simhash v0.0.0-20151007195837-79f94a1100d6
require (
github.com/PuerkitoBio/goquery v1.8.0 // indirect
github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 // indirect
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/cnf/structhash v0.0.0-20201127153200-e1b16c1ebc08 // indirect
github.com/dimchansky/utfbom v1.1.1 // indirect

6
go.sum
View File

@ -1,11 +1,15 @@
github.com/Masterminds/glide v0.13.2/go.mod h1:STyF5vcenH/rUqTEv+/hBXlSTo7KYwg2oc2f4tzPWic=
github.com/Masterminds/semver v1.4.2/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y=
github.com/Masterminds/vcs v1.13.0/go.mod h1:N09YCmOQr6RLxC6UNHzuVwAdodYbbnycGHSmwVJjcKA=
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/akrylysov/pogreb v0.10.0/go.mod h1:pNs6QmpQ1UlTJKDezuRWmaqkgUE2TuU0YTWyqJZ7+lI=
github.com/akrylysov/pogreb v0.10.1 h1:FqlR8VR7uCbJdfUob916tPM+idpKgeESDXOA1K0DK4w=
github.com/akrylysov/pogreb v0.10.1/go.mod h1:pNs6QmpQ1UlTJKDezuRWmaqkgUE2TuU0YTWyqJZ7+lI=
github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 h1:MzBOUgng9orim59UnfUTLRjMpd09C5uEVQ6RPGeCaVI=
github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129/go.mod h1:rFgpPQZYZ8vdbc+48xibu8ALc3yeyd64IhHS+PU6Yyg=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw=
@ -163,6 +167,8 @@ github.com/projectdiscovery/stringsutil v0.0.0-20210804142656-fd3c28dbaafe/go.mo
github.com/projectdiscovery/stringsutil v0.0.0-20210823090203-2f5f137e8e1d/go.mod h1:oTRc18WBv9t6BpaN9XBY+QmG28PUpsyDzRht56Qf49I=
github.com/projectdiscovery/stringsutil v0.0.0-20210830151154-f567170afdd9 h1:xbL1/7h0k6HE3RzPdYk9W/8pUxESrGWewTaZdIB5Pes=
github.com/projectdiscovery/stringsutil v0.0.0-20210830151154-f567170afdd9/go.mod h1:oTRc18WBv9t6BpaN9XBY+QmG28PUpsyDzRht56Qf49I=
github.com/projectdiscovery/stringsutil v0.0.0-20220208075244-7c05502ca8e9 h1:4fvUw6b4sS4GoWbHr60mJo3dI//4mGt3BuLx8Sz9aNw=
github.com/projectdiscovery/stringsutil v0.0.0-20220208075244-7c05502ca8e9/go.mod h1:oTRc18WBv9t6BpaN9XBY+QmG28PUpsyDzRht56Qf49I=
github.com/projectdiscovery/urlutil v0.0.0-20210805190935-3d83726391c1 h1:9dYmONRtwy+xP8UAGHxEQ0cxO3umc9qiFmnYsoDUps4=
github.com/projectdiscovery/urlutil v0.0.0-20210805190935-3d83726391c1/go.mod h1:oXLErqOpqEAp/ueQlknysFxHO3CUNoSiDNnkiHG+Jpo=
github.com/projectdiscovery/wappalyzergo v0.0.33 h1:vlKAil++KSdJ0vkX7/3Xiom0R6L1aeoYDbFITVcYCZs=