feat: added enhancements to favicon + made API public (#1774)

* more additions and enhancements to httpx

* feat: added enhancements to favicon + made API public

* update deps

* misc additions

* fix hasChain with 1 status code not working

* Revert "fix hasChain with 1 status code not working"

This reverts commit fabadcddbc.

---------

Co-authored-by: Sandeep Singh <sandeep@projectdiscovery.io>
This commit is contained in:
Ice3man 2024-06-24 19:07:26 +05:30 committed by GitHub
parent d2fcc0fd9c
commit 3b5554af36
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 82 additions and 37 deletions

View File

@ -6,16 +6,16 @@ import (
) )
// CdnCheck verifies if the given ip is part of Cdn/WAF ranges // CdnCheck verifies if the given ip is part of Cdn/WAF ranges
func (h *HTTPX) CdnCheck(ip string) (bool, string, error) { func (h *HTTPX) CdnCheck(ip string) (bool, string, string, error) {
if h.cdn == nil { if h.cdn == nil {
return false, "", fmt.Errorf("cdn client not configured") return false, "", "", fmt.Errorf("cdn client not configured")
} }
// the goal is to check if ip is part of cdn/waf to decide if target should be scanned or not // the goal is to check if ip is part of cdn/waf to decide if target should be scanned or not
// since 'cloud' itemtype does not fit logic here , we consider target is not part of cdn/waf // since 'cloud' itemtype does not fit logic here , we consider target is not part of cdn/waf
matched, value, itemType, err := h.cdn.Check(net.ParseIP((ip))) matched, value, itemType, err := h.cdn.Check(net.ParseIP((ip)))
if itemType == "cloud" { if itemType == "cloud" {
return false, "", err return false, value, itemType, err
} }
return matched, value, err return matched, value, itemType, err
} }

View File

@ -194,8 +194,13 @@ func New(options *Options) (*HTTPX, error) {
httpx.htmlPolicy = bluemonday.NewPolicy() httpx.htmlPolicy = bluemonday.NewPolicy()
httpx.CustomHeaders = httpx.Options.CustomHeaders httpx.CustomHeaders = httpx.Options.CustomHeaders
if options.CdnCheck != "false" || options.ExcludeCdn {
httpx.cdn = cdncheck.New() if options.CDNCheckClient != nil {
httpx.cdn = options.CDNCheckClient
} else {
if options.CdnCheck != "false" || options.ExcludeCdn {
httpx.cdn = cdncheck.New()
}
} }
return httpx, nil return httpx, nil

View File

@ -5,6 +5,7 @@ import (
"strings" "strings"
"time" "time"
"github.com/projectdiscovery/cdncheck"
"github.com/projectdiscovery/networkpolicy" "github.com/projectdiscovery/networkpolicy"
) )
@ -46,6 +47,7 @@ type Options struct {
SniName string SniName string
TlsImpersonate bool TlsImpersonate bool
NetworkPolicy *networkpolicy.NetworkPolicy NetworkPolicy *networkpolicy.NetworkPolicy
CDNCheckClient *cdncheck.Client
Protocol Proto Protocol Proto
} }

View File

@ -310,8 +310,9 @@ type Options struct {
Protocol string Protocol string
// Optional pre-created objects to reduce allocations // Optional pre-created objects to reduce allocations
Wappalyzer *wappalyzer.Wappalyze Wappalyzer *wappalyzer.Wappalyze
Networkpolicy *networkpolicy.NetworkPolicy Networkpolicy *networkpolicy.NetworkPolicy
CDNCheckClient *cdncheck.Client
} }
// ParseOptions parses the command line options for application // ParseOptions parses the command line options for application

View File

@ -89,6 +89,10 @@ type Runner struct {
httpApiEndpoint *Server httpApiEndpoint *Server
} }
func (r *Runner) HTTPX() *httpx.HTTPX {
return r.hp
}
// picked based on try-fail but it seems to close to one it's used https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html#c1992 // picked based on try-fail but it seems to close to one it's used https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html#c1992
var hammingDistanceThreshold int = 22 var hammingDistanceThreshold int = 22
@ -133,6 +137,7 @@ func New(options *Options) (*Runner, error) {
return nil, err return nil, err
} }
httpxOptions.NetworkPolicy = np httpxOptions.NetworkPolicy = np
httpxOptions.CDNCheckClient = options.CDNCheckClient
// Enables automatically tlsgrab if tlsprobe is requested // Enables automatically tlsgrab if tlsprobe is requested
httpxOptions.TLSGrab = options.TLSGrab || options.TLSProbe httpxOptions.TLSGrab = options.TLSGrab || options.TLSProbe
@ -1895,7 +1900,7 @@ retry:
builder.WriteString(fmt.Sprintf(" [%s]", cnames[0])) builder.WriteString(fmt.Sprintf(" [%s]", cnames[0]))
} }
isCDN, cdnName, err := hp.CdnCheck(ip) isCDN, cdnName, cdnType, err := hp.CdnCheck(ip)
if scanopts.OutputCDN == "true" && isCDN && err == nil { if scanopts.OutputCDN == "true" && isCDN && err == nil {
builder.WriteString(fmt.Sprintf(" [%s]", cdnName)) builder.WriteString(fmt.Sprintf(" [%s]", cdnName))
} }
@ -1943,10 +1948,11 @@ retry:
builder.WriteRune(']') builder.WriteRune(']')
} }
var faviconMMH3, faviconPath string var faviconMMH3, faviconPath, faviconURL string
var faviconData []byte
if scanopts.Favicon { if scanopts.Favicon {
var err error var err error
faviconMMH3, faviconPath, err = r.handleFaviconHash(hp, req, resp) faviconMMH3, faviconPath, faviconData, faviconURL, err = r.HandleFaviconHash(hp, req, resp.Data, true)
if err == nil { if err == nil {
builder.WriteString(" [") builder.WriteString(" [")
if !scanopts.OutputWithNoColor { if !scanopts.OutputWithNoColor {
@ -2188,11 +2194,13 @@ retry:
CNAMEs: cnames, CNAMEs: cnames,
CDN: isCDN, CDN: isCDN,
CDNName: cdnName, CDNName: cdnName,
CDNType: cdnType,
ResponseTime: resp.Duration.String(), ResponseTime: resp.Duration.String(),
Technologies: technologies, Technologies: technologies,
FinalURL: finalURL, FinalURL: finalURL,
FavIconMMH3: faviconMMH3, FavIconMMH3: faviconMMH3,
FaviconPath: faviconPath, FaviconPath: faviconPath,
FaviconURL: faviconURL,
Hashes: hashesMap, Hashes: hashesMap,
Extracts: extractResult, Extracts: extractResult,
Jarm: jarmhash, Jarm: jarmhash,
@ -2210,6 +2218,7 @@ retry:
Resolvers: resolvers, Resolvers: resolvers,
RequestRaw: requestDump, RequestRaw: requestDump,
Response: resp, Response: resp,
FaviconData: faviconData,
} }
if resp.BodyDomains != nil { if resp.BodyDomains != nil {
result.Fqdns = resp.BodyDomains.Fqdns result.Fqdns = resp.BodyDomains.Fqdns
@ -2248,11 +2257,11 @@ func calculatePerceptionHash(screenshotBytes []byte) (uint64, error) {
return pHash.GetHash(), nil return pHash.GetHash(), nil
} }
func (r *Runner) handleFaviconHash(hp *httpx.HTTPX, req *retryablehttp.Request, currentResp *httpx.Response) (string, string, error) { func (r *Runner) HandleFaviconHash(hp *httpx.HTTPX, req *retryablehttp.Request, currentResp []byte, defaultProbe bool) (string, string, []byte, string, error) {
// Check if current URI is ending with .ico => use current body without additional requests // Check if current URI is ending with .ico => use current body without additional requests
if path.Ext(req.URL.Path) == ".ico" { if path.Ext(req.URL.Path) == ".ico" {
hash, err := r.calculateFaviconHashWithRaw(currentResp.Data) hash, err := r.calculateFaviconHashWithRaw(currentResp)
return hash, req.URL.Path, err return hash, req.URL.Path, currentResp, "", err
} }
// search in the response of the requested path for element and rel shortcut/mask/apple-touch icon // search in the response of the requested path for element and rel shortcut/mask/apple-touch icon
@ -2260,36 +2269,57 @@ func (r *Runner) handleFaviconHash(hp *httpx.HTTPX, req *retryablehttp.Request,
// if not, any of link from other icons can be requested // if not, any of link from other icons can be requested
potentialURLs, err := extractPotentialFavIconsURLs(currentResp) potentialURLs, err := extractPotentialFavIconsURLs(currentResp)
if err != nil { if err != nil {
return "", "", err return "", "", nil, "", err
} }
faviconPath := "/favicon.ico" clone := req.Clone(context.Background())
// pick the first - we want only one request
if len(potentialURLs) > 0 { var faviconHash, faviconPath, faviconURL string
URL, err := r.parseURL(potentialURLs[0]) var faviconData []byte
errCount := 0
if len(potentialURLs) == 0 && defaultProbe {
potentialURLs = append(potentialURLs, "/favicon.ico")
}
// We only want upto two favicon requests, if the
// first one fails, we will try the second one
for _, potentialURL := range potentialURLs {
if errCount == 2 {
break
}
URL, err := r.parseURL(potentialURL)
if err != nil { if err != nil {
return "", "", err continue
} }
if URL.IsAbs() { if URL.IsAbs() {
req.SetURL(URL) clone.SetURL(URL)
req.Host = URL.Host clone.Host = URL.Host
faviconPath = "" potentialURL = ""
} else { } else {
faviconPath = URL.String() potentialURL = URL.String()
} }
}
if faviconPath != "" { if potentialURL != "" {
err = req.URL.MergePath(faviconPath, false) err = clone.MergePath(potentialURL, false)
if err != nil {
continue
}
}
resp, err := hp.Do(clone, httpx.UnsafeOptions{})
if err != nil { if err != nil {
return "", "", errorutil.NewWithTag("favicon", "failed to add %v to url got %v", faviconPath, err) errCount++
continue
} }
hash, err := r.calculateFaviconHashWithRaw(resp.Data)
if err != nil {
continue
}
faviconURL = clone.URL.String()
faviconPath = potentialURL
faviconHash = hash
faviconData = resp.Data
break
} }
resp, err := hp.Do(req, httpx.UnsafeOptions{}) return faviconHash, faviconPath, faviconData, faviconURL, nil
if err != nil {
return "", "", errors.Wrap(err, "could not fetch favicon")
}
hash, err := r.calculateFaviconHashWithRaw(resp.Data)
return hash, req.URL.Path, err
} }
func (r *Runner) calculateFaviconHashWithRaw(data []byte) (string, error) { func (r *Runner) calculateFaviconHashWithRaw(data []byte) (string, error) {
@ -2300,9 +2330,9 @@ func (r *Runner) calculateFaviconHashWithRaw(data []byte) (string, error) {
return fmt.Sprintf("%d", hashNum), nil return fmt.Sprintf("%d", hashNum), nil
} }
func extractPotentialFavIconsURLs(resp *httpx.Response) ([]string, error) { func extractPotentialFavIconsURLs(resp []byte) ([]string, error) {
var potentialURLs []string var potentialURLs []string
document, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Data)) document, err := goquery.NewDocumentFromReader(bytes.NewReader(resp))
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -2314,6 +2344,10 @@ func extractPotentialFavIconsURLs(resp *httpx.Response) ([]string, error) {
potentialURLs = append(potentialURLs, href) potentialURLs = append(potentialURLs, href)
} }
}) })
// Sort and prefer icon with .ico extension
sort.Slice(potentialURLs, func(i, j int) bool {
return !strings.HasSuffix(potentialURLs[i], ".ico")
})
return potentialURLs, nil return potentialURLs, nil
} }
@ -2412,7 +2446,7 @@ func (r *Runner) skipCDNPort(host string, port string) bool {
// pick the first ip as target // pick the first ip as target
hostIP := dnsData.A[0] hostIP := dnsData.A[0]
isCdnIP, _, err := r.hp.CdnCheck(hostIP) isCdnIP, _, _, err := r.hp.CdnCheck(hostIP)
if err != nil { if err != nil {
return false return false
} }

View File

@ -40,6 +40,7 @@ type Result struct {
Hashes map[string]interface{} `json:"hash,omitempty" csv:"hash"` Hashes map[string]interface{} `json:"hash,omitempty" csv:"hash"`
ExtractRegex []string `json:"extract_regex,omitempty" csv:"extract_regex"` ExtractRegex []string `json:"extract_regex,omitempty" csv:"extract_regex"`
CDNName string `json:"cdn_name,omitempty" csv:"cdn_name"` CDNName string `json:"cdn_name,omitempty" csv:"cdn_name"`
CDNType string `json:"cdn_type,omitempty" csv:"cdn_type"`
SNI string `json:"sni,omitempty" csv:"sni"` SNI string `json:"sni,omitempty" csv:"sni"`
Port string `json:"port,omitempty" csv:"port"` Port string `json:"port,omitempty" csv:"port"`
Raw string `json:"-" csv:"-"` Raw string `json:"-" csv:"-"`
@ -59,6 +60,7 @@ type Result struct {
Path string `json:"path,omitempty" csv:"path"` Path string `json:"path,omitempty" csv:"path"`
FavIconMMH3 string `json:"favicon,omitempty" csv:"favicon"` FavIconMMH3 string `json:"favicon,omitempty" csv:"favicon"`
FaviconPath string `json:"favicon_path,omitempty" csv:"favicon_path"` FaviconPath string `json:"favicon_path,omitempty" csv:"favicon_path"`
FaviconURL string `json:"favicon_url,omitempty" csv:"favicon_url"`
FinalURL string `json:"final_url,omitempty" csv:"final_url"` FinalURL string `json:"final_url,omitempty" csv:"final_url"`
ResponseHeaders map[string]interface{} `json:"header,omitempty" csv:"header"` ResponseHeaders map[string]interface{} `json:"header,omitempty" csv:"header"`
RawHeaders string `json:"raw_header,omitempty" csv:"raw_header"` RawHeaders string `json:"raw_header,omitempty" csv:"raw_header"`
@ -96,6 +98,7 @@ type Result struct {
TechnologyDetails map[string]wappalyzer.AppInfo `json:"-" csv:"-"` TechnologyDetails map[string]wappalyzer.AppInfo `json:"-" csv:"-"`
RequestRaw []byte `json:"-" csv:"-"` RequestRaw []byte `json:"-" csv:"-"`
Response *httpx.Response `json:"-" csv:"-"` Response *httpx.Response `json:"-" csv:"-"`
FaviconData []byte `json:"-" csv:"-"`
} }
// function to get dsl variables from result struct // function to get dsl variables from result struct