add error page classifier

This commit is contained in:
Doğan Can Bakır 2023-06-20 14:13:48 +03:00
parent c11993908b
commit 900f5623e8
12 changed files with 723 additions and 10 deletions

View File

@ -131,13 +131,14 @@ EXTRACTOR:
FILTERS:
-fc, -filter-code string filter response with specified status code (-fc 403,401)
-fep, -filter-error-page filter response with ML based error page detection
-fl, -filter-length string filter response with specified content length (-fl 23,33)
-flc, -filter-line-count string filter response body with specified line count (-flc 423,532)
-fwc, -filter-word-count string filter response body with specified word count (-fwc 423,532)
-ffc, -filter-favicon string[] filter response with specified favicon hash (-mfc 1494302000)
-fs, -filter-string string filter response with specified string (-fs admin)
-fe, -filter-regex string filter response with specified regex (-fe admin)
-fcdn, -filter-cdn string[] filter host with specified cdn provider (incapsula, oracle, google, azure, cloudflare, cloudfront, fastly, akamai, sucuri, leaseweb)
-fcdn, -filter-cdn string[] filter host with specified cdn provider (google, leaseweb, stackpath, cloudfront, fastly)
-frt, -filter-response-time string filter response with specified response time in seconds (-frt '> 1')
-fdc, -filter-condition string filter response with dsl expression condition

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -0,0 +1,105 @@
package errorpageclassifier
import "fmt"
type ConfusionMatrix struct {
matrix [][]int
labels []string
}
func NewConfusionMatrix(actual, predicted []string, labels []string) *ConfusionMatrix {
n := len(labels)
matrix := make([][]int, n)
for i := range matrix {
matrix[i] = make([]int, n)
}
labelIndices := make(map[string]int)
for i, label := range labels {
labelIndices[label] = i
}
for i := range actual {
matrix[labelIndices[actual[i]]][labelIndices[predicted[i]]]++
}
return &ConfusionMatrix{
matrix: matrix,
labels: labels,
}
}
func (cm *ConfusionMatrix) PrintConfusionMatrix() {
fmt.Printf("%30s\n", "Confusion Matrix")
fmt.Println()
// Print header
fmt.Printf("%-15s", "")
for _, label := range cm.labels {
fmt.Printf("%-15s", label)
}
fmt.Println()
// Print rows
for i, row := range cm.matrix {
fmt.Printf("%-15s", cm.labels[i])
for _, value := range row {
fmt.Printf("%-15d", value)
}
fmt.Println()
}
fmt.Println()
}
func (cm *ConfusionMatrix) PrintClassificationReport() {
fmt.Printf("%30s\n", "Classification Report")
fmt.Println()
fmt.Printf("\n%-15s %-10s %-10s %-10s %-10s\n", "", "precision", "recall", "f1-score", "support")
totals := map[string]float64{"true": 0, "predicted": 0, "correct": 0}
macroAvg := map[string]float64{"precision": 0, "recall": 0, "f1-score": 0}
for i, label := range cm.labels {
truePos := cm.matrix[i][i]
falsePos, falseNeg := 0, 0
for j := 0; j < len(cm.labels); j++ {
if i != j {
falsePos += cm.matrix[j][i]
falseNeg += cm.matrix[i][j]
}
}
precision := float64(truePos) / float64(truePos+falsePos)
recall := float64(truePos) / float64(truePos+falseNeg)
f1Score := 2 * precision * recall / (precision + recall)
support := truePos + falseNeg
fmt.Printf("%-15s %-10.2f %-10.2f %-10.2f %-10d\n", label, precision, recall, f1Score, support)
totals["true"] += float64(support)
totals["predicted"] += float64(truePos + falsePos)
totals["correct"] += float64(truePos)
macroAvg["precision"] += precision
macroAvg["recall"] += recall
macroAvg["f1-score"] += f1Score
}
accuracy := totals["correct"] / totals["true"]
fmt.Printf("\n%-26s %-10s %-10.2f %-10d", "accuracy", "", accuracy, int(totals["true"]))
fmt.Printf("\n%-15s %-10.2f %-10.2f %-10.2f %-10d\n", "macro avg",
macroAvg["precision"]/float64(len(cm.labels)),
macroAvg["recall"]/float64(len(cm.labels)),
macroAvg["f1-score"]/float64(len(cm.labels)),
int(totals["true"]))
precisionWeightedAvg := totals["correct"] / totals["predicted"]
recallWeightedAvg := totals["correct"] / totals["true"]
f1ScoreWeightedAvg := 2 * precisionWeightedAvg * recallWeightedAvg / (precisionWeightedAvg + recallWeightedAvg)
fmt.Printf("%-15s %-10.2f %-10.2f %-10.2f %-10d\n", "weighted avg",
precisionWeightedAvg, recallWeightedAvg, f1ScoreWeightedAvg, int(totals["true"]))
fmt.Println()
}

View File

@ -0,0 +1,201 @@
The Forum page seems to have a glitch. Our technicians are on it.||error
There was a problem with the Product Details page. Try reloading.||error
Error 500: The E-books page is experiencing a problem.||error
Unfortunately, the Video Tutorials page is down for maintenance.||error
Our Archive page is currently unavailable. We apologize for the inconvenience.||error
We're having trouble loading the Membership Details page.||error
An error occurred while trying to access the Profile Settings page.||error
Error 404: The Team page could not be found.||error
Our Project Highlights page seems to be having some technical issues.||error
We're sorry, but we can't seem to find the Donations page.||error
You've landed on our Forum page. Engage in interesting discussions.||nonerror
Welcome to the Product Details page. Learn more about our products here.||nonerror
You are now on our E-books page. Enjoy a wealth of knowledge.||nonerror
This is the Video Tutorials page. Learn with our easy-to-follow videos.||nonerror
Welcome to our Archive. Dive into our rich history.||nonerror
You're now on the Membership Details page. See the benefits of joining us.||nonerror
This is your Profile Settings page. Update your personal details as needed.||nonerror
You're on the Team page. Meet the people behind our organization.||nonerror
Welcome to our Project Highlights page. See what we've been up to.||nonerror
You've landed on the Donations page. Every contribution helps us do more.||nonerror
500 - Server Error This is highly unusual! Our tech team have been notified and are working on it.||error
Sorry this page is currently under maintenance.||error
Access Denied - You don't have permission to access this page.||error
This page seems to be missing 404 Error!||error
Sorry something went wrong. Please try again later.||error
We're sorry this page could not be found 404.||error
The page you requested could not be found on our site.||error
500 - Internal server error. There is a problem with the resource you are looking for and it cannot be displayed.||error
Error 401 Unauthorized: Access is denied due to invalid credentials.||error
Bad request 400. Your browser sent a request that this server could not understand.||error
This is a 404 error page||error
Sorry this page does not exist||error
Error 500: Internal Server Error||error
Oops! That page cant be found.Try searching from the field above or go to the home page.||error
An error has occurred while processing your request. It happens to the best of us! Don't worry! There are no bugs without a fix! Let's try again! What were you looking for? If you are an adventurer search this site! If difficulties persist please contact the website administrator and report the error below. 404 Page not found||error
Whoops our bad... The page you requested was not found and we have a fine guess why. If you typed the URL directly please make sure the spelling is correct. If you clicked on a link to get here the link is outdated. What can you do? Have no fear help is near! There are many ways you can get back on track with Magento Store. Go back to the previous page. Use the search bar at the top of the page to search for your products. Follow these links to get you back on track! Store Home My Account||error
404 - Page not found Unfortunately the requested page could not be found.||error
PAGE NOT FOUND The page you're looking for doesn't seem to exist anymore… Return to the homepage||error
Who moved my... lemon? Oh no - looks like we can't find the page you are looking for. But you know the saying; when life gives you lemons... okay we can't find a clever way to end that sentence but we do have 2 suggestions to help you find what you were looking for: Go to the front page Or Search for a specific topic If something you need really is missing we would love it if you would let us know ❤️️||error
404—page not found||error
Apologies but there's a 503 Service Unavailable error. The server cannot handle the request.||error
Sorry you don't have access rights to this page. Error 403: Forbidden.||error
404 - Oops! The page you are looking for has been misplaced.||error
Sorry the server encountered an unexpected condition that prevented it from fulfilling the request. Error 500: Internal Server Error.||error
Whoa! The page you're looking for seems to have vanished. Error 404.||error
Sorry this page has moved or doesn't exist anymore. Error 404.||error
Sorry but your request timed out. Please try again. Error 504: Gateway Timeout.||error
We're sorry but an unknown error occurred while processing your request.||error
Error 502: Bad Gateway. The server encountered a temporary error and could not complete your request.||error
The requested resource could not be found on this server. Please verify your request and try again. Error 404.||error
This Help Center page is temporarily unavailable.||error
Privacy Policy page not found. Please try again later.||error
There seems to be an error on our Services page. We're working to fix it.||error
An error occurred while loading the Search Results page.||error
Category page not found. It might have been removed or relocated.||error
There was a problem loading the Cart page. Please try again.||error
Our Terms of Service page is currently down for maintenance.||error
We're sorry, but the Sitemap is not available at the moment.||error
We're having trouble loading the Reviews page.||error
An error occurred while trying to access the Partners page.||error
Settings page is currently unavailable. We apologize for the inconvenience.||error
Error 404: Resources page not found.||error
Our Press Releases page seems to be having some technical issues.||error
We're sorry, but we can't seem to find the Case Studies page.||error
There was a problem loading the Community page. Please refresh the page.||error
Error 503: The Subscriptions page is temporarily unavailable.||error
There's a problem with our Customer Support page. We're on it.||error
We're having trouble finding the Notifications page. It may have been moved.||error
There was a problem with the Feedback page. Try again later.||error
Our Transactions page is currently experiencing some issues. We appreciate your patience.||error
Your request has been successfully submitted.||nonerror
You have successfully logged out.||nonerror
Congratulations on successfully completing the course!||nonerror
The payment has been processed successfully.||nonerror
Thank you for your feedback!||nonerror
Your download will start shortly.||nonerror
Profile updated successfully.||nonerror
Thanks for contacting us! We'll get back to you as soon as possible.||nonerror
Sign-up successful. Welcome to our community!||nonerror
Your booking has been confirmed. Check your email for details.||nonerror
Welcome! Your registration was successful.||nonerror
Congratulations! You've successfully updated your profile.||nonerror
Great! Your order was placed successfully. We'll send you an email confirmation soon.||nonerror
Welcome back! Your login was successful.||nonerror
Success! You've added the item to your cart.||nonerror
Your request was sent successfully. We'll get back to you as soon as possible.||nonerror
Great job! Your settings have been saved.||nonerror
Your message has been submitted successfully. We appreciate your feedback.||nonerror
Thank you for subscribing to our newsletter!||nonerror
Great news! Your transaction was successful.||nonerror
Welcome to our homepage. Feel free to browse around||nonerror
Thanks for signing up! You're now a registered user.||nonerror
Your order has been placed successfully! You'll receive a confirmation email shortly||nonerror
Congratulations your account has been successfully created||nonerror
Thank you for your inquiry. We will respond to your message within 24 hours||nonerror
You've successfully added the item to your cart!||nonerror
Success! Your password has been updated||nonerror
Welcome back! You have successfully logged in||nonerror
Great job! Your profile has been updated||nonerror
Your message was sent successfully. We'll get back to you shortly||nonerror
Welcome to our website. Explore and enjoy our services.||nonerror
Thank you for visiting our About Us page. Learn more about our journey and team.||nonerror
You are now browsing our Products page. Check out our latest offerings.||nonerror
This is our Contact Us page. Feel free to reach out with any queries or feedback.||nonerror
You have reached the end of the page. Scroll up to continue browsing.||nonerror
Welcome to the News section. Stay updated with our latest announcements.||nonerror
Now viewing: Image Gallery. Enjoy a visual tour of our activities.||nonerror
You're on our FAQ page. Get answers to common questions.||nonerror
Welcome to the Blog section. Engage with our thoughts and insights.||nonerror
This is the Discussion Forum. Join in, ask questions, or help others.||nonerror
You're on the Login page. Enter your credentials to access your account.||nonerror
Welcome to the Sign-Up page. Join our community today.||nonerror
This is your User Dashboard. Manage your account and settings here.||nonerror
You've reached the Checkout page. Review your order and proceed to payment.||nonerror
Welcome to the Download section . Access our digital resources here.||nonerror
This is the Careers page. Explore job opportunities with us.||nonerror
You're viewing the Events Calendar. Keep track of upcoming activities.||nonerror
This is the User Profile page. Update your information as needed.||nonerror
Welcome to our Testimonials page. Read reviews and stories from our users.||nonerror
You are now on the Home page. Start exploring from here.||nonerror
Welcome to home page||nonerror
You're now on our Help Center page. Find answers to common questions here.||nonerror
Welcome to our Privacy Policy page. Learn how we protect your personal information.||nonerror
You've landed on the Services page. Explore what we have to offer.||nonerror
This is the Search Results page. Did you find what you were looking for?||nonerror
Now browsing the Category page. View all items in this category.||nonerror
You're now on the Cart page. Review your selections before proceeding to checkout.||nonerror
Welcome to our Terms of Service page. Understand our conditions for providing services.||nonerror
You are currently on our Sitemap. Navigate our website with ease.||nonerror
You are on the Reviews page. Check out what others have to say about us.||nonerror
Now viewing the Partners page. Meet the organizations we collaborate with.||nonerror
You're on the Settings page. Customize your user experience.||nonerror
This is our Resources page. Access useful documents and guides.||nonerror
You've landed on the Press Releases page. Stay updated with our latest news.||nonerror
Welcome to our Case Studies page. Discover our past projects and achievements.||nonerror
You're now on the Community page. Connect and interact with other members.||nonerror
You are currently on the Subscriptions page. Manage your preferences here.||nonerror
Now viewing the Customer Support page. We're here to help.||nonerror
This is the Notifications page. Keep track of your updates and alerts.||nonerror
You've landed on the Feedback page. Share your thoughts with us.||nonerror
Welcome to the Transactions page. Monitor your past and current transactions.||nonerror
500 - Server Error This is highly unusual! Our tech team have been notified and are working on it.||error
Sorry this page is currently under maintenance.||error
Access Denied - You don't have permission to access this page.||error
This page seems to be missing 404 Error!||error
Sorry something went wrong. Please try again later.||error
We're sorry this page could not be found 404.||error
The page you requested could not be found on our site.||error
500 - Internal server error. There is a problem with the resource you are looking for and it cannot be displayed.||error
Error 401 Unauthorized: Access is denied due to invalid credentials.||error
Bad request 400. Your browser sent a request that this server could not understand.||error
Your request has been successfully submitted.||nonerror
You have successfully logged out.||nonerror
Congratulations on successfully completing the course!||nonerror
The payment has been processed successfully.||nonerror
Thank you for your feedback!||nonerror
Your download will start shortly.||nonerror
Profile updated successfully.||nonerror
Thanks for contacting us! We'll get back to you as soon as possible.||nonerror
Sign-up successful. Welcome to our community!||nonerror
Your booking has been confirmed. Check your email for details.||nonerror
This is a 404 error page||error
Sorry this page does not exist||error
Error 500: Internal Server Error||error
Oops! That page cant be found.Try searching from the field above or go to the home page.||error
An error has occurred while processing your request. It happens to the best of us! Don't worry! There are no bugs without a fix! Let's try again! What were you looking for? If you are an adventurer search this site! If difficulties persist please contact the website administrator and report the error below. 404 Page not found||error
Whoops our bad... The page you requested was not found and we have a fine guess why. If you typed the URL directly please make sure the spelling is correct. If you clicked on a link to get here the link is outdated. What can you do? Have no fear help is near! There are many ways you can get back on track with Magento Store. Go back to the previous page. Use the search bar at the top of the page to search for your products. Follow these links to get you back on track! Store Home | My Account||error
404 - Page not found Unfortunately the requested page could not be found.||error
PAGE NOT FOUND The page you're looking for doesn't seem to exist anymore… Return to the homepage||error
Who moved my... lemon? Oh no - looks like we can't find the page you are looking for. But you know the saying; when life gives you lemons... okay we can't find a clever way to end that sentence but we do have 2 suggestions to help you find what you were looking for: Go to the front page Or Search for a specific topic If something you need really is missing we would love it if you would let us know ❤️️||error
404—page not found||error
Apologies but there's a 503 Service Unavailable error. The server cannot handle the request.||error
Sorry you don't have access rights to this page. Error 403: Forbidden.||error
404 - Oops! The page you are looking for has been misplaced.||error
Sorry the server encountered an unexpected condition that prevented it from fulfilling the request. Error 500: Internal Server Error.||error
Whoa! The page you're looking for seems to have vanished. Error 404.||error
Sorry this page has moved or doesn't exist anymore. Error 404.||error
Sorry but your request timed out. Please try again. Error 504: Gateway Timeout.||error
We're sorry but an unknown error occurred while processing your request.||error
Error 502: Bad Gateway. The server encountered a temporary error and could not complete your request.||error
The requested resource could not be found on this server. Please verify your request and try again. Error 404.||error
Welcome! Your registration was successful.||nonerror
Congratulations! You've successfully updated your profile.||nonerror
Great! Your order was placed successfully. We'll send you an email confirmation soon.||nonerror
Welcome back! Your login was successful.||nonerror
Success! You've added the item to your cart.||nonerror
Your request was sent successfully. We'll get back to you as soon as possible.||nonerror
Great job! Your settings have been saved.||nonerror
Your message has been submitted successfully. We appreciate your feedback.||nonerror
Thank you for subscribing to our newsletter!||nonerror
Great news! Your transaction was successful.||nonerror
Welcome to our homepage. Feel free to browse around||nonerror
Thanks for signing up! You're now a registered user.||nonerror
Your order has been placed successfully! You'll receive a confirmation email shortly||nonerror
Congratulations your account has been successfully created||nonerror
Thank you for your inquiry. We will respond to your message within 24 hours||nonerror
You've successfully added the item to your cart!||nonerror
Success! Your password has been updated||nonerror
Welcome back! You have successfully logged in||nonerror
Great job! Your profile has been updated||nonerror
Your message was sent successfully. We'll get back to you shortly||nonerror

View File

@ -0,0 +1,136 @@
package errorpageclassifier
import (
_ "embed"
"fmt"
"math/rand"
"strings"
"github.com/jaytaylor/html2text"
)
const (
modelPath = "clf.gob"
threshold = 1.1
testPercentage = 0.2
)
var categories = []string{"error", "nonerror"}
type Document struct {
Class string
Text string
}
//go:embed dataset.txt
var dataset string
//go:embed clf.gob
var classifierData []byte
type ErrorPageClassifier struct {
classifier *Classifier
}
func New() *ErrorPageClassifier {
classifier, err := NewClassifierFromFileData(classifierData)
if err != nil {
panic(err)
}
return &ErrorPageClassifier{classifier: classifier}
}
func (n *ErrorPageClassifier) Classify(html string) string {
text, err := htmlToText(html)
if err != nil {
panic(err)
}
if text == "" {
return "other"
}
cls := n.classifier.Classify(text)
return cls
}
func (epc *ErrorPageClassifier) Evaluate() {
train, test := trainTestSplit()
fmt.Println("no of docs in TRAIN dataset:", len(train))
fmt.Println("no of docs in TEST dataset:", len(test))
fmt.Println("Evaluating classifier on test set:")
actualTest, predictedTest := epc.testClf(test)
confusionMatrixTest := NewConfusionMatrix(actualTest, predictedTest, []string{"error", "nonerror"})
confusionMatrixTest.PrintConfusionMatrix()
confusionMatrixTest.PrintClassificationReport()
fmt.Println("Evaluating classifier on the first 100 docs in the train set:")
actualValidate, predictedValidate := epc.validateClf(train[0:100])
confusionMatrixValidate := NewConfusionMatrix(actualValidate, predictedValidate, []string{"error", "nonerror"})
confusionMatrixValidate.PrintConfusionMatrix()
confusionMatrixValidate.PrintClassificationReport()
}
func (epc *ErrorPageClassifier) testClf(test []Document) ([]string, []string) {
actual := []string{}
predicted := []string{}
for _, doc := range test {
class := epc.classifier.Classify(doc.Text)
actual = append(actual, doc.Class)
predicted = append(predicted, class)
}
return actual, predicted
}
func (epc *ErrorPageClassifier) validateClf(validation []Document) ([]string, []string) {
actual := []string{}
predicted := []string{}
for _, doc := range validation {
actual = append(actual, doc.Class)
sentiment := epc.classifier.Classify(doc.Text)
predicted = append(predicted, sentiment)
}
return actual, predicted
}
func TrainAndSave() {
train, test := trainTestSplit()
clf := NewClassifier(categories, threshold)
fmt.Println("no of docs in TRAIN dataset:", len(train))
fmt.Println("no of docs in TEST dataset:", len(test))
for _, doc := range train {
clf.Train(doc.Class, doc.Text)
}
err := clf.SaveClassifierToFile(modelPath)
if err != nil {
panic(err)
}
}
func trainTestSplit() (train, test []Document) {
data := strings.Split(dataset, "\n")
for _, line := range data {
s := strings.Split(line, "||")
doc, sentiment := s[0], s[1]
if rand.Float64() > testPercentage {
train = append(train, Document{sentiment, doc})
} else {
test = append(test, Document{sentiment, doc})
}
}
return train, test
}
func htmlToText(html string) (string, error) {
text, err := html2text.FromString(html, html2text.Options{TextOnly: true})
if err != nil {
return "", err
}
return text, nil
}

View File

@ -0,0 +1,53 @@
package errorpageclassifier
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestErrorPageClassifier(t *testing.T) {
t.Run("test creation of new ErrorPageClassifier", func(t *testing.T) {
epc := New()
assert.NotNil(t, epc)
})
t.Run("test classification non error page text", func(t *testing.T) {
epc := New()
assert.Equal(t, "nonerror", epc.Classify(`<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Terms of Service</title>
</head>
<body>
<h1>Welcome to our Terms of Service page.</h1>
<p>Understand our conditions for providing services.</p>
</body>
</html>
`))
})
t.Run("test classification on error page text", func(t *testing.T) {
epc := New()
assert.Equal(t, "error", epc.Classify(`<!DOCTYPE html>
<html>
<head>
<title>Error 403: Forbidden</title>
<style>
.error-message {
text-align: center;
color: #333;
}
</style>
</head>
<body>
<div class="error-message">
<h1>Error 403: Forbidden</h1>
<p>Sorry you don't have access rights to this page.</p>
</div>
</body>
</html>
`))
})
}

3
go.mod
View File

@ -82,9 +82,11 @@ require (
github.com/google/go-querystring v1.1.0 // indirect
github.com/gorilla/css v1.0.0 // indirect
github.com/hashicorp/go-version v1.6.0 // indirect
github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kataras/jwt v0.1.8 // indirect
github.com/klauspost/compress v1.15.15 // indirect
github.com/kljensen/snowball v0.8.0 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
@ -111,6 +113,7 @@ require (
github.com/sashabaranov/go-openai v1.9.1 // indirect
github.com/shirou/gopsutil/v3 v3.23.5 // indirect
github.com/shoenig/go-m1cpu v0.1.6 // indirect
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
github.com/syndtr/goleveldb v1.0.0 // indirect
github.com/tidwall/btree v1.6.0 // indirect
github.com/tidwall/buntdb v1.3.0 // indirect

6
go.sum
View File

@ -104,6 +104,8 @@ github.com/hbakhtiyor/strsim v0.0.0-20190107154042-4d2bbb273edf/go.mod h1:V99KdS
github.com/hdm/jarm-go v0.0.7 h1:Eq0geenHrBSYuKrdVhrBdMMzOmA+CAMLzN2WrF3eL6A=
github.com/hdm/jarm-go v0.0.7/go.mod h1:kinGoS0+Sdn1Rr54OtanET5E5n7AlD6T6CrJAKDjJSQ=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 h1:iCHtR9CQyktQ5+f3dMVZfwD2KWJUgm7M0gdL9NGr8KA=
github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4dN7jwJOQ1U=
@ -114,6 +116,8 @@ github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0
github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw=
github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/kljensen/snowball v0.8.0 h1:WU4cExxK6sNW33AiGdbn4e8RvloHrhkAssu2mVJ11kg=
github.com/kljensen/snowball v0.8.0/go.mod h1:OGo5gFWjaeXqCu4iIrMl5OYip9XUJHGOU5eSkPjVg2A=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
@ -247,6 +251,8 @@ github.com/sirupsen/logrus v1.3.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo=
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=

View File

@ -158,6 +158,7 @@ type Options struct {
OutputMatchStatusCode string
OutputMatchContentLength string
OutputFilterStatusCode string
OutputFilterErrorPage bool
OutputFilterContentLength string
InputRawRequest string
rawRequest string
@ -334,6 +335,7 @@ func ParseOptions() *Options {
flagSet.CreateGroup("filters", "Filters",
flagSet.StringVarP(&options.OutputFilterStatusCode, "filter-code", "fc", "", "filter response with specified status code (-fc 403,401)"),
flagSet.BoolVarP(&options.OutputFilterErrorPage, "filter-error-page", "fep", false, "filter response with ML based error page detection"),
flagSet.StringVarP(&options.OutputFilterContentLength, "filter-length", "fl", "", "filter response with specified content length (-fl 23,33)"),
flagSet.StringVarP(&options.OutputFilterLinesCount, "filter-line-count", "flc", "", "filter response body with specified line count (-flc 423,532)"),
flagSet.StringVarP(&options.OutputFilterWordsCount, "filter-word-count", "fwc", "", "filter response body with specified word count (-fwc 423,532)"),

View File

@ -27,6 +27,7 @@ import (
asnmap "github.com/projectdiscovery/asnmap/libs"
dsl "github.com/projectdiscovery/dsl"
"github.com/projectdiscovery/httpx/common/customextract"
"github.com/projectdiscovery/httpx/common/errorpageclassifier"
"github.com/projectdiscovery/httpx/common/hashes/jarm"
"github.com/projectdiscovery/mapcidr/asn"
errorutil "github.com/projectdiscovery/utils/errors"
@ -75,6 +76,7 @@ type Runner struct {
ratelimiter ratelimit.Limiter
HostErrorsCache gcache.Cache[string, int]
browser *Browser
errorPageClassifier *errorpageclassifier.ErrorPageClassifier
}
// New creates a new client for running enumeration process.
@ -308,6 +310,8 @@ func New(options *Options) (*Runner, error) {
runner.HostErrorsCache = gc
}
runner.errorPageClassifier = errorpageclassifier.New()
return runner, nil
}
@ -735,6 +739,9 @@ func (r *Runner) RunEnumeration() {
}
}
if r.options.OutputFilterErrorPage && resp.KnowledgeBase["PageType"] == "error" {
continue
}
if len(r.options.filterStatusCode) > 0 && slice.IntSliceContains(r.options.filterStatusCode, resp.StatusCode) {
continue
}
@ -1753,6 +1760,9 @@ retry:
ScreenshotBytes: screenshotBytes,
ScreenshotPath: screenshotPath,
HeadlessBody: headlessBody,
KnowledgeBase: map[string]interface{}{
"PageType": r.errorPageClassifier.Classify(respData),
},
}
if r.options.OnResult != nil {
r.options.OnResult(result)

View File

@ -76,6 +76,7 @@ type Result struct {
ScreenshotBytes []byte `json:"screenshot_bytes,omitempty" csv:"screenshot_bytes"`
StoredResponsePath string `json:"stored_response_path,omitempty" csv:"stored_response_path"`
ScreenshotPath string `json:"screenshot_path,omitempty" csv:"screenshot_path"`
KnowledgeBase map[string]interface{} `json:"knowledgebase,omitempty" csv:"knowledgebase"`
}
// function to get dsl variables from result struct