mirror of
https://github.com/projectdiscovery/httpx.git
synced 2024-12-01 12:13:00 +03:00
Adding support for euc-kr charset
This commit is contained in:
parent
24c20f2f45
commit
eb3787518f
@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"io/ioutil"
|
||||
|
||||
"golang.org/x/text/encoding/korean"
|
||||
"golang.org/x/text/encoding/simplifiedchinese"
|
||||
"golang.org/x/text/encoding/traditionalchinese"
|
||||
"golang.org/x/text/transform"
|
||||
@ -43,3 +44,8 @@ func Encodebig5(s []byte) ([]byte, error) {
|
||||
}
|
||||
return d, nil
|
||||
}
|
||||
|
||||
func DecodeKorean(s []byte) ([]byte, error) {
|
||||
koreanDecoder := korean.EUCKR.NewDecoder()
|
||||
return koreanDecoder.Bytes(s)
|
||||
}
|
||||
|
@ -7,6 +7,7 @@ import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/projectdiscovery/stringsutil"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
@ -39,16 +40,21 @@ func ExtractTitle(r *Response) (title string) {
|
||||
|
||||
// Non UTF-8
|
||||
if contentTypes, ok := r.Headers["Content-Type"]; ok {
|
||||
contentType := strings.Join(contentTypes, ";")
|
||||
contentType := strings.ToLower(strings.Join(contentTypes, ";"))
|
||||
|
||||
// special cases
|
||||
if strings.Contains(strings.ToLower(contentType), "charset=gb2312") ||
|
||||
strings.Contains(strings.ToLower(contentType), "charset=gbk") {
|
||||
switch {
|
||||
case stringsutil.ContainsAny(contentType, "charset=gb2312", "charset=gbk"):
|
||||
titleUtf8, err := Decodegbk([]byte(title))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return string(titleUtf8)
|
||||
case stringsutil.ContainsAny(contentType, "euc-kr"):
|
||||
titleUtf8, err := DecodeKorean([]byte(title))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return string(titleUtf8)
|
||||
}
|
||||
|
||||
@ -63,12 +69,12 @@ func ExtractTitle(r *Response) (title string) {
|
||||
}
|
||||
mcontentType = strings.ToLower(mcontentType)
|
||||
}
|
||||
if strings.Contains(mcontentType, "gb2312") || strings.Contains(mcontentType, "gbk") {
|
||||
switch {
|
||||
case stringsutil.ContainsAny(mcontentType, "gb2312", "gbk"):
|
||||
titleUtf8, err := Decodegbk([]byte(title))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return string(titleUtf8)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user