Fix charset parsing logic of Content-Type header.

This commit is contained in:
jcamiel 2024-03-17 16:12:47 +01:00
parent dd6434e48d
commit b05f70855f
No known key found for this signature in database
GPG Key ID: 07FF11CFD55356CC
3 changed files with 53 additions and 6 deletions

View File

@ -11,6 +11,13 @@ Content-Type: text/html; charset=latin1
body == "<p>café</p>"
GET http://localhost:8000/charset/latin1/uppercase-key
HTTP 200
Content-Type: text/html; CHARSET=latin1
[Asserts]
body == "<p>café</p>"
GET http://localhost:8000/charset/gb2312
HTTP 200
Content-Type: text/html; charset=gb2312
@ -25,8 +32,15 @@ Content-Type: text/html; charset=cp1256
body == "<p>مرحبا بالعالم</p>"
GET http://localhost:8000/charset/uppercase
GET http://localhost:8000/charset/uppercase-value
HTTP 200
Content-Type: text/html; charset=UTF-8
[Asserts]
body == "<p>Hello World!</p>"
GET http://localhost:8000/charset/many-keys
HTTP 200
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
[Asserts]
body == "<p>Hello World!</p>"

View File

@ -7,13 +7,22 @@ def charset_default():
return "<p>Hello World!</p>"
@app.route("/charset/uppercase")
def charset_uppercase():
@app.route("/charset/uppercase-value")
def charset_uppercase_value():
resp = make_response("<p>Hello World!</p>")
resp.headers["Content-Type"] = "text/html; charset=UTF-8"
return resp
@app.route("/charset/many-keys")
def charset_uppercase_many_keys():
resp = make_response("<p>Hello World!</p>")
resp.headers[
"Content-Type"
] = "text/plain; version=0.0.4; charset=utf-8; escaping=values"
return resp
@app.route("/charset/latin1")
def charset_latin1():
resp = make_response("<p>café</p>".encode("latin1"))
@ -21,6 +30,13 @@ def charset_latin1():
return resp
@app.route("/charset/latin1/uppercase-key")
def charset_latin1_uppercase_key():
resp = make_response("<p>café</p>".encode("latin1"))
resp.headers["Content-Type"] = "text/html; CHARSET=latin1"
return resp
@app.route("/charset/gb2312")
def charset_gb2312():
resp = make_response("<p>你好世界</p>".encode("gb2312"))

View File

@ -36,9 +36,14 @@ pub fn is_html(content_type: &str) -> bool {
/// Extracts charset from mime-type String
pub fn charset(mime_type: &str) -> Option<String> {
mime_type
.find("charset=")
.map(|index| mime_type[(index + 8)..].to_string())
let parts = mime_type.trim().split(';');
for part in parts {
let param = part.trim().split('=').collect::<Vec<_>>();
if param.len() == 2 && param[0].trim().eq_ignore_ascii_case("charset") {
return Some(param[1].trim().to_string());
}
}
None
}
#[cfg(test)]
@ -51,10 +56,22 @@ pub mod tests {
charset("text/plain; charset=utf-8"),
Some("utf-8".to_string())
);
assert_eq!(
charset("text/plain; charset=ISO-8859-1"),
Some("ISO-8859-1".to_string())
);
assert_eq!(charset("text/plain;"), None);
assert_eq!(
charset("text/plain; CHARSET=ISO-8859-1"),
Some("ISO-8859-1".to_string())
);
assert_eq!(
charset("text/plain; version=0.0.4; charset=utf-8; escaping=values"),
Some("utf-8".to_string())
);
}
}