http_client: improve header parsing

Summary: Add a new `Header` type that represents a parsed header line. Notably, `libcurl` treats both the initial status line and trailing CRLF as headers; the new struct handles these cases in a strongly-typed way. This is particularly useful when working with streaming responses, as this means that the `Receiver` can tell the status code upfront rather than waiting until the end of the request, and the `Receiver` can tell once all headers (except for trailers) have been received.

Reviewed By: quark-zju

Differential Revision: D22228632

fbshipit-source-id: 06f1a21d7af25b37269bb449a1e53237ec74490a
This commit is contained in:
Arun Kulshreshtha 2020-06-30 21:03:21 -07:00 committed by Facebook GitHub Bot
parent 282f7729b6
commit 8c116f9067
3 changed files with 164 additions and 0 deletions

View File

@ -20,6 +20,7 @@ env_logger = "0.7"
http = "0.2"
log = { version = "0.4.8", features = ["kv_unstable"] }
once_cell = "1.4"
regex = "1.3.7"
serde = { version = "1.0", features = ["derive", "rc"] }
serde_cbor = "0.11"
serde_json = "1.0"

View File

@ -0,0 +1,161 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use std::str;
use http::{
header::{HeaderName, HeaderValue},
status::StatusCode,
version::Version,
};
use once_cell::sync::Lazy;
use regex::Regex;
use thiserror::Error;
static STATUS_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)HTTP/([0-9.]+) ([0-9]+)").unwrap());
/// A parsed header line.
///
/// This enum represents a line from the header section of a
/// response. Note that in addition to the headers, libcurl
/// also passes the initial status line and trailing blank
/// line to the user-specified header callback, so we must
/// be able to handle those cases.
#[derive(Eq, PartialEq, Debug)]
pub enum Header {
Status(Version, StatusCode),
Header(HeaderName, HeaderValue),
EndOfHeaders,
}
#[derive(Debug, Error)]
#[error("Malformed header: {:?}", String::from_utf8_lossy(.0))]
pub struct BadHeader<'a>(&'a [u8]);
impl Header {
/// Parse a header line. The input is expected to be a CRLF-terminated
/// line which can be decoded as UTF-8. Note that per RFC 7230, header
/// values can sometimes contain arbitrary binary data, but in practice
/// they are limited to ASCII characters, so for simplicity we reject
/// non-UTF-8 header values. Aside from the values, the specification
/// restricts all other parts of a header line to be limited to ASCII.
pub fn parse(line: &[u8]) -> Result<Self, BadHeader> {
let header = str::from_utf8(line)
.map_err(|_| BadHeader(line))?
.trim_end(); // Strip off trailing CRLF.
if header.is_empty() {
return Ok(Self::EndOfHeaders);
}
if let Some((name, value)) = parse_header(header) {
return Ok(Self::Header(name, value));
}
if let Some((version, code)) = parse_status(header) {
return Ok(Self::Status(version, code));
}
Err(BadHeader(line))
}
}
/// Parse a status line, e.g. "HTTP/1.1 200 OK".
fn parse_status(line: &str) -> Option<(Version, StatusCode)> {
if let Some(captures) = STATUS_REGEX.captures(&line) {
let version_str = captures.get(1).map(|m| m.as_str())?;
let code_str = captures.get(2).map(|m| m.as_str())?;
let version = match version_str {
"0.9" => Version::HTTP_09,
"1.0" => Version::HTTP_10,
"1.1" => Version::HTTP_11,
"2" | "2.0" => Version::HTTP_2,
"3" | "3.0 " => Version::HTTP_3,
_ => return None,
};
let code = StatusCode::from_u16(code_str.parse().ok()?).ok()?;
Some((version, code))
} else {
None
}
}
/// Parse a header name-value pair, e.g. "Content-Length: 42\r\n".
fn parse_header(header: &str) -> Option<(HeaderName, HeaderValue)> {
let parts = header.splitn(2, ':').collect::<Vec<_>>();
let (name, value) = if parts.len() > 1 {
(parts[0], parts[1].trim_start())
} else {
(parts[0], "")
};
let name = HeaderName::from_bytes(name.as_bytes()).ok()?;
let value = HeaderValue::from_bytes(value.as_bytes()).ok()?;
Some((name, value))
}
#[cfg(test)]
mod test {
use super::*;
use anyhow::Result;
use http::header;
#[test]
fn test_parse_header() -> Result<()> {
let header = Header::parse(b"Content-Length: 42\r\n")?;
let expected = Header::Header(header::CONTENT_LENGTH, HeaderValue::from_static("42"));
assert_eq!(header, expected);
let header = Header::parse(b"X-Non-Standard: test\r\n")?;
let expected = Header::Header(
HeaderName::from_static("x-non-standard"),
HeaderValue::from_static("test"),
);
assert_eq!(header, expected);
let header = Header::parse(b"X-No-Value\r\n")?;
let expected = Header::Header(
HeaderName::from_static("x-no-value"),
HeaderValue::from_static(""),
);
assert_eq!(header, expected);
let header = Header::parse(b"X-Whitespace: hello world \r\n")?;
let expected = Header::Header(
HeaderName::from_static("x-whitespace"),
HeaderValue::from_static("hello world"),
);
assert_eq!(header, expected);
let header = Header::parse("X-Non-ASCII-Value: \u{1F980}\r\n".as_ref())?;
let expected = Header::Header(
HeaderName::from_static("x-non-ascii-value"),
HeaderValue::from_bytes("\u{1F980}".as_ref())?,
);
assert_eq!(header, expected);
assert!(Header::parse("\u{1F980}: Non-ASCII name\r\n".as_ref()).is_err());
Ok(())
}
#[test]
fn test_parse_status() -> Result<()> {
let status = Header::parse(b"HTTP/2 201 CREATED\r\n")?;
let expected = Header::Status(Version::HTTP_2, StatusCode::CREATED);
assert_eq!(status, expected);
Ok(())
}
#[test]
fn test_parse_crlf() -> Result<()> {
assert_eq!(Header::parse(b"\r\n")?, Header::EndOfHeaders);
Ok(())
}
}

View File

@ -13,6 +13,7 @@ mod client;
mod driver;
mod errors;
mod handler;
mod header;
mod progress;
mod receiver;
mod request;
@ -21,6 +22,7 @@ mod stats;
pub use client::HttpClient;
pub use errors::{Abort, CertOrKeyMissing, HttpClientError};
pub use header::Header;
pub use progress::Progress;
pub use receiver::Receiver;
pub use request::{Request, StreamRequest};