2020-05-05 08:17:40 +03:00
/*
* Copyright ( c ) 2018 - 2020 , Andreas Kling < kling @ serenityos . org >
2022-03-03 21:35:10 +03:00
* Copyright ( c ) 2022 , the SerenityOS developers .
2020-05-05 08:17:40 +03:00
*
2021-04-22 11:24:48 +03:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-05-05 08:17:40 +03:00
*/
2022-10-01 18:11:36 +03:00
# include <AK/CharacterTypes.h>
2021-01-16 17:51:56 +03:00
# include <AK/Debug.h>
2022-07-04 14:44:51 +03:00
# include <AK/JsonObject.h>
2023-01-25 22:19:05 +03:00
# include <AK/MemoryStream.h>
2023-01-02 14:16:39 +03:00
# include <AK/Try.h>
2022-03-31 22:08:10 +03:00
# include <LibCompress/Brotli.h>
2021-03-04 00:54:07 +03:00
# include <LibCompress/Gzip.h>
2021-03-22 05:41:13 +03:00
# include <LibCompress/Zlib.h>
2021-05-12 02:56:25 +03:00
# include <LibCore/Event.h>
2020-05-05 08:17:40 +03:00
# include <LibHTTP/HttpResponse.h>
# include <LibHTTP/Job.h>
# include <stdio.h>
# include <unistd.h>
namespace HTTP {
2023-01-02 14:16:39 +03:00
static ErrorOr < ByteBuffer > handle_content_encoding ( ByteBuffer const & buf , DeprecatedString const & content_encoding )
2020-05-05 08:17:40 +03:00
{
2021-02-07 15:03:24 +03:00
dbgln_if ( JOB_DEBUG , " Job::handle_content_encoding: buf has content_encoding={} " , content_encoding ) ;
2020-05-05 08:17:40 +03:00
2022-03-31 22:08:10 +03:00
// FIXME: Actually do the decompression of the data using streams, instead of all at once when everything has been
// received. This will require that some of the decompression algorithms are implemented in a streaming way.
2023-02-10 03:00:18 +03:00
// Gzip and Deflate are implemented using Stream, while Brotli uses the newer Core::Stream. The Gzip and
2022-03-31 22:08:10 +03:00
// Deflate implementations will likely need to be changed to LibCore::Stream for this to work easily.
2020-05-05 08:17:40 +03:00
if ( content_encoding = = " gzip " ) {
2021-03-04 00:54:07 +03:00
if ( ! Compress : : GzipDecompressor : : is_likely_compressed ( buf ) ) {
2021-01-09 20:51:44 +03:00
dbgln ( " Job::handle_content_encoding: buf is not gzip compressed! " ) ;
2020-05-05 08:17:40 +03:00
}
2021-02-07 15:03:24 +03:00
dbgln_if ( JOB_DEBUG , " Job::handle_content_encoding: buf is gzip compressed! " ) ;
2020-05-05 08:17:40 +03:00
2023-01-02 14:16:39 +03:00
auto uncompressed = TRY ( Compress : : GzipDecompressor : : decompress_all ( buf ) ) ;
2020-05-05 08:17:40 +03:00
2021-01-24 01:59:27 +03:00
if constexpr ( JOB_DEBUG ) {
2021-01-16 17:51:56 +03:00
dbgln ( " Job::handle_content_encoding: Gzip::decompress() successful. " ) ;
dbgln ( " Input size: {} " , buf . size ( ) ) ;
2023-01-02 14:16:39 +03:00
dbgln ( " Output size: {} " , uncompressed . size ( ) ) ;
2021-01-16 17:51:56 +03:00
}
2020-05-05 08:17:40 +03:00
2023-01-02 14:16:39 +03:00
return uncompressed ;
2021-03-22 05:41:13 +03:00
} else if ( content_encoding = = " deflate " ) {
dbgln_if ( JOB_DEBUG , " Job::handle_content_encoding: buf is deflate compressed! " ) ;
// Even though the content encoding is "deflate", it's actually deflate with the zlib wrapper.
// https://tools.ietf.org/html/rfc7230#section-4.2.2
2022-12-26 17:51:51 +03:00
auto uncompressed = Compress : : ZlibDecompressor : : decompress_all ( buf ) ;
2021-03-22 05:41:13 +03:00
if ( ! uncompressed . has_value ( ) ) {
// From the RFC:
// "Note: Some non-conformant implementations send the "deflate"
// compressed data without the zlib wrapper."
2022-12-26 17:51:51 +03:00
dbgln_if ( JOB_DEBUG , " Job::handle_content_encoding: ZlibDecompressor::decompress_all() failed. Trying DeflateDecompressor::decompress_all() " ) ;
2023-01-02 14:16:39 +03:00
uncompressed = TRY ( Compress : : DeflateDecompressor : : decompress_all ( buf ) ) ;
2021-03-22 05:41:13 +03:00
}
if constexpr ( JOB_DEBUG ) {
dbgln ( " Job::handle_content_encoding: Deflate decompression successful. " ) ;
dbgln ( " Input size: {} " , buf . size ( ) ) ;
dbgln ( " Output size: {} " , uncompressed . value ( ) . size ( ) ) ;
}
2022-03-31 22:08:10 +03:00
return uncompressed . release_value ( ) ;
} else if ( content_encoding = = " br " ) {
dbgln_if ( JOB_DEBUG , " Job::handle_content_encoding: buf is brotli compressed! " ) ;
2023-01-30 13:05:43 +03:00
FixedMemoryStream bufstream { buf } ;
auto brotli_stream = Compress : : BrotliDecompressionStream { bufstream } ;
2022-03-31 22:08:10 +03:00
2023-01-02 14:16:39 +03:00
auto uncompressed = TRY ( brotli_stream . read_until_eof ( ) ) ;
2022-03-31 22:08:10 +03:00
if constexpr ( JOB_DEBUG ) {
dbgln ( " Job::handle_content_encoding: Brotli::decompress() successful. " ) ;
dbgln ( " Input size: {} " , buf . size ( ) ) ;
2023-01-02 14:16:39 +03:00
dbgln ( " Output size: {} " , uncompressed . size ( ) ) ;
2022-03-31 22:08:10 +03:00
}
2023-01-02 14:16:39 +03:00
return uncompressed ;
2020-05-05 08:17:40 +03:00
}
return buf ;
}
2023-02-10 03:00:18 +03:00
Job : : Job ( HttpRequest & & request , Stream & output_stream )
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
: Core : : NetworkJob ( output_stream )
2021-11-29 01:00:52 +03:00
, m_request ( move ( request ) )
2020-05-05 08:17:40 +03:00
{
}
2023-02-09 01:05:44 +03:00
void Job : : start ( Core : : Socket & socket )
2022-02-02 18:51:55 +03:00
{
VERIFY ( ! m_socket ) ;
2023-02-09 01:05:44 +03:00
m_socket = static_cast < Core : : BufferedSocketBase * > ( & socket ) ;
2022-02-02 18:51:55 +03:00
dbgln_if ( HTTPJOB_DEBUG , " Reusing previous connection for {} " , url ( ) ) ;
deferred_invoke ( [ this ] {
dbgln_if ( HTTPJOB_DEBUG , " HttpJob: on_connected callback " ) ;
on_socket_connected ( ) ;
} ) ;
}
void Job : : shutdown ( ShutdownMode mode )
{
if ( ! m_socket )
return ;
if ( mode = = ShutdownMode : : CloseSocket ) {
m_socket - > close ( ) ;
2022-09-26 21:09:38 +03:00
m_socket - > on_ready_to_read = nullptr ;
2022-02-02 18:51:55 +03:00
} else {
m_socket - > on_ready_to_read = nullptr ;
m_socket = nullptr ;
}
}
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
void Job : : flush_received_buffers ( )
{
if ( ! m_can_stream_response | | m_buffered_size = = 0 )
return ;
2021-09-18 02:18:22 +03:00
dbgln_if ( JOB_DEBUG , " Job: Flushing received buffers: have {} bytes in {} buffers for {} " , m_buffered_size , m_received_buffers . size ( ) , m_request . url ( ) ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
for ( size_t i = 0 ; i < m_received_buffers . size ( ) ; + + i ) {
2023-03-06 19:16:25 +03:00
auto & payload = m_received_buffers [ i ] - > pending_flush ;
2022-02-02 18:51:55 +03:00
auto result = do_write ( payload ) ;
if ( result . is_error ( ) ) {
if ( ! result . error ( ) . is_errno ( ) ) {
dbgln_if ( JOB_DEBUG , " Job: Failed to flush received buffers: {} " , result . error ( ) ) ;
continue ;
}
if ( result . error ( ) . code ( ) = = EINTR ) {
i - - ;
continue ;
}
break ;
}
auto written = result . release_value ( ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
m_buffered_size - = written ;
if ( written = = payload . size ( ) ) {
// FIXME: Make this a take-first-friendly object?
2022-02-11 22:25:15 +03:00
( void ) m_received_buffers . take_first ( ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
- - i ;
continue ;
}
2021-02-23 22:42:32 +03:00
VERIFY ( written < payload . size ( ) ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
payload = payload . slice ( written , payload . size ( ) - written ) ;
2021-01-16 17:51:56 +03:00
break ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
}
2021-09-18 02:18:22 +03:00
dbgln_if ( JOB_DEBUG , " Job: Flushing received buffers done: have {} bytes in {} buffers for {} " , m_buffered_size , m_received_buffers . size ( ) , m_request . url ( ) ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
}
2022-02-02 18:51:55 +03:00
void Job : : register_on_ready_to_read ( Function < void ( ) > callback )
2020-05-05 08:17:40 +03:00
{
2022-02-02 18:51:55 +03:00
m_socket - > on_ready_to_read = [ this , callback = move ( callback ) ] {
callback ( ) ;
// As `m_socket` is a buffered object, we might not get notifications for data in the buffer
// so exhaust the buffer to ensure we don't end up waiting forever.
2022-02-04 13:49:59 +03:00
auto can_read_without_blocking = m_socket - > can_read_without_blocking ( ) ;
if ( can_read_without_blocking . is_error ( ) )
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
if ( can_read_without_blocking . value ( ) & & m_state ! = State : : Finished & & ! has_error ( ) ) {
2022-02-02 18:51:55 +03:00
deferred_invoke ( [ this ] {
if ( m_socket & & m_socket - > on_ready_to_read )
m_socket - > on_ready_to_read ( ) ;
} ) ;
}
} ;
}
2021-01-16 17:51:56 +03:00
2022-12-04 21:02:33 +03:00
ErrorOr < DeprecatedString > Job : : read_line ( size_t size )
2022-02-02 18:51:55 +03:00
{
2022-02-04 13:49:59 +03:00
auto buffer = TRY ( ByteBuffer : : create_uninitialized ( size ) ) ;
2022-04-15 15:44:23 +03:00
auto bytes_read = TRY ( m_socket - > read_until ( buffer , " \r \n " sv ) ) ;
2022-12-04 21:02:33 +03:00
return DeprecatedString : : copy ( bytes_read ) ;
2022-02-02 18:51:55 +03:00
}
2022-02-04 13:49:59 +03:00
ErrorOr < ByteBuffer > Job : : receive ( size_t size )
2022-02-02 18:51:55 +03:00
{
if ( size = = 0 )
2022-02-04 13:49:59 +03:00
return ByteBuffer { } ;
2022-02-02 18:51:55 +03:00
2022-02-04 13:49:59 +03:00
auto buffer = TRY ( ByteBuffer : : create_uninitialized ( size ) ) ;
2022-02-02 18:51:55 +03:00
size_t nread ;
do {
auto result = m_socket - > read ( buffer ) ;
if ( result . is_error ( ) & & result . error ( ) . is_errno ( ) & & result . error ( ) . code ( ) = = EINTR )
continue ;
2022-04-15 15:33:02 +03:00
nread = TRY ( result ) . size ( ) ;
2022-02-02 18:51:55 +03:00
break ;
} while ( true ) ;
return buffer . slice ( 0 , nread ) ;
}
void Job : : on_socket_connected ( )
{
auto raw_request = m_request . to_raw_request ( ) ;
if constexpr ( JOB_DEBUG ) {
dbgln ( " Job: raw_request: " ) ;
2022-12-04 21:02:33 +03:00
dbgln ( " {} " , DeprecatedString : : copy ( raw_request ) ) ;
2022-02-02 18:51:55 +03:00
}
2022-12-11 21:21:36 +03:00
bool success = ! m_socket - > write_entire_buffer ( raw_request ) . is_error ( ) ;
2022-02-02 18:51:55 +03:00
if ( ! success )
deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
2021-01-16 17:51:56 +03:00
2020-05-05 08:17:40 +03:00
register_on_ready_to_read ( [ & ] {
2021-09-18 02:18:22 +03:00
dbgln_if ( JOB_DEBUG , " Ready to read for {}, state = {}, cancelled = {} " , m_request . url ( ) , to_underlying ( m_state ) , is_cancelled ( ) ) ;
2020-05-05 08:17:40 +03:00
if ( is_cancelled ( ) )
return ;
2021-01-06 21:39:29 +03:00
if ( m_state = = State : : Finished ) {
2021-05-20 00:35:53 +03:00
// We have everything we want, at this point, we can either get an EOF, or a bunch of extra newlines
// (unless "Connection: close" isn't specified)
// So just ignore everything after this.
2021-01-06 21:39:29 +03:00
return ;
}
2022-02-02 18:51:55 +03:00
if ( m_socket - > is_eof ( ) ) {
dbgln_if ( JOB_DEBUG , " Read failure: Actually EOF! " ) ;
2021-10-04 14:32:38 +03:00
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : ProtocolFailed ) ; } ) ;
2022-02-02 18:51:55 +03:00
}
2021-10-04 14:32:38 +03:00
2022-02-02 18:51:55 +03:00
while ( m_state = = State : : InStatus ) {
2022-02-04 13:49:59 +03:00
auto can_read_line = m_socket - > can_read_line ( ) ;
if ( can_read_line . is_error ( ) ) {
dbgln_if ( JOB_DEBUG , " Job {} could not figure out whether we could read a line " , m_request . url ( ) ) ;
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
}
if ( ! can_read_line . value ( ) ) {
2022-12-12 20:30:29 +03:00
dbgln_if ( JOB_DEBUG , " Job {} cannot read a full line " , m_request . url ( ) ) ;
// TODO: Should we retry here instead of failing instantly?
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
2021-09-18 02:18:22 +03:00
}
2022-12-12 20:30:29 +03:00
2022-02-04 13:49:59 +03:00
auto maybe_line = read_line ( PAGE_SIZE ) ;
if ( maybe_line . is_error ( ) ) {
dbgln_if ( JOB_DEBUG , " Job {} could not read line: {} " , m_request . url ( ) , maybe_line . error ( ) ) ;
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
}
auto line = maybe_line . release_value ( ) ;
2021-09-18 02:18:22 +03:00
dbgln_if ( JOB_DEBUG , " Job {} read line of length {} " , m_request . url ( ) , line . length ( ) ) ;
2020-05-05 08:17:40 +03:00
if ( line . is_null ( ) ) {
2021-09-06 01:59:52 +03:00
dbgln ( " Job: Expected HTTP status " ) ;
2021-08-30 21:12:48 +03:00
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
2020-05-05 08:17:40 +03:00
}
2020-12-13 13:44:53 +03:00
auto parts = line . split_view ( ' ' ) ;
2022-02-11 21:53:34 +03:00
if ( parts . size ( ) < 2 ) {
dbgln ( " Job: Expected 2-part or 3-part HTTP status line, got '{}' " , line ) ;
2021-08-30 21:12:48 +03:00
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : ProtocolFailed ) ; } ) ;
2020-05-05 08:17:40 +03:00
}
2022-10-01 18:11:36 +03:00
if ( ! parts [ 0 ] . matches ( " HTTP/?.? " sv , CaseSensitivity : : CaseSensitive ) | | ! is_ascii_digit ( parts [ 0 ] [ 5 ] ) | | ! is_ascii_digit ( parts [ 0 ] [ 7 ] ) ) {
dbgln ( " Job: Expected HTTP-Version to be of the form 'HTTP/X.Y', got '{}' " , parts [ 0 ] ) ;
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : ProtocolFailed ) ; } ) ;
}
auto http_major_version = parse_ascii_digit ( parts [ 0 ] [ 5 ] ) ;
auto http_minor_version = parse_ascii_digit ( parts [ 0 ] [ 7 ] ) ;
m_legacy_connection = http_major_version < 1 | | ( http_major_version = = 1 & & http_minor_version = = 0 ) ;
2020-06-12 22:07:52 +03:00
auto code = parts [ 1 ] . to_uint ( ) ;
if ( ! code . has_value ( ) ) {
2021-09-06 01:59:52 +03:00
dbgln ( " Job: Expected numeric HTTP status " ) ;
2021-08-30 21:12:48 +03:00
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : ProtocolFailed ) ; } ) ;
2020-05-05 08:17:40 +03:00
}
2020-06-12 22:07:52 +03:00
m_code = code . value ( ) ;
2020-05-05 08:17:40 +03:00
m_state = State : : InHeaders ;
2022-02-04 13:49:59 +03:00
auto can_read_without_blocking = m_socket - > can_read_without_blocking ( ) ;
if ( can_read_without_blocking . is_error ( ) )
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
if ( ! can_read_without_blocking . value ( ) )
2022-02-02 18:51:55 +03:00
return ;
2020-05-05 08:17:40 +03:00
}
2022-02-02 18:51:55 +03:00
while ( m_state = = State : : InHeaders | | m_state = = State : : Trailers ) {
2022-02-04 13:49:59 +03:00
auto can_read_line = m_socket - > can_read_line ( ) ;
if ( can_read_line . is_error ( ) ) {
dbgln_if ( JOB_DEBUG , " Job {} could not figure out whether we could read a line " , m_request . url ( ) ) ;
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
}
if ( ! can_read_line . value ( ) ) {
2022-02-02 18:51:55 +03:00
dbgln_if ( JOB_DEBUG , " Can't read lines anymore :( " ) ;
2020-05-05 08:17:40 +03:00
return ;
2022-02-02 18:51:55 +03:00
}
2022-02-04 13:49:59 +03:00
2021-10-04 17:00:45 +03:00
// There's no max limit defined on headers, but for our sanity, let's limit it to 32K.
2022-02-04 13:49:59 +03:00
auto maybe_line = read_line ( 32 * KiB ) ;
if ( maybe_line . is_error ( ) ) {
dbgln_if ( JOB_DEBUG , " Job {} could not read a header line: {} " , m_request . url ( ) , maybe_line . error ( ) ) ;
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
}
auto line = maybe_line . release_value ( ) ;
2020-05-05 08:17:40 +03:00
if ( line . is_null ( ) ) {
2020-08-19 05:34:15 +03:00
if ( m_state = = State : : Trailers ) {
2020-07-26 05:08:33 +03:00
// Some servers like to send two ending chunks
// use this fact as an excuse to ignore anything after the last chunk
// that is not a valid trailing header.
return finish_up ( ) ;
}
2021-09-06 01:59:52 +03:00
dbgln ( " Job: Expected HTTP header " ) ;
2020-05-05 08:17:40 +03:00
return did_fail ( Core : : NetworkJob : : Error : : ProtocolFailed ) ;
}
2020-12-13 13:44:53 +03:00
if ( line . is_empty ( ) ) {
2020-08-19 05:34:15 +03:00
if ( m_state = = State : : Trailers ) {
2020-05-12 01:25:10 +03:00
return finish_up ( ) ;
}
2022-02-02 18:51:55 +03:00
if ( on_headers_received ) {
if ( ! m_set_cookie_headers . is_empty ( ) )
2022-12-06 04:12:49 +03:00
m_headers . set ( " Set-Cookie " , JsonArray { m_set_cookie_headers } . to_deprecated_string ( ) ) ;
2022-02-02 18:51:55 +03:00
on_headers_received ( m_headers , m_code > 0 ? m_code : Optional < u32 > { } ) ;
}
m_state = State : : InBody ;
2021-09-19 01:01:40 +03:00
// We've reached the end of the headers, there's a possibility that the server
// responds with nothing (content-length = 0 with normal encoding); if that's the case,
// quit early as we won't be reading anything anyway.
if ( auto result = m_headers . get ( " Content-Length " sv ) . value_or ( " " sv ) . to_uint ( ) ; result . has_value ( ) ) {
if ( result . value ( ) = = 0 & & ! m_headers . get ( " Transfer-Encoding " sv ) . value_or ( " " sv ) . view ( ) . trim_whitespace ( ) . equals_ignoring_case ( " chunked " sv ) )
return finish_up ( ) ;
}
2022-02-09 22:51:17 +03:00
// There's also the possibility that the server responds with 204 (No Content),
// and manages to set a Content-Length anyway, in such cases ignore Content-Length and quit early;
// As the HTTP spec explicitly prohibits presence of Content-Length when the response code is 204.
if ( m_code = = 204 )
return finish_up ( ) ;
2022-02-04 13:49:59 +03:00
2022-02-02 18:51:55 +03:00
break ;
2020-05-05 08:17:40 +03:00
}
2020-12-13 13:44:53 +03:00
auto parts = line . split_view ( ' : ' ) ;
2020-05-05 08:17:40 +03:00
if ( parts . is_empty ( ) ) {
2020-08-19 05:34:15 +03:00
if ( m_state = = State : : Trailers ) {
2020-07-26 05:08:33 +03:00
// Some servers like to send two ending chunks
// use this fact as an excuse to ignore anything after the last chunk
// that is not a valid trailing header.
return finish_up ( ) ;
}
2021-09-06 01:59:52 +03:00
dbgln ( " Job: Expected HTTP header with key/value " ) ;
2021-08-30 21:12:48 +03:00
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : ProtocolFailed ) ; } ) ;
2020-05-05 08:17:40 +03:00
}
auto name = parts [ 0 ] ;
2020-12-13 13:44:53 +03:00
if ( line . length ( ) < name . length ( ) + 2 ) {
2020-08-19 05:34:15 +03:00
if ( m_state = = State : : Trailers ) {
2020-07-26 05:08:33 +03:00
// Some servers like to send two ending chunks
// use this fact as an excuse to ignore anything after the last chunk
// that is not a valid trailing header.
return finish_up ( ) ;
}
2021-09-06 01:59:52 +03:00
dbgln ( " Job: Malformed HTTP header: '{}' ({}) " , line , line . length ( ) ) ;
2021-08-30 21:12:48 +03:00
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : ProtocolFailed ) ; } ) ;
2020-05-05 08:17:40 +03:00
}
2020-12-13 13:44:53 +03:00
auto value = line . substring ( name . length ( ) + 2 , line . length ( ) - name . length ( ) - 2 ) ;
2022-07-11 20:32:29 +03:00
if ( name . equals_ignoring_case ( " Set-Cookie " sv ) ) {
2021-08-11 01:09:35 +03:00
dbgln_if ( JOB_DEBUG , " Job: Received Set-Cookie header: '{}' " , value ) ;
m_set_cookie_headers . append ( move ( value ) ) ;
2022-02-04 13:49:59 +03:00
auto can_read_without_blocking = m_socket - > can_read_without_blocking ( ) ;
if ( can_read_without_blocking . is_error ( ) )
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
if ( ! can_read_without_blocking . value ( ) )
2022-02-02 18:51:55 +03:00
return ;
} else if ( auto existing_value = m_headers . get ( name ) ; existing_value . has_value ( ) ) {
2021-08-11 01:09:35 +03:00
StringBuilder builder ;
builder . append ( existing_value . value ( ) ) ;
builder . append ( ' , ' ) ;
builder . append ( value ) ;
2023-01-26 21:58:09 +03:00
m_headers . set ( name , builder . to_deprecated_string ( ) ) ;
2021-08-11 01:09:35 +03:00
} else {
m_headers . set ( name , value ) ;
}
2022-07-11 20:32:29 +03:00
if ( name . equals_ignoring_case ( " Content-Encoding " sv ) ) {
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
// Assume that any content-encoding means that we can't decode it as a stream :(
2021-02-07 15:03:24 +03:00
dbgln_if ( JOB_DEBUG , " Content-Encoding {} detected, cannot stream output :( " , value ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
m_can_stream_response = false ;
2022-07-11 20:32:29 +03:00
} else if ( name . equals_ignoring_case ( " Content-Length " sv ) ) {
2021-10-16 21:17:18 +03:00
auto length = value . to_uint ( ) ;
if ( length . has_value ( ) )
m_content_length = length . value ( ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
}
2021-02-07 15:03:24 +03:00
dbgln_if ( JOB_DEBUG , " Job: [{}] = '{}' " , name , value ) ;
2022-02-04 13:49:59 +03:00
auto can_read_without_blocking = m_socket - > can_read_without_blocking ( ) ;
if ( can_read_without_blocking . is_error ( ) )
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
if ( ! can_read_without_blocking . value ( ) ) {
2022-02-02 18:51:55 +03:00
dbgln_if ( JOB_DEBUG , " Can't read headers anymore, byebye :( " ) ;
return ;
}
2020-05-05 08:17:40 +03:00
}
2021-02-23 22:42:32 +03:00
VERIFY ( m_state = = State : : InBody ) ;
2022-02-04 13:49:59 +03:00
while ( true ) {
auto can_read_without_blocking = m_socket - > can_read_without_blocking ( ) ;
if ( can_read_without_blocking . is_error ( ) )
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
if ( ! can_read_without_blocking . value ( ) )
break ;
AK: Rename KB, MB, GB to KiB, MiB, GiB
The SI prefixes "k", "M", "G" mean "10^3", "10^6", "10^9".
The IEC prefixes "Ki", "Mi", "Gi" mean "2^10", "2^20", "2^30".
Let's use the correct name, at least in code.
Only changes the name of the constants, no other behavior change.
2020-08-15 20:55:00 +03:00
auto read_size = 64 * KiB ;
2020-05-12 01:25:10 +03:00
if ( m_current_chunk_remaining_size . has_value ( ) ) {
read_chunk_size : ;
auto remaining = m_current_chunk_remaining_size . value ( ) ;
if ( remaining = = - 1 ) {
// read size
2022-02-04 13:49:59 +03:00
auto maybe_size_data = read_line ( PAGE_SIZE ) ;
if ( maybe_size_data . is_error ( ) ) {
dbgln_if ( JOB_DEBUG , " Job: Could not receive chunk: {} " , maybe_size_data . error ( ) ) ;
}
auto size_data = maybe_size_data . release_value ( ) ;
2021-04-11 23:17:33 +03:00
if ( m_should_read_chunk_ending_line ) {
VERIFY ( size_data . is_empty ( ) ) ;
m_should_read_chunk_ending_line = false ;
2022-02-02 18:51:55 +03:00
continue ;
2021-04-11 23:17:33 +03:00
}
2020-12-13 13:44:53 +03:00
auto size_lines = size_data . view ( ) . lines ( ) ;
2021-02-07 15:03:24 +03:00
dbgln_if ( JOB_DEBUG , " Job: Received a chunk with size '{}' " , size_data ) ;
2020-05-12 01:25:10 +03:00
if ( size_lines . size ( ) = = 0 ) {
2022-02-02 18:51:55 +03:00
if ( ! m_socket - > is_eof ( ) )
break ;
2021-01-09 20:51:44 +03:00
dbgln ( " Job: Reached end of stream " ) ;
2020-08-19 05:34:15 +03:00
finish_up ( ) ;
2022-02-02 18:51:55 +03:00
break ;
2020-05-12 01:25:10 +03:00
} else {
2022-10-22 16:38:21 +03:00
auto chunk = size_lines [ 0 ] . split_view ( ' ; ' , SplitBehavior : : KeepEmpty ) ;
2022-12-04 21:02:33 +03:00
DeprecatedString size_string = chunk [ 0 ] ;
2020-05-20 23:06:54 +03:00
char * endptr ;
auto size = strtoul ( size_string . characters ( ) , & endptr , 16 ) ;
if ( * endptr ) {
// invalid number
2021-08-30 21:12:48 +03:00
deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
2022-02-02 18:51:55 +03:00
break ;
2020-05-20 23:06:54 +03:00
}
if ( size = = 0 ) {
2020-05-12 01:25:10 +03:00
// This is the last chunk
// '0' *[; chunk-ext-name = chunk-ext-value]
// We're going to ignore _all_ chunk extensions
read_size = 0 ;
m_current_chunk_total_size = 0 ;
m_current_chunk_remaining_size = 0 ;
2021-01-16 17:51:56 +03:00
2021-02-07 15:03:24 +03:00
dbgln_if ( JOB_DEBUG , " Job: Received the last chunk with extensions '{}' " , size_string . substring_view ( 1 , size_string . length ( ) - 1 ) ) ;
2020-05-12 01:25:10 +03:00
} else {
m_current_chunk_total_size = size ;
m_current_chunk_remaining_size = size ;
read_size = size ;
2021-01-16 17:51:56 +03:00
2021-02-07 15:03:24 +03:00
dbgln_if ( JOB_DEBUG , " Job: Chunk of size '{}' started " , size ) ;
2020-05-12 01:25:10 +03:00
}
}
} else {
read_size = remaining ;
2021-01-16 17:51:56 +03:00
2021-02-07 15:03:24 +03:00
dbgln_if ( JOB_DEBUG , " Job: Resuming chunk with '{}' bytes left over " , remaining ) ;
2020-05-12 01:25:10 +03:00
}
} else {
auto transfer_encoding = m_headers . get ( " Transfer-Encoding " ) ;
if ( transfer_encoding . has_value ( ) ) {
2021-10-16 21:17:29 +03:00
// HTTP/1.1 3.3.3.3:
// If a message is received with both a Transfer-Encoding and a Content-Length header field, the Transfer-Encoding overrides the Content-Length. [...]
// https://httpwg.org/specs/rfc7230.html#message.body.length
m_content_length = { } ;
2021-04-14 07:50:25 +03:00
// Note: Some servers add extra spaces around 'chunked', see #6302.
auto encoding = transfer_encoding . value ( ) . trim_whitespace ( ) ;
2021-01-16 17:51:56 +03:00
2021-02-07 15:03:24 +03:00
dbgln_if ( JOB_DEBUG , " Job: This content has transfer encoding '{}' " , encoding ) ;
2022-07-11 20:32:29 +03:00
if ( encoding . equals_ignoring_case ( " chunked " sv ) ) {
2020-05-12 01:25:10 +03:00
m_current_chunk_remaining_size = - 1 ;
goto read_chunk_size ;
} else {
2021-01-16 17:51:56 +03:00
dbgln ( " Job: Unknown transfer encoding '{}', the result will likely be wrong! " , encoding ) ;
2020-05-12 01:25:10 +03:00
}
}
}
2022-02-04 13:49:59 +03:00
can_read_without_blocking = m_socket - > can_read_without_blocking ( ) ;
if ( can_read_without_blocking . is_error ( ) )
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
if ( ! can_read_without_blocking . value ( ) )
2022-02-02 18:51:55 +03:00
break ;
2021-09-18 02:18:22 +03:00
dbgln_if ( JOB_DEBUG , " Waiting for payload for {} " , m_request . url ( ) ) ;
2022-02-04 13:49:59 +03:00
auto maybe_payload = receive ( read_size ) ;
if ( maybe_payload . is_error ( ) ) {
dbgln_if ( JOB_DEBUG , " Could not read the payload: {} " , maybe_payload . error ( ) ) ;
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
}
auto payload = maybe_payload . release_value ( ) ;
2022-02-02 18:51:55 +03:00
if ( payload . is_empty ( ) & & m_socket - > is_eof ( ) ) {
finish_up ( ) ;
break ;
2020-05-05 08:17:40 +03:00
}
2020-05-12 01:25:10 +03:00
2021-10-16 21:17:25 +03:00
bool read_everything = false ;
if ( m_content_length . has_value ( ) ) {
auto length = m_content_length . value ( ) ;
if ( m_received_size + payload . size ( ) > = length ) {
2021-10-29 15:16:25 +03:00
payload . resize ( length - m_received_size ) ;
2021-10-16 21:17:25 +03:00
read_everything = true ;
}
}
2022-02-11 22:25:15 +03:00
m_received_buffers . append ( make < ReceivedBuffer > ( payload ) ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
m_buffered_size + = payload . size ( ) ;
2020-05-05 08:17:40 +03:00
m_received_size + = payload . size ( ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
flush_received_buffers ( ) ;
2020-05-05 08:17:40 +03:00
2021-10-16 21:17:25 +03:00
deferred_invoke ( [ this ] { did_progress ( m_content_length , m_received_size ) ; } ) ;
if ( read_everything ) {
2021-10-29 15:16:25 +03:00
VERIFY ( m_received_size < = m_content_length . value ( ) ) ;
2021-10-16 21:17:25 +03:00
finish_up ( ) ;
2022-02-02 18:51:55 +03:00
break ;
2021-10-16 21:17:25 +03:00
}
2022-03-20 00:29:12 +03:00
// Check after reading all the buffered data if we have reached the end of stream
// for cases where the server didn't send a content length, chunked encoding but is
// directly closing the connection.
if ( ! m_content_length . has_value ( ) & & ! m_current_chunk_remaining_size . has_value ( ) & & m_socket - > is_eof ( ) ) {
finish_up ( ) ;
break ;
}
2020-05-12 01:25:10 +03:00
if ( m_current_chunk_remaining_size . has_value ( ) ) {
auto size = m_current_chunk_remaining_size . value ( ) - payload . size ( ) ;
2021-01-16 17:51:56 +03:00
2021-02-07 15:03:24 +03:00
dbgln_if ( JOB_DEBUG , " Job: We have {} bytes left over in this chunk " , size ) ;
2020-05-12 01:25:10 +03:00
if ( size = = 0 ) {
2021-02-07 15:03:24 +03:00
dbgln_if ( JOB_DEBUG , " Job: Finished a chunk of {} bytes " , m_current_chunk_total_size . value ( ) ) ;
2020-08-19 05:34:15 +03:00
if ( m_current_chunk_total_size . value ( ) = = 0 ) {
m_state = State : : Trailers ;
2022-02-02 18:51:55 +03:00
break ;
2020-08-19 05:34:15 +03:00
}
2020-05-12 01:25:10 +03:00
// we've read everything, now let's get the next chunk
size = - 1 ;
2022-02-04 13:49:59 +03:00
auto can_read_line = m_socket - > can_read_line ( ) ;
if ( can_read_line . is_error ( ) )
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
if ( can_read_line . value ( ) ) {
auto maybe_line = read_line ( PAGE_SIZE ) ;
if ( maybe_line . is_error ( ) ) {
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
}
VERIFY ( maybe_line . value ( ) . is_empty ( ) ) ;
2021-04-11 23:17:33 +03:00
} else {
m_should_read_chunk_ending_line = true ;
}
2020-05-12 01:25:10 +03:00
}
m_current_chunk_remaining_size = size ;
}
2022-02-02 18:51:55 +03:00
}
2020-05-12 01:25:10 +03:00
2022-02-02 18:51:55 +03:00
if ( ! m_socket - > is_open ( ) ) {
2021-05-01 22:10:08 +03:00
dbgln_if ( JOB_DEBUG , " Connection appears to have closed, finishing up " ) ;
2020-05-05 08:17:40 +03:00
finish_up ( ) ;
}
} ) ;
}
2021-05-12 02:56:25 +03:00
void Job : : timer_event ( Core : : TimerEvent & event )
{
event . accept ( ) ;
finish_up ( ) ;
if ( m_buffered_size = = 0 )
stop_timer ( ) ;
}
2020-05-05 08:17:40 +03:00
void Job : : finish_up ( )
{
2021-06-29 00:10:18 +03:00
VERIFY ( ! m_has_scheduled_finish ) ;
2020-05-05 08:17:40 +03:00
m_state = State : : Finished ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
if ( ! m_can_stream_response ) {
2022-02-04 13:49:59 +03:00
auto maybe_flattened_buffer = ByteBuffer : : create_uninitialized ( m_buffered_size ) ;
if ( maybe_flattened_buffer . is_error ( ) )
return did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ;
auto flattened_buffer = maybe_flattened_buffer . release_value ( ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
u8 * flat_ptr = flattened_buffer . data ( ) ;
for ( auto & received_buffer : m_received_buffers ) {
2023-03-06 19:16:25 +03:00
memcpy ( flat_ptr , received_buffer - > pending_flush . data ( ) , received_buffer - > pending_flush . size ( ) ) ;
flat_ptr + = received_buffer - > pending_flush . size ( ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
}
m_received_buffers . clear ( ) ;
// For the time being, we cannot stream stuff with content-encoding set to _anything_.
2021-03-04 00:54:07 +03:00
// FIXME: LibCompress exposes a streaming interface, so this can be resolved
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
auto content_encoding = m_headers . get ( " Content-Encoding " ) ;
if ( content_encoding . has_value ( ) ) {
2023-01-02 14:16:39 +03:00
if ( auto result = handle_content_encoding ( flattened_buffer , content_encoding . value ( ) ) ; ! result . is_error ( ) )
2021-10-04 13:52:48 +03:00
flattened_buffer = result . release_value ( ) ;
else
return did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
}
m_buffered_size = flattened_buffer . size ( ) ;
2022-02-11 22:25:15 +03:00
m_received_buffers . append ( make < ReceivedBuffer > ( move ( flattened_buffer ) ) ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
m_can_stream_response = true ;
2020-05-05 08:17:40 +03:00
}
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
flush_received_buffers ( ) ;
if ( m_buffered_size ! = 0 ) {
2020-12-31 01:12:44 +03:00
// We have to wait for the client to consume all the downloaded data
// before we can actually call `did_finish`. in a normal flow, this should
// never be hit since the client is reading as we are writing, unless there
// are too many concurrent downloads going on.
2021-05-12 02:56:25 +03:00
dbgln_if ( JOB_DEBUG , " Flush finished with {} bytes remaining, will try again later " , m_buffered_size ) ;
if ( ! has_timer ( ) )
start_timer ( 50 ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 16:44:12 +03:00
return ;
2020-05-05 08:17:40 +03:00
}
2021-06-29 00:10:18 +03:00
m_has_scheduled_finish = true ;
2022-02-02 18:51:55 +03:00
auto response = HttpResponse : : create ( m_code , move ( m_headers ) , m_received_size ) ;
2021-08-30 21:12:48 +03:00
deferred_invoke ( [ this , response = move ( response ) ] {
2021-09-30 11:49:54 +03:00
// If the server responded with "Connection: close", close the connection
2022-10-01 18:11:36 +03:00
// as the server may or may not want to close the socket. Also, if this is
// a legacy HTTP server (1.0 or older), assume close is the default value.
if ( auto result = response - > headers ( ) . get ( " Connection " sv ) ; result . has_value ( ) ? result - > equals_ignoring_case ( " close " sv ) : m_legacy_connection )
2021-09-30 11:49:54 +03:00
shutdown ( ShutdownMode : : CloseSocket ) ;
2021-06-29 00:10:18 +03:00
did_finish ( response ) ;
2020-05-05 08:17:40 +03:00
} ) ;
}
}