Functional prototype for paginated APIs.

Only GitHub at moment.
This commit is contained in:
James Dunkerley 2024-09-03 12:31:39 +01:00
parent ed9a132bf9
commit bfdf876867
4 changed files with 40 additions and 21 deletions

View File

@ -11,12 +11,14 @@ import project.Errors.Illegal_Argument.Illegal_Argument
import project.Errors.Problem_Behavior.Problem_Behavior
import project.Internal.Data_Read_Helpers
import project.Meta
import project.Metadata.Display
import project.Network.HTTP.Header.Header
import project.Network.HTTP.HTTP
import project.Network.HTTP.HTTP_Error.HTTP_Error
import project.Network.HTTP.HTTP_Method.HTTP_Method
import project.Network.HTTP.Request_Body.Request_Body
import project.Network.HTTP.Request_Error
import project.Network.Paginated_URI.Paginated_URI
import project.Network.URI.URI
import project.Nothing.Nothing
import project.Runtime.Context
@ -25,7 +27,7 @@ import project.System.File.Generic.Writable_File.Writable_File
from project.Data.Boolean import Boolean, False, True
from project.Meta.Enso_Project import enso_project
from project.Metadata.Choice import Option
from project.Metadata.Widget import Folder_Browse, Text_Input
from project.Metadata.Widget import Folder_Browse, Single_Choice, Text_Input
from project.System.File_Format import Auto_Detect, File_Format
## ALIAS load, open
@ -194,11 +196,12 @@ list (directory:(Text | File)=enso_project.root) (name_filter:Text="") recursive
import Standard.Base.Data
file = enso_project.data / "spreadsheet.xls"
Data.fetch URL . body . write file
@uri Text_Input
@uri (Single_Choice values=[Option "URI" '""', Option "Paged API" "..Paged_API"] display=Display.Always)
@format Data_Read_Helpers.format_widget_with_raw_response
fetch : URI -> HTTP_Method -> Vector (Header | Pair Text Text) -> File_Format -> Any ! Request_Error | HTTP_Error
fetch (uri:URI) (method:HTTP_Method=..Get) (headers:(Vector (Header | Pair Text Text))=[]) (format = Auto_Detect) =
Data_Read_Helpers.fetch_following_data_links uri method headers (Data_Read_Helpers.handle_legacy_format "fetch" "format" format)
fetch : (Paginated_URI|URI) -> HTTP_Method -> Vector (Header | Pair Text Text) -> File_Format -> Any ! Request_Error | HTTP_Error
fetch (uri:Paginated_URI|URI) (method:HTTP_Method=..Get) (headers:(Vector (Header | Pair Text Text))=[]) (format = Auto_Detect) =
if uri.is_a Paginated_URI then uri.fetch method headers format else
Data_Read_Helpers.fetch_following_data_links uri method headers (Data_Read_Helpers.handle_legacy_format "fetch" "format" format)
## ALIAS http post, upload
GROUP Output

View File

@ -106,8 +106,8 @@ type Response
headers_by_name : Text -> Vector Text
headers_by_name self name:Text =
# This is a mapping that maps a header name to a list of values (since headers may be duplicated).
multi_map = self.internal_http_response.headers.map
if multi_map.containsKey name then Vector.from_polyglot_array (multi_map.get) name else []
headers = self.internal_http_response.headerValues name
Vector.from_polyglot_array headers
## ICON metadata
Get the response content type.

View File

@ -1,13 +1,24 @@
import project.Any.Any
import project.Data.Numbers.Integer
import project.Data.Pair.Pair
import project.Data.Text.Text
import project.Data.Vector.Vector
import project.Errors.Common.Missing_Argument
import project.Network.HTTP.Header.Header
import project.Network.HTTP.HTTP
import project.Network.HTTP.HTTP_Error.HTTP_Error
import project.Network.HTTP.HTTP_Method.HTTP_Method
import project.Network.HTTP.Request.Request
import project.Network.HTTP.Request_Error
import project.Network.HTTP.Response.Response
import project.Network.URI.URI
import project.Nothing.Nothing
import project.Panic.Caught_Panic
import project.Panic.Panic
import project.Warning.Warning
from project.Data.Text.Extensions import all
from project.Logging import all
from project.System.File_Format import Auto_Detect, File_Format
## URI with pagination information.
type Paginated_URI
@ -18,13 +29,14 @@ type Paginated_URI
- max_pages: The maximum number of pages to fetch.
- retries: The number of retries to attempt if a request fails.
- next_step: A function that takes the current URI and returns the next URI.
Paged_URI initial_uri:URI max_pages:Integer=10 retries:Integer=3 next_step=Paginated_URI.next_uri
Paged_API initial_uri:URI=(Missing_Argument.throw "initial_uri") max_pages:Integer=10 retries:Integer=3 next_step=Paginated_URI.next_uri
## Fetches a paginated resource.
fetch : HTTP_Method -> Vector (Header | Pair Text Text) -> File_Format -> Vector Any ! Request_Error | HTTP_Error
fetch self (method:HTTP_Method=..Get) (headers:(Vector (Header | Pair Text Text))=[]) (format = Auto_Detect) =
inner_fetch uri headers left current =
if left==0 then Warning.attach (More_Pages.Warning self.max_pages) current else
uri.log_message "Fetching page "+(self.max_pages-left+1).to_text+" of "+self.max_pages.to_text+" - "+uri.to_text
raw_response = HTTP.fetch uri method headers
next = self.next_step uri headers raw_response
decoded = raw_response.decode format
@ -38,7 +50,7 @@ type Paginated_URI
next_uri current_uri:URI current_headers raw_response:Response =
host = current_uri.host.to_case ..Lower
case host of
"api.github.com" -> Paginated_URI.github_next_uri current_uri current_headers raw_response
"api.github.com" -> Paginated_URI.next_github_uri current_uri current_headers raw_response
_ -> Nothing
## Method to get the next URI for GitHub.
@ -46,12 +58,12 @@ type Paginated_URI
next_github_uri current_uri:URI current_headers raw_response:Response =
_ = [current_uri, current_headers]
link = raw_response.headers_by_name "Link" . get 0
if link then Nothing else
parts = link_value.split ','
if link.is_nothing then Nothing else
parts = link.split ','
next_link = parts.find (_.contains 'rel="next"') if_missing=Nothing
if next_link.is_nothing then Nothing else
next_uri = next_link.take (..Before ';') . trim '<>'
Pair.Value next_uri.to_uri current_headers
next_uri = next_link.take (..Before ';') . trim what=' <>'
Pair.Value (URI.from next_uri) current_headers
## PRIVATE
Indicates that there are more pages to fetch.
@ -66,25 +78,25 @@ type More_Pages
## PRIVATE
Fetch with a retry mechanism.
fetch_with_retries : URI -> HTTP_Method -> Vector (Header | Pair Text Text) -> File_Format -> Integer -> Request ! Request_Error | HTTP_Error
fetch_with_retries uri method headers format retries = case retries of
0 -> HTTP.fetch uri method headers format
fetch_with_retries : URI -> HTTP_Method -> Vector (Header | Pair Text Text) -> Integer -> Request ! Request_Error | HTTP_Error
fetch_with_retries uri method headers retries = case retries of
0 -> HTTP.fetch uri method headers
_ ->
result = Panic.catch Any c->(panic_handler uri c) <|
(HTTP.fetch uri method headers format) . catch Any e->(error_handler uri e)
(HTTP.fetch uri method headers) . catch Any e->(error_handler uri e)
if result.is_nothing.not then result else
fetch_with_retries uri method headers format (retries-1)
fetch_with_retries uri method headers (retries-1)
## PRIVATE
Handler for panics.
panic_handler : URI -> Caught_Panic -> Nothing
panic_handler uri caught_panic =
uri.log_message (caught_panic.payload.to_display_text) ..Severe
uri.log_message (caught_panic.payload.to_display_text) ..Warning
Nothing
## PRIVATE
Handler for errors.
error_handler : URI -> Any -> Nothing
error_handler uri error =
uri.log_message (error.to_display_text) ..Severe
uri.log_message (error.to_display_text) ..Warning
Nothing

View File

@ -5,4 +5,8 @@ import java.net.URI;
import java.net.http.HttpHeaders;
/** A subset of the HttpResponse to avoid leaking the decrypted Enso secrets. */
public record EnsoHttpResponse(URI uri, HttpHeaders headers, InputStream body, int statusCode) {}
public record EnsoHttpResponse(URI uri, HttpHeaders headers, InputStream body, int statusCode) {
public String[] headerValues(String name) {
return headers.allValues(name).toArray(String[]::new);
}
}