When connecting to a Postgres database through a datalink stored on Enso Cloud, its asset ID is included in the audit logs (#11291)

- Closes #9869
This commit is contained in:
Radosław Waśko 2024-10-10 17:18:47 +02:00 committed by GitHub
parent 239a6803bf
commit 2843dcbf4a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 101 additions and 39 deletions

View File

@ -19,7 +19,8 @@ type S3_Data_Link
Value (uri : Text) format_json (credentials : AWS_Credential)
## PRIVATE
parse json -> S3_Data_Link =
parse json source -> S3_Data_Link =
_ = source
uri = get_required_field "uri" json expected_type=Text
auth = decode_aws_credential (get_required_field "auth" json)
format_json = get_optional_field "format" json

View File

@ -6,8 +6,12 @@ import project.Data.Text.Encoding.Encoding
import project.Data.Text.Text
import project.Data.Vector.Vector
import project.Enso_Cloud.Data_Link.Data_Link
import project.Enso_Cloud.Enso_File.Enso_Asset_Type
import project.Enso_Cloud.Enso_File.Enso_File
import project.Enso_Cloud.Enso_Secret.Enso_Secret
import project.Enso_Cloud.Errors.Missing_Data_Link_Library
import project.Enso_Cloud.Internal.Existing_Enso_Asset.Existing_Enso_Asset
import project.Enso_Cloud.Internal.Utils
import project.Error.Error
import project.Errors.Common.No_Such_Conversion
import project.Errors.Illegal_Argument.Illegal_Argument
@ -15,8 +19,10 @@ import project.Errors.Illegal_State.Illegal_State
import project.Errors.Problem_Behavior.Problem_Behavior
import project.Errors.Unimplemented.Unimplemented
import project.Meta
import project.Network.HTTP.HTTP_Method.HTTP_Method
import project.Nothing.Nothing
import project.Panic.Panic
import project.Runtime
import project.System.File.Data_Link_Access.Data_Link_Access
import project.System.File.File
import project.System.File.File_Access.File_Access
@ -50,40 +56,67 @@ data_link_content_type = "application/x-enso-datalink"
data_link_extension = ".datalink"
## PRIVATE
interpret_json_as_data_link json =
Describes from where the data link is sourced.
type Data_Link_Source_Metadata
## A data link coming from Enso Cloud.
Cloud_Asset id:Text
## No information is available about the source.
Unknown
## PRIVATE
interpret_json_as_data_link json source:Data_Link_Source_Metadata=..Unknown =
typ = get_required_field "type" json expected_type=Text
case DataLinkSPI.findDataLinkType typ of
Nothing ->
library_name = get_required_field "libraryName" json expected_type=Text
Error.throw (Missing_Data_Link_Library.Error library_name typ)
data_link_type ->
data_link_type.parse json
data_link_type.parse json source
## PRIVATE
read_and_interpret_data_link (file : File_Like) =
case file.underlying of
## We have special handling of Cloud files, because for them we want to correlate the asset id of the data link to be used in the audit logs.
Only the Cloud-backed files have this special handling. All other backends behave uniformly.
cloud_file : Enso_File ->
asset = Existing_Enso_Asset.get_asset_reference_for cloud_file
if asset.asset_type != Enso_Asset_Type.Data_Link then Error.throw (Illegal_Argument.Error "Cannot interpret file "+cloud_file.to_text+" as a data link, because it is "+asset.asset_type.to_text+".") else
interpret_existing_asset_as_data_link asset
_ ->
json = Data_Link.read_config file
## We don't register the source for other backends - we are not interested in Filesystem path or S3 URI - only Cloud asset id is important in the audit logs.
interpret_json_as_data_link json source=..Unknown
## PRIVATE
This method should only be called on an asset that is known to be a data link.
interpret_existing_asset_as_data_link asset:Existing_Enso_Asset =
Runtime.assert (asset.asset_type == Enso_Asset_Type.Data_Link)
source = Data_Link_Source_Metadata.Cloud_Asset asset.id
json = Utils.http_request_as_json HTTP_Method.Get asset.internal_uri
interpret_json_as_data_link json source
## PRIVATE
read_data_link (file : File_Like) format (on_problems : Problem_Behavior) =
json = Data_Link.read_config file
data_link_instance = interpret_json_as_data_link json
data_link_instance = read_and_interpret_data_link file
resolved_format = File_Format.resolve format
data_link_instance.read resolved_format on_problems
## PRIVATE
read_data_link_as_stream (file : File_Like) (open_options : Vector) (f : Input_Stream -> Any) =
json = Data_Link.read_config file
data_link_instance = interpret_json_as_data_link json
data_link_instance = read_and_interpret_data_link file
data_link_with_input_stream = Data_Link_With_Input_Stream.find data_link_instance
data_link_with_input_stream.with_input_stream open_options f
## PRIVATE
write_data_link_as_stream (file : File_Like) (open_options : Vector) (f : Output_Stream -> Any) =
json = Data_Link.read_config file
data_link_instance = interpret_json_as_data_link json
data_link_instance = read_and_interpret_data_link file
data_link_with_output_stream = Data_Link_With_Output_Stream.find data_link_instance
data_link_with_output_stream.with_output_stream open_options f
## PRIVATE
interpret_data_link_as_writable_file (file : File_Like) -> Writable_File =
json = Data_Link.read_config file
data_link_instance = interpret_json_as_data_link json
data_link_instance = read_and_interpret_data_link file
## We override the return value to return the original datalink, not the target.
Ideally, we'd do this inside of the `as_writable_file` implementation,
but there the data link does not know its 'origin', so it is impossible

View File

@ -381,8 +381,7 @@ type Enso_File
Enso_Asset_Type.Project -> Error.throw (Illegal_Argument.Error "Projects cannot be read within Enso code. Open using the IDE.")
Enso_Asset_Type.Secret -> Error.throw (Illegal_Argument.Error "Secrets cannot be read directly.")
Enso_Asset_Type.Data_Link ->
json = Utils.http_request_as_json HTTP_Method.Get asset.internal_uri
datalink = Data_Link_Helpers.interpret_json_as_data_link json
datalink = Data_Link_Helpers.interpret_existing_asset_as_data_link asset
datalink.read format on_problems
Enso_Asset_Type.Directory -> Error.throw (Illegal_Argument.Error "Cannot `read` a directory, use `list`.")
Enso_Asset_Type.File ->

View File

@ -21,7 +21,8 @@ type Enso_File_Data_Link
private Value (path : Text) format_json
## PRIVATE
parse json -> Enso_File_Data_Link =
parse json source -> Enso_File_Data_Link =
_ = source
path = get_required_field "path" json expected_type=Text
format_json = get_optional_field "format" json
Enso_File_Data_Link.Value path format_json

View File

@ -20,7 +20,8 @@ type HTTP_Fetch_Data_Link
Value (request : Request) format_json
## PRIVATE
parse json -> HTTP_Fetch_Data_Link =
parse json source -> HTTP_Fetch_Data_Link =
_ = source
uri = get_required_field "uri" json expected_type=Text
method = HTTP_Method.from (get_required_field "method" json expected_type=Text)
format_json = get_optional_field "format" json

View File

@ -3,7 +3,7 @@ private
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.System.Input_Stream.Input_Stream
from Standard.Base.Enso_Cloud.Data_Link_Helpers import parse_secure_value
from Standard.Base.Enso_Cloud.Data_Link_Helpers import Data_Link_Source_Metadata, parse_secure_value
from Standard.Base.Enso_Cloud.Public_Utils import get_optional_field, get_required_field
import project.Connection.Connection_Options.Connection_Options
@ -14,15 +14,15 @@ import project.Connection.Postgres.Postgres
type Postgres_Data_Link
## PRIVATE
A data-link returning a connection to the specified database.
Connection details:Postgres
Connection details:Postgres related_asset_id:Text|Nothing
## PRIVATE
A data-link returning a query to a specific table within a database.
Table name:Text details:Postgres
Table name:Text details:Postgres related_asset_id:Text|Nothing
## PRIVATE
parse json -> Postgres_Data_Link =
parse json source:Data_Link_Source_Metadata -> Postgres_Data_Link =
host = get_required_field "host" json expected_type=Text
port = get_required_field "port" json expected_type=Integer
db_name = get_required_field "database_name" json expected_type=Text
@ -34,22 +34,25 @@ type Postgres_Data_Link
password = get_required_field "password" credentials_json |> parse_secure_value
Credentials.Username_And_Password username password
related_asset_id = case source of
Data_Link_Source_Metadata.Cloud_Asset id -> id
_ -> Nothing
details = Postgres.Server host=host port=port database=db_name schema=schema credentials=credentials
case get_optional_field "table" json expected_type=Text of
Nothing ->
Postgres_Data_Link.Connection details
Postgres_Data_Link.Connection details related_asset_id
table_name : Text ->
Postgres_Data_Link.Table table_name details
Postgres_Data_Link.Table table_name details related_asset_id
## PRIVATE
read self (format = Auto_Detect) (on_problems : Problem_Behavior) =
_ = on_problems
if format != Auto_Detect then Error.throw (Illegal_Argument.Error "Only Auto_Detect can be used with a Postgres Data Link, as it points to a database.") else
# TODO add related asset id here: https://github.com/enso-org/enso/issues/9869
audit_mode = if Enso_User.is_logged_in then "cloud" else "local"
default_options = Connection_Options.Value [["enso.internal.audit", audit_mode]]
options_vector = [["enso.internal.audit", audit_mode]] + (if self.related_asset_id.is_nothing then [] else [["enso.internal.relatedAssetId", self.related_asset_id]])
default_options = Connection_Options.Value options_vector
connection = self.details.connect default_options allow_data_links=False
case self of
Postgres_Data_Link.Connection _ -> connection
Postgres_Data_Link.Table table_name _ ->
Postgres_Data_Link.Connection _ _ -> connection
Postgres_Data_Link.Table table_name _ _ ->
connection.query table_name

View File

@ -19,7 +19,8 @@ type Snowflake_Data_Link
Table name:Text details:Snowflake_Details
## PRIVATE
parse json -> Snowflake_Data_Link =
parse json source -> Snowflake_Data_Link =
_ = source
account = get_required_field "account" json expected_type=Text
db_name = get_required_field "database_name" json expected_type=Text
schema = get_optional_field "schema" json if_missing="SNOWFLAKE" expected_type=Text

View File

@ -22,7 +22,7 @@ public final class CloudAuditedConnection extends AuditedConnection {
super(underlying);
metadata = new ObjectNode(JsonNodeFactory.instance);
if (relatedAssetId != null) {
metadata.put("asset_id", relatedAssetId);
metadata.put("dataLinkAssetId", relatedAssetId);
}
try {
metadata.put("connectionUri", underlying.getMetaData().getURL());

View File

@ -81,7 +81,7 @@ get_audit_log_events -> Vector Audit_Log_Event =
## PRIVATE
type Audit_Log_Event
## PRIVATE
Value organization_id:Text user_email:Text timestamp:Date_Time metadata:JS_Object message:Text project_id:Text
Value organization_id:Text user_email:Text timestamp:Date_Time|Nothing metadata:JS_Object message:Text project_id:Text|Nothing
## PRIVATE
from_json json =

View File

@ -1,4 +1,5 @@
from Standard.Base import all
import Standard.Base.Enso_Cloud.Data_Link.Data_Link
import Standard.Base.Enso_Cloud.Internal.Audit_Log.Audit_Log
from Standard.Table import Table
@ -8,6 +9,7 @@ from Standard.Database import all
from Standard.Test import all
import enso_dev.Base_Tests.Network.Enso_Cloud.Cloud_Tests_Setup.Cloud_Tests_Setup
import enso_dev.Base_Tests.Network.Enso_Cloud.Cloud_Tests_Setup.Temporary_Directory
from enso_dev.Base_Tests.Network.Enso_Cloud.Audit_Log_Spec import Audit_Log_Event, get_audit_log_events
import project.Database.Postgres_Spec.Temporary_Data_Link_File
@ -24,6 +26,7 @@ add_specs suite_builder prefix ~datalink_to_connection database_pending =
The local environment is more predictable for running these tests.
The following flag can be changed to `False` to run it on the real cloud (if it is set up in the test context).
This can be used to verify that the mock logic is consistent with the real thing.
TODO Once https://github.com/enso-org/enso/issues/10919 is implemented, we can remove this flag.
always_run_on_mock = True
cloud_setup = if always_run_on_mock then Cloud_Tests_Setup.prepare_mock_setup else Cloud_Tests_Setup.prepare
suite_builder.group prefix+"Audit Logs" pending=(cloud_setup.pending.if_nothing database_pending) group_builder->
@ -37,13 +40,9 @@ add_specs suite_builder prefix ~datalink_to_connection database_pending =
# Retrying is needed as there may be some delay before the background thread finishes processing the logs.
Test.with_retries <|
# We send a synchronous log to block the main thread until all pending async logs have been processed.
Audit_Log.report_event "TestEvent" "test message to sync" async=False . should_succeed
all_events = get_audit_log_events
relevant_events = all_events.filter e-> e.message.contains table_name
Test.with_clue ((relevant_events.map .to_text).join '\n' 'Found relevant events are:\n' '\n') <|
create = relevant_events.find (e-> e.message.contains "CREATE")
create.should_succeed
create.user_email . should_equal Enso_User.current.email
@ -89,13 +88,37 @@ add_specs suite_builder prefix ~datalink_to_connection database_pending =
# We just check that we can read queries through this connection:
locally_audited_connection.read (SQL_Query.Raw_SQL "SELECT 1") . at 0 . to_vector . should_equal [1]
group_builder.specify "should know the asset id of the data link used for the connection" pending="TODO: https://github.com/enso-org/enso/issues/9869" <|
## TODO:
1. write data link to Enso File
2. set up real cloud and establish the connection
3. switch to mock cloud (if wanted) and run some queries
4. inspect logs and search for the asset id
Error.throw "TODO"
# This test may only run on real cloud because the mock does not support creating datalinks.
# Once the tests above can be run on real cloud too (#10919), we can merge all 3 cloud setups into a single one.
real_cloud = Cloud_Tests_Setup.prepare
test_root = Temporary_Directory.make "Audit-Logs-Datalinks"
group_builder.specify "should know the asset id of the data link used for the connection" pending=real_cloud.real_cloud_pending <| real_cloud.with_prepared_environment <|
# Upload our local reference data link to the cloud
cloud_data_link = test_root.get / "audited-db.datalink"
Data_Link.copy datalink_to_connection cloud_data_link . should_succeed
# Set-up an audited connection through the Cloud data link
audited_connection = cloud_data_link.read
# Until https://github.com/enso-org/enso/issues/10919 is implemented, we switch over to cloud mock to force the audit logs to be sent there:
mock_setup = Cloud_Tests_Setup.prepare_mock_setup
mock_setup.with_prepared_environment <|
table_name = "audited-table-"+Random.uuid
mem_table = Table.new [["X", [1, 2]], ["Y", ["my_payload", "foo"]]]
mem_table.select_into_database_table audited_connection table_name temporary=True . should_succeed
Test.with_retries <|
all_events = get_audit_log_events
relevant_events = all_events.filter e-> e.message.contains table_name
Test.with_clue ((relevant_events.map .to_text).join '\n' 'Found relevant events are:\n' '\n') <|
create = relevant_events.find (e-> e.message.contains "CREATE")
create.should_succeed
create.metadata.get "connectionUri" . should_contain "jdbc:"
# Verify that it contains an asset id field:
asset_id = create.metadata.get "dataLinkAssetId"
# We cannot really assume too much about the ID other than that it is a string:
asset_id.should_be_a Text
main filter=Nothing =
connection_details = get_configured_connection_details