From 2843dcbf4a93f72aa1028ebec2bef5f5ce3a1d28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Thu, 10 Oct 2024 17:18:47 +0200 Subject: [PATCH] When connecting to a Postgres database through a datalink stored on Enso Cloud, its asset ID is included in the audit logs (#11291) - Closes #9869 --- .../AWS/0.0.0-dev/src/S3/S3_Data_Link.enso | 3 +- .../src/Enso_Cloud/Data_Link_Helpers.enso | 53 +++++++++++++++---- .../0.0.0-dev/src/Enso_Cloud/Enso_File.enso | 3 +- .../Internal/Enso_File_Data_Link.enso | 3 +- .../HTTP/Internal/HTTP_Fetch_Data_Link.enso | 3 +- .../Data_Link/Postgres_Data_Link.enso | 23 ++++---- .../0.0.0-dev/src/Snowflake_Data_Link.enso | 3 +- .../audit/CloudAuditedConnection.java | 2 +- .../Network/Enso_Cloud/Audit_Log_Spec.enso | 2 +- .../src/Database/Common/Audit_Spec.enso | 45 ++++++++++++---- 10 files changed, 101 insertions(+), 39 deletions(-) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_Data_Link.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_Data_Link.enso index 6319c36de5..8cc8580a1a 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_Data_Link.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_Data_Link.enso @@ -19,7 +19,8 @@ type S3_Data_Link Value (uri : Text) format_json (credentials : AWS_Credential) ## PRIVATE - parse json -> S3_Data_Link = + parse json source -> S3_Data_Link = + _ = source uri = get_required_field "uri" json expected_type=Text auth = decode_aws_credential (get_required_field "auth" json) format_json = get_optional_field "format" json diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link_Helpers.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link_Helpers.enso index f203d8e8e7..16fa881592 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link_Helpers.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Data_Link_Helpers.enso @@ -6,8 +6,12 @@ import project.Data.Text.Encoding.Encoding import project.Data.Text.Text import project.Data.Vector.Vector import project.Enso_Cloud.Data_Link.Data_Link +import project.Enso_Cloud.Enso_File.Enso_Asset_Type +import project.Enso_Cloud.Enso_File.Enso_File import project.Enso_Cloud.Enso_Secret.Enso_Secret import project.Enso_Cloud.Errors.Missing_Data_Link_Library +import project.Enso_Cloud.Internal.Existing_Enso_Asset.Existing_Enso_Asset +import project.Enso_Cloud.Internal.Utils import project.Error.Error import project.Errors.Common.No_Such_Conversion import project.Errors.Illegal_Argument.Illegal_Argument @@ -15,8 +19,10 @@ import project.Errors.Illegal_State.Illegal_State import project.Errors.Problem_Behavior.Problem_Behavior import project.Errors.Unimplemented.Unimplemented import project.Meta +import project.Network.HTTP.HTTP_Method.HTTP_Method import project.Nothing.Nothing import project.Panic.Panic +import project.Runtime import project.System.File.Data_Link_Access.Data_Link_Access import project.System.File.File import project.System.File.File_Access.File_Access @@ -50,40 +56,67 @@ data_link_content_type = "application/x-enso-datalink" data_link_extension = ".datalink" ## PRIVATE -interpret_json_as_data_link json = + Describes from where the data link is sourced. +type Data_Link_Source_Metadata + ## A data link coming from Enso Cloud. + Cloud_Asset id:Text + + ## No information is available about the source. + Unknown + +## PRIVATE +interpret_json_as_data_link json source:Data_Link_Source_Metadata=..Unknown = typ = get_required_field "type" json expected_type=Text case DataLinkSPI.findDataLinkType typ of Nothing -> library_name = get_required_field "libraryName" json expected_type=Text Error.throw (Missing_Data_Link_Library.Error library_name typ) data_link_type -> - data_link_type.parse json + data_link_type.parse json source + +## PRIVATE +read_and_interpret_data_link (file : File_Like) = + case file.underlying of + ## We have special handling of Cloud files, because for them we want to correlate the asset id of the data link to be used in the audit logs. + Only the Cloud-backed files have this special handling. All other backends behave uniformly. + cloud_file : Enso_File -> + asset = Existing_Enso_Asset.get_asset_reference_for cloud_file + if asset.asset_type != Enso_Asset_Type.Data_Link then Error.throw (Illegal_Argument.Error "Cannot interpret file "+cloud_file.to_text+" as a data link, because it is "+asset.asset_type.to_text+".") else + interpret_existing_asset_as_data_link asset + _ -> + json = Data_Link.read_config file + ## We don't register the source for other backends - we are not interested in Filesystem path or S3 URI - only Cloud asset id is important in the audit logs. + interpret_json_as_data_link json source=..Unknown + +## PRIVATE + This method should only be called on an asset that is known to be a data link. +interpret_existing_asset_as_data_link asset:Existing_Enso_Asset = + Runtime.assert (asset.asset_type == Enso_Asset_Type.Data_Link) + source = Data_Link_Source_Metadata.Cloud_Asset asset.id + json = Utils.http_request_as_json HTTP_Method.Get asset.internal_uri + interpret_json_as_data_link json source ## PRIVATE read_data_link (file : File_Like) format (on_problems : Problem_Behavior) = - json = Data_Link.read_config file - data_link_instance = interpret_json_as_data_link json + data_link_instance = read_and_interpret_data_link file resolved_format = File_Format.resolve format data_link_instance.read resolved_format on_problems ## PRIVATE read_data_link_as_stream (file : File_Like) (open_options : Vector) (f : Input_Stream -> Any) = - json = Data_Link.read_config file - data_link_instance = interpret_json_as_data_link json + data_link_instance = read_and_interpret_data_link file data_link_with_input_stream = Data_Link_With_Input_Stream.find data_link_instance data_link_with_input_stream.with_input_stream open_options f ## PRIVATE write_data_link_as_stream (file : File_Like) (open_options : Vector) (f : Output_Stream -> Any) = - json = Data_Link.read_config file - data_link_instance = interpret_json_as_data_link json + data_link_instance = read_and_interpret_data_link file data_link_with_output_stream = Data_Link_With_Output_Stream.find data_link_instance data_link_with_output_stream.with_output_stream open_options f ## PRIVATE interpret_data_link_as_writable_file (file : File_Like) -> Writable_File = - json = Data_Link.read_config file - data_link_instance = interpret_json_as_data_link json + data_link_instance = read_and_interpret_data_link file ## We override the return value to return the original datalink, not the target. Ideally, we'd do this inside of the `as_writable_file` implementation, but there the data link does not know its 'origin', so it is impossible diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Enso_File.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Enso_File.enso index c7c14bb30a..f5fc798054 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Enso_File.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Enso_File.enso @@ -381,8 +381,7 @@ type Enso_File Enso_Asset_Type.Project -> Error.throw (Illegal_Argument.Error "Projects cannot be read within Enso code. Open using the IDE.") Enso_Asset_Type.Secret -> Error.throw (Illegal_Argument.Error "Secrets cannot be read directly.") Enso_Asset_Type.Data_Link -> - json = Utils.http_request_as_json HTTP_Method.Get asset.internal_uri - datalink = Data_Link_Helpers.interpret_json_as_data_link json + datalink = Data_Link_Helpers.interpret_existing_asset_as_data_link asset datalink.read format on_problems Enso_Asset_Type.Directory -> Error.throw (Illegal_Argument.Error "Cannot `read` a directory, use `list`.") Enso_Asset_Type.File -> diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Enso_File_Data_Link.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Enso_File_Data_Link.enso index d1663c7161..24c6b382fb 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Enso_File_Data_Link.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Enso_Cloud/Internal/Enso_File_Data_Link.enso @@ -21,7 +21,8 @@ type Enso_File_Data_Link private Value (path : Text) format_json ## PRIVATE - parse json -> Enso_File_Data_Link = + parse json source -> Enso_File_Data_Link = + _ = source path = get_required_field "path" json expected_type=Text format_json = get_optional_field "format" json Enso_File_Data_Link.Value path format_json diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Network/HTTP/Internal/HTTP_Fetch_Data_Link.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Network/HTTP/Internal/HTTP_Fetch_Data_Link.enso index e1be01b37b..cb087ca360 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Network/HTTP/Internal/HTTP_Fetch_Data_Link.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Network/HTTP/Internal/HTTP_Fetch_Data_Link.enso @@ -20,7 +20,8 @@ type HTTP_Fetch_Data_Link Value (request : Request) format_json ## PRIVATE - parse json -> HTTP_Fetch_Data_Link = + parse json source -> HTTP_Fetch_Data_Link = + _ = source uri = get_required_field "uri" json expected_type=Text method = HTTP_Method.from (get_required_field "method" json expected_type=Text) format_json = get_optional_field "format" json diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Data_Link/Postgres_Data_Link.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Data_Link/Postgres_Data_Link.enso index 42e4b7f41a..9c43742a41 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Data_Link/Postgres_Data_Link.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Data_Link/Postgres_Data_Link.enso @@ -3,7 +3,7 @@ private from Standard.Base import all import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.System.Input_Stream.Input_Stream -from Standard.Base.Enso_Cloud.Data_Link_Helpers import parse_secure_value +from Standard.Base.Enso_Cloud.Data_Link_Helpers import Data_Link_Source_Metadata, parse_secure_value from Standard.Base.Enso_Cloud.Public_Utils import get_optional_field, get_required_field import project.Connection.Connection_Options.Connection_Options @@ -14,15 +14,15 @@ import project.Connection.Postgres.Postgres type Postgres_Data_Link ## PRIVATE A data-link returning a connection to the specified database. - Connection details:Postgres + Connection details:Postgres related_asset_id:Text|Nothing ## PRIVATE A data-link returning a query to a specific table within a database. - Table name:Text details:Postgres + Table name:Text details:Postgres related_asset_id:Text|Nothing ## PRIVATE - parse json -> Postgres_Data_Link = + parse json source:Data_Link_Source_Metadata -> Postgres_Data_Link = host = get_required_field "host" json expected_type=Text port = get_required_field "port" json expected_type=Integer db_name = get_required_field "database_name" json expected_type=Text @@ -34,22 +34,25 @@ type Postgres_Data_Link password = get_required_field "password" credentials_json |> parse_secure_value Credentials.Username_And_Password username password + related_asset_id = case source of + Data_Link_Source_Metadata.Cloud_Asset id -> id + _ -> Nothing details = Postgres.Server host=host port=port database=db_name schema=schema credentials=credentials case get_optional_field "table" json expected_type=Text of Nothing -> - Postgres_Data_Link.Connection details + Postgres_Data_Link.Connection details related_asset_id table_name : Text -> - Postgres_Data_Link.Table table_name details + Postgres_Data_Link.Table table_name details related_asset_id ## PRIVATE read self (format = Auto_Detect) (on_problems : Problem_Behavior) = _ = on_problems if format != Auto_Detect then Error.throw (Illegal_Argument.Error "Only Auto_Detect can be used with a Postgres Data Link, as it points to a database.") else - # TODO add related asset id here: https://github.com/enso-org/enso/issues/9869 audit_mode = if Enso_User.is_logged_in then "cloud" else "local" - default_options = Connection_Options.Value [["enso.internal.audit", audit_mode]] + options_vector = [["enso.internal.audit", audit_mode]] + (if self.related_asset_id.is_nothing then [] else [["enso.internal.relatedAssetId", self.related_asset_id]]) + default_options = Connection_Options.Value options_vector connection = self.details.connect default_options allow_data_links=False case self of - Postgres_Data_Link.Connection _ -> connection - Postgres_Data_Link.Table table_name _ -> + Postgres_Data_Link.Connection _ _ -> connection + Postgres_Data_Link.Table table_name _ _ -> connection.query table_name diff --git a/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Snowflake_Data_Link.enso b/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Snowflake_Data_Link.enso index 4cedef22d8..192e131000 100644 --- a/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Snowflake_Data_Link.enso +++ b/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Snowflake_Data_Link.enso @@ -19,7 +19,8 @@ type Snowflake_Data_Link Table name:Text details:Snowflake_Details ## PRIVATE - parse json -> Snowflake_Data_Link = + parse json source -> Snowflake_Data_Link = + _ = source account = get_required_field "account" json expected_type=Text db_name = get_required_field "database_name" json expected_type=Text schema = get_optional_field "schema" json if_missing="SNOWFLAKE" expected_type=Text diff --git a/std-bits/database/src/main/java/org/enso/database/audit/CloudAuditedConnection.java b/std-bits/database/src/main/java/org/enso/database/audit/CloudAuditedConnection.java index 0272c23280..9568931f7b 100644 --- a/std-bits/database/src/main/java/org/enso/database/audit/CloudAuditedConnection.java +++ b/std-bits/database/src/main/java/org/enso/database/audit/CloudAuditedConnection.java @@ -22,7 +22,7 @@ public final class CloudAuditedConnection extends AuditedConnection { super(underlying); metadata = new ObjectNode(JsonNodeFactory.instance); if (relatedAssetId != null) { - metadata.put("asset_id", relatedAssetId); + metadata.put("dataLinkAssetId", relatedAssetId); } try { metadata.put("connectionUri", underlying.getMetaData().getURL()); diff --git a/test/Base_Tests/src/Network/Enso_Cloud/Audit_Log_Spec.enso b/test/Base_Tests/src/Network/Enso_Cloud/Audit_Log_Spec.enso index 27bfc3d1d1..a5c6f06e3f 100644 --- a/test/Base_Tests/src/Network/Enso_Cloud/Audit_Log_Spec.enso +++ b/test/Base_Tests/src/Network/Enso_Cloud/Audit_Log_Spec.enso @@ -81,7 +81,7 @@ get_audit_log_events -> Vector Audit_Log_Event = ## PRIVATE type Audit_Log_Event ## PRIVATE - Value organization_id:Text user_email:Text timestamp:Date_Time metadata:JS_Object message:Text project_id:Text + Value organization_id:Text user_email:Text timestamp:Date_Time|Nothing metadata:JS_Object message:Text project_id:Text|Nothing ## PRIVATE from_json json = diff --git a/test/Table_Tests/src/Database/Common/Audit_Spec.enso b/test/Table_Tests/src/Database/Common/Audit_Spec.enso index f5b375f6f6..d159188776 100644 --- a/test/Table_Tests/src/Database/Common/Audit_Spec.enso +++ b/test/Table_Tests/src/Database/Common/Audit_Spec.enso @@ -1,4 +1,5 @@ from Standard.Base import all +import Standard.Base.Enso_Cloud.Data_Link.Data_Link import Standard.Base.Enso_Cloud.Internal.Audit_Log.Audit_Log from Standard.Table import Table @@ -8,6 +9,7 @@ from Standard.Database import all from Standard.Test import all import enso_dev.Base_Tests.Network.Enso_Cloud.Cloud_Tests_Setup.Cloud_Tests_Setup +import enso_dev.Base_Tests.Network.Enso_Cloud.Cloud_Tests_Setup.Temporary_Directory from enso_dev.Base_Tests.Network.Enso_Cloud.Audit_Log_Spec import Audit_Log_Event, get_audit_log_events import project.Database.Postgres_Spec.Temporary_Data_Link_File @@ -24,6 +26,7 @@ add_specs suite_builder prefix ~datalink_to_connection database_pending = The local environment is more predictable for running these tests. The following flag can be changed to `False` to run it on the real cloud (if it is set up in the test context). This can be used to verify that the mock logic is consistent with the real thing. + TODO Once https://github.com/enso-org/enso/issues/10919 is implemented, we can remove this flag. always_run_on_mock = True cloud_setup = if always_run_on_mock then Cloud_Tests_Setup.prepare_mock_setup else Cloud_Tests_Setup.prepare suite_builder.group prefix+"Audit Logs" pending=(cloud_setup.pending.if_nothing database_pending) group_builder-> @@ -37,13 +40,9 @@ add_specs suite_builder prefix ~datalink_to_connection database_pending = # Retrying is needed as there may be some delay before the background thread finishes processing the logs. Test.with_retries <| - # We send a synchronous log to block the main thread until all pending async logs have been processed. - Audit_Log.report_event "TestEvent" "test message to sync" async=False . should_succeed - all_events = get_audit_log_events relevant_events = all_events.filter e-> e.message.contains table_name Test.with_clue ((relevant_events.map .to_text).join '\n' 'Found relevant events are:\n' '\n') <| - create = relevant_events.find (e-> e.message.contains "CREATE") create.should_succeed create.user_email . should_equal Enso_User.current.email @@ -89,13 +88,37 @@ add_specs suite_builder prefix ~datalink_to_connection database_pending = # We just check that we can read queries through this connection: locally_audited_connection.read (SQL_Query.Raw_SQL "SELECT 1") . at 0 . to_vector . should_equal [1] - group_builder.specify "should know the asset id of the data link used for the connection" pending="TODO: https://github.com/enso-org/enso/issues/9869" <| - ## TODO: - 1. write data link to Enso File - 2. set up real cloud and establish the connection - 3. switch to mock cloud (if wanted) and run some queries - 4. inspect logs and search for the asset id - Error.throw "TODO" + # This test may only run on real cloud because the mock does not support creating datalinks. + # Once the tests above can be run on real cloud too (#10919), we can merge all 3 cloud setups into a single one. + real_cloud = Cloud_Tests_Setup.prepare + test_root = Temporary_Directory.make "Audit-Logs-Datalinks" + group_builder.specify "should know the asset id of the data link used for the connection" pending=real_cloud.real_cloud_pending <| real_cloud.with_prepared_environment <| + # Upload our local reference data link to the cloud + cloud_data_link = test_root.get / "audited-db.datalink" + Data_Link.copy datalink_to_connection cloud_data_link . should_succeed + + # Set-up an audited connection through the Cloud data link + audited_connection = cloud_data_link.read + + # Until https://github.com/enso-org/enso/issues/10919 is implemented, we switch over to cloud mock to force the audit logs to be sent there: + mock_setup = Cloud_Tests_Setup.prepare_mock_setup + mock_setup.with_prepared_environment <| + table_name = "audited-table-"+Random.uuid + mem_table = Table.new [["X", [1, 2]], ["Y", ["my_payload", "foo"]]] + mem_table.select_into_database_table audited_connection table_name temporary=True . should_succeed + + Test.with_retries <| + all_events = get_audit_log_events + relevant_events = all_events.filter e-> e.message.contains table_name + Test.with_clue ((relevant_events.map .to_text).join '\n' 'Found relevant events are:\n' '\n') <| + create = relevant_events.find (e-> e.message.contains "CREATE") + create.should_succeed + + create.metadata.get "connectionUri" . should_contain "jdbc:" + # Verify that it contains an asset id field: + asset_id = create.metadata.get "dataLinkAssetId" + # We cannot really assume too much about the ID other than that it is a string: + asset_id.should_be_a Text main filter=Nothing = connection_details = get_configured_connection_details