[DPP-1327] Efficient Oracle ETQ data migration (#15843)

Previous version of etq_array_diff function proved to be vastly inefficient on a synthetic data set
resulting in "SQL Error: ORA-04036: PGA memory used by the instance exceeds PGA_AGGREGATE_LIMIT" error
after many minutes of processing. PGA_AGGREGATE_LIMIT was set to 2GB in that testing setup.

This new implementation of etq_array_diff completed the migration successfully in 03min 34s
which is comparable with the data migration for PostgreSQL. No evidence for excessive consumption
of memory was found ('select * from V$PGASTAT;').
This commit is contained in:
pbatko-da 2022-12-09 12:03:06 +01:00 committed by GitHub
parent e880f26b36
commit fe92e52da7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 21 deletions

View File

@ -1 +1 @@
871a62483f3ea549e765e3b199eb0a9c116b2a6d6408918dd101aaf6d5899059 b58ff4ac8a8a2be798424de9dd246dedb1825bfe468f00fc82ba65c534654af7

View File

@ -2,30 +2,26 @@
-- Removes all elements from a that are present in b, essentially computes a - b. -- Removes all elements from a that are present in b, essentially computes a - b.
CREATE OR REPLACE FUNCTION etq_array_diff( CREATE OR REPLACE FUNCTION etq_array_diff(
arrayClob1 IN CLOB, clobA IN CLOB,
arrayClob2 IN CLOB clobB IN CLOB
) )
RETURN CLOB RETURN CLOB
IS IS
arrayJson1 json_array_t := json_array_t.parse(arrayClob1); aDiffB CLOB;
outputJsonArray json_array_t := json_array_t ('[]');
-- Number type has
-- 999...(38 9's) x10^125 maximum value
-- -999...(38 9's) x10^125 minimum value
-- so 200 characters should be enough to hold it together with the whole filter expression
filterExpression varchar2(200);
BEGIN BEGIN
FOR i IN 0 .. arrayJson1.get_size - 1 SELECT coalesce(JSON_ARRAYAGG(elemA), '[]') foo
LOOP INTO aDiffB
-- `$[*]` selects each element of the array FROM
-- `(@ == v)` is a filter expression that check whether each matched element is equal to some value `v` (
filterExpression := '$[*]?(@ == ' || (arrayJson1.get(i).to_clob()) ||')'; SELECT elemA FROM json_table(clobA, '$[*]' columns (elemA NUMBER PATH '$'))
IF NOT json_exists(arrayClob2, filterExpression) ) arrayA
THEN LEFT JOIN
outputJsonArray.append(arrayJson1.get(i)); (
END IF; SELECT elemB FROM json_table(clobB, '$[*]' columns (elemB NUMBER PATH '$'))
END LOOP; ) arrayB
RETURN outputJsonArray.to_clob(); ON elemA = elemB
WHERE elemB IS NULL;
RETURN aDiffB;
END; END;
/ /