From 8df8cf7583dc262c18cea0dba75dacb63bab1f00 Mon Sep 17 00:00:00 2001 From: Harshal Patel Date: Tue, 21 Jan 2025 18:06:05 +0530 Subject: [PATCH] HIVE-28622: Duplicate Entries in TXN_WRITE_NOTIFICATION_LOG Due to Oracle's Handling of Empty Strings In Oracle, empty strings ('') are treated as NULL values for VARCHAR2 and CHAR data types. This behavior is unique to Oracle and can be confusing, as an empty string is typically considered distinct from NULL in other databases. As a result, the TXN_WRITE_NOTIFICATION_LOG table receives duplicate entries for a single Hive ACID transaction involving MERGE statements. This discrepancy leads to issues: the _files and _dumpmetadata files in a Hive ACID incremental dump will not align if the dump scope includes one or more MERGE statements. Consequently, the Hive ACID incremental LOAD fails at the target (DR), blocking subsequent replication executions. Solution * Add additional check for partition being null Testing: * Tested on cluster with oracle and mysql as backend database --- .../hive/hcatalog/listener/DbNotificationListener.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java index 7262e7f873e2..7636d646da65 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java @@ -1205,7 +1205,7 @@ private void addWriteNotificationLog(List eventBatch, List insertList = new ArrayList<>(); Map> updateMap = new HashMap<>(); @@ -1221,7 +1221,8 @@ private void addWriteNotificationLog(List eventBatch, List