Skip to content

Commit

Permalink
IMPALA-13085: Add warning and NULL out DECIMAL values in Iceberg meta…
Browse files Browse the repository at this point in the history
…data tables

DECIMAL values are not supported in Iceberg metadata tables and Impala
runs on a DCHECK and crashes if it encounters one.

Until this issue is properly fixed (see IMPALA-13080), this commit
introduces a temporary solution: DECIMAL values coming from Iceberg
metadata tables are NULLed out and a warning is issued.

Testing:
 - added a DECIMAL column to the 'iceberg_metadata_alltypes' test table,
   so querying the `files` metadata table will include a DECIMAL in the
   'readable_metrics' struct.

Change-Id: I0c8791805bc4fa2112e092e65366ca2815f3fa22
Reviewed-on: http://gerrit.cloudera.org:8080/21429
Reviewed-by: Daniel Becker <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
  • Loading branch information
d-becker authored and Impala Public Jenkins committed May 28, 2024
1 parent d0237fb commit 2e093bb
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 5 deletions.
22 changes: 20 additions & 2 deletions be/src/exec/iceberg-metadata/iceberg-row-reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ namespace impala {
IcebergRowReader::IcebergRowReader(ScanNode* scan_node,
IcebergMetadataScanner* metadata_scanner)
: scan_node_(scan_node),
metadata_scanner_(metadata_scanner) {}
metadata_scanner_(metadata_scanner),
unsupported_decimal_warning_emitted_(false) {}

Status IcebergRowReader::InitJNI() {
DCHECK(list_cl_ == nullptr) << "InitJNI() already called!";
Expand Down Expand Up @@ -120,7 +121,10 @@ Status IcebergRowReader::WriteSlot(JNIEnv* env, const jobject* struct_like_row,
} case TYPE_DOUBLE: { // java.lang.Double
RETURN_IF_ERROR(WriteDoubleSlot(env, accessed_value, slot));
break;
} case TYPE_TIMESTAMP: { // org.apache.iceberg.types.TimestampType
} case TYPE_DECIMAL: {
RETURN_IF_ERROR(WriteDecimalSlot(slot_desc, tuple, state));
break;
}case TYPE_TIMESTAMP: { // org.apache.iceberg.types.TimestampType
RETURN_IF_ERROR(WriteTimeStampSlot(env, accessed_value, slot));
break;
} case TYPE_STRING: {
Expand Down Expand Up @@ -220,6 +224,20 @@ Status IcebergRowReader::WriteDoubleSlot(JNIEnv* env, const jobject &accessed_va
return Status::OK();
}

Status IcebergRowReader::WriteDecimalSlot(const SlotDescriptor* slot_desc, Tuple* tuple,
RuntimeState* state) {
// TODO IMPALA-13080: Handle DECIMALs without NULLing them out.
constexpr const char* warning = "DECIMAL values from Iceberg metadata tables "
"are displayed as NULL. See IMPALA-13080.";
if (!unsupported_decimal_warning_emitted_) {
unsupported_decimal_warning_emitted_ = true;
LOG(WARNING) << warning;
state->LogError(ErrorMsg(TErrorCode::NOT_IMPLEMENTED_ERROR, warning));
}
tuple->SetNull(slot_desc->null_indicator_offset());
return Status::OK();
}

Status IcebergRowReader::WriteTimeStampSlot(JNIEnv* env, const jobject &accessed_value,
void* slot) {
DCHECK(accessed_value != nullptr);
Expand Down
6 changes: 6 additions & 0 deletions be/src/exec/iceberg-metadata/iceberg-row-reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ class IcebergRowReader {
/// IcebergMetadataScanner class, used to get and access values inside java objects.
IcebergMetadataScanner* metadata_scanner_;

/// We want to emit a warning about DECIMAL values being NULLed out at most once. This
/// member keeps track of whether the warning has already been emitted.
bool unsupported_decimal_warning_emitted_;

// Writes a Java value into the target tuple. 'struct_like_row' is only used for struct
// types. It is needed because struct children reside directly in the parent tuple of
// the struct.
Expand All @@ -99,6 +103,8 @@ class IcebergRowReader {
WARN_UNUSED_RESULT;
Status WriteDoubleSlot(JNIEnv* env, const jobject &accessed_value, void* slot)
WARN_UNUSED_RESULT;
Status WriteDecimalSlot(const SlotDescriptor* slot_desc, Tuple* tuple,
RuntimeState* state) WARN_UNUSED_RESULT;
/// Iceberg TimeStamp is parsed into TimestampValue.
Status WriteTimeStampSlot(JNIEnv* env, const jobject &accessed_value, void* slot)
WARN_UNUSED_RESULT;
Expand Down
5 changes: 4 additions & 1 deletion testdata/datasets/functional/functional_schema_template.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3905,7 +3905,7 @@ CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
dt date,
s string,
bn binary,
-- TODO IMPALA-13080: Add decimal.
dc decimal,
strct struct<i: int>,
arr array<double>,
mp map<int, float>
Expand All @@ -3924,6 +3924,7 @@ INSERT INTO {db_name}{db_suffix}.{table_name} VALUES (
to_date("2024-05-14"),
"Some string",
"bin1",
15.48,
named_struct("i", 10),
array(cast(10.0 as double), cast(20.0 as double)),
map(10, cast(10.0 as float), 100, cast(100.0 as float))
Expand All @@ -3938,6 +3939,7 @@ INSERT INTO {db_name}{db_suffix}.{table_name} VALUES (
to_date("2025-06-15"),
"A string",
NULL,
5.8,
named_struct("i", -150),
array(cast(-10.0 as double), cast(-2e100 as double)),
map(10, cast(0.5 as float), 101, cast(1e3 as float))
Expand All @@ -3952,6 +3954,7 @@ INSERT INTO {db_name}{db_suffix}.{table_name} VALUES (
NULL,
NULL,
"bin2",
NULL,
named_struct("i", -150),
array(cast(-12.0 as double), cast(-2e100 as double)),
map(10, cast(0.5 as float), 101, cast(1e3 as float))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -834,12 +834,11 @@ STRING,DATE,DATE
# Query the `files` metadata table of a table that contains all types - because of lower
# and upper bounds, the 'readable_metrics' struct of the metadata table will also contain
# all types.
# TODO IMPALA-13080: Add DECIMAL.
####
---- QUERY
select readable_metrics from functional_parquet.iceberg_metadata_alltypes.`files`;
---- RESULTS
regex:'{"arr.element":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":0,"lower_bound":-2e\+100,"upper_bound":20},"b":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":false,"upper_bound":true},"bn":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"YmluMQ==","upper_bound":"YmluMg=="},"d":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":1,"lower_bound":-2e-100,"upper_bound":2e\+100},"dt":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"2024-05-14","upper_bound":"2025-06-15"},"f":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":1,"lower_bound":2.000000026702864e-10,"upper_bound":1999999973982208},"i":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":5},"l":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":-10,"upper_bound":150},"mp.key":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"mp.value":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":0,"lower_bound":0.5,"upper_bound":1000},"s":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"A string","upper_bound":"Some string"},"strct.i":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":-150,"upper_bound":10},"ts":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"2024-05-14 14:51:12","upper_bound":"2025-06-15 18:51:12"}}'
regex:'{"arr.element":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":0,"lower_bound":-2e\+100,"upper_bound":20},"b":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":false,"upper_bound":true},"bn":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"YmluMQ==","upper_bound":"YmluMg=="},"d":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":1,"lower_bound":-2e-100,"upper_bound":2e\+100},"dc":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"dt":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"2024-05-14","upper_bound":"2025-06-15"},"f":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":1,"lower_bound":2.000000026702864e-10,"upper_bound":1999999973982208},"i":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":5},"l":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":-10,"upper_bound":150},"mp.key":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"mp.value":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":0,"lower_bound":0.5,"upper_bound":1000},"s":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"A string","upper_bound":"Some string"},"strct.i":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":-150,"upper_bound":10},"ts":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"2024-05-14 14:51:12","upper_bound":"2025-06-15 18:51:12"}}'
---- TYPES
STRING
====
Expand Down

0 comments on commit 2e093bb

Please sign in to comment.