Skip to content

Commit

Permalink
HIVE-28673: Fix issues in JSON SerDe implementations related to Decim…
Browse files Browse the repository at this point in the history
…al (#5584) (Araika Singh, reviewed by Indhumathi Muthumurugesh, Shohei Okumiya)
  • Loading branch information
armitage420 authored Feb 6, 2025
1 parent da272b4 commit fcb59c8
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 6 deletions.
2 changes: 1 addition & 1 deletion data/files/jsonserde.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"binarycolumn1" : -2, "binarycolumn2" : false, "binarycolumn3" : null, "binarycolumn4" : true, "binarycolumn5" : 1.23e45, "binarycolumn6" : "value", "booleancaseinsensitive" : "TrUE", "booleanstring" : "true", "booleanboolean" : true, "stringfalse" : "FaLSE", "somestring" : "somestringhere", "booleannull" : null, "booleannumfalse" : 0, "booleannumtrue" : -1}
{"binarycolumn1" : -2, "binarycolumn2" : false, "binarycolumn3" : null, "binarycolumn4" : true, "binarycolumn5" : 1.23e45, "binarycolumn6" : "value", "booleancaseinsensitive" : "TrUE", "booleanstring" : "true", "booleanboolean" : true, "stringfalse" : "FaLSE", "somestring" : "somestringhere", "booleannull" : null, "booleannumfalse" : 0, "booleannumtrue" : -1, "decimalcol1" : -9999999999999999.99, "decimalcol2" : 9999999999999999.99, "decimalcol3" : 1000000000000000000000000000000000000.00, "decimalcol4" : 99.999 , "decimalcol5" : 1e39}
32 changes: 31 additions & 1 deletion ql/src/test/queries/clientpositive/json_serde3.q
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ drop table if exists json_serde3_1;
drop table if exists json_serde3_2;
drop table if exists json_serde3_3;
drop table if exists json_serde3_4;
drop table if exists json_serde3_5;
drop table if exists json_serde3_6;

create table json_serde3_1 (
binarycolumn1 binary,
Expand Down Expand Up @@ -64,7 +66,35 @@ INSERT INTO TABLE json_serde3_4 VALUES ("TrUE", "true", true, "FaLSE", "somestri

select * from json_serde3_4;

create table json_serde3_5 (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';

LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table json_serde3_5;
INSERT INTO TABLE json_serde3_5 VALUES (-9999999999999999.99, 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39);

select * from json_serde3_5;

create table json_serde3_6 (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';

LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table json_serde3_6;
INSERT INTO TABLE json_serde3_6 VALUES (-9999999999999999.99, 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39);

select * from json_serde3_6;

drop table json_serde3_1;
drop table json_serde3_2;
drop table json_serde3_3;
drop table json_serde3_4;
drop table json_serde3_4;
drop table json_serde3_5;
drop table json_serde3_6;
138 changes: 136 additions & 2 deletions ql/src/test/results/clientpositive/llap/json_serde3.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@ PREHOOK: Output: database:default
POSTHOOK: query: drop table if exists json_serde3_4
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
PREHOOK: query: drop table if exists json_serde3_5
PREHOOK: type: DROPTABLE
PREHOOK: Output: database:default
POSTHOOK: query: drop table if exists json_serde3_5
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
PREHOOK: query: drop table if exists json_serde3_6
PREHOOK: type: DROPTABLE
PREHOOK: Output: database:default
POSTHOOK: query: drop table if exists json_serde3_6
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
PREHOOK: query: create table json_serde3_1 (
binarycolumn1 binary,
binarycolumn2 binary,
Expand Down Expand Up @@ -77,7 +89,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@json_serde3_1
#### A masked pattern was here ####
-2 FALSE NULL TRUE 1.23E45 value
-2 false NULL true 1.23E45 value
-2 false NULL true 1.23E+45 value
PREHOOK: query: create table json_serde3_2 (
binarycolumn1 binary,
binarycolumn2 binary,
Expand Down Expand Up @@ -131,7 +143,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@json_serde3_2
#### A masked pattern was here ####
-2 FALSE NULL TRUE 1.23E45 value
-2 false NULL true 1.23E45 value
-2 false NULL true 1.23E+45 value
PREHOOK: query: create table json_serde3_3 (
booleancaseinsensitive boolean,
booleanstring boolean,
Expand Down Expand Up @@ -252,6 +264,108 @@ POSTHOOK: Input: default@json_serde3_4
#### A masked pattern was here ####
true true true false true NULL false true
true true true false true NULL false true
PREHOOK: query: create table json_serde3_5 (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@json_serde3_5
POSTHOOK: query: create table json_serde3_5 (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@json_serde3_5
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table json_serde3_5
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@json_serde3_5
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table json_serde3_5
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@json_serde3_5
PREHOOK: query: INSERT INTO TABLE json_serde3_5 VALUES (-9999999999999999.99, 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@json_serde3_5
POSTHOOK: query: INSERT INTO TABLE json_serde3_5 VALUES (-9999999999999999.99, 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@json_serde3_5
POSTHOOK: Lineage: json_serde3_5.decimalcol1 SCRIPT []
POSTHOOK: Lineage: json_serde3_5.decimalcol2 SCRIPT []
POSTHOOK: Lineage: json_serde3_5.decimalcol3 SCRIPT []
POSTHOOK: Lineage: json_serde3_5.decimalcol4 SCRIPT []
POSTHOOK: Lineage: json_serde3_5.decimalcol5 SCRIPT []
PREHOOK: query: select * from json_serde3_5
PREHOOK: type: QUERY
PREHOOK: Input: default@json_serde3_5
#### A masked pattern was here ####
POSTHOOK: query: select * from json_serde3_5
POSTHOOK: type: QUERY
POSTHOOK: Input: default@json_serde3_5
#### A masked pattern was here ####
-9999999999999999.99 9999999999999999.99 NULL 100.00 NULL
-9999999999999999.99 9999999999999999.99 NULL 100.00 NULL
PREHOOK: query: create table json_serde3_6 (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@json_serde3_6
POSTHOOK: query: create table json_serde3_6 (
decimalcol1 decimal(18,2),
decimalcol2 decimal(38,2),
decimalcol3 decimal(38,2),
decimalcol4 decimal(18,2),
decimalcol5 decimal(38,2))
row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@json_serde3_6
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table json_serde3_6
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@json_serde3_6
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table json_serde3_6
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@json_serde3_6
PREHOOK: query: INSERT INTO TABLE json_serde3_6 VALUES (-9999999999999999.99, 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@json_serde3_6
POSTHOOK: query: INSERT INTO TABLE json_serde3_6 VALUES (-9999999999999999.99, 9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@json_serde3_6
POSTHOOK: Lineage: json_serde3_6.decimalcol1 SCRIPT []
POSTHOOK: Lineage: json_serde3_6.decimalcol2 SCRIPT []
POSTHOOK: Lineage: json_serde3_6.decimalcol3 SCRIPT []
POSTHOOK: Lineage: json_serde3_6.decimalcol4 SCRIPT []
POSTHOOK: Lineage: json_serde3_6.decimalcol5 SCRIPT []
PREHOOK: query: select * from json_serde3_6
PREHOOK: type: QUERY
PREHOOK: Input: default@json_serde3_6
#### A masked pattern was here ####
POSTHOOK: query: select * from json_serde3_6
POSTHOOK: type: QUERY
POSTHOOK: Input: default@json_serde3_6
#### A masked pattern was here ####
-9999999999999999.99 9999999999999999.99 NULL 100.00 NULL
-9999999999999999.99 9999999999999999.99 NULL 100.00 NULL
PREHOOK: query: drop table json_serde3_1
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@json_serde3_1
Expand Down Expand Up @@ -292,3 +406,23 @@ POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@json_serde3_4
POSTHOOK: Output: database:default
POSTHOOK: Output: default@json_serde3_4
PREHOOK: query: drop table json_serde3_5
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@json_serde3_5
PREHOOK: Output: database:default
PREHOOK: Output: default@json_serde3_5
POSTHOOK: query: drop table json_serde3_5
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@json_serde3_5
POSTHOOK: Output: database:default
POSTHOOK: Output: default@json_serde3_5
PREHOOK: query: drop table json_serde3_6
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@json_serde3_6
PREHOOK: Output: database:default
PREHOOK: Output: default@json_serde3_6
POSTHOOK: query: drop table json_serde3_6
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@json_serde3_6
POSTHOOK: Output: database:default
POSTHOOK: Output: default@json_serde3_6
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.fasterxml.jackson.databind.DeserializationFeature;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hive.common.type.Date;
Expand All @@ -56,6 +57,8 @@
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo;
import org.apache.hive.common.util.TimestampParser;
Expand Down Expand Up @@ -172,7 +175,7 @@ public HiveJsonReader(ObjectInspector oi) {
public HiveJsonReader(ObjectInspector oi, TimestampParser tsParser) {
this.tsParser = tsParser;
this.oi = oi;
this.objectMapper = new ObjectMapper();
this.objectMapper = new ObjectMapper().enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS);
}

/**
Expand Down Expand Up @@ -426,7 +429,8 @@ private Object visitLeafNode(final JsonNode leafNode,
case TIMESTAMP:
return tsParser.parseTimestamp(leafNode.asText());
case DECIMAL:
return HiveDecimal.create(leafNode.asText());
HiveDecimal decimal = HiveDecimal.create(leafNode.asText());
return HiveDecimalUtils.enforcePrecisionScale(decimal, (DecimalTypeInfo) typeInfo);
case TIMESTAMPLOCALTZ:
final Timestamp ts = tsParser.parseTimestamp(leafNode.asText());
final ZoneId zid = ((TimestampLocalTZTypeInfo) typeInfo).timeZone();
Expand Down

0 comments on commit fcb59c8

Please sign in to comment.