-
Notifications
You must be signed in to change notification settings - Fork 4.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HIVE-28586: Iceberg: Support for write order in iceberg tables at CRE…
…ATE TABLE (Zoltan Ratkai, reviewed by Ayush Saxena, Butao Zhang, Denys Kuzmenko, Shohei Okumiya) Closes #5541
- Loading branch information
Showing
12 changed files
with
477 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
44 changes: 44 additions & 0 deletions
44
iceberg/iceberg-handler/src/test/queries/positive/iceberg_create_locally_ordered_table.q
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
-- Mask neededVirtualColumns due to non-strict order | ||
--! qt:replace:/(\s+neededVirtualColumns:\s)(.*)/$1#Masked#/ | ||
-- Mask the totalSize value as it can have slight variability, causing test flakiness | ||
--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ | ||
-- Mask random uuid | ||
--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ | ||
-- Mask a random snapshot id | ||
--! qt:replace:/(\s+current-snapshot-id\s+)\S+(\s*)/$1#Masked#/ | ||
-- Mask added file size | ||
--! qt:replace:/(\S\"added-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ | ||
-- Mask total file size | ||
--! qt:replace:/(\S\"total-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ | ||
-- Mask removed file size | ||
--! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ | ||
-- Mask current-snapshot-timestamp-ms | ||
--! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/ | ||
--! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/ | ||
-- Mask iceberg version | ||
--! qt:replace:/(\S\"iceberg-version\\\":\\\")(\w+\s\w+\s\d+\.\d+\.\d+\s\(\w+\s\w+\))(\\\")/$1#Masked#$3/ | ||
set hive.llap.io.enabled=true; | ||
set hive.vectorized.execution.enabled=true; | ||
set hive.optimize.shared.work.merge.ts.schema=true; | ||
|
||
create table ice_orc (id int, text string) stored by iceberg stored as orc; | ||
|
||
insert into ice_orc values (3, "3"),(2, "2"),(4, "4"),(5, "5"),(1, "1"),(2, "3"),(3,null),(2,null),(null,"a"); | ||
|
||
describe formatted ice_orc; | ||
describe extended ice_orc; | ||
set hive.fetch.task.conversion=more; | ||
select * from ice_orc; | ||
|
||
create table ice_orc_sorted (id int, text string) write locally ordered by id desc nulls first, text asc nulls last stored by iceberg stored as orc; | ||
|
||
insert into ice_orc_sorted values (3, "3"),(2, "2"),(4, "4"),(5, "5"),(1, "1"),(2, "3"),(3,null),(2,null),(null,"a"); | ||
|
||
describe formatted ice_orc_sorted; | ||
describe extended ice_orc_sorted; | ||
set hive.fetch.task.conversion=more; | ||
select * from ice_orc_sorted; | ||
|
||
drop table ice_orc; | ||
drop table ice_orc_sorted; | ||
|
207 changes: 207 additions & 0 deletions
207
...iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_ordered_table.q.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,207 @@ | ||
PREHOOK: query: create table ice_orc (id int, text string) stored by iceberg stored as orc | ||
PREHOOK: type: CREATETABLE | ||
PREHOOK: Output: database:default | ||
PREHOOK: Output: default@ice_orc | ||
POSTHOOK: query: create table ice_orc (id int, text string) stored by iceberg stored as orc | ||
POSTHOOK: type: CREATETABLE | ||
POSTHOOK: Output: database:default | ||
POSTHOOK: Output: default@ice_orc | ||
PREHOOK: query: insert into ice_orc values (3, "3"),(2, "2"),(4, "4"),(5, "5"),(1, "1"),(2, "3"),(3,null),(2,null),(null,"a") | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: _dummy_database@_dummy_table | ||
PREHOOK: Output: default@ice_orc | ||
POSTHOOK: query: insert into ice_orc values (3, "3"),(2, "2"),(4, "4"),(5, "5"),(1, "1"),(2, "3"),(3,null),(2,null),(null,"a") | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: _dummy_database@_dummy_table | ||
POSTHOOK: Output: default@ice_orc | ||
PREHOOK: query: describe formatted ice_orc | ||
PREHOOK: type: DESCTABLE | ||
PREHOOK: Input: default@ice_orc | ||
POSTHOOK: query: describe formatted ice_orc | ||
POSTHOOK: type: DESCTABLE | ||
POSTHOOK: Input: default@ice_orc | ||
# col_name data_type comment | ||
id int | ||
text string | ||
|
||
# Detailed Table Information | ||
Database: default | ||
#### A masked pattern was here #### | ||
Retention: 0 | ||
#### A masked pattern was here #### | ||
Table Type: EXTERNAL_TABLE | ||
Table Parameters: | ||
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"text\":\"true\"}} | ||
EXTERNAL TRUE | ||
bucketing_version 2 | ||
current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"text\",\"required\":false,\"type\":\"string\"}]} | ||
current-snapshot-id #Masked# | ||
current-snapshot-summary {\"added-data-files\":\"1\",\"added-records\":\"9\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"9\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} | ||
current-snapshot-timestamp-ms #Masked# | ||
format-version 2 | ||
iceberg.orc.files.only true | ||
#### A masked pattern was here #### | ||
numFiles 1 | ||
numRows 9 | ||
parquet.compression zstd | ||
#### A masked pattern was here #### | ||
rawDataSize 0 | ||
serialization.format 1 | ||
snapshot-count 1 | ||
storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler | ||
table_type ICEBERG | ||
totalSize #Masked# | ||
#### A masked pattern was here #### | ||
uuid #Masked# | ||
write.delete.mode merge-on-read | ||
write.format.default orc | ||
write.merge.mode merge-on-read | ||
write.update.mode merge-on-read | ||
|
||
# Storage Information | ||
SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe | ||
InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat | ||
OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat | ||
Compressed: No | ||
Sort Columns: [] | ||
PREHOOK: query: describe extended ice_orc | ||
PREHOOK: type: DESCTABLE | ||
PREHOOK: Input: default@ice_orc | ||
POSTHOOK: query: describe extended ice_orc | ||
POSTHOOK: type: DESCTABLE | ||
POSTHOOK: Input: default@ice_orc | ||
id int | ||
text string | ||
|
||
#### A masked pattern was here #### | ||
PREHOOK: query: select * from ice_orc | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@ice_orc | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: select * from ice_orc | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@ice_orc | ||
#### A masked pattern was here #### | ||
3 3 | ||
2 2 | ||
4 4 | ||
5 5 | ||
1 1 | ||
2 3 | ||
3 NULL | ||
2 NULL | ||
NULL a | ||
PREHOOK: query: create table ice_orc_sorted (id int, text string) write locally ordered by id desc nulls first, text asc nulls last stored by iceberg stored as orc | ||
PREHOOK: type: CREATETABLE | ||
PREHOOK: Output: database:default | ||
PREHOOK: Output: default@ice_orc_sorted | ||
POSTHOOK: query: create table ice_orc_sorted (id int, text string) write locally ordered by id desc nulls first, text asc nulls last stored by iceberg stored as orc | ||
POSTHOOK: type: CREATETABLE | ||
POSTHOOK: Output: database:default | ||
POSTHOOK: Output: default@ice_orc_sorted | ||
PREHOOK: query: insert into ice_orc_sorted values (3, "3"),(2, "2"),(4, "4"),(5, "5"),(1, "1"),(2, "3"),(3,null),(2,null),(null,"a") | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: _dummy_database@_dummy_table | ||
PREHOOK: Output: default@ice_orc_sorted | ||
POSTHOOK: query: insert into ice_orc_sorted values (3, "3"),(2, "2"),(4, "4"),(5, "5"),(1, "1"),(2, "3"),(3,null),(2,null),(null,"a") | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: _dummy_database@_dummy_table | ||
POSTHOOK: Output: default@ice_orc_sorted | ||
PREHOOK: query: describe formatted ice_orc_sorted | ||
PREHOOK: type: DESCTABLE | ||
PREHOOK: Input: default@ice_orc_sorted | ||
POSTHOOK: query: describe formatted ice_orc_sorted | ||
POSTHOOK: type: DESCTABLE | ||
POSTHOOK: Input: default@ice_orc_sorted | ||
# col_name data_type comment | ||
id int | ||
text string | ||
|
||
# Detailed Table Information | ||
Database: default | ||
#### A masked pattern was here #### | ||
Retention: 0 | ||
#### A masked pattern was here #### | ||
Table Type: EXTERNAL_TABLE | ||
Table Parameters: | ||
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"text\":\"true\"}} | ||
EXTERNAL TRUE | ||
bucketing_version 2 | ||
current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"text\",\"required\":false,\"type\":\"string\"}]} | ||
current-snapshot-id #Masked# | ||
current-snapshot-summary {\"added-data-files\":\"1\",\"added-records\":\"9\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"9\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} | ||
current-snapshot-timestamp-ms #Masked# | ||
default-sort-order {\"order-id\":1,\"fields\":[{\"transform\":\"identity\",\"source-id\":1,\"direction\":\"desc\",\"null-order\":\"nulls-first\"},{\"transform\":\"identity\",\"source-id\":2,\"direction\":\"asc\",\"null-order\":\"nulls-last\"}]} | ||
format-version 2 | ||
iceberg.orc.files.only true | ||
#### A masked pattern was here #### | ||
numFiles 1 | ||
numRows 9 | ||
parquet.compression zstd | ||
#### A masked pattern was here #### | ||
rawDataSize 0 | ||
serialization.format 1 | ||
snapshot-count 1 | ||
storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler | ||
table_type ICEBERG | ||
totalSize #Masked# | ||
#### A masked pattern was here #### | ||
uuid #Masked# | ||
write.delete.mode merge-on-read | ||
write.format.default orc | ||
write.merge.mode merge-on-read | ||
write.update.mode merge-on-read | ||
|
||
# Storage Information | ||
SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe | ||
InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat | ||
OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat | ||
Compressed: No | ||
Sort Columns: [FieldSchema(name:id, type:int, comment:Transform: identity, Sort direction: DESC, Null sort order: NULLS_FIRST), FieldSchema(name:text, type:string, comment:Transform: identity, Sort direction: ASC, Null sort order: NULLS_LAST)] | ||
PREHOOK: query: describe extended ice_orc_sorted | ||
PREHOOK: type: DESCTABLE | ||
PREHOOK: Input: default@ice_orc_sorted | ||
POSTHOOK: query: describe extended ice_orc_sorted | ||
POSTHOOK: type: DESCTABLE | ||
POSTHOOK: Input: default@ice_orc_sorted | ||
id int | ||
text string | ||
|
||
#### A masked pattern was here #### | ||
PREHOOK: query: select * from ice_orc_sorted | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@ice_orc_sorted | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: select * from ice_orc_sorted | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@ice_orc_sorted | ||
#### A masked pattern was here #### | ||
NULL a | ||
5 5 | ||
4 4 | ||
3 3 | ||
3 NULL | ||
2 2 | ||
2 3 | ||
2 NULL | ||
1 1 | ||
PREHOOK: query: drop table ice_orc | ||
PREHOOK: type: DROPTABLE | ||
PREHOOK: Input: default@ice_orc | ||
PREHOOK: Output: database:default | ||
PREHOOK: Output: default@ice_orc | ||
POSTHOOK: query: drop table ice_orc | ||
POSTHOOK: type: DROPTABLE | ||
POSTHOOK: Input: default@ice_orc | ||
POSTHOOK: Output: database:default | ||
POSTHOOK: Output: default@ice_orc | ||
PREHOOK: query: drop table ice_orc_sorted | ||
PREHOOK: type: DROPTABLE | ||
PREHOOK: Input: default@ice_orc_sorted | ||
PREHOOK: Output: database:default | ||
PREHOOK: Output: default@ice_orc_sorted | ||
POSTHOOK: query: drop table ice_orc_sorted | ||
POSTHOOK: type: DROPTABLE | ||
POSTHOOK: Input: default@ice_orc_sorted | ||
POSTHOOK: Output: database:default | ||
POSTHOOK: Output: default@ice_orc_sorted |
Oops, something went wrong.