From 42bc228029bd2c438ed3459b94cdd39aec3c7ab5 Mon Sep 17 00:00:00 2001 From: featzhang Date: Sun, 21 Apr 2024 22:28:45 +0800 Subject: [PATCH] [INLONG-9456][Sort] Support Apache Paimon [INLONG-9456][Sort] Support Apache Paimon format with spotless d --- README.md | 65 +-- .../inlong/common/enums/TaskTypeEnum.java | 1 + .../src/plugins/nodes/defaults/Paimon.ts | 84 ++++ .../src/plugins/sinks/defaults/Paimon.ts | 397 ++++++++++++++++++ .../src/plugins/sources/defaults/Paimon.ts | 159 +++++++ .../ui/components/FieldList/FieldTypeConf.tsx | 20 + .../main/assemblies/sort-connectors-v1.15.xml | 8 + .../manager/client/File2PaimonExample.java | 170 ++++++++ .../manager/common/consts/DataNodeType.java | 1 + .../manager/common/consts/SourceType.java | 3 + .../manager/common/consts/StreamType.java | 3 + .../plugin/flink/enums/ConnectorJarType.java | 2 + .../pojo/node/paimon/PaimonDataNodeDTO.java | 78 ++++ .../pojo/node/paimon/PaimonDataNodeInfo.java | 55 +++ .../node/paimon/PaimonDataNodeRequest.java | 50 +++ .../pojo/sink/paimon/PaimonColumnInfo.java | 79 ++++ .../pojo/sink/paimon/PaimonPartition.java | 55 +++ .../sink/paimon/PaimonPartitionField.java | 41 ++ .../manager/pojo/sink/paimon/PaimonSink.java | 93 ++++ .../pojo/sink/paimon/PaimonSinkDTO.java | 118 ++++++ .../pojo/sink/paimon/PaimonSinkRequest.java | 75 ++++ .../pojo/sink/paimon/PaimonTableInfo.java | 41 ++ .../manager/pojo/sink/paimon/PaimonType.java | 64 +++ .../sort/node/ExtractNodeProviderFactory.java | 2 + .../sort/node/LoadNodeProviderFactory.java | 2 + .../sort/node/provider/PaimonProvider.java | 97 +++++ .../pojo/source/paimon/PaimonSource.java | 82 ++++ .../pojo/source/paimon/PaimonSourceDTO.java | 92 ++++ .../source/paimon/PaimonSourceRequest.java | 68 +++ .../node/paimon/PaimonDataNodeOperator.java | 107 +++++ .../sink/paimon/PaimonCatalogClient.java | 287 +++++++++++++ .../sink/paimon/PaimonFileFormat.java | 30 ++ .../sink/paimon/PaimonResourceOperator.java | 182 ++++++++ .../sink/paimon/PaimonTypeConverter.java | 55 +++ .../resource/sink/paimon/PaimonUtils.java | 116 +++++ .../sink/paimon/PaimonSinkOperator.java | 163 +++++++ .../source/paimon/PaimonSourceOperator.java | 85 ++++ .../service/sink/PaimonSinkServiceTest.java | 95 +++++ inlong-sort/README.md | 1 + .../protocol/constant/PaimonConstant.java | 174 ++++++++ .../sort/protocol/node/ExtractNode.java | 2 + .../inlong/sort/protocol/node/LoadNode.java | 2 + .../inlong/sort/protocol/node/Node.java | 4 + .../node/extract/PaimonExtractNode.java | 157 +++++++ .../protocol/node/load/PaimonLoadNode.java | 189 +++++++++ .../node/extract/PaimonExtractNodeTest.java | 64 +++ .../node/load/PaimonLoadNodeTest.java | 57 +++ inlong-sort/sort-core/pom.xml | 6 + .../sort/parser/PaimonNodeSqlParserTest.java | 185 ++++++++ .../sort-flink/sort-flink-v1.15/pom.xml | 11 + .../sort-connectors/paimon/pom.xml | 106 +++++ .../table/sink/PaimonTableInlongFactory.java | 55 +++ .../org.apache.flink.table.factories.Factory | 16 + .../sort-flink-v1.15/sort-connectors/pom.xml | 1 + 54 files changed, 4123 insertions(+), 32 deletions(-) create mode 100644 inlong-dashboard/src/plugins/nodes/defaults/Paimon.ts create mode 100644 inlong-dashboard/src/plugins/sinks/defaults/Paimon.ts create mode 100644 inlong-dashboard/src/plugins/sources/defaults/Paimon.ts create mode 100644 inlong-manager/manager-client-examples/src/test/java/org/apache/inlong/manager/client/File2PaimonExample.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/node/paimon/PaimonDataNodeDTO.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/node/paimon/PaimonDataNodeInfo.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/node/paimon/PaimonDataNodeRequest.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonColumnInfo.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonPartition.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonPartitionField.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonSink.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonSinkDTO.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonSinkRequest.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonTableInfo.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonType.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sort/node/provider/PaimonProvider.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/source/paimon/PaimonSource.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/source/paimon/PaimonSourceDTO.java create mode 100644 inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/source/paimon/PaimonSourceRequest.java create mode 100644 inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/node/paimon/PaimonDataNodeOperator.java create mode 100644 inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonCatalogClient.java create mode 100644 inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonFileFormat.java create mode 100644 inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonResourceOperator.java create mode 100644 inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonTypeConverter.java create mode 100644 inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonUtils.java create mode 100644 inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/sink/paimon/PaimonSinkOperator.java create mode 100644 inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/source/paimon/PaimonSourceOperator.java create mode 100644 inlong-manager/manager-service/src/test/java/org/apache/inlong/manager/service/sink/PaimonSinkServiceTest.java create mode 100644 inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/constant/PaimonConstant.java create mode 100644 inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/extract/PaimonExtractNode.java create mode 100644 inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/load/PaimonLoadNode.java create mode 100644 inlong-sort/sort-common/src/test/java/org/apache/inlong/sort/protocol/node/extract/PaimonExtractNodeTest.java create mode 100644 inlong-sort/sort-common/src/test/java/org/apache/inlong/sort/protocol/node/load/PaimonLoadNodeTest.java create mode 100644 inlong-sort/sort-core/src/test/java/org/apache/inlong/sort/parser/PaimonNodeSqlParserTest.java create mode 100644 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/paimon/pom.xml create mode 100644 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/paimon/src/main/java/org/apache/inlong/sort/paimon/table/sink/PaimonTableInlongFactory.java create mode 100644 inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/paimon/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory diff --git a/README.md b/README.md index 6c09eca8ba7..dd3de0e3703 100644 --- a/README.md +++ b/README.md @@ -72,38 +72,39 @@ You can use InLong in the following ways: - Visualize your operations on [InLong dashboard](https://inlong.apache.org/docs/next/user_guide/dashboard_usage). ## Supported Data Nodes (Updating) -| Type | Name | Version | -|--------------|-------------------|------------------------------| -| Extract Node | Auto Push | None | -| | File | None | -| | Kafka | 2.x | -| | MongoDB | >= 3.6 | -| | MQTT | >= 3.1 | -| | MySQL | 5.6, 5.7, 8.0.x | -| | Oracle | 11,12,19 | -| | PostgreSQL | 9.6, 10, 11, 12 | -| | Pulsar | 2.8.x | -| | Redis | 2.6.x | -| | SQLServer | 2012, 2014, 2016, 2017, 2019 | -| Load Node | Auto Consumption | None | -| | ClickHouse | 20.7+ | -| | Elasticsearch | 6.x, 7.x | -| | Greenplum | 4.x, 5.x, 6.x | -| | HBase | 2.2.x | -| | HDFS | 2.x, 3.x | -| | Hive | 1.x, 2.x, 3.x | -| | Iceberg | 0.12.x | -| | Hudi | 0.12.x | -| | Kafka | 2.x | -| | MySQL | 5.6, 5.7, 8.0.x | -| | Oracle | 11, 12, 19 | -| | PostgreSQL | 9.6, 10, 11, 12 | -| | SQLServer | 2012, 2014, 2016, 2017, 2019 | -| | TDSQL-PostgreSQL | 10.17 | -| | Doris | >= 0.13 | -| | StarRocks | >= 2.0 | -| | Kudu | >= 1.12.0 | -| | Redis | >= 3.0 | +| Type | Name | Version | +|--------------|------------------|------------------------------| +| Extract Node | Auto Push | None | +| | File | None | +| | Kafka | 2.x | +| | MongoDB | >= 3.6 | +| | MQTT | >= 3.1 | +| | MySQL | 5.6, 5.7, 8.0.x | +| | Oracle | 11,12,19 | +| | PostgreSQL | 9.6, 10, 11, 12 | +| | Pulsar | 2.8.x | +| | Redis | 2.6.x | +| | SQLServer | 2012, 2014, 2016, 2017, 2019 | +| Load Node | Auto Consumption | None | +| | ClickHouse | 20.7+ | +| | Elasticsearch | 6.x, 7.x | +| | Greenplum | 4.x, 5.x, 6.x | +| | HBase | 2.2.x | +| | HDFS | 2.x, 3.x | +| | Hive | 1.x, 2.x, 3.x | +| | Iceberg | 0.12.x | +| | Hudi | 0.12.x | +| | Paimon | 0.7.0+ | +| | Kafka | 2.x | +| | MySQL | 5.6, 5.7, 8.0.x | +| | Oracle | 11, 12, 19 | +| | PostgreSQL | 9.6, 10, 11, 12 | +| | SQLServer | 2012, 2014, 2016, 2017, 2019 | +| | TDSQL-PostgreSQL | 10.17 | +| | Doris | >= 0.13 | +| | StarRocks | >= 2.0 | +| | Kudu | >= 1.12.0 | +| | Redis | >= 3.0 | ## Build InLong More detailed instructions can be found at [Quick Start](https://inlong.apache.org/docs/next/quick_start/how_to_build) section in the documentation. diff --git a/inlong-common/src/main/java/org/apache/inlong/common/enums/TaskTypeEnum.java b/inlong-common/src/main/java/org/apache/inlong/common/enums/TaskTypeEnum.java index c84ea142dbd..ba6b03a4bad 100644 --- a/inlong-common/src/main/java/org/apache/inlong/common/enums/TaskTypeEnum.java +++ b/inlong-common/src/main/java/org/apache/inlong/common/enums/TaskTypeEnum.java @@ -44,6 +44,7 @@ public enum TaskTypeEnum { REDIS(11), MQTT(12), HUDI(13), + PAIMON(14), // only used for unit test MOCK(201) diff --git a/inlong-dashboard/src/plugins/nodes/defaults/Paimon.ts b/inlong-dashboard/src/plugins/nodes/defaults/Paimon.ts new file mode 100644 index 00000000000..18a6d90ca83 --- /dev/null +++ b/inlong-dashboard/src/plugins/nodes/defaults/Paimon.ts @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { DataWithBackend } from '@/plugins/DataWithBackend'; +import { RenderRow } from '@/plugins/RenderRow'; +import { RenderList } from '@/plugins/RenderList'; +import { NodeInfo } from '../common/NodeInfo'; + +const { I18n } = DataWithBackend; +const { FieldDecorator } = RenderRow; + +export default class PaimonNode extends NodeInfo implements DataWithBackend, RenderRow, RenderList { + @FieldDecorator({ + type: 'input', + rules: [{ required: true }], + }) + @I18n('meta.Nodes.Paimon.Username') + username: string; + + @FieldDecorator({ + type: 'password', + rules: [{ required: true }], + }) + @I18n('meta.Nodes.Paimon.Password') + token: string; + + @FieldDecorator({ + type: 'input', + rules: [{ required: true }], + props: values => ({ + disabled: [110, 130].includes(values?.status), + placeholder: 'thrift://127.0.0.1:9083', + }), + }) + @I18n('meta.Nodes.Paimon.Url') + url: string; + + @FieldDecorator({ + type: 'input', + rules: [{ required: true }], + props: values => ({ + disabled: [110, 130].includes(values?.status), + placeholder: 'hdfs://127.0.0.1:9000/user/paimon/warehouse', + }), + }) + @I18n('meta.Nodes.Paimon.Warehouse') + warehouse: string; + + @FieldDecorator({ + type: 'select', + initialValue: 'HIVE', + props: values => ({ + disabled: [110, 130].includes(values?.status), + options: [ + { + label: 'HIVE', + value: 'HIVE', + }, + { + label: 'HADOOP', + value: 'HADOOP', + }, + ], + }), + }) + @I18n('meta.Nodes.Paimon.CatalogType') + catalogType: string; +} diff --git a/inlong-dashboard/src/plugins/sinks/defaults/Paimon.ts b/inlong-dashboard/src/plugins/sinks/defaults/Paimon.ts new file mode 100644 index 00000000000..207835da86c --- /dev/null +++ b/inlong-dashboard/src/plugins/sinks/defaults/Paimon.ts @@ -0,0 +1,397 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { DataWithBackend } from '@/plugins/DataWithBackend'; +import { RenderRow } from '@/plugins/RenderRow'; +import { RenderList } from '@/plugins/RenderList'; +import i18n from '@/i18n'; +import EditableTable from '@/ui/components/EditableTable'; +import { sourceFields } from '../common/sourceFields'; +import { SinkInfo } from '../common/SinkInfo'; +import NodeSelect from '@/ui/components/NodeSelect'; +import CreateTable from '@/ui/components/CreateTable'; + +const { I18n } = DataWithBackend; +const { FieldDecorator, SyncField, SyncCreateTableField, IngestionField } = RenderRow; +const { ColumnDecorator } = RenderList; + +const paimonFieldTypes = [ + 'int', + 'long', + 'string', + 'float', + 'double', + 'date', + 'timestamp', + 'time', + 'boolean', + 'decimal', + 'timestamptz', + 'binary', + 'fixed', + 'uuid', +].map(item => ({ + label: item, + value: item, +})); + +const matchPartitionStrategies = fieldType => { + const data = [ + { + label: 'None', + value: 'None', + disabled: false, + }, + { + label: 'Identity', + value: 'Identity', + disabled: false, + }, + { + label: 'Year', + value: 'Year', + disabled: !['timestamp', 'date'].includes(fieldType), + }, + { + label: 'Month', + value: 'Month', + disabled: !['timestamp', 'date'].includes(fieldType), + }, + { + label: 'Day', + value: 'Day', + disabled: !['timestamp', 'date'].includes(fieldType), + }, + { + label: 'Hour', + value: 'Hour', + disabled: fieldType !== 'timestamp', + }, + { + label: 'Bucket', + value: 'Bucket', + disabled: ![ + 'string', + 'boolean', + 'short', + 'int', + 'long', + 'float', + 'double', + 'decimal', + ].includes(fieldType), + }, + { + label: 'Truncate', + value: 'Truncate', + disabled: !['string', 'int', 'long', 'binary', 'decimal'].includes(fieldType), + }, + ]; + + return data.filter(item => !item.disabled); +}; + +export default class PaimonSink extends SinkInfo implements DataWithBackend, RenderRow, RenderList { + @FieldDecorator({ + type: 'input', + rules: [{ required: true }], + props: values => ({ + disabled: [110].includes(values?.status), + }), + }) + @ColumnDecorator() + @SyncField() + @IngestionField() + @I18n('meta.Sinks.Paimon.DbName') + dbName: string; + + @FieldDecorator({ + type: CreateTable, + rules: [{ required: true }], + props: values => ({ + disabled: [110].includes(values?.status), + sinkType: values.sinkType, + inlongGroupId: values.inlongGroupId, + inlongStreamId: values.inlongStreamId, + fieldName: 'tableName', + sinkObj: { + ...values, + }, + }), + }) + @ColumnDecorator() + @I18n('meta.Sinks.Paimon.TableName') + @SyncField() + @IngestionField() + tableName: string; + + @FieldDecorator({ + type: 'radio', + rules: [{ required: true }], + initialValue: 1, + tooltip: i18n.t('meta.Sinks.EnableCreateResourceHelp'), + props: values => ({ + disabled: [110].includes(values?.status), + options: [ + { + label: i18n.t('basic.Yes'), + value: 1, + }, + { + label: i18n.t('basic.No'), + value: 0, + }, + ], + }), + }) + @IngestionField() + @I18n('meta.Sinks.EnableCreateResource') + enableCreateResource: number; + + @FieldDecorator({ + type: NodeSelect, + rules: [{ required: true }], + props: values => ({ + disabled: [110].includes(values?.status), + nodeType: 'PAIMON', + }), + }) + @I18n('meta.Sinks.DataNodeName') + @SyncField() + @IngestionField() + dataNodeName: string; + + @FieldDecorator({ + type: 'select', + rules: [{ required: true }], + initialValue: 'Parquet', + props: values => ({ + disabled: [110].includes(values?.status), + options: [ + { + label: 'Parquet', + value: 'Parquet', + }, + // { + // label: 'Orc', + // value: 'Orc', + // }, + // { + // label: 'Avro', + // value: 'Avro', + // }, + ], + }), + }) + @ColumnDecorator() + @I18n('meta.Sinks.Paimon.FileFormat') + @SyncField() + @IngestionField() + fileFormat: string; + + @FieldDecorator({ + type: EditableTable, + rules: [{ required: false }], + initialValue: [], + tooltip: i18n.t('meta.Sinks.Paimon.ExtListHelper'), + props: values => ({ + size: 'small', + columns: [ + { + title: 'Key', + dataIndex: 'keyName', + props: { + disabled: [110].includes(values?.status), + }, + }, + { + title: 'Value', + dataIndex: 'keyValue', + props: { + disabled: [110].includes(values?.status), + }, + }, + ], + }), + }) + @ColumnDecorator() + @SyncField() + @IngestionField() + @I18n('meta.Sinks.Paimon.ExtList') + extList: string; + + @FieldDecorator({ + type: 'select', + rules: [{ required: true }], + initialValue: 'EXACTLY_ONCE', + isPro: true, + props: values => ({ + disabled: [110].includes(values?.status), + options: [ + { + label: 'EXACTLY_ONCE', + value: 'EXACTLY_ONCE', + }, + { + label: 'AT_LEAST_ONCE', + value: 'AT_LEAST_ONCE', + }, + ], + }), + }) + @ColumnDecorator() + @SyncField() + @IngestionField() + @I18n('meta.Sinks.Paimon.DataConsistency') + dataConsistency: string; + + @FieldDecorator({ + type: EditableTable, + props: values => ({ + size: 'small', + canBatchAdd: true, + upsetByFieldKey: true, + editing: ![110].includes(values?.status), + columns: getFieldListColumns(values), + }), + }) + @IngestionField() + sinkFieldList: Record[]; + + @FieldDecorator({ + type: EditableTable, + initialValue: [], + props: values => ({ + size: 'small', + editing: ![110].includes(values?.status), + columns: getFieldListColumns(values).filter( + item => item.dataIndex !== 'sourceFieldName' && item.dataIndex !== 'sourceFieldType', + ), + canBatchAdd: true, + upsertByFieldKey: true, + }), + }) + @SyncCreateTableField() + createTableField: Record[]; + + @FieldDecorator({ + type: 'input', + tooltip: i18n.t('meta.Sinks.Paimon.PrimaryKeyHelper'), + props: values => ({ + disabled: [110].includes(values?.status), + }), + }) + @ColumnDecorator() + @SyncField() + @IngestionField() + @I18n('meta.Sinks.Paimon.PrimaryKey') + primaryKey: string; + + @FieldDecorator({ + type: 'input', + tooltip: i18n.t('meta.Sinks.Paimon.PartitionKeyHelper'), + rules: [{ required: false }], + props: values => ({ + disabled: [110].includes(values?.status), + }), + }) + @ColumnDecorator() + @SyncField() + @IngestionField() + @I18n('meta.Sinks.Paimon.PartitionKey') + partitionKey: string; +} + +const getFieldListColumns = sinkValues => { + return [ + ...sourceFields, + { + title: i18n.t('meta.Sinks.SinkFieldName'), + width: 110, + dataIndex: 'fieldName', + rules: [ + { required: true }, + { + pattern: /^[a-zA-Z_][a-zA-Z0-9_]*$/, + message: i18n.t('meta.Sinks.SinkFieldNameRule'), + }, + ], + props: (text, record, idx, isNew) => ({ + disabled: [110].includes(sinkValues?.status as number) && !isNew, + }), + }, + { + title: i18n.t('meta.Sinks.SinkFieldType'), + dataIndex: 'fieldType', + width: 130, + initialValue: paimonFieldTypes[0].value, + type: 'select', + rules: [{ required: true, message: `${i18n.t('meta.Sinks.FieldTypeMessage')}` }], + props: (text, record, idx, isNew) => ({ + options: paimonFieldTypes, + onChange: value => { + const partitionStrategies = matchPartitionStrategies(value); + if (partitionStrategies.every(item => item.value !== record.partitionStrategy)) { + return { + partitionStrategy: partitionStrategies[0].value, + }; + } + }, + disabled: [110].includes(sinkValues?.status as number) && !isNew, + }), + }, + { + title: 'Length', + dataIndex: 'fieldLength', + type: 'inputnumber', + props: { + min: 0, + }, + initialValue: 1, + rules: [{ type: 'number', required: true }], + visible: (text, record) => record.fieldType === 'fixed', + }, + { + title: 'Precision', + dataIndex: 'fieldPrecision', + type: 'inputnumber', + props: { + min: 0, + }, + initialValue: 1, + rules: [{ type: 'number', required: true }], + visible: (text, record) => record.fieldType === 'decimal', + }, + { + title: 'Scale', + dataIndex: 'fieldScale', + type: 'inputnumber', + props: { + min: 0, + }, + initialValue: 1, + rules: [{ type: 'number', required: true }], + visible: (text, record) => record.fieldType === 'decimal', + }, + { + title: i18n.t('meta.Sinks.FieldDescription'), + dataIndex: 'fieldComment', + }, + ]; +}; diff --git a/inlong-dashboard/src/plugins/sources/defaults/Paimon.ts b/inlong-dashboard/src/plugins/sources/defaults/Paimon.ts new file mode 100644 index 00000000000..a68ec23e329 --- /dev/null +++ b/inlong-dashboard/src/plugins/sources/defaults/Paimon.ts @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { DataWithBackend } from '@/plugins/DataWithBackend'; +import { RenderRow } from '@/plugins/RenderRow'; +import { RenderList } from '@/plugins/RenderList'; +import { SourceInfo } from '../common/SourceInfo'; +import i18n from '@/i18n'; +import EditableTable from '@/ui/components/EditableTable'; + +const { I18n } = DataWithBackend; +const { FieldDecorator, SyncField, IngestionField } = RenderRow; +const { ColumnDecorator } = RenderList; + +export default class PaimonSource + extends SourceInfo + implements DataWithBackend, RenderRow, RenderList +{ + @FieldDecorator({ + type: 'input', + rules: [{ required: true }], + props: values => ({ + disabled: [110, 130].includes(values?.status), + }), + }) + @ColumnDecorator() + @SyncField() + @IngestionField() + @I18n('meta.Sources.Paimon.DbName') + dbName: string; + + @FieldDecorator({ + type: 'input', + rules: [{ required: true }], + props: values => ({ + disabled: [110, 130].includes(values?.status), + }), + }) + @ColumnDecorator() + @SyncField() + @IngestionField() + @I18n('meta.Sources.Paimon.TableName') + tableName: string; + + @FieldDecorator({ + type: 'input', + rules: [{ required: true }], + props: values => ({ + disabled: [110, 130].includes(values?.status), + placeholder: 'thrift://127.0.0.1:9083', + }), + }) + @ColumnDecorator() + @SyncField() + @IngestionField() + @I18n('Catalog URI') + catalogUri: string; + + @FieldDecorator({ + type: 'input', + rules: [{ required: true }], + props: values => ({ + disabled: [110, 130].includes(values?.status), + placeholder: 'hdfs://127.0.0.1:9000/user/paimon/warehouse', + }), + }) + @ColumnDecorator() + @SyncField() + @IngestionField() + @I18n('meta.Sources.Paimon.Warehouse') + warehouse: string; + + @FieldDecorator({ + type: 'radio', + rules: [{ required: true }], + initialValue: false, + props: values => ({ + disabled: values?.status === 101, + options: [ + { + label: i18n.t('basic.Yes'), + value: true, + }, + { + label: i18n.t('basic.No'), + value: false, + }, + ], + }), + tooltip: i18n.t('meta.Sources.Paimon.ReadStreamingSkipCompactionHelp'), + }) + @ColumnDecorator() + @SyncField() + @IngestionField() + @I18n('meta.Sources.Paimon.ReadStreamingSkipCompaction') + readStreamingSkipCompaction: boolean; + + @FieldDecorator({ + type: 'input', + rules: [{ required: true }], + props: values => ({ + disabled: [110, 130].includes(values?.status), + placeholder: '20221213211100', + }), + tooltip: i18n.t('meta.Sources.Paimon.ReadStartCommitHelp'), + }) + @ColumnDecorator() + @SyncField() + @IngestionField() + @I18n('meta.Sources.Paimon.ReadStartCommit') + readStartCommit: string; + + @FieldDecorator({ + type: EditableTable, + rules: [{ required: false }], + initialValue: [], + tooltip: i18n.t('meta.Sources.Paimon.ExtListHelper'), + props: values => ({ + size: 'small', + columns: [ + { + title: 'Key', + dataIndex: 'keyName', + props: { + disabled: [110, 130].includes(values?.status), + }, + }, + { + title: 'Value', + dataIndex: 'keyValue', + props: { + disabled: [110, 130].includes(values?.status), + }, + }, + ], + }), + }) + @ColumnDecorator() + @SyncField() + @IngestionField() + @I18n('meta.Sources.Paimon.ExtList') + extList: string; +} diff --git a/inlong-dashboard/src/ui/components/FieldList/FieldTypeConf.tsx b/inlong-dashboard/src/ui/components/FieldList/FieldTypeConf.tsx index d1b4a00c246..f5abe79cb39 100644 --- a/inlong-dashboard/src/ui/components/FieldList/FieldTypeConf.tsx +++ b/inlong-dashboard/src/ui/components/FieldList/FieldTypeConf.tsx @@ -140,6 +140,26 @@ const hudiFieldTypes = [ value: item, })); +const paimonFieldTypes = [ + 'int', + 'long', + 'string', + 'float', + 'double', + 'date', + 'timestamp', + 'time', + 'boolean', + 'decimal', + 'timestamptz', + 'binary', + 'fixed', + 'uuid', +].map(item => ({ + label: item, + value: item, +})); + const icebergFieldTypes = [ 'string', 'boolean', diff --git a/inlong-distribution/src/main/assemblies/sort-connectors-v1.15.xml b/inlong-distribution/src/main/assemblies/sort-connectors-v1.15.xml index dd34470683f..4b0b68a167c 100644 --- a/inlong-distribution/src/main/assemblies/sort-connectors-v1.15.xml +++ b/inlong-distribution/src/main/assemblies/sort-connectors-v1.15.xml @@ -107,6 +107,14 @@ 0644 + + ../inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/paimon/target + inlong-sort/connectors + + sort-connector-paimon-v1.15-${project.version}.jar + + 0644 + ../inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/kafka/target inlong-sort/connectors diff --git a/inlong-manager/manager-client-examples/src/test/java/org/apache/inlong/manager/client/File2PaimonExample.java b/inlong-manager/manager-client-examples/src/test/java/org/apache/inlong/manager/client/File2PaimonExample.java new file mode 100644 index 00000000000..743445c7846 --- /dev/null +++ b/inlong-manager/manager-client-examples/src/test/java/org/apache/inlong/manager/client/File2PaimonExample.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.client; + +import org.apache.inlong.manager.client.api.ClientConfiguration; +import org.apache.inlong.manager.client.api.InlongClient; +import org.apache.inlong.manager.client.api.InlongGroup; +import org.apache.inlong.manager.client.api.InlongGroupContext; +import org.apache.inlong.manager.client.api.InlongStreamBuilder; +import org.apache.inlong.manager.common.enums.FieldType; +import org.apache.inlong.manager.common.util.JsonUtils; +import org.apache.inlong.manager.pojo.group.InlongGroupInfo; +import org.apache.inlong.manager.pojo.sink.SinkField; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonColumnInfo; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonPartition; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonSink; +import org.apache.inlong.manager.pojo.source.file.FileSource; +import org.apache.inlong.manager.pojo.stream.StreamField; + +import com.google.common.collect.Lists; +import lombok.extern.slf4j.Slf4j; +import org.apache.shiro.util.Assert; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.concurrent.TimeUnit; + +/** + * Test class for file to paimon. + */ +@Slf4j +@Disabled +public class File2PaimonExample extends BaseExample { + + @Test + public void testCreateGroupForPaimon() { + ClientConfiguration configuration = new ClientConfiguration(); + configuration.setWriteTimeout(10); + configuration.setReadTimeout(10); + configuration.setConnectTimeout(10); + configuration.setTimeUnit(TimeUnit.SECONDS); + configuration.setAuthentication(super.getInlongAuth()); + InlongClient inlongClient = InlongClient.create(super.getServiceUrl(), configuration); + + InlongGroupInfo groupInfo = super.createGroupInfo(); + try { + InlongGroup group = inlongClient.forGroup(groupInfo); + InlongStreamBuilder streamBuilder = group.createStream(createStreamInfo()); + streamBuilder.fields(createStreamFields()); + streamBuilder.source(createAgentFileSource()); + streamBuilder.sink(createPaimonSink()); + streamBuilder.initOrUpdate(); + // start group + InlongGroupContext inlongGroupContext = group.init(); + Assert.notNull(inlongGroupContext); + } catch (Exception e) { + e.printStackTrace(); + } + } + + @Test + public void testStopGroup() { + ClientConfiguration configuration = new ClientConfiguration(); + configuration.setWriteTimeout(10); + configuration.setReadTimeout(10); + configuration.setConnectTimeout(10); + configuration.setTimeUnit(TimeUnit.SECONDS); + configuration.setAuthentication(super.getInlongAuth()); + InlongClient inlongClient = InlongClient.create(super.getServiceUrl(), configuration); + InlongGroupInfo groupInfo = createGroupInfo(); + try { + InlongGroup group = inlongClient.forGroup(groupInfo); + InlongGroupContext groupContext = group.delete(); + Assert.notNull(groupContext); + } catch (Exception e) { + e.printStackTrace(); + } + } + + private FileSource createAgentFileSource() { + FileSource fileSource = new FileSource(); + fileSource.setSourceName("{source.name}"); + fileSource.setAgentIp("{agent.ip}"); + fileSource.setPattern("/a/b/*.txt"); + fileSource.setTimeOffset("-1h"); + return fileSource; + } + + private List createStreamFields() { + List streamFieldList = Lists.newArrayList(); + streamFieldList.add(new StreamField(0, FieldType.STRING.toString(), "name", null, null)); + streamFieldList.add(new StreamField(1, FieldType.INT.toString(), "age", null, null)); + streamFieldList.add(new StreamField(2, FieldType.DECIMAL.toString(), "score", null, null)); + streamFieldList.add(new StreamField(3, FieldType.TIMESTAMP.toString(), "ts", null, null)); + return streamFieldList; + } + + /** + * Create Paimon sink + */ + public PaimonSink createPaimonSink() { + PaimonSink sink = new PaimonSink(); + + sink.setSinkName("{sink.name}"); + sink.setDbName("{db.name}"); + sink.setTableName("{table.name}"); + sink.setCatalogUri("thrift://{ip:port}"); + sink.setWarehouse("hdfs://{ip:port}/user/paimon/warehouse/"); + + final SinkField field1 = new SinkField(0, FieldType.INT.toString(), "age", FieldType.INT.toString(), "age"); + final SinkField field2 = new SinkField(1, FieldType.STRING.toString(), "name", FieldType.STRING.toString(), + "name"); + final SinkField field3 = new SinkField(3, FieldType.DECIMAL.toString(), "score", FieldType.DECIMAL.toString(), + "score"); + final SinkField field4 = new SinkField(3, FieldType.TIMESTAMP.toString(), "ts", FieldType.TIMESTAMP.toString(), + "ts"); + + // field ext param + // field1: bucket partition example + PaimonColumnInfo info1 = new PaimonColumnInfo(); + info1.setRequired(true); + info1.setPartitionStrategy(PaimonPartition.BUCKET.toString()); + info1.setBucketNum(10); + field1.setExtParams(JsonUtils.toJsonString(info1)); + + // field3: decimal column example + PaimonColumnInfo info3 = new PaimonColumnInfo(); + info3.setScale(5); + info3.setPrecision(10); // scale must be less than or equal to precision + field3.setExtParams(JsonUtils.toJsonString(info3)); + + // field4: hour partition example + PaimonColumnInfo info4 = new PaimonColumnInfo(); + info4.setPartitionStrategy(PaimonPartition.HOUR.toString()); + field4.setExtParams(JsonUtils.toJsonString(info4)); + + List fields = new ArrayList<>(); + fields.add(field1); + fields.add(field2); + fields.add(field3); + fields.add(field4); + sink.setSinkFieldList(fields); + + List> extList = new ArrayList<>(); + HashMap map = new HashMap<>(); + map.put("hoodie.datasource.hive_sync.partition_fields", "name"); + extList.add(map); + sink.setExtList(extList); + sink.setPrimaryKey("name"); + return sink; + } +} diff --git a/inlong-manager/manager-common/src/main/java/org/apache/inlong/manager/common/consts/DataNodeType.java b/inlong-manager/manager-common/src/main/java/org/apache/inlong/manager/common/consts/DataNodeType.java index fda7ee72624..cce78d60857 100644 --- a/inlong-manager/manager-common/src/main/java/org/apache/inlong/manager/common/consts/DataNodeType.java +++ b/inlong-manager/manager-common/src/main/java/org/apache/inlong/manager/common/consts/DataNodeType.java @@ -26,6 +26,7 @@ public class DataNodeType { public static final String KAFKA = "KAFKA"; public static final String ICEBERG = "ICEBERG"; public static final String HUDI = "HUDI"; + public static final String PAIMON = "PAIMON"; public static final String CLICKHOUSE = "CLICKHOUSE"; public static final String ELASTICSEARCH = "ELASTICSEARCH"; public static final String MYSQL = "MYSQL"; diff --git a/inlong-manager/manager-common/src/main/java/org/apache/inlong/manager/common/consts/SourceType.java b/inlong-manager/manager-common/src/main/java/org/apache/inlong/manager/common/consts/SourceType.java index f40593e4218..13ef3834244 100644 --- a/inlong-manager/manager-common/src/main/java/org/apache/inlong/manager/common/consts/SourceType.java +++ b/inlong-manager/manager-common/src/main/java/org/apache/inlong/manager/common/consts/SourceType.java @@ -22,6 +22,8 @@ import java.util.HashMap; import java.util.Map; +import static org.apache.inlong.manager.common.consts.DataNodeType.PAIMON; + /** * Constants of source type. */ @@ -55,6 +57,7 @@ public class SourceType extends StreamType { put(REDIS, TaskTypeEnum.REDIS); put(MQTT, TaskTypeEnum.MQTT); put(HUDI, TaskTypeEnum.HUDI); + put(PAIMON, TaskTypeEnum.PAIMON); } }; diff --git a/inlong-manager/manager-common/src/main/java/org/apache/inlong/manager/common/consts/StreamType.java b/inlong-manager/manager-common/src/main/java/org/apache/inlong/manager/common/consts/StreamType.java index f8f70dfe192..6d2c36e0274 100644 --- a/inlong-manager/manager-common/src/main/java/org/apache/inlong/manager/common/consts/StreamType.java +++ b/inlong-manager/manager-common/src/main/java/org/apache/inlong/manager/common/consts/StreamType.java @@ -28,6 +28,9 @@ public class StreamType { @SupportSortType(sortType = SortType.SORT_FLINK) public static final String HUDI = "HUDI"; + @SupportSortType(sortType = SortType.SORT_FLINK) + public static final String PAIMON = "PAIMON"; + @SupportSortType(sortType = SortType.SORT_FLINK) public static final String POSTGRESQL = "POSTGRESQL"; diff --git a/inlong-manager/manager-plugins/base/src/main/java/org/apache/inlong/manager/plugin/flink/enums/ConnectorJarType.java b/inlong-manager/manager-plugins/base/src/main/java/org/apache/inlong/manager/plugin/flink/enums/ConnectorJarType.java index 52a581a5020..a39a3f0cdc3 100644 --- a/inlong-manager/manager-plugins/base/src/main/java/org/apache/inlong/manager/plugin/flink/enums/ConnectorJarType.java +++ b/inlong-manager/manager-plugins/base/src/main/java/org/apache/inlong/manager/plugin/flink/enums/ConnectorJarType.java @@ -71,6 +71,8 @@ public enum ConnectorJarType { HUDI_SINK("hudiLoad", "hudi"), + PAIMON_SINK("paimonLoad", "paimon"), + HDFS_SINK("fileSystemLoad", ""), REDIS_SINK("redisLoad", "redis"), diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/node/paimon/PaimonDataNodeDTO.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/node/paimon/PaimonDataNodeDTO.java new file mode 100644 index 00000000000..fda5e277b59 --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/node/paimon/PaimonDataNodeDTO.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.node.paimon; + +import org.apache.inlong.manager.common.enums.ErrorCodeEnum; +import org.apache.inlong.manager.common.exceptions.BusinessException; +import org.apache.inlong.manager.common.util.CommonBeanUtils; +import org.apache.inlong.manager.common.util.JsonUtils; + +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.validation.constraints.NotNull; + +/** + * Paimon data node info + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +@ApiModel("Paimon data node info") +public class PaimonDataNodeDTO { + + private static final Logger LOGGER = LoggerFactory.getLogger(PaimonDataNodeDTO.class); + + @ApiModelProperty("Catalog type, like: HIVE, HADOOP, default is HIVE") + @Builder.Default + private String catalogType = "HIVE"; + + @ApiModelProperty("Paimon data warehouse dir") + private String warehouse; + + /** + * Get the dto instance from the request + */ + public static PaimonDataNodeDTO getFromRequest(PaimonDataNodeRequest request, String extParams) { + PaimonDataNodeDTO dto = StringUtils.isNotBlank(extParams) + ? PaimonDataNodeDTO.getFromJson(extParams) + : new PaimonDataNodeDTO(); + return CommonBeanUtils.copyProperties(request, dto, true); + } + + /** + * Get the dto instance from the JSON string. + */ + public static PaimonDataNodeDTO getFromJson(@NotNull String extParams) { + try { + return JsonUtils.parseObject(extParams, PaimonDataNodeDTO.class); + } catch (Exception e) { + throw new BusinessException(ErrorCodeEnum.GROUP_INFO_INCORRECT, + String.format("Failed to parse extParams for Paimon node: %s", e.getMessage())); + } + } + +} diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/node/paimon/PaimonDataNodeInfo.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/node/paimon/PaimonDataNodeInfo.java new file mode 100644 index 00000000000..d4c53478d58 --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/node/paimon/PaimonDataNodeInfo.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.node.paimon; + +import org.apache.inlong.manager.common.consts.DataNodeType; +import org.apache.inlong.manager.common.util.CommonBeanUtils; +import org.apache.inlong.manager.common.util.JsonTypeDefine; +import org.apache.inlong.manager.pojo.node.DataNodeInfo; + +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; + +/** + * Paimon data node info + */ +@Data +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = true) +@JsonTypeDefine(value = DataNodeType.PAIMON) +@ApiModel("Paimon data node info") +public class PaimonDataNodeInfo extends DataNodeInfo { + + @ApiModelProperty("Catalog type, like: HIVE, HADOOP, default is HIVE") + private String catalogType = "HIVE"; + + @ApiModelProperty("Paimon data warehouse dir") + private String warehouse; + + public PaimonDataNodeInfo() { + this.setType(DataNodeType.PAIMON); + } + + @Override + public PaimonDataNodeRequest genRequest() { + return CommonBeanUtils.copyProperties(this, PaimonDataNodeRequest::new); + } +} diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/node/paimon/PaimonDataNodeRequest.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/node/paimon/PaimonDataNodeRequest.java new file mode 100644 index 00000000000..9375a556ce2 --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/node/paimon/PaimonDataNodeRequest.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.node.paimon; + +import org.apache.inlong.manager.common.consts.DataNodeType; +import org.apache.inlong.manager.common.util.JsonTypeDefine; +import org.apache.inlong.manager.pojo.node.DataNodeRequest; + +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; + +/** + * Paimon data node request + */ +@Data +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = true) +@JsonTypeDefine(value = DataNodeType.PAIMON) +@ApiModel("Paimon data node request") +public class PaimonDataNodeRequest extends DataNodeRequest { + + @ApiModelProperty("Catalog type, like: HIVE, HADOOP, default is HIVE") + private String catalogType = "HIVE"; + + @ApiModelProperty("Paimon data warehouse dir") + private String warehouse; + + public PaimonDataNodeRequest() { + this.setType(DataNodeType.PAIMON); + } + +} diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonColumnInfo.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonColumnInfo.java new file mode 100644 index 00000000000..c110448b61e --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonColumnInfo.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.sink.paimon; + +import org.apache.inlong.manager.common.enums.ErrorCodeEnum; +import org.apache.inlong.manager.common.exceptions.BusinessException; +import org.apache.inlong.manager.common.util.JsonUtils; + +import io.swagger.annotations.ApiModelProperty; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import org.apache.commons.lang3.StringUtils; + +/** + * Paimon column info + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class PaimonColumnInfo { + + @ApiModelProperty("Length of fixed type") + private Integer length; + + @ApiModelProperty("Precision of decimal type") + private Integer precision; + + @ApiModelProperty("Scale of decimal type") + private Integer scale; + + @ApiModelProperty("Field partition strategy, including: None, Identity, Year, Month, Day, Hour, Bucket, Truncate") + private String partitionStrategy; + + @ApiModelProperty("Bucket num param of bucket partition") + private Integer bucketNum; + + @ApiModelProperty("Width param of truncate partition") + private Integer width; + + // The following are passed from base field and need not be part of API for extra param + private String name; + private String type; + private String desc; + private boolean required; + + private boolean isPartition; + + /** + * Get the extra param from the Json + */ + public static PaimonColumnInfo getFromJson(String extParams) { + if (StringUtils.isEmpty(extParams)) { + return new PaimonColumnInfo(); + } + try { + return JsonUtils.parseObject(extParams, PaimonColumnInfo.class); + } catch (Exception e) { + throw new BusinessException(ErrorCodeEnum.SINK_INFO_INCORRECT.getMessage() + ": " + e.getMessage()); + } + } +} diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonPartition.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonPartition.java new file mode 100644 index 00000000000..82ce27f7e79 --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonPartition.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.sink.paimon; + +import org.apache.inlong.manager.common.enums.ErrorCodeEnum; +import org.apache.inlong.manager.common.util.Preconditions; + +/** + * Paimon partition type + */ +public enum PaimonPartition { + + IDENTITY, + BUCKET, + TRUNCATE, + YEAR, + MONTH, + DAY, + HOUR, + NONE, + ; + + /** + * Get partition type from name + */ + public static PaimonPartition forName(String name) { + Preconditions.expectNotBlank(name, ErrorCodeEnum.INVALID_PARAMETER, "PaimonPartition should not be null"); + for (PaimonPartition value : values()) { + if (value.toString().equalsIgnoreCase(name)) { + return value; + } + } + throw new IllegalArgumentException(String.format("Unsupported PaimonPartition : %s", name)); + } + + @Override + public String toString() { + return name(); + } +} diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonPartitionField.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonPartitionField.java new file mode 100644 index 00000000000..6bc15dc1934 --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonPartitionField.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.sink.paimon; + +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; + +/** + * Paimon partition field info + */ +@Data +@ApiModel("Paimon partition field") +public class PaimonPartitionField { + + @ApiModelProperty("Field name") + private String fieldName; + + @ApiModelProperty("Field type") + private String fieldType; + + @ApiModelProperty("Field format, including: MICROSECONDS, MILLISECONDS, SECONDS, SQL, ISO_8601" + + " and custom such as 'yyyy-MM-dd HH:mm:ss'. This is mainly used for time format") + private String fieldFormat; + +} diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonSink.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonSink.java new file mode 100644 index 00000000000..00815f43857 --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonSink.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.sink.paimon; + +import org.apache.inlong.manager.common.consts.SinkType; +import org.apache.inlong.manager.common.util.CommonBeanUtils; +import org.apache.inlong.manager.common.util.JsonTypeDefine; +import org.apache.inlong.manager.pojo.sink.SinkRequest; +import org.apache.inlong.manager.pojo.sink.StreamSink; + +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; +import lombok.experimental.SuperBuilder; + +import java.util.HashMap; +import java.util.List; + +/** + * Paimon sink info + */ +@Data +@SuperBuilder +@AllArgsConstructor +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = true) +@ApiModel(value = "Paimon sink info") +@JsonTypeDefine(value = SinkType.PAIMON) +public class PaimonSink extends StreamSink { + + @ApiModelProperty("Catalog type, like: HIVE, HADOOP, default is HIVE") + @Builder.Default + private String catalogType = "HIVE"; + + @ApiModelProperty("Catalog uri, such as hive metastore thrift://ip:port") + private String catalogUri; + + @ApiModelProperty("Paimon data warehouse dir") + private String warehouse; + + @ApiModelProperty("Target database name") + private String dbName; + + @ApiModelProperty("Target table name") + private String tableName; + + @ApiModelProperty("Data path, such as: hdfs://ip:port/user/hive/warehouse/test.db") + private String dataPath; + + @ApiModelProperty("File format, support: Parquet, Orc, Avro") + private String fileFormat; + + @ApiModelProperty("Partition type, like: H-hour, D-day, W-week, M-month, O-once, R-regulation") + private String partitionType; + + @ApiModelProperty("Primary key") + private String primaryKey; + + @ApiModelProperty("Extended properties") + private List> extList; + + @ApiModelProperty("Partition field list") + private String partitionKey; + + public PaimonSink() { + this.setSinkType(SinkType.PAIMON); + } + + @Override + public SinkRequest genSinkRequest() { + return CommonBeanUtils.copyProperties(this, PaimonSinkRequest::new); + } + +} diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonSinkDTO.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonSinkDTO.java new file mode 100644 index 00000000000..aa8e7a052b7 --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonSinkDTO.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.sink.paimon; + +import org.apache.inlong.manager.common.enums.ErrorCodeEnum; +import org.apache.inlong.manager.common.exceptions.BusinessException; +import org.apache.inlong.manager.common.util.CommonBeanUtils; +import org.apache.inlong.manager.common.util.JsonUtils; + +import io.swagger.annotations.ApiModelProperty; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import org.apache.commons.lang3.StringUtils; + +import javax.validation.constraints.NotNull; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Paimon sink info + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class PaimonSinkDTO { + + @ApiModelProperty("Catalog type, like: HIVE, HADOOP, default is HIVE") + @Builder.Default + private String catalogType = "HIVE"; + + @ApiModelProperty("Catalog uri, such as hive metastore thrift://ip:port") + private String catalogUri; + + @ApiModelProperty("Paimon data warehouse dir") + private String warehouse; + + @ApiModelProperty("Target database name") + private String dbName; + + @ApiModelProperty("Target table name") + private String tableName; + + @ApiModelProperty("Data path, such as: hdfs://ip:port/user/hive/warehouse/test.db") + private String dataPath; + + @ApiModelProperty("File format, support: Parquet, Orc, Avro") + private String fileFormat; + + @ApiModelProperty("Partition type, like: H-hour, D-day, W-week, M-month, O-once, R-regulation") + private String partitionType; + + @ApiModelProperty("Primary key") + private String primaryKey; + + @ApiModelProperty("Properties for Paimon") + private Map properties; + + @ApiModelProperty("Extended properties") + private List> extList; + + @ApiModelProperty("Partition field list") + private String partitionKey; + + /** + * Get the dto instance from the request + */ + public static PaimonSinkDTO getFromRequest(PaimonSinkRequest request, String extParams) { + PaimonSinkDTO dto = + StringUtils.isNotBlank(extParams) ? PaimonSinkDTO.getFromJson(extParams) : new PaimonSinkDTO(); + return CommonBeanUtils.copyProperties(request, dto, true); + } + + public static PaimonSinkDTO getFromJson(@NotNull String extParams) { + try { + return JsonUtils.parseObject(extParams, PaimonSinkDTO.class); + } catch (Exception e) { + throw new BusinessException(ErrorCodeEnum.SINK_INFO_INCORRECT, + String.format("parse extParams of Paimon SinkDTO failure: %s", e.getMessage())); + } + } + + /** + * Get Paimon table info + */ + public static PaimonTableInfo getPaimonTableInfo(PaimonSinkDTO paimonInfo, List columnList) { + PaimonTableInfo tableInfo = new PaimonTableInfo(); + tableInfo.setDbName(paimonInfo.getDbName()); + tableInfo.setTableName(paimonInfo.getTableName()); + + tableInfo.setPartitionKey(paimonInfo.getPartitionKey()); + tableInfo.setColumns(columnList); + tableInfo.setPrimaryKey(paimonInfo.getPrimaryKey()); + tableInfo.setFileFormat(paimonInfo.getFileFormat()); + tableInfo.setTblProperties(paimonInfo.getProperties()); + return tableInfo; + } + +} diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonSinkRequest.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonSinkRequest.java new file mode 100644 index 00000000000..d458c19ef9d --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonSinkRequest.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.sink.paimon; + +import org.apache.inlong.manager.common.consts.SinkType; +import org.apache.inlong.manager.common.util.JsonTypeDefine; +import org.apache.inlong.manager.pojo.sink.SinkRequest; + +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; + +import javax.validation.constraints.Pattern; + +import java.util.HashMap; +import java.util.List; + +/** + * Paimon sink request. + */ +@Data +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = true) +@ApiModel(value = "Paimon sink request") +@JsonTypeDefine(value = SinkType.PAIMON) +public class PaimonSinkRequest extends SinkRequest { + + @ApiModelProperty("Catalog type, like: HIVE, HADOOP, default is HIVE") + private String catalogType = "HIVE"; + + @ApiModelProperty("Catalog uri, such as hive metastore thrift://ip:port") + @Pattern(regexp = "^((?!\\s).)*$", message = "not supports blank in url") + private String catalogUri; + + @ApiModelProperty("Paimon data warehouse dir") + private String warehouse; + + @ApiModelProperty("Target database name") + private String dbName; + + @ApiModelProperty("Target table name") + private String tableName; + + @ApiModelProperty("Data path, such as: hdfs://ip:port/user/hive/warehouse/test.db") + private String dataPath; + + @ApiModelProperty("File format, support: Parquet, Orc, Avro") + private String fileFormat; + + @ApiModelProperty("Extended properties") + private List> extList; + + @ApiModelProperty("Partition field list") + private String partitionKey; + + @ApiModelProperty("Primary key") + private String primaryKey; +} diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonTableInfo.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonTableInfo.java new file mode 100644 index 00000000000..55ef14b6742 --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonTableInfo.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.sink.paimon; + +import lombok.Data; + +import java.util.List; +import java.util.Map; + +/** + * Paimon table info + */ +@Data +public class PaimonTableInfo { + + private String dbName; + private String tableName; + private String tableDesc; + private String fileFormat; + private Map tblProperties; + private List columns; + + private String primaryKey; + + private String partitionKey; +} diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonType.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonType.java new file mode 100644 index 00000000000..384ac0bbc2a --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sink/paimon/PaimonType.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.sink.paimon; + +import lombok.Getter; + +/** + * Paimon data type + */ +public enum PaimonType { + + BOOLEAN("boolean", "boolean"), + INT("int", "int"), + LONG("long", "bigint"), + FLOAT("float", "float"), + DOUBLE("double", "double"), + DATE("date", "date"), + TIME("time", "time(0)"), + TIMESTAMP("timestamp", "timestamp(3)"), + TIMESTAMPT_Z("timestamptz", "timestamp(6)"), + STRING("string", "varchar(" + Integer.MAX_VALUE + ")"), + BINARY("binary", "tinyint"), + UUID("uuid", "uuid"), + FIXED("fixed", null), + DECIMAL("decimal", null); + + @Getter + private final String type; + + @Getter + private final String hiveType; + + PaimonType(String type, String hiveType) { + this.type = type; + this.hiveType = hiveType; + } + + /** + * Get type from name + */ + public static PaimonType forType(String type) { + for (PaimonType ibType : values()) { + if (ibType.getType().equalsIgnoreCase(type)) { + return ibType; + } + } + throw new IllegalArgumentException(String.format("invalid paimon type = %s", type)); + } +} diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sort/node/ExtractNodeProviderFactory.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sort/node/ExtractNodeProviderFactory.java index 07da9d3b5a8..3c06cf6d25f 100644 --- a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sort/node/ExtractNodeProviderFactory.java +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sort/node/ExtractNodeProviderFactory.java @@ -26,6 +26,7 @@ import org.apache.inlong.manager.pojo.sort.node.provider.MongoDBProvider; import org.apache.inlong.manager.pojo.sort.node.provider.MySQLBinlogProvider; import org.apache.inlong.manager.pojo.sort.node.provider.OracleProvider; +import org.apache.inlong.manager.pojo.sort.node.provider.PaimonProvider; import org.apache.inlong.manager.pojo.sort.node.provider.PostgreSQLProvider; import org.apache.inlong.manager.pojo.sort.node.provider.PulsarProvider; import org.apache.inlong.manager.pojo.sort.node.provider.RedisProvider; @@ -48,6 +49,7 @@ public class ExtractNodeProviderFactory { static { // The Providers Parsing SourceInfo to ExtractNode which sort needed EXTRACT_NODE_PROVIDER_LIST.add(new HudiProvider()); + EXTRACT_NODE_PROVIDER_LIST.add(new PaimonProvider()); EXTRACT_NODE_PROVIDER_LIST.add(new KafkaProvider()); EXTRACT_NODE_PROVIDER_LIST.add(new MongoDBProvider()); EXTRACT_NODE_PROVIDER_LIST.add(new OracleProvider()); diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sort/node/LoadNodeProviderFactory.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sort/node/LoadNodeProviderFactory.java index 1df97b77fbc..7874f08c84f 100644 --- a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sort/node/LoadNodeProviderFactory.java +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sort/node/LoadNodeProviderFactory.java @@ -33,6 +33,7 @@ import org.apache.inlong.manager.pojo.sort.node.provider.KuduProvider; import org.apache.inlong.manager.pojo.sort.node.provider.MySQLProvider; import org.apache.inlong.manager.pojo.sort.node.provider.OracleProvider; +import org.apache.inlong.manager.pojo.sort.node.provider.PaimonProvider; import org.apache.inlong.manager.pojo.sort.node.provider.PostgreSQLProvider; import org.apache.inlong.manager.pojo.sort.node.provider.RedisProvider; import org.apache.inlong.manager.pojo.sort.node.provider.SQLServerProvider; @@ -63,6 +64,7 @@ public class LoadNodeProviderFactory { LOAD_NODE_PROVIDER_LIST.add(new HDFSProvider()); LOAD_NODE_PROVIDER_LIST.add(new HiveProvider()); LOAD_NODE_PROVIDER_LIST.add(new HudiProvider()); + LOAD_NODE_PROVIDER_LIST.add(new PaimonProvider()); LOAD_NODE_PROVIDER_LIST.add(new IcebergProvider()); LOAD_NODE_PROVIDER_LIST.add(new KuduProvider()); LOAD_NODE_PROVIDER_LIST.add(new MySQLProvider()); diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sort/node/provider/PaimonProvider.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sort/node/provider/PaimonProvider.java new file mode 100644 index 00000000000..17fbfb907f9 --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/sort/node/provider/PaimonProvider.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.sort.node.provider; + +import org.apache.inlong.manager.common.consts.StreamType; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonSink; +import org.apache.inlong.manager.pojo.sort.node.base.ExtractNodeProvider; +import org.apache.inlong.manager.pojo.sort.node.base.LoadNodeProvider; +import org.apache.inlong.manager.pojo.source.paimon.PaimonSource; +import org.apache.inlong.manager.pojo.stream.StreamField; +import org.apache.inlong.manager.pojo.stream.StreamNode; +import org.apache.inlong.sort.protocol.FieldInfo; +import org.apache.inlong.sort.protocol.constant.PaimonConstant.CatalogType; +import org.apache.inlong.sort.protocol.node.ExtractNode; +import org.apache.inlong.sort.protocol.node.LoadNode; +import org.apache.inlong.sort.protocol.node.extract.PaimonExtractNode; +import org.apache.inlong.sort.protocol.node.load.PaimonLoadNode; +import org.apache.inlong.sort.protocol.transformation.FieldRelation; + +import java.util.List; +import java.util.Map; + +/** + * The Provider for creating Paimon extract or load nodes. + */ +public class PaimonProvider implements ExtractNodeProvider, LoadNodeProvider { + + @Override + public Boolean accept(String streamType) { + return StreamType.PAIMON.equals(streamType); + } + + @Override + public ExtractNode createExtractNode(StreamNode streamNodeInfo) { + PaimonSource source = (PaimonSource) streamNodeInfo; + List fieldInfos = parseStreamFieldInfos(source.getFieldList(), source.getSourceName()); + Map properties = parseProperties(source.getProperties()); + + return new PaimonExtractNode( + source.getSourceName(), + source.getSourceName(), + fieldInfos, + null, + source.getCatalogUri(), + source.getWarehouse(), + source.getDbName(), + source.getTableName(), + CatalogType.HIVE, + source.getCheckIntervalInMinus(), + source.isReadStreamingSkipCompaction(), + source.getReadStartCommit(), + properties, + source.getExtList()); + } + + @Override + public LoadNode createLoadNode(StreamNode nodeInfo, Map constantFieldMap) { + PaimonSink paimonSink = (PaimonSink) nodeInfo; + Map properties = parseProperties(paimonSink.getProperties()); + List fieldInfos = parseSinkFieldInfos(paimonSink.getSinkFieldList(), paimonSink.getSinkName()); + List fieldRelations = parseSinkFields(paimonSink.getSinkFieldList(), constantFieldMap); + CatalogType catalogType = CatalogType.forName(paimonSink.getCatalogType()); + + return new PaimonLoadNode( + paimonSink.getSinkName(), + paimonSink.getSinkName(), + fieldInfos, + fieldRelations, + null, + null, + null, + properties, + paimonSink.getDbName(), + paimonSink.getTableName(), + paimonSink.getPrimaryKey(), + catalogType, + paimonSink.getCatalogUri(), + paimonSink.getWarehouse(), + paimonSink.getExtList(), + paimonSink.getPartitionKey()); + } +} \ No newline at end of file diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/source/paimon/PaimonSource.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/source/paimon/PaimonSource.java new file mode 100644 index 00000000000..f9376f03509 --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/source/paimon/PaimonSource.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.source.paimon; + +import org.apache.inlong.manager.common.consts.SourceType; +import org.apache.inlong.manager.common.util.CommonBeanUtils; +import org.apache.inlong.manager.common.util.JsonTypeDefine; +import org.apache.inlong.manager.pojo.source.SourceRequest; +import org.apache.inlong.manager.pojo.source.StreamSource; + +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; +import lombok.experimental.SuperBuilder; + +import java.util.HashMap; +import java.util.List; + +/** + * The Paimon source info + */ +@Data +@SuperBuilder +@AllArgsConstructor +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = true) +@ApiModel(value = "Paimon source info") +@JsonTypeDefine(value = SourceType.PAIMON) +public class PaimonSource extends StreamSource { + + @ApiModelProperty("The database name of Paimon") + private String dbName; + + @ApiModelProperty("The table name of the Paimon") + private String tableName; + + @ApiModelProperty("The catalog uri of the Paimon") + private String catalogUri; + + @ApiModelProperty("The dfs base path of the Paimon") + private String warehouse; + + @ApiModelProperty("The check file interval in minutes") + private int checkIntervalInMinus; + + @ApiModelProperty("The flag indicate whether skip files in compaction") + private boolean readStreamingSkipCompaction; + + @ApiModelProperty("The start commit id") + private String readStartCommit; + + @ApiModelProperty("Extended properties") + private List> extList; + + public PaimonSource() { + this.setSourceType(SourceType.PAIMON); + } + + @Override + public SourceRequest genSourceRequest() { + return CommonBeanUtils.copyProperties(this, PaimonSourceRequest::new); + } + +} diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/source/paimon/PaimonSourceDTO.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/source/paimon/PaimonSourceDTO.java new file mode 100644 index 00000000000..92f07ae1405 --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/source/paimon/PaimonSourceDTO.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.source.paimon; + +import org.apache.inlong.manager.common.enums.ErrorCodeEnum; +import org.apache.inlong.manager.common.exceptions.BusinessException; +import org.apache.inlong.manager.common.util.CommonBeanUtils; +import org.apache.inlong.manager.common.util.JsonUtils; + +import io.swagger.annotations.ApiModelProperty; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import org.apache.commons.lang3.StringUtils; + +import javax.validation.constraints.NotNull; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Paimon source info + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class PaimonSourceDTO { + + @ApiModelProperty("The database name of Paimon") + private String dbName; + + @ApiModelProperty("The table name of Paimon") + private String tableName; + + @ApiModelProperty("The catalog uri of Paimon") + private String catalogUri; + + @ApiModelProperty("The dfs base path of Paimon") + private String warehouse; + + @ApiModelProperty("The flag indicate whether skip files in compaction") + private boolean readStreamingSkipCompaction; + + @ApiModelProperty("The start commit id") + private String readStartCommit; + + @ApiModelProperty("Extended properties") + private List> extList; + + @ApiModelProperty("Properties for Paimon") + private Map properties; + + /** + * Get the dto instance from the request + */ + public static PaimonSourceDTO getFromRequest(PaimonSourceRequest request, String extParams) { + PaimonSourceDTO dto = StringUtils.isNotBlank(extParams) + ? PaimonSourceDTO.getFromJson(extParams) + : new PaimonSourceDTO(); + return CommonBeanUtils.copyProperties(request, dto, true); + } + + /** + * Get the dto instance from the JSON string + */ + public static PaimonSourceDTO getFromJson(@NotNull String extParams) { + try { + return JsonUtils.parseObject(extParams, PaimonSourceDTO.class); + } catch (Exception e) { + throw new BusinessException(ErrorCodeEnum.SOURCE_INFO_INCORRECT, + String.format("parse extParams of PaimonSource failure: %s", e.getMessage())); + } + } +} diff --git a/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/source/paimon/PaimonSourceRequest.java b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/source/paimon/PaimonSourceRequest.java new file mode 100644 index 00000000000..1978e34cf6f --- /dev/null +++ b/inlong-manager/manager-pojo/src/main/java/org/apache/inlong/manager/pojo/source/paimon/PaimonSourceRequest.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.pojo.source.paimon; + +import org.apache.inlong.manager.common.consts.SourceType; +import org.apache.inlong.manager.common.util.JsonTypeDefine; +import org.apache.inlong.manager.pojo.source.SourceRequest; + +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; + +import java.util.HashMap; +import java.util.List; + +/** + * Request info of the Paimon source + */ +@Data +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = true) +@ApiModel(value = "Request of the Paimon source") +@JsonTypeDefine(value = SourceType.PAIMON) +public class PaimonSourceRequest extends SourceRequest { + + @ApiModelProperty("The database name of Paimon") + private String dbName; + + @ApiModelProperty("The table name of Paimon") + private String tableName; + + @ApiModelProperty("The catalog uri of Paimon") + private String catalogUri; + + @ApiModelProperty("The dfs base path of Paimon") + private String warehouse; + + @ApiModelProperty("The flag indicate whether skip files in compaction") + private boolean readStreamingSkipCompaction; + + @ApiModelProperty("The start commit id") + private String readStartCommit; + + @ApiModelProperty("Extended properties") + private List> extList; + + public PaimonSourceRequest() { + this.setSourceType(SourceType.PAIMON); + } + +} diff --git a/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/node/paimon/PaimonDataNodeOperator.java b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/node/paimon/PaimonDataNodeOperator.java new file mode 100644 index 00000000000..48b3cc07ec2 --- /dev/null +++ b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/node/paimon/PaimonDataNodeOperator.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.service.node.paimon; + +import org.apache.inlong.manager.common.consts.DataNodeType; +import org.apache.inlong.manager.common.enums.ErrorCodeEnum; +import org.apache.inlong.manager.common.exceptions.BusinessException; +import org.apache.inlong.manager.common.util.CommonBeanUtils; +import org.apache.inlong.manager.common.util.Preconditions; +import org.apache.inlong.manager.dao.entity.DataNodeEntity; +import org.apache.inlong.manager.pojo.node.DataNodeInfo; +import org.apache.inlong.manager.pojo.node.DataNodeRequest; +import org.apache.inlong.manager.pojo.node.paimon.PaimonDataNodeDTO; +import org.apache.inlong.manager.pojo.node.paimon.PaimonDataNodeInfo; +import org.apache.inlong.manager.pojo.node.paimon.PaimonDataNodeRequest; +import org.apache.inlong.manager.service.node.AbstractDataNodeOperator; +import org.apache.inlong.manager.service.resource.sink.paimon.PaimonCatalogClient; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +@Service +public class PaimonDataNodeOperator extends AbstractDataNodeOperator { + + private static final Logger LOGGER = LoggerFactory.getLogger(PaimonDataNodeOperator.class); + + @Autowired + private ObjectMapper objectMapper; + + @Override + public Boolean accept(String dataNodeType) { + return getDataNodeType().equals(dataNodeType); + } + + @Override + public String getDataNodeType() { + return DataNodeType.PAIMON; + } + + @Override + public DataNodeInfo getFromEntity(DataNodeEntity entity) { + if (entity == null) { + throw new BusinessException(ErrorCodeEnum.DATA_NODE_NOT_FOUND); + } + + PaimonDataNodeInfo paimonDataNodeInfo = new PaimonDataNodeInfo(); + CommonBeanUtils.copyProperties(entity, paimonDataNodeInfo); + if (StringUtils.isNotBlank(entity.getExtParams())) { + PaimonDataNodeDTO dto = PaimonDataNodeDTO.getFromJson(entity.getExtParams()); + CommonBeanUtils.copyProperties(dto, paimonDataNodeInfo); + } + return paimonDataNodeInfo; + } + + @Override + protected void setTargetEntity(DataNodeRequest request, DataNodeEntity targetEntity) { + PaimonDataNodeRequest paimonNodeRequest = (PaimonDataNodeRequest) request; + CommonBeanUtils.copyProperties(paimonNodeRequest, targetEntity, true); + try { + PaimonDataNodeDTO dto = PaimonDataNodeDTO.getFromRequest(paimonNodeRequest, targetEntity.getExtParams()); + targetEntity.setExtParams(objectMapper.writeValueAsString(dto)); + } catch (Exception e) { + throw new BusinessException(ErrorCodeEnum.SOURCE_INFO_INCORRECT, + String.format("Failed to build extParams for Paimon node: %s", e.getMessage())); + } + } + + @Override + public Boolean testConnection(DataNodeRequest request) { + PaimonDataNodeRequest paimonRequest = (PaimonDataNodeRequest) request; + String metastoreUri = paimonRequest.getUrl(); + String warehouse = paimonRequest.getWarehouse(); + Preconditions.expectNotBlank(metastoreUri, ErrorCodeEnum.INVALID_PARAMETER, "connection url cannot be empty"); + try (PaimonCatalogClient client = new PaimonCatalogClient(metastoreUri, warehouse)) { + client.open(); + client.listAllDatabases(); + LOGGER.info("paimon connection not null - connection success for metastoreUri={}, warehouse={}", + metastoreUri, warehouse); + return true; + } catch (Exception e) { + String errMsg = String.format("paimon connection failed for metastoreUri=%s, warehouse=%s", metastoreUri, + warehouse); + LOGGER.error(errMsg, e); + throw new BusinessException(errMsg); + } + } + +} diff --git a/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonCatalogClient.java b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonCatalogClient.java new file mode 100644 index 00000000000..59d1cbbecca --- /dev/null +++ b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonCatalogClient.java @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.service.resource.sink.paimon; + +import org.apache.inlong.manager.pojo.sink.paimon.PaimonColumnInfo; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonTableInfo; + +import com.google.common.collect.Maps; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; +import org.apache.hadoop.mapred.SequenceFileInputFormat; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static org.apache.inlong.manager.service.resource.sink.paimon.PaimonUtils.*; + +/** + * The Catalog client for Paimon. + */ +public class PaimonCatalogClient implements AutoCloseable { + + private static final Logger LOG = LoggerFactory.getLogger(PaimonCatalogClient.class); + + private String dbName; + private final String warehouse; + private IMetaStoreClient client; + private final HiveConf hiveConf; + + public PaimonCatalogClient(String uri, String warehouse, String dbName) throws MetaException { + this.warehouse = warehouse; + this.dbName = dbName; + hiveConf = new HiveConf(); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, uri); + hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI, false); + } + + public PaimonCatalogClient(String uri, String warehouse) throws MetaException { + this.warehouse = warehouse; + hiveConf = new HiveConf(); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, uri); + hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI, false); + } + + /** + * Open the hive metastore connection + */ + public void open() { + if (this.client == null) { + try { + this.client = new HiveMetaStoreClient(hiveConf); + } catch (Exception e) { + throw new RuntimeException("Failed to create hive metastore client", e); + } + LOG.info("Connected to Hive metastore"); + } + } + + private void createDatabase(String warehouse, Map meta, boolean ignoreIfExists) { + Database database = new Database(); + Map parameter = Maps.newHashMap(); + database.setName(dbName); + database.setLocationUri((new Path(warehouse, dbName) + ".db")); + meta.forEach((key, value) -> { + if (key.equals("comment")) { + database.setDescription(value); + } else if (key.equals("location")) { + database.setLocationUri(value); + } else if (value != null) { + parameter.put(key, value); + } + database.setParameters(parameter); + }); + try { + client.createDatabase(database); + } catch (AlreadyExistsException e) { + if (!ignoreIfExists) { + throw new RuntimeException("Database '" + dbName + "' already exist!"); + } + } catch (TException e) { + throw new RuntimeException("Failed to create database '" + dbName + + "'", e); + } + } + + /** + * Create the paimon database + * @param warehouse the warehouse directory in dfs + * @param ignoreIfExists not create again if exist + */ + public void createDatabase(String warehouse, boolean ignoreIfExists) { + createDatabase(warehouse, Maps.newHashMap(), ignoreIfExists); + } + + /** + * Check table if exist + * @param tableName the table name of paimon table + * @return return true if exist + */ + public boolean tableExist(String tableName) throws TException { + return client.tableExists(dbName, tableName); + } + + /** + * get column infos of exist paimon table + * @param dbName the database name + * @param tableName the table name + */ + public List getColumns( + String dbName, + String tableName) + throws TException { + Table hiveTable = client.getTable(dbName, tableName); + List allCols = hiveTable.getSd().getCols().stream() + // filter out the metadata columns + .filter(s -> !PaimonUtils.isMetadataFile(s.getName())) + .collect(Collectors.toList()); + + return allCols.stream() + .map((FieldSchema s) -> { + PaimonColumnInfo info = new PaimonColumnInfo(); + info.setName(s.getName()); + info.setType(s.getType()); + return info; + }) + .collect(Collectors.toList()); + } + + /** + * Add column to paimon table at the tail + */ + public void addColumns(String tableName, List columns) throws TException { + Table hiveTable = client.getTable(dbName, tableName); + Table newHiveTable = hiveTable.deepCopy(); + List cols = newHiveTable.getSd().getCols(); + for (PaimonColumnInfo column : columns) { + FieldSchema fieldSchema = new FieldSchema(); + fieldSchema.setName(column.getName()); + fieldSchema.setType(column.getType()); + fieldSchema.setComment(column.getDesc()); + cols.add(fieldSchema); + } + newHiveTable.getSd().setCols(cols); + client.alter_table(dbName, tableName, newHiveTable); + } + + /** + * Create paimon table and register to hive metastore + * @param tableName the paimon table name + * @param tableInfo the paimon table info + * @param useRealTimeInputFormat ignore uber input Format + */ + public void createTable( + String tableName, + PaimonTableInfo tableInfo, + boolean useRealTimeInputFormat) + throws TException, IOException { + Table hiveTable = this.getEmptyTable(dbName, tableName); + hiveTable.setOwner(UserGroupInformation.getCurrentUser().getUserName()); + hiveTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); + + Map properties = new HashMap<>(); + String location = this.warehouse + "/" + dbName + ".db" + "/" + tableName; + properties.put("path", location); + + List cols = tableInfo.getColumns() + .stream() + .map(column -> { + FieldSchema fieldSchema = new FieldSchema(); + fieldSchema.setName(column.getName()); + fieldSchema.setType(PaimonTypeConverter.convert(column)); + fieldSchema.setComment(column.getDesc()); + return fieldSchema; + }) + .collect(Collectors.toList()); + + // Build storage of paimon table + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(cols); + hiveTable.setDbName(dbName); + hiveTable.setTableName(tableName); + // FIXME: splitSchemas need config by frontend + + PaimonFileFormat baseFileFormat = PaimonFileFormat.PARQUET; + // ignore uber input Format + String inputFormatClassName = + getInputFormatClassName(baseFileFormat, useRealTimeInputFormat); + String outputFormatClassName = getOutputFormatClassName(baseFileFormat); + String serDeClassName = getSerDeClassName(baseFileFormat); + sd.setInputFormat(inputFormatClassName); + sd.setOutputFormat(outputFormatClassName); + + Map serdeProperties = new HashMap<>(); + serdeProperties.put("path", location); + serdeProperties.put(IS_QUERY_AS_RO_TABLE, String.valueOf(!useRealTimeInputFormat)); + sd.setSerdeInfo(new SerDeInfo(null, serDeClassName, serdeProperties)); + sd.setLocation(location); + hiveTable.setSd(sd); + + hiveTable.setParameters(properties); + + client.createTable(hiveTable); + } + + public List listAllDatabases() throws TException { + return client.getAllDatabases(); + } + + /** + * Close the connection of hive metastore + */ + public void close() { + if (client != null) { + client.close(); + client = null; + LOG.info("Disconnect to hive metastore"); + } + } + + public Table getEmptyTable(String databaseName, String tableName) { + StorageDescriptor sd = new StorageDescriptor(); + { + sd.setSerdeInfo(new SerDeInfo()); + sd.setNumBuckets(-1); + sd.setBucketCols(new ArrayList()); + sd.setCols(new ArrayList()); + sd.setParameters(new HashMap()); + sd.setSortCols(new ArrayList()); + sd.getSerdeInfo().setParameters(new HashMap()); + // We have to use MetadataTypedColumnsetSerDe because LazySimpleSerDe does + // not support a table with no columns. + sd.getSerdeInfo().setSerializationLib(MetadataTypedColumnsetSerDe.class.getName()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + sd.setInputFormat(SequenceFileInputFormat.class.getName()); + SkewedInfo skewInfo = new SkewedInfo(); + skewInfo.setSkewedColNames(new ArrayList()); + skewInfo.setSkewedColValues(new ArrayList>()); + skewInfo.setSkewedColValueLocationMaps(new HashMap, String>()); + sd.setSkewedInfo(skewInfo); + } + + Table t = new Table(); + { + t.setSd(sd); + t.setPartitionKeys(new ArrayList()); + t.setParameters(new HashMap()); + t.setTableType(TableType.MANAGED_TABLE.toString()); + t.setDbName(databaseName); + t.setTableName(tableName); + // set create time + t.setCreateTime((int) (System.currentTimeMillis() / 1000)); + } + // Explictly set the bucketing version + t.getParameters().put(hive_metastoreConstants.TABLE_BUCKETING_VERSION, + "2"); + return t; + } + +} diff --git a/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonFileFormat.java b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonFileFormat.java new file mode 100644 index 00000000000..d3f749bdf4e --- /dev/null +++ b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonFileFormat.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.service.resource.sink.paimon; + +/** + * Paimon file format. + */ +public enum PaimonFileFormat { + + PARQUET, + PAIMON_LOG, + HFILE, + ORC; + +} diff --git a/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonResourceOperator.java b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonResourceOperator.java new file mode 100644 index 00000000000..b1eadb4e24f --- /dev/null +++ b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonResourceOperator.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.service.resource.sink.paimon; + +import org.apache.inlong.manager.common.consts.InlongConstants; +import org.apache.inlong.manager.common.consts.SinkType; +import org.apache.inlong.manager.common.enums.ErrorCodeEnum; +import org.apache.inlong.manager.common.enums.SinkStatus; +import org.apache.inlong.manager.common.exceptions.WorkflowException; +import org.apache.inlong.manager.common.util.CommonBeanUtils; +import org.apache.inlong.manager.common.util.Preconditions; +import org.apache.inlong.manager.dao.entity.StreamSinkFieldEntity; +import org.apache.inlong.manager.dao.mapper.StreamSinkFieldEntityMapper; +import org.apache.inlong.manager.pojo.node.paimon.PaimonDataNodeInfo; +import org.apache.inlong.manager.pojo.sink.SinkInfo; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonColumnInfo; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonSinkDTO; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonTableInfo; +import org.apache.inlong.manager.service.node.DataNodeOperateHelper; +import org.apache.inlong.manager.service.resource.sink.SinkResourceOperator; +import org.apache.inlong.manager.service.sink.StreamSinkService; + +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.List; + +import static java.util.stream.Collectors.toList; + +/** + * Paimon resource operator + */ +@Service +public class PaimonResourceOperator implements SinkResourceOperator { + + private static final Logger LOGGER = LoggerFactory.getLogger(PaimonResourceOperator.class); + + private static final String CATALOG_TYPE_HIVE = "HIVE"; + + @Autowired + private StreamSinkService sinkService; + @Autowired + private StreamSinkFieldEntityMapper sinkFieldMapper; + @Autowired + private DataNodeOperateHelper dataNodeHelper; + + @Override + public Boolean accept(String sinkType) { + return SinkType.PAIMON.equals(sinkType); + } + + /** + * Create Paimon table according to the sink config + */ + public void createSinkResource(SinkInfo sinkInfo) { + if (sinkInfo == null) { + LOGGER.warn("sink info was null, skip to create resource"); + return; + } + + if (SinkStatus.CONFIG_SUCCESSFUL.getCode().equals(sinkInfo.getStatus())) { + LOGGER.warn("sink resource [" + sinkInfo.getId() + "] already success, skip to create"); + return; + } else if (InlongConstants.DISABLE_CREATE_RESOURCE.equals(sinkInfo.getEnableCreateResource())) { + LOGGER.warn("create resource was disabled, skip to create for [" + sinkInfo.getId() + "]"); + return; + } + + this.createTableIfAbsent(sinkInfo); + } + + private PaimonSinkDTO getPaimonInfo(SinkInfo sinkInfo) { + PaimonSinkDTO paimonInfo = PaimonSinkDTO.getFromJson(sinkInfo.getExtParams()); + + // read uri from data node if not supplied by user + if (StringUtils.isBlank(paimonInfo.getCatalogUri()) + && CATALOG_TYPE_HIVE.equals(paimonInfo.getCatalogType())) { + String dataNodeName = sinkInfo.getDataNodeName(); + Preconditions.expectNotBlank(dataNodeName, ErrorCodeEnum.INVALID_PARAMETER, + "Paimon catalog uri not specified and data node is empty"); + PaimonDataNodeInfo dataNodeInfo = (PaimonDataNodeInfo) dataNodeHelper.getDataNodeInfo( + dataNodeName, sinkInfo.getSinkType()); + CommonBeanUtils.copyProperties(dataNodeInfo, paimonInfo); + paimonInfo.setCatalogUri(dataNodeInfo.getUrl()); + } + + paimonInfo.setDataPath( + paimonInfo.getWarehouse() + "/" + paimonInfo.getDbName() + ".db/" + paimonInfo.getTableName()); + return paimonInfo; + } + + private void createTableIfAbsent(SinkInfo sinkInfo) { + LOGGER.info("begin to create paimon table for sinkInfo={}", sinkInfo); + + // Get all info from config + PaimonSinkDTO paimonInfo = getPaimonInfo(sinkInfo); + List columnInfoList = getColumnList(sinkInfo); + if (CollectionUtils.isEmpty(columnInfoList)) { + throw new IllegalArgumentException("no paimon columns specified"); + } + PaimonTableInfo tableInfo = PaimonSinkDTO.getPaimonTableInfo(paimonInfo, columnInfoList); + + String metastoreUri = paimonInfo.getCatalogUri(); + String warehouse = paimonInfo.getWarehouse(); + String dbName = paimonInfo.getDbName(); + String tableName = paimonInfo.getTableName(); + + PaimonCatalogClient client = null; + try { + client = new PaimonCatalogClient(metastoreUri, warehouse, dbName); + client.open(); + + // 1. create database if not exists + client.createDatabase(warehouse, true); + // 2. check if the table exists + boolean tableExists = client.tableExist(tableName); + + if (!tableExists) { + // 3. create table + client.createTable(tableName, tableInfo, true); + } else { + // 4. or update table columns + List existColumns = client.getColumns(dbName, tableName); + List needAddColumns = tableInfo.getColumns().stream().skip(existColumns.size()) + .collect(toList()); + if (CollectionUtils.isNotEmpty(needAddColumns)) { + client.addColumns(tableName, needAddColumns); + LOGGER.info("{} columns added for table {}", needAddColumns.size(), tableName); + } + } + String info = "success to create Paimon resource"; + sinkService.updateStatus(sinkInfo.getId(), SinkStatus.CONFIG_SUCCESSFUL.getCode(), info); + LOGGER.info(info + " for sinkInfo = {}", info); + } catch (Throwable e) { + String errMsg = "create Paimon table failed: " + e.getMessage(); + LOGGER.error(errMsg, e); + sinkService.updateStatus(sinkInfo.getId(), SinkStatus.CONFIG_FAILED.getCode(), errMsg); + throw new WorkflowException(errMsg); + } finally { + if (client != null) { + client.close(); + } + } + } + + private List getColumnList(SinkInfo sinkInfo) { + List fieldList = sinkFieldMapper.selectBySinkId(sinkInfo.getId()); + + // set columns + List columnList = new ArrayList<>(); + for (StreamSinkFieldEntity field : fieldList) { + PaimonColumnInfo column = PaimonColumnInfo.getFromJson(field.getExtParams()); + column.setName(field.getFieldName()); + column.setType(field.getFieldType()); + column.setDesc(field.getFieldComment()); + column.setRequired(field.getIsRequired() != null && field.getIsRequired() > 0); + columnList.add(column); + } + + return columnList; + } +} diff --git a/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonTypeConverter.java b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonTypeConverter.java new file mode 100644 index 00000000000..a422882a40d --- /dev/null +++ b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonTypeConverter.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.service.resource.sink.paimon; + +import org.apache.inlong.manager.pojo.sink.paimon.PaimonColumnInfo; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonType; + +import java.util.Optional; + +/** + * Converter between Java type and Hive type that reflects the behavior before This converter reflects the old behavior + * that includes: + *
    + *
  • Use old java.sql.* time classes for time data types. + *
  • Only support millisecond precision for timestamps or day-time intervals. + *
  • Let variable precision and scale for decimal types pass through the planner. + *
+ * {@see org.apache.flink.table.types.utils.TypeInfoDataTypeConverter} + */ +public class PaimonTypeConverter { + + /** + * Converter field type of column to Hive field type. + */ + public static String convert(PaimonColumnInfo column) { + return Optional.ofNullable(column) + .map(col -> PaimonType.forType(col.getType())) + .map(paimonType -> { + if (PaimonType.DECIMAL == paimonType) { + return String.format("decimal(%d, %d)", column.getPrecision(), column.getScale()); + } else if (PaimonType.FIXED == paimonType) { + return String.format("fixed(%d)", column.getLength()); + } else { + return paimonType.getHiveType(); + } + }) + .orElseThrow(() -> new RuntimeException("Can not properly convert type of column: " + column)); + } + +} diff --git a/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonUtils.java b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonUtils.java new file mode 100644 index 00000000000..d6ec485a913 --- /dev/null +++ b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/resource/sink/paimon/PaimonUtils.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.service.resource.sink.paimon; + +import com.google.common.collect.Sets; + +import java.util.Set; + +/** + * The utility of paimon. + */ +public class PaimonUtils { + + private static final String COMMIT_TIME_METADATA_FILE_NAME = "_Paimon_commit_time"; + private static final String COMMIT_SEQNO_METADATA_FILE_NAME = "_Paimon_commit_seqno"; + private static final String RECORD_KEY_METADATA_FILE_NAME = "_Paimon_record_key"; + private static final String PARTITION_PATH_METADATA_FILE_NAME = "_Paimon_partition_path"; + private static final String METADATA_FILE_NAME = "_Paimon_file_name"; + private static final String OPERATION_METADATA_FILE_NAME = "_Paimon_operation"; + + public static final String IS_QUERY_AS_RO_TABLE = "Paimon.query.as.ro.table"; + + private static final Set PAIMON_METADATA_FILES = + Sets.newHashSet(COMMIT_TIME_METADATA_FILE_NAME, COMMIT_SEQNO_METADATA_FILE_NAME, + RECORD_KEY_METADATA_FILE_NAME, PARTITION_PATH_METADATA_FILE_NAME, METADATA_FILE_NAME, + OPERATION_METADATA_FILE_NAME); + private static final String PARQUET_REALTIME_INPUT_FORMAT_NAME = + "org.apache.paimon.hadoop.realtime.PaimonParquetRealtimeInputFormat"; + private static final String PARQUET_INPUT_FORMAT_NAME = "org.apache.paimon.hadoop.PaimonParquetInputFormat"; + private static final String HFILE_REALTIME_INPUT_FORMAT_NAME = + "org.apache.paimon.hadoop.realtime.PaimonHFileRealtimeInputFormat"; + private static final String HFILE_INPUT_FORMAT_NAME = "org.apache.paimon.hadoop.PaimonHFileInputFormat"; + private static final String ORC_INPUT_FORMAT_NAME = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"; + private static final String PARQUET_HIVE_SER_DE_CLASS_NAME = + "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"; + private static final String ORC_SER_DE_CLASS_NAME = "org.apache.hadoop.hive.ql.io.orc.OrcSerde"; + private static final String MAPRED_PARQUET_OUTPUT_FORMAT_NAME = + "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"; + private static final String ORC_OUTPUT_FORMAT_NAME = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"; + + /** + * Check if metadata file. + */ + public static boolean isMetadataFile(String fileName) { + return PAIMON_METADATA_FILES.contains(fileName); + } + + /** + * Get the InputFormat class name. + */ + public static String getInputFormatClassName(PaimonFileFormat baseFileFormat, boolean realtime) { + switch (baseFileFormat) { + case PARQUET: + if (realtime) { + return PARQUET_REALTIME_INPUT_FORMAT_NAME; + } else { + return PARQUET_INPUT_FORMAT_NAME; + } + case HFILE: + if (realtime) { + return HFILE_REALTIME_INPUT_FORMAT_NAME; + } else { + return HFILE_INPUT_FORMAT_NAME; + } + case ORC: + return ORC_INPUT_FORMAT_NAME; + default: + throw new RuntimeException("Paimon InputFormat not implemented for base file format " + baseFileFormat); + } + } + + /** + * Get the OutputFormat class name. + */ + public static String getOutputFormatClassName(PaimonFileFormat baseFileFormat) { + switch (baseFileFormat) { + case PARQUET: + case HFILE: + return MAPRED_PARQUET_OUTPUT_FORMAT_NAME; + case ORC: + return ORC_OUTPUT_FORMAT_NAME; + default: + throw new RuntimeException("No OutputFormat for base file format " + baseFileFormat); + } + } + + /** + * Get the Ser and DeSer class name. + */ + public static String getSerDeClassName(PaimonFileFormat baseFileFormat) { + switch (baseFileFormat) { + case PARQUET: + case HFILE: + return PARQUET_HIVE_SER_DE_CLASS_NAME; + case ORC: + return ORC_SER_DE_CLASS_NAME; + default: + throw new RuntimeException("No SerDe for base file format " + baseFileFormat); + } + } +} diff --git a/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/sink/paimon/PaimonSinkOperator.java b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/sink/paimon/PaimonSinkOperator.java new file mode 100644 index 00000000000..78a5c3b89c4 --- /dev/null +++ b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/sink/paimon/PaimonSinkOperator.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.service.sink.paimon; + +import org.apache.inlong.manager.common.consts.InlongConstants; +import org.apache.inlong.manager.common.consts.SinkType; +import org.apache.inlong.manager.common.enums.ErrorCodeEnum; +import org.apache.inlong.manager.common.enums.FieldType; +import org.apache.inlong.manager.common.exceptions.BusinessException; +import org.apache.inlong.manager.common.util.CommonBeanUtils; +import org.apache.inlong.manager.dao.entity.StreamSinkEntity; +import org.apache.inlong.manager.pojo.node.paimon.PaimonDataNodeInfo; +import org.apache.inlong.manager.pojo.sink.SinkField; +import org.apache.inlong.manager.pojo.sink.SinkRequest; +import org.apache.inlong.manager.pojo.sink.StreamSink; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonColumnInfo; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonSink; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonSinkDTO; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonSinkRequest; +import org.apache.inlong.manager.service.sink.AbstractSinkOperator; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Paimon sink operator, such as save or update paimon field, etc. + */ +@Service +public class PaimonSinkOperator extends AbstractSinkOperator { + + private static final String HOODIE_PRIMARY_KEY_FIELD = "hoodie.datasource.write.recordkey.field"; + + private static final Logger LOGGER = LoggerFactory.getLogger(PaimonSinkOperator.class); + + private static final String CATALOG_TYPE_HIVE = "HIVE"; + + @Autowired + private ObjectMapper objectMapper; + + @Override + public Boolean accept(String sinkType) { + return SinkType.PAIMON.equals(sinkType); + } + + @Override + protected String getSinkType() { + return SinkType.PAIMON; + } + + @Override + protected void setTargetEntity(SinkRequest request, StreamSinkEntity targetEntity) { + if (!this.getSinkType().equals(request.getSinkType())) { + throw new BusinessException(ErrorCodeEnum.SINK_TYPE_NOT_SUPPORT, + ErrorCodeEnum.SINK_TYPE_NOT_SUPPORT.getMessage() + ": " + getSinkType()); + } + PaimonSinkRequest sinkRequest = (PaimonSinkRequest) request; + + String partitionKey = sinkRequest.getPartitionKey(); + String primaryKey = sinkRequest.getPrimaryKey(); + boolean primaryKeyExist = StringUtils.isNotBlank(primaryKey); + boolean partitionKeyExist = StringUtils.isNotBlank(partitionKey); + if (primaryKeyExist || partitionKeyExist) { + Set fieldNames = sinkRequest.getSinkFieldList().stream().map(SinkField::getFieldName) + .collect(Collectors.toSet()); + if (partitionKeyExist) { + List partitionKeys = Arrays.asList(partitionKey.split(InlongConstants.COMMA)); + if (!CollectionUtils.isSubCollection(partitionKeys, fieldNames)) { + throw new BusinessException(ErrorCodeEnum.SINK_SAVE_FAILED, + String.format("The partitionKey(%s) must be included in the sinkFieldList(%s)", + partitionKey, fieldNames)); + } + } + if (primaryKeyExist) { + List primaryKeys = Arrays.asList(primaryKey.split(InlongConstants.COMMA)); + if (!CollectionUtils.isSubCollection(primaryKeys, fieldNames)) { + throw new BusinessException(ErrorCodeEnum.SINK_SAVE_FAILED, + String.format("The primaryKey(%s) must be included in the sinkFieldList(%s)", + primaryKey, fieldNames)); + } + } + } + + try { + PaimonSinkDTO dto = PaimonSinkDTO.getFromRequest(sinkRequest, targetEntity.getExtParams()); + targetEntity.setExtParams(objectMapper.writeValueAsString(dto)); + } catch (Exception e) { + throw new BusinessException(ErrorCodeEnum.SINK_SAVE_FAILED, + String.format("serialize extParams of Paimon SinkDTO failure: %s", e.getMessage())); + } + } + + @Override + public StreamSink getFromEntity(StreamSinkEntity entity) { + PaimonSink sink = new PaimonSink(); + if (entity == null) { + return sink; + } + + PaimonSinkDTO dto = PaimonSinkDTO.getFromJson(entity.getExtParams()); + if (StringUtils.isBlank(dto.getCatalogUri()) && CATALOG_TYPE_HIVE.equals(dto.getCatalogType())) { + if (StringUtils.isBlank(entity.getDataNodeName())) { + throw new BusinessException(ErrorCodeEnum.SINK_INFO_INCORRECT, + "paimon catalog uri unspecified and data node is blank"); + } + PaimonDataNodeInfo dataNodeInfo = (PaimonDataNodeInfo) dataNodeHelper.getDataNodeInfo( + entity.getDataNodeName(), entity.getSinkType()); + CommonBeanUtils.copyProperties(dataNodeInfo, dto, true); + dto.setCatalogUri(dataNodeInfo.getUrl()); + } + + CommonBeanUtils.copyProperties(entity, sink, true); + CommonBeanUtils.copyProperties(dto, sink, true); + List sinkFields = super.getSinkFields(entity.getId()); + sink.setSinkFieldList(sinkFields); + return sink; + } + + @Override + protected void checkFieldInfo(SinkField field) { + if (FieldType.forName(field.getFieldType()) == FieldType.DECIMAL) { + PaimonColumnInfo info = PaimonColumnInfo.getFromJson(field.getExtParams()); + if (info.getPrecision() == null || info.getScale() == null) { + String errorMsg = String.format("precision or scale not specified for decimal field (%s)", + field.getFieldName()); + LOGGER.error("field info check error: {}", errorMsg); + throw new BusinessException(errorMsg); + } + if (info.getPrecision() < info.getScale()) { + String errorMsg = String.format( + "precision (%d) must be greater or equal than scale (%d) for decimal field (%s)", + info.getPrecision(), info.getScale(), field.getFieldName()); + LOGGER.error("field info check error: {}", errorMsg); + throw new BusinessException(errorMsg); + } + } + } + +} diff --git a/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/source/paimon/PaimonSourceOperator.java b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/source/paimon/PaimonSourceOperator.java new file mode 100644 index 00000000000..12f9576b71b --- /dev/null +++ b/inlong-manager/manager-service/src/main/java/org/apache/inlong/manager/service/source/paimon/PaimonSourceOperator.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.service.source.paimon; + +import org.apache.inlong.manager.common.consts.SourceType; +import org.apache.inlong.manager.common.enums.ErrorCodeEnum; +import org.apache.inlong.manager.common.exceptions.BusinessException; +import org.apache.inlong.manager.common.util.CommonBeanUtils; +import org.apache.inlong.manager.dao.entity.StreamSourceEntity; +import org.apache.inlong.manager.pojo.source.SourceRequest; +import org.apache.inlong.manager.pojo.source.StreamSource; +import org.apache.inlong.manager.pojo.source.paimon.PaimonSource; +import org.apache.inlong.manager.pojo.source.paimon.PaimonSourceDTO; +import org.apache.inlong.manager.pojo.source.paimon.PaimonSourceRequest; +import org.apache.inlong.manager.pojo.stream.StreamField; +import org.apache.inlong.manager.service.source.AbstractSourceOperator; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.List; + +/** + * Paimon source operator + */ +@Service +public class PaimonSourceOperator extends AbstractSourceOperator { + + @Autowired + private ObjectMapper objectMapper; + + @Override + public Boolean accept(String sourceType) { + return SourceType.PAIMON.equals(sourceType); + } + + @Override + protected String getSourceType() { + return SourceType.PAIMON; + } + + @Override + protected void setTargetEntity(SourceRequest request, StreamSourceEntity targetEntity) { + PaimonSourceRequest sourceRequest = (PaimonSourceRequest) request; + CommonBeanUtils.copyProperties(sourceRequest, targetEntity, true); + try { + PaimonSourceDTO dto = PaimonSourceDTO.getFromRequest(sourceRequest, targetEntity.getExtParams()); + targetEntity.setExtParams(objectMapper.writeValueAsString(dto)); + } catch (Exception e) { + throw new BusinessException(ErrorCodeEnum.SOURCE_INFO_INCORRECT, + String.format("serialize extParams of Paimon SourceDTO failure: %s", e.getMessage())); + } + } + + @Override + public StreamSource getFromEntity(StreamSourceEntity entity) { + PaimonSource source = new PaimonSource(); + if (entity == null) { + return source; + } + PaimonSourceDTO dto = PaimonSourceDTO.getFromJson(entity.getExtParams()); + CommonBeanUtils.copyProperties(entity, source, true); + CommonBeanUtils.copyProperties(dto, source, true); + List sourceFields = super.getSourceFields(entity.getId()); + source.setFieldList(sourceFields); + return source; + } + +} diff --git a/inlong-manager/manager-service/src/test/java/org/apache/inlong/manager/service/sink/PaimonSinkServiceTest.java b/inlong-manager/manager-service/src/test/java/org/apache/inlong/manager/service/sink/PaimonSinkServiceTest.java new file mode 100644 index 00000000000..0acb5833683 --- /dev/null +++ b/inlong-manager/manager-service/src/test/java/org/apache/inlong/manager/service/sink/PaimonSinkServiceTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.manager.service.sink; + +import org.apache.inlong.manager.common.consts.InlongConstants; +import org.apache.inlong.manager.common.consts.SinkType; +import org.apache.inlong.manager.pojo.sink.SinkRequest; +import org.apache.inlong.manager.pojo.sink.StreamSink; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonSink; +import org.apache.inlong.manager.pojo.sink.paimon.PaimonSinkRequest; +import org.apache.inlong.manager.service.ServiceBaseTest; +import org.apache.inlong.manager.service.core.impl.InlongStreamServiceTest; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; + +/** + * Paimon stream sink service test. + */ +public class PaimonSinkServiceTest extends ServiceBaseTest { + + private final String globalGroupId = "b_group1"; + private final String globalStreamId = "stream1_paimon"; + private final String globalOperator = "admin"; + + @Autowired + private StreamSinkService sinkService; + @Autowired + private InlongStreamServiceTest streamServiceTest; + + /** + * Save sink info. + */ + public Integer saveSink(String sinkName) { + streamServiceTest.saveInlongStream(globalGroupId, globalStreamId, globalOperator); + PaimonSinkRequest sinkInfo = new PaimonSinkRequest(); + sinkInfo.setInlongGroupId(globalGroupId); + sinkInfo.setInlongStreamId(globalStreamId); + sinkInfo.setSinkType(SinkType.PAIMON); + sinkInfo.setEnableCreateResource(InlongConstants.DISABLE_CREATE_RESOURCE); + sinkInfo.setDataPath("hdfs://127.0.0.1:8020/data"); + sinkInfo.setSinkName(sinkName); + sinkInfo.setId((int) (Math.random() * 100000 + 1)); + sinkInfo.setCatalogUri("thrift://127.0.0.1:9000"); + return sinkService.save(sinkInfo, globalOperator); + } + + @Test + public void testSaveAndDelete() { + Integer id = this.saveSink("default1"); + Assertions.assertNotNull(id); + boolean result = sinkService.delete(id, false, globalOperator); + Assertions.assertTrue(result); + } + + @Test + public void testListByIdentifier() { + Integer id = this.saveSink("default2"); + StreamSink sink = sinkService.get(id); + Assertions.assertEquals(globalGroupId, sink.getInlongGroupId()); + sinkService.delete(id, false, globalOperator); + } + + @Test + public void testGetAndUpdate() { + Integer sinkId = this.saveSink("default3"); + StreamSink streamSink = sinkService.get(sinkId); + Assertions.assertEquals(globalGroupId, streamSink.getInlongGroupId()); + + PaimonSink sink = (PaimonSink) streamSink; + sink.setEnableCreateResource(InlongConstants.DISABLE_CREATE_RESOURCE); + SinkRequest request = sink.genSinkRequest(); + boolean result = sinkService.update(request, globalOperator); + Assertions.assertTrue(result); + + sinkService.delete(sinkId, false, globalOperator); + } + +} diff --git a/inlong-sort/README.md b/inlong-sort/README.md index c7d23e160a8..1667fd7e31d 100644 --- a/inlong-sort/README.md +++ b/inlong-sort/README.md @@ -30,6 +30,7 @@ InLong Sort can be used together with the Manager to manage metadata, or it can | | HDFS | | | TDSQL Postgres | | | Hudi | +| | Paimon | ## Build ### For Apache Flink 1.13 (default) diff --git a/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/constant/PaimonConstant.java b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/constant/PaimonConstant.java new file mode 100644 index 00000000000..558e77e1809 --- /dev/null +++ b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/constant/PaimonConstant.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.protocol.constant; + +/** + * PAIMON option constant + */ +public class PaimonConstant { + + /** + * Connector key + */ + public static final String CONNECTOR_KEY = "connector"; + + /** + * 'connector' = 'paimon-inlong' + */ + public static final String CONNECTOR = "paimon-inlong"; + + public static final String ENABLE_CODE = "true"; + + /** + * Asynchronously sync Hive meta to HMS, default false + */ + public static final String PAIMON_OPTION_HIVE_SYNC_ENABLED = "hive_sync.enabled"; + + /** + * Database name for hive sync, default 'default' + */ + public static final String PAIMON_OPTION_HIVE_SYNC_DB = "hive_sync.db"; + + /** + * Table name for hive sync, default 'unknown' + */ + public static final String PAIMON_OPTION_HIVE_SYNC_TABLE = "hive_sync.table"; + + /** + * File format for hive sync, default 'PARQUET' + */ + public static final String PAIMON_OPTION_HIVE_SYNC_FILE_FORMAT = "hive_sync.file_format"; + + /** + * Mode to choose for Hive ops. Valid values are hms, jdbc and hiveql, default 'hms' + */ + public static final String PAIMON_OPTION_HIVE_SYNC_MODE = "hive_sync.mode"; + + /** + * The HMS mode use the hive meta client to sync metadata. + */ + public static final String PAIMON_OPTION_HIVE_SYNC_MODE_HMS_VALUE = "hms"; + + /** + * Metastore uris for hive sync, default '' + */ + public static final String PAIMON_OPTION_HIVE_SYNC_METASTORE_URIS = "hive_sync.metastore.uris"; + + /** + * Base path for the target paimon table. + * The path would be created if it does not exist, + * otherwise a Hoodie table expects to be initialized successfully + */ + public static final String PAIMON_OPTION_DEFAULT_PATH = "path"; + + /** + * Database name that will be used for incremental query.If different databases have the same table name during + * incremental query, + * we can set it to limit the table name under a specific database + */ + public static final String PAIMON_OPTION_DATABASE_NAME = "paimon.database.name"; + + /** + * Table name that will be used for registering with Hive. Needs to be same across runs. + */ + public static final String PAIMON_OPTION_TABLE_NAME = "paimon.table.name"; + + /** + * Record key field. Value to be used as the `recordKey` component of `HoodieKey`. + * Actual value will be obtained by invoking .toString() on the field value. Nested fields can be specified using + * the dot notation eg: `a.b.c` + */ + public static final String PAIMON_OPTION_RECORD_KEY_FIELD_NAME = "paimon.datasource.write.recordkey.field"; + + /** + * Partition path field. Value to be used at the partitionPath component of HoodieKey. + * Actual value obtained by invoking .toString() + */ + public static final String PAIMON_OPTION_PARTITION_PATH_FIELD_NAME = "paimon.datasource.write.partitionpath.field"; + + /** + * The prefix of ddl attr parsed from frontend advanced properties. + */ + public static final String DDL_ATTR_PREFIX = "ddl."; + + /** + * The property key of advanced properties. + */ + public static final String EXTEND_ATTR_KEY_NAME = "keyName"; + + /** + * The property value of advanced properties. + */ + public static final String EXTEND_ATTR_VALUE_NAME = "keyValue"; + + /** + * Check interval for streaming read of SECOND, default 1 minute + */ + public static final String READ_STREAMING_CHECK_INTERVAL = "read.streaming.check-interval"; + + /** + * Whether to read as streaming source, default false + */ + public static final String READ_AS_STREAMING = "read.streaming.enabled"; + + /** + * Start commit instant for reading, the commit time format should be 'yyyyMMddHHmmss', + * by default reading from the latest instant for streaming read + */ + public static final String READ_START_COMMIT = "read.start-commit"; + + /** + * Whether to skip compaction instants for streaming read, + * there are two cases that this option can be used to avoid reading duplicates: + * 1) you are definitely sure that the consumer reads faster than any compaction instants, + * usually with delta time compaction strategy that is long enough, for e.g, one week; + * 2) changelog mode is enabled, this option is a solution to keep data integrity + */ + public static final String READ_STREAMING_SKIP_COMPACT = "read.streaming.skip_compaction"; + + /** + * PAIMON supported catalog type + */ + public enum CatalogType { + + /** + * Data stored in hive metastore. + */ + HIVE, + /** + * Data stored in hadoop filesystem. + */ + HADOOP, + /** + * Data stored in hybris metastore. + */ + HYBRIS; + + /** + * get catalogType from name + */ + public static CatalogType forName(String name) { + for (CatalogType value : values()) { + if (value.name().equals(name)) { + return value; + } + } + throw new IllegalArgumentException(String.format("Unsupport catalogType:%s", name)); + } + } +} diff --git a/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/ExtractNode.java b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/ExtractNode.java index fc68f0f356f..c3b92af9b04 100644 --- a/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/ExtractNode.java +++ b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/ExtractNode.java @@ -26,6 +26,7 @@ import org.apache.inlong.sort.protocol.node.extract.MongoExtractNode; import org.apache.inlong.sort.protocol.node.extract.MySqlExtractNode; import org.apache.inlong.sort.protocol.node.extract.OracleExtractNode; +import org.apache.inlong.sort.protocol.node.extract.PaimonExtractNode; import org.apache.inlong.sort.protocol.node.extract.PostgresExtractNode; import org.apache.inlong.sort.protocol.node.extract.PulsarExtractNode; import org.apache.inlong.sort.protocol.node.extract.RedisExtractNode; @@ -65,6 +66,7 @@ @JsonSubTypes.Type(value = RedisExtractNode.class, name = "redisExtract"), @JsonSubTypes.Type(value = DorisExtractNode.class, name = "dorisExtract"), @JsonSubTypes.Type(value = HudiExtractNode.class, name = "hudiExtract"), + @JsonSubTypes.Type(value = PaimonExtractNode.class, name = "paimonExtract"), @JsonSubTypes.Type(value = IcebergExtractNode.class, name = "icebergExtract"), }) @Data diff --git a/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/LoadNode.java b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/LoadNode.java index 18427241739..4370fc13a91 100644 --- a/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/LoadNode.java +++ b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/LoadNode.java @@ -31,6 +31,7 @@ import org.apache.inlong.sort.protocol.node.load.KafkaLoadNode; import org.apache.inlong.sort.protocol.node.load.MySqlLoadNode; import org.apache.inlong.sort.protocol.node.load.OracleLoadNode; +import org.apache.inlong.sort.protocol.node.load.PaimonLoadNode; import org.apache.inlong.sort.protocol.node.load.PostgresLoadNode; import org.apache.inlong.sort.protocol.node.load.RedisLoadNode; import org.apache.inlong.sort.protocol.node.load.SqlServerLoadNode; @@ -75,6 +76,7 @@ @JsonSubTypes.Type(value = DorisLoadNode.class, name = "dorisLoad"), @JsonSubTypes.Type(value = StarRocksLoadNode.class, name = "starRocksLoad"), @JsonSubTypes.Type(value = HudiLoadNode.class, name = "hudiLoad"), + @JsonSubTypes.Type(value = PaimonLoadNode.class, name = "paimonLoad"), @JsonSubTypes.Type(value = RedisLoadNode.class, name = "redisLoad"), }) @NoArgsConstructor diff --git a/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/Node.java b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/Node.java index b926efd0dc1..71621ea68ca 100644 --- a/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/Node.java +++ b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/Node.java @@ -26,6 +26,7 @@ import org.apache.inlong.sort.protocol.node.extract.MongoExtractNode; import org.apache.inlong.sort.protocol.node.extract.MySqlExtractNode; import org.apache.inlong.sort.protocol.node.extract.OracleExtractNode; +import org.apache.inlong.sort.protocol.node.extract.PaimonExtractNode; import org.apache.inlong.sort.protocol.node.extract.PostgresExtractNode; import org.apache.inlong.sort.protocol.node.extract.PulsarExtractNode; import org.apache.inlong.sort.protocol.node.extract.RedisExtractNode; @@ -44,6 +45,7 @@ import org.apache.inlong.sort.protocol.node.load.KuduLoadNode; import org.apache.inlong.sort.protocol.node.load.MySqlLoadNode; import org.apache.inlong.sort.protocol.node.load.OracleLoadNode; +import org.apache.inlong.sort.protocol.node.load.PaimonLoadNode; import org.apache.inlong.sort.protocol.node.load.PostgresLoadNode; import org.apache.inlong.sort.protocol.node.load.RedisLoadNode; import org.apache.inlong.sort.protocol.node.load.SqlServerLoadNode; @@ -79,6 +81,7 @@ @JsonSubTypes.Type(value = RedisExtractNode.class, name = "redisExtract"), @JsonSubTypes.Type(value = DorisExtractNode.class, name = "dorisExtract"), @JsonSubTypes.Type(value = HudiExtractNode.class, name = "hudiExtract"), + @JsonSubTypes.Type(value = PaimonExtractNode.class, name = "paimonExtract"), @JsonSubTypes.Type(value = IcebergExtractNode.class, name = "icebergExtract"), @JsonSubTypes.Type(value = TransformNode.class, name = "baseTransform"), @JsonSubTypes.Type(value = DistinctNode.class, name = "distinct"), @@ -97,6 +100,7 @@ @JsonSubTypes.Type(value = GreenplumLoadNode.class, name = "greenplumLoad"), @JsonSubTypes.Type(value = DorisLoadNode.class, name = "dorisLoad"), @JsonSubTypes.Type(value = HudiLoadNode.class, name = "hudiLoad"), + @JsonSubTypes.Type(value = PaimonLoadNode.class, name = "paimonLoad"), @JsonSubTypes.Type(value = DorisLoadNode.class, name = "dorisLoad"), @JsonSubTypes.Type(value = StarRocksLoadNode.class, name = "starRocksLoad"), @JsonSubTypes.Type(value = RedisLoadNode.class, name = "redisLoad"), diff --git a/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/extract/PaimonExtractNode.java b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/extract/PaimonExtractNode.java new file mode 100644 index 00000000000..5d2d40c0c63 --- /dev/null +++ b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/extract/PaimonExtractNode.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.protocol.node.extract; + +import org.apache.inlong.sort.protocol.FieldInfo; +import org.apache.inlong.sort.protocol.node.ExtractNode; +import org.apache.inlong.sort.protocol.transformation.WatermarkField; + +import com.google.common.base.Preconditions; +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonInclude; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonInclude.Include; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonTypeName; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import java.io.Serializable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.apache.inlong.sort.protocol.constant.PaimonConstant.*; + +/** + * Paimon extract node for extract data from paimon + */ +@EqualsAndHashCode(callSuper = true) +@JsonTypeName("paimonExtract") +@JsonInclude(Include.NON_NULL) +@Data +public class PaimonExtractNode extends ExtractNode implements Serializable { + + @JsonProperty("tableName") + @Nonnull + private String tableName; + + @JsonProperty("dbName") + @Nonnull + private String dbName; + + @JsonProperty("catalogType") + private CatalogType catalogType; + + @JsonProperty("uri") + private String uri; + + @JsonProperty("warehouse") + private String warehouse; + + @JsonProperty("readStreamingSkipCompaction") + private boolean readStreamingSkipCompaction; + + @JsonProperty("readStartCommit") + private String readStartCommit; + + @JsonProperty("extList") + private List> extList; + + private static final long serialVersionUID = 1L; + private int checkIntervalInMinus; + + public PaimonExtractNode( + @Nonnull @JsonProperty("id") String id, + @Nonnull @JsonProperty("name") String name, + @Nonnull @JsonProperty("fields") List fields, + @Nullable @JsonProperty("watermarkField") WatermarkField watermarkField, + @Nullable @JsonProperty("uri") String uri, + @Nullable @JsonProperty("warehouse") String warehouse, + @Nonnull @JsonProperty("dbName") String dbName, + @Nonnull @JsonProperty("tableName") String tableName, + @JsonProperty("catalogType") CatalogType catalogType, + @JsonProperty("checkIntervalInMinus") int checkIntervalInMinus, + @JsonProperty("readStreamingSkipCompaction") boolean readStreamingSkipCompaction, + @JsonProperty("readStartCommit") String readStartCommit, + @Nullable @JsonProperty("properties") Map properties, + @JsonProperty("extList") List> extList) { + super(id, name, fields, watermarkField, properties); + + this.tableName = Preconditions.checkNotNull(tableName, "table name is null"); + this.dbName = Preconditions.checkNotNull(dbName, "db name is null"); + this.catalogType = catalogType == null ? CatalogType.HIVE : catalogType; + this.uri = uri; + this.warehouse = warehouse; + this.readStreamingSkipCompaction = readStreamingSkipCompaction; + this.readStartCommit = readStartCommit; + this.extList = extList; + this.checkIntervalInMinus = checkIntervalInMinus; + } + + @Override + public Map tableOptions() { + Map options = super.tableOptions(); + + options.put(READ_AS_STREAMING, ENABLE_CODE); + options.put(READ_STREAMING_CHECK_INTERVAL, String.valueOf(checkIntervalInMinus)); + + // Synchronization to Metastore is enabled by default, + // which can be modified in the front-end configuration + options.put(PAIMON_OPTION_HIVE_SYNC_ENABLED, ENABLE_CODE); + options.put(PAIMON_OPTION_HIVE_SYNC_MODE, PAIMON_OPTION_HIVE_SYNC_MODE_HMS_VALUE); + options.put(PAIMON_OPTION_HIVE_SYNC_DB, dbName); + options.put(PAIMON_OPTION_HIVE_SYNC_TABLE, tableName); + options.put(PAIMON_OPTION_HIVE_SYNC_METASTORE_URIS, uri); + + // If the extend attributes starts with .ddl, + // it will be passed to the ddl statement of the table + if (CollectionUtils.isNotEmpty(extList)) { + extList.forEach(ext -> { + String keyName = ext.get(EXTEND_ATTR_KEY_NAME); + if (StringUtils.isNoneBlank(keyName) && + keyName.startsWith(DDL_ATTR_PREFIX)) { + String ddlKeyName = keyName.substring(DDL_ATTR_PREFIX.length()); + String ddlValue = ext.get(EXTEND_ATTR_VALUE_NAME); + options.put(ddlKeyName, ddlValue); + } + }); + } + + String path = String.format("%s/%s.db/%s", warehouse, dbName, tableName); + options.put(PAIMON_OPTION_DEFAULT_PATH, path); + + // read options + options.put(READ_START_COMMIT, readStartCommit); + options.put(READ_STREAMING_SKIP_COMPACT, String.valueOf(readStreamingSkipCompaction)); + + options.put(PAIMON_OPTION_DATABASE_NAME, dbName); + options.put(PAIMON_OPTION_TABLE_NAME, tableName); + options.put(CONNECTOR_KEY, CONNECTOR); + + return options; + } + + @Override + public String genTableName() { + return String.format("paimon_table_%s", getId()); + } +} diff --git a/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/load/PaimonLoadNode.java b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/load/PaimonLoadNode.java new file mode 100644 index 00000000000..93b9d344aaf --- /dev/null +++ b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/load/PaimonLoadNode.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.protocol.node.load; + +import org.apache.inlong.sort.protocol.FieldInfo; +import org.apache.inlong.sort.protocol.InlongMetric; +import org.apache.inlong.sort.protocol.constant.PaimonConstant.CatalogType; +import org.apache.inlong.sort.protocol.enums.FilterStrategy; +import org.apache.inlong.sort.protocol.node.LoadNode; +import org.apache.inlong.sort.protocol.transformation.FieldRelation; +import org.apache.inlong.sort.protocol.transformation.FilterFunction; + +import com.google.common.base.Preconditions; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonTypeName; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import java.io.Serializable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * The load node of paimon. + */ +@JsonTypeName("paimonLoad") +@Data +@NoArgsConstructor +@EqualsAndHashCode(callSuper = true) +public class PaimonLoadNode extends LoadNode implements InlongMetric, Serializable { + + private static final long serialVersionUID = -1L; + + public static final String ENABLE_CODE = "true"; + + private static final String PAIMON_OPTION_HIVE_SYNC_ENABLED = "hive_sync.enabled"; + private static final String PAIMON_OPTION_HIVE_SYNC_DB = "hive_sync.db"; + private static final String PAIMON_OPTION_HIVE_SYNC_TABLE = "hive_sync.table"; + private static final String PAIMON_OPTION_HIVE_SYNC_FILE_FORMAT = "hive_sync.file_format"; + private static final String PAIMON_OPTION_HIVE_SYNC_MODE = "hive_sync.mode"; + private static final String PAIMON_OPTION_HIVE_SYNC_MODE_HMS_VALUE = "hms"; + private static final String PAIMON_OPTION_HIVE_SYNC_METASTORE_URIS = "hive_sync.metastore.uris"; + private static final String PAIMON_OPTION_DEFAULT_PATH = "path"; + private static final String PAIMON_OPTION_DATABASE_NAME = "paimon.database.name"; + private static final String PAIMON_OPTION_TABLE_NAME = "paimon.table.name"; + private static final String PAIMON_OPTION_RECORD_KEY_FIELD_NAME = "paimon.datasource.write.recordkey.field"; + private static final String PAIMON_OPTION_PARTITION_PATH_FIELD_NAME = "paimon.datasource.write.partitionpath.field"; + private static final String DDL_ATTR_PREFIX = "ddl."; + private static final String EXTEND_ATTR_KEY_NAME = "keyName"; + private static final String EXTEND_ATTR_VALUE_NAME = "keyValue"; + + @JsonProperty("tableName") + @Nonnull + private String tableName; + + @JsonProperty("dbName") + @Nonnull + private String dbName; + + @JsonProperty("primaryKey") + private String primaryKey; + + @JsonProperty("catalogType") + private CatalogType catalogType; + + @JsonProperty("uri") + private String uri; + + @JsonProperty("warehouse") + private String warehouse; + + @JsonProperty("extList") + private List> extList; + + @JsonProperty("partitionKey") + private String partitionKey; + + @JsonCreator + public PaimonLoadNode( + @JsonProperty("id") String id, + @JsonProperty("name") String name, + @JsonProperty("fields") List fields, + @JsonProperty("fieldRelations") List fieldRelations, + @JsonProperty("filters") List filters, + @JsonProperty("filterStrategy") FilterStrategy filterStrategy, + @Nullable @JsonProperty("sinkParallelism") Integer sinkParallelism, + @JsonProperty("properties") Map properties, + @Nonnull @JsonProperty("dbName") String dbName, + @Nonnull @JsonProperty("tableName") String tableName, + @JsonProperty("primaryKey") String primaryKey, + @JsonProperty("catalogType") CatalogType catalogType, + @JsonProperty("uri") String uri, + @JsonProperty("warehouse") String warehouse, + @JsonProperty("extList") List> extList, + @JsonProperty("partitionKey") String partitionKey) { + super(id, name, fields, fieldRelations, filters, filterStrategy, sinkParallelism, properties); + this.tableName = Preconditions.checkNotNull(tableName, "table name is null"); + this.dbName = Preconditions.checkNotNull(dbName, "db name is null"); + this.primaryKey = primaryKey; + this.catalogType = catalogType == null ? CatalogType.HIVE : catalogType; + this.uri = uri; + this.warehouse = warehouse; + this.extList = extList; + this.partitionKey = partitionKey; + } + + @Override + public Map tableOptions() { + Map options = super.tableOptions(); + + // Synchronization to Metastore is enabled by default, + // which can be modified in the front-end configuration + options.put(PAIMON_OPTION_HIVE_SYNC_ENABLED, ENABLE_CODE); + options.put(PAIMON_OPTION_HIVE_SYNC_MODE, PAIMON_OPTION_HIVE_SYNC_MODE_HMS_VALUE); + options.put(PAIMON_OPTION_HIVE_SYNC_DB, dbName); + options.put(PAIMON_OPTION_HIVE_SYNC_TABLE, tableName); + options.put(PAIMON_OPTION_HIVE_SYNC_METASTORE_URIS, uri); + + // partition field + if (StringUtils.isNoneBlank(partitionKey)) { + options.put(PAIMON_OPTION_PARTITION_PATH_FIELD_NAME, partitionKey); + } + + // If the extend attributes starts with .ddl, + // it will be passed to the ddl statement of the table + if (CollectionUtils.isNotEmpty(extList)) { + extList.forEach(ext -> { + String keyName = ext.get(EXTEND_ATTR_KEY_NAME); + if (StringUtils.isNoneBlank(keyName) && + keyName.startsWith(DDL_ATTR_PREFIX)) { + String ddlKeyName = keyName.substring(DDL_ATTR_PREFIX.length()); + String ddlValue = ext.get(EXTEND_ATTR_VALUE_NAME); + options.put(ddlKeyName, ddlValue); + } + }); + } + + String path = String.format("%s/%s.db/%s", warehouse, dbName, tableName); + options.put(PAIMON_OPTION_DEFAULT_PATH, path); + + options.put(PAIMON_OPTION_DATABASE_NAME, dbName); + options.put(PAIMON_OPTION_TABLE_NAME, tableName); + if (StringUtils.isNoneBlank(primaryKey)) { + options.put(PAIMON_OPTION_RECORD_KEY_FIELD_NAME, primaryKey); + } + options.put("connector", "paimon-inlong"); + + return options; + } + + @Override + public String genTableName() { + return tableName; + } + + @Override + public String getPrimaryKey() { + return primaryKey; + } + + @Override + public List getPartitionFields() { + return super.getPartitionFields(); + } + +} diff --git a/inlong-sort/sort-common/src/test/java/org/apache/inlong/sort/protocol/node/extract/PaimonExtractNodeTest.java b/inlong-sort/sort-common/src/test/java/org/apache/inlong/sort/protocol/node/extract/PaimonExtractNodeTest.java new file mode 100644 index 00000000000..d61ba5b49c6 --- /dev/null +++ b/inlong-sort/sort-common/src/test/java/org/apache/inlong/sort/protocol/node/extract/PaimonExtractNodeTest.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.protocol.node.extract; + +import org.apache.inlong.common.pojo.sort.dataflow.field.format.DecimalFormatInfo; +import org.apache.inlong.common.pojo.sort.dataflow.field.format.DoubleFormatInfo; +import org.apache.inlong.common.pojo.sort.dataflow.field.format.IntFormatInfo; +import org.apache.inlong.common.pojo.sort.dataflow.field.format.StringFormatInfo; +import org.apache.inlong.sort.SerializeBaseTest; +import org.apache.inlong.sort.protocol.FieldInfo; +import org.apache.inlong.sort.protocol.constant.PaimonConstant.CatalogType; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +/** + * Test for {@link PaimonExtractNode} + */ +public class PaimonExtractNodeTest extends SerializeBaseTest { + + @Override + public PaimonExtractNode getTestObject() { + List fields = Arrays.asList( + new FieldInfo("dt", new StringFormatInfo()), + new FieldInfo("id", new IntFormatInfo()), + new FieldInfo("name", new StringFormatInfo()), + new FieldInfo("age", new IntFormatInfo()), + new FieldInfo("price", new DecimalFormatInfo()), + new FieldInfo("sale", new DoubleFormatInfo())); + + return new PaimonExtractNode( + "1", + "doris_input", + fields, + null, + null, + "localhost:8030", + "dbName", + "tableName", + CatalogType.HIVE, + 1, + true, + "-1", + new HashMap<>(), + new ArrayList<>()); + } +} diff --git a/inlong-sort/sort-common/src/test/java/org/apache/inlong/sort/protocol/node/load/PaimonLoadNodeTest.java b/inlong-sort/sort-common/src/test/java/org/apache/inlong/sort/protocol/node/load/PaimonLoadNodeTest.java new file mode 100644 index 00000000000..f4aa61558a5 --- /dev/null +++ b/inlong-sort/sort-common/src/test/java/org/apache/inlong/sort/protocol/node/load/PaimonLoadNodeTest.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.protocol.node.load; + +import org.apache.inlong.common.pojo.sort.dataflow.field.format.StringFormatInfo; +import org.apache.inlong.sort.SerializeBaseTest; +import org.apache.inlong.sort.protocol.FieldInfo; +import org.apache.inlong.sort.protocol.constant.PaimonConstant.CatalogType; +import org.apache.inlong.sort.protocol.transformation.FieldRelation; + +import com.google.common.collect.Maps; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Map; + +/** + * Test for {@link PaimonLoadNode} + */ +public class PaimonLoadNodeTest extends SerializeBaseTest { + + @Override + public PaimonLoadNode getTestObject() { + Map properties = Maps.newHashMap(); + return new PaimonLoadNode("1", "test_paimon", + Collections.singletonList(new FieldInfo("id", new StringFormatInfo())), + Collections.singletonList(new FieldRelation(new FieldInfo("id", new StringFormatInfo()), + new FieldInfo("id", new StringFormatInfo()))), + null, + null, + 1, + null, + "test_db", + "test_table", + "id", + CatalogType.HIVE, + "thrift://localhost:9083", + "hdfs://localhost:9000/user/paimon/warehouse", + new ArrayList<>(), + "f1"); + } +} diff --git a/inlong-sort/sort-core/pom.xml b/inlong-sort/sort-core/pom.xml index a1be0b20585..769b7a0fd59 100644 --- a/inlong-sort/sort-core/pom.xml +++ b/inlong-sort/sort-core/pom.xml @@ -293,6 +293,12 @@ ${project.version} test + + org.apache.inlong + sort-connector-paimon-v1.15 + ${project.version} + test + org.apache.inlong sort-connector-iceberg-v1.15 diff --git a/inlong-sort/sort-core/src/test/java/org/apache/inlong/sort/parser/PaimonNodeSqlParserTest.java b/inlong-sort/sort-core/src/test/java/org/apache/inlong/sort/parser/PaimonNodeSqlParserTest.java new file mode 100644 index 00000000000..407a302a7b2 --- /dev/null +++ b/inlong-sort/sort-core/src/test/java/org/apache/inlong/sort/parser/PaimonNodeSqlParserTest.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.parser; + +import org.apache.inlong.common.pojo.sort.dataflow.field.format.FloatFormatInfo; +import org.apache.inlong.common.pojo.sort.dataflow.field.format.IntFormatInfo; +import org.apache.inlong.common.pojo.sort.dataflow.field.format.LongFormatInfo; +import org.apache.inlong.common.pojo.sort.dataflow.field.format.StringFormatInfo; +import org.apache.inlong.common.pojo.sort.dataflow.field.format.TimestampFormatInfo; +import org.apache.inlong.sort.parser.impl.FlinkSqlParser; +import org.apache.inlong.sort.parser.result.FlinkSqlParseResult; +import org.apache.inlong.sort.protocol.FieldInfo; +import org.apache.inlong.sort.protocol.GroupInfo; +import org.apache.inlong.sort.protocol.StreamInfo; +import org.apache.inlong.sort.protocol.constant.PaimonConstant.CatalogType; +import org.apache.inlong.sort.protocol.node.Node; +import org.apache.inlong.sort.protocol.node.extract.MySqlExtractNode; +import org.apache.inlong.sort.protocol.node.load.PaimonLoadNode; +import org.apache.inlong.sort.protocol.transformation.FieldRelation; +import org.apache.inlong.sort.protocol.transformation.relation.NodeRelation; + +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.EnvironmentSettings; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.apache.flink.test.util.AbstractTestBase; +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Test for Paimon SQL parser. + */ +public class PaimonNodeSqlParserTest extends AbstractTestBase { + + private MySqlExtractNode buildMySQLExtractNode(String id) { + List fields = Arrays.asList(new FieldInfo("id", new LongFormatInfo()), + new FieldInfo("name", new StringFormatInfo()), + new FieldInfo("age", new IntFormatInfo()), + new FieldInfo("salary", new FloatFormatInfo()), + new FieldInfo("ts", new TimestampFormatInfo()), + new FieldInfo("event_type", new StringFormatInfo())); + // if you hope hive load mode of append, please add this config + Map map = new HashMap<>(); + map.put("append-mode", "true"); + return new MySqlExtractNode(id, "mysql_input", fields, + null, map, "id", + Collections.singletonList("work1"), "localhost", "root", "123456", + "inlong", null, null, + null, null); + } + + private PaimonLoadNode buildPaimonLoadNodeWithHadoopCatalog() { + List fields = Arrays.asList(new FieldInfo("id", new LongFormatInfo()), + new FieldInfo("name", new StringFormatInfo()), + new FieldInfo("salary", new StringFormatInfo()), + new FieldInfo("ts", new TimestampFormatInfo())); + List relations = Arrays + .asList(new FieldRelation(new FieldInfo("id", new LongFormatInfo()), + new FieldInfo("id", new LongFormatInfo())), + new FieldRelation(new FieldInfo("name", new StringFormatInfo()), + new FieldInfo("name", new StringFormatInfo())), + new FieldRelation(new FieldInfo("age", new IntFormatInfo()), + new FieldInfo("age", new IntFormatInfo())), + new FieldRelation(new FieldInfo("ts", new TimestampFormatInfo()), + new FieldInfo("ts", new TimestampFormatInfo()))); + + List> extList = new ArrayList<>(); + HashMap map = new HashMap<>(); + map.put("table.type", "MERGE_ON_READ"); + extList.add(map); + + return new PaimonLoadNode( + "paimon", + "paimon_table_name", + fields, + relations, + null, + null, + null, + null, + "inlong", + "inlong_paimon", + null, + CatalogType.HADOOP, + null, + "hdfs://localhost:9000/paimon/warehouse", + extList, + "f1"); + } + + private PaimonLoadNode buildPaimonLoadNodeWithHiveCatalog() { + List fields = Arrays.asList(new FieldInfo("id", new LongFormatInfo()), + new FieldInfo("name", new StringFormatInfo()), + new FieldInfo("age", new IntFormatInfo()), + new FieldInfo("ts", new TimestampFormatInfo())); + List relations = Arrays + .asList(new FieldRelation(new FieldInfo("id", new LongFormatInfo()), + new FieldInfo("id", new LongFormatInfo())), + new FieldRelation(new FieldInfo("name", new StringFormatInfo()), + new FieldInfo("name", new StringFormatInfo())), + new FieldRelation(new FieldInfo("age", new IntFormatInfo()), + new FieldInfo("age", new IntFormatInfo())), + new FieldRelation(new FieldInfo("ts", new TimestampFormatInfo()), + new FieldInfo("ts", new TimestampFormatInfo()))); + List> extList = new ArrayList<>(); + HashMap map = new HashMap<>(); + map.put("table.type", "MERGE_ON_READ"); + extList.add(map); + + // set HIVE_CONF_DIR,or set uri and warehouse + return new PaimonLoadNode( + "paimon", + "paimon_table_name", + fields, + relations, + null, + null, + null, + null, + "inlong", + "inlong_paimon", + null, + CatalogType.HIVE, + "thrift://localhost:9083", + "/hive/warehouse", + extList, + "f1"); + } + + /** + * build node relation + * + * @param inputs extract node + * @param outputs load node + * @return node relation + */ + private NodeRelation buildNodeRelation(List inputs, List outputs) { + List inputIds = inputs.stream().map(Node::getId).collect(Collectors.toList()); + List outputIds = outputs.stream().map(Node::getId).collect(Collectors.toList()); + return new NodeRelation(inputIds, outputIds); + } + + @Test + public void testPaimon() throws Exception { + EnvironmentSettings settings = EnvironmentSettings + .newInstance() + .inStreamingMode() + .build(); + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.setParallelism(1); + env.enableCheckpointing(10000); + StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, settings); + Node inputNode = buildMySQLExtractNode("1"); + Node outputNode = buildPaimonLoadNodeWithHiveCatalog(); + StreamInfo streamInfo = new StreamInfo("1L", Arrays.asList(inputNode, outputNode), + Collections.singletonList(buildNodeRelation(Collections.singletonList(inputNode), + Collections.singletonList(outputNode)))); + GroupInfo groupInfo = new GroupInfo("group_id", Collections.singletonList(streamInfo)); + FlinkSqlParser parser = FlinkSqlParser.getInstance(tableEnv, groupInfo); + FlinkSqlParseResult result = (FlinkSqlParseResult) parser.parse(); + Assert.assertTrue(!result.getLoadSqls().isEmpty() && !result.getCreateTableSqls().isEmpty()); + } +} diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/pom.xml b/inlong-sort/sort-flink/sort-flink-v1.15/pom.xml index 50cd4ee5baf..be4ec16d96f 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/pom.xml +++ b/inlong-sort/sort-flink/sort-flink-v1.15/pom.xml @@ -53,6 +53,7 @@ 2.12.4-15.0 7.2.2.jre8 0.12.3 + 0.7.0-incubating 0.9.3 2.7.2 2.3.0 @@ -134,6 +135,16 @@ hudi-flink1.15-bundle ${hudi.version} + + org.apache.paimon + paimon-bundle + ${paimon.version} + + + org.apache.paimon + paimon-flink-1.15 + ${paimon.version} + io.streamnative.connectors flink-protobuf diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/paimon/pom.xml b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/paimon/pom.xml new file mode 100644 index 00000000000..1bb309e8fae --- /dev/null +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/paimon/pom.xml @@ -0,0 +1,106 @@ + + + + 4.0.0 + + org.apache.inlong + sort-connectors-v1.15 + 1.13.0-SNAPSHOT + + + sort-connector-paimon-v1.15 + jar + Apache InLong - Sort-connector-paimon + + + ${project.parent.parent.parent.parent.parent.basedir} + 4.2.1 + + + + + org.apache.inlong + sort-connector-base + ${project.version} + + + org.apache.paimon + paimon-flink-1.15 + + + org.apache.thrift + libfb303 + + + com.google.guava + guava + ${guava.version} + + + com.fasterxml.woodstox + woodstox-core + ${woodstox-core.version} + + + org.codehaus.woodstox + stax2-api + ${stax2-api.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + shade-flink + + shade + + package + + + + org.apache.paimon:* + org.apache.hive:hive-exec + org.apache.hadoop:* + com.fasterxml.woodstox:* + org.codehaus.woodstox:* + com.google.guava:* + com.google.protobuf:* + + + + + org.apache.inlong:sort-connector-* + + org/apache/inlong/** + META-INF/services/org.apache.flink.table.factories.Factory + + + + + + + + + + diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/paimon/src/main/java/org/apache/inlong/sort/paimon/table/sink/PaimonTableInlongFactory.java b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/paimon/src/main/java/org/apache/inlong/sort/paimon/table/sink/PaimonTableInlongFactory.java new file mode 100644 index 00000000000..4306023e0d2 --- /dev/null +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/paimon/src/main/java/org/apache/inlong/sort/paimon/table/sink/PaimonTableInlongFactory.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sort.paimon.table.sink; + +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.paimon.flink.FlinkTableFactory; + +import java.util.Set; + +import static org.apache.inlong.sort.base.Constants.INLONG_AUDIT; +import static org.apache.inlong.sort.base.Constants.INLONG_METRIC; + +public class PaimonTableInlongFactory extends FlinkTableFactory { + + public static final String SORT_CONNECTOR_IDENTIFY_PAIMON = "paimon-inlong"; + + public PaimonTableInlongFactory() { + super(); + } + + @Override + public String factoryIdentifier() { + return SORT_CONNECTOR_IDENTIFY_PAIMON; + } + + @Override + public DynamicTableSink createDynamicTableSink(Context context) { + return super.createDynamicTableSink(context); + } + + @Override + public Set> optionalOptions() { + Set> configOptions = super.optionalOptions(); + configOptions.add(INLONG_METRIC); + configOptions.add(INLONG_AUDIT); + return configOptions; + } + +} diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/paimon/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/paimon/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory new file mode 100644 index 00000000000..4e747eab493 --- /dev/null +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/paimon/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.inlong.sort.paimon.table.sink.PaimonTableInlongFactory diff --git a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/pom.xml b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/pom.xml index c753aa538f8..4ddf588eef2 100644 --- a/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/pom.xml +++ b/inlong-sort/sort-flink/sort-flink-v1.15/sort-connectors/pom.xml @@ -42,6 +42,7 @@ tubemq hbase hudi + paimon kafka redis