From 9a9add6f4486520b03570e08cb31bd0a8fda1b34 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Wed, 31 Jul 2019 15:50:39 +0800 Subject: [PATCH 01/62] update --- flinkx-kudu/flinkx-kudu-core/pom.xml | 21 +++++ .../dtstack/flinkx/kudu/core/KuduUtil.java | 46 +++++++++++ flinkx-kudu/flinkx-kudu-reader/pom.xml | 79 +++++++++++++++++++ .../flinkx/kudu/reader/KuduInputFormat.java | 63 +++++++++++++++ .../kudu/reader/KuduInputFormatBuilder.java | 34 ++++++++ .../flinkx/kudu/reader/KuduReader.java | 42 ++++++++++ flinkx-kudu/flinkx-kudu-writer/pom.xml | 79 +++++++++++++++++++ .../flinkx/kudu/writer/KuduOutputFormat.java | 53 +++++++++++++ .../kudu/writer/KuduOutputFormatBuilder.java | 34 ++++++++ .../flinkx/kudu/writer/KuduWriter.java | 42 ++++++++++ flinkx-kudu/pom.xml | 28 +++++++ pom.xml | 1 + 12 files changed, 522 insertions(+) create mode 100644 flinkx-kudu/flinkx-kudu-core/pom.xml create mode 100644 flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java create mode 100644 flinkx-kudu/flinkx-kudu-reader/pom.xml create mode 100644 flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java create mode 100644 flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java create mode 100644 flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java create mode 100644 flinkx-kudu/flinkx-kudu-writer/pom.xml create mode 100644 flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java create mode 100644 flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java create mode 100644 flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java create mode 100644 flinkx-kudu/pom.xml diff --git a/flinkx-kudu/flinkx-kudu-core/pom.xml b/flinkx-kudu/flinkx-kudu-core/pom.xml new file mode 100644 index 0000000000..3adb954226 --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-core/pom.xml @@ -0,0 +1,21 @@ + + + + flinkx-kudu + com.dtstack.flinkx + 1.6 + + 4.0.0 + + flinkx-kudu-core + + + + org.apache.kudu + kudu-client + 1.10.0 + + + \ No newline at end of file diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java new file mode 100644 index 0000000000..ce0f615722 --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.kudu.core; + +import org.apache.kudu.client.AsyncKuduClient; + +/** + * @author jiangbo + * @date 2019/7/31 + */ +public class KuduUtil { + + + public static AsyncKuduClient getKuduClient(String masterAddress){ + AsyncKuduClient.AsyncKuduClientBuilder builder; + if(masterAddress.contains(",")){ + builder = new AsyncKuduClient.AsyncKuduClientBuilder(masterAddress); + } else { + builder = new AsyncKuduClient.AsyncKuduClientBuilder(masterAddress); + } + + return builder.build(); + } + + public static void main(String[] args) { + AsyncKuduClient client = getKuduClient(""); + client.tableExists(""); + } +} diff --git a/flinkx-kudu/flinkx-kudu-reader/pom.xml b/flinkx-kudu/flinkx-kudu-reader/pom.xml new file mode 100644 index 0000000000..524b6236df --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-reader/pom.xml @@ -0,0 +1,79 @@ + + + + flinkx-kudu + com.dtstack.flinkx + 1.6 + + 4.0.0 + + flinkx-kudu-reader + + + + com.dtstack.flinkx + flinkx-kudu-core + 1.6 + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java new file mode 100644 index 0000000000..0ad7534566 --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.kudu.reader; + +import com.dtstack.flinkx.inputformat.RichInputFormat; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.core.io.InputSplit; +import org.apache.flink.types.Row; + +import java.io.IOException; + +/** + * @author jiangbo + * @date 2019/7/31 + */ +public class KuduInputFormat extends RichInputFormat { + @Override + protected void openInternal(InputSplit inputSplit) throws IOException { + + } + + @Override + protected Row nextRecordInternal(Row row) throws IOException { + return null; + } + + @Override + protected void closeInternal() throws IOException { + + } + + @Override + public void configure(Configuration parameters) { + + } + + @Override + public InputSplit[] createInputSplits(int minNumSplits) throws IOException { + return new InputSplit[0]; + } + + @Override + public boolean reachedEnd() throws IOException { + return false; + } +} diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java new file mode 100644 index 0000000000..46b1c5bb40 --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.kudu.reader; + +import com.dtstack.flinkx.inputformat.RichInputFormatBuilder; + +/** + * @author jiangbo + * @date 2019/7/31 + */ +public class KuduInputFormatBuilder extends RichInputFormatBuilder { + + @Override + protected void checkFormat() { + + } +} diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java new file mode 100644 index 0000000000..a6839d5912 --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.kudu.reader; + +import com.dtstack.flinkx.config.DataTransferConfig; +import com.dtstack.flinkx.reader.DataReader; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.types.Row; + +/** + * @author jiangbo + * @date 2019/7/31 + */ +public class KuduReader extends DataReader { + + protected KuduReader(DataTransferConfig config, StreamExecutionEnvironment env) { + super(config, env); + } + + @Override + public DataStream readData() { + return null; + } +} diff --git a/flinkx-kudu/flinkx-kudu-writer/pom.xml b/flinkx-kudu/flinkx-kudu-writer/pom.xml new file mode 100644 index 0000000000..f82f24aecc --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-writer/pom.xml @@ -0,0 +1,79 @@ + + + + flinkx-kudu + com.dtstack.flinkx + 1.6 + + 4.0.0 + + flinkx-kudu-writer + + + + com.dtstack.flinkx + flinkx-kudu-core + 1.6 + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java new file mode 100644 index 0000000000..d64f692418 --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.kudu.writer; + +import com.dtstack.flinkx.exception.WriteRecordException; +import com.dtstack.flinkx.outputformat.RichOutputFormat; +import org.apache.flink.types.Row; + +import java.io.IOException; + +/** + * @author jiangbo + * @date 2019/7/31 + */ +public class KuduOutputFormat extends RichOutputFormat { + + @Override + protected void openInternal(int taskNumber, int numTasks) throws IOException { + + } + + @Override + protected void writeSingleRecordInternal(Row row) throws WriteRecordException { + + } + + @Override + protected void writeMultipleRecordsInternal() throws Exception { + + } + + @Override + public void closeInternal() throws IOException { + super.closeInternal(); + } +} diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java new file mode 100644 index 0000000000..1f5b618273 --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.kudu.writer; + +import com.dtstack.flinkx.outputformat.RichOutputFormatBuilder; + +/** + * @author jiangbo + * @date 2019/7/31 + */ +public class KuduOutputFormatBuilder extends RichOutputFormatBuilder { + + @Override + protected void checkFormat() { + + } +} diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java new file mode 100644 index 0000000000..da4eddcde9 --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.kudu.writer; + +import com.dtstack.flinkx.config.DataTransferConfig; +import com.dtstack.flinkx.writer.DataWriter; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.datastream.DataStreamSink; +import org.apache.flink.types.Row; + +/** + * @author jiangbo + * @date 2019/7/31 + */ +public class KuduWriter extends DataWriter { + + public KuduWriter(DataTransferConfig config) { + super(config); + } + + @Override + public DataStreamSink writeData(DataStream dataSet) { + return null; + } +} diff --git a/flinkx-kudu/pom.xml b/flinkx-kudu/pom.xml new file mode 100644 index 0000000000..01ce9e536c --- /dev/null +++ b/flinkx-kudu/pom.xml @@ -0,0 +1,28 @@ + + + + flinkx-all + com.dtstack.flinkx + 1.6 + + 4.0.0 + + flinkx-kudu + pom + + flinkx-kudu-core + flinkx-kudu-reader + flinkx-kudu-writer + + + + + com.dtstack.flinkx + flinkx-core + 1.6 + provided + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 989256453c..543e502fbe 100644 --- a/pom.xml +++ b/pom.xml @@ -33,6 +33,7 @@ flinkx-binlog flinkx-kafka09 flinkx-kafka10 + flinkx-kudu From b66e2b0cc17182620e91e098340ec9612b7159c7 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Fri, 2 Aug 2019 19:18:46 +0800 Subject: [PATCH 02/62] update --- .../dtstack/flinkx/kudu/core/KuduConfig.java | 110 ++++++++++++++++++ .../flinkx/kudu/core/KuduConfigBuilder.java | 106 +++++++++++++++++ .../dtstack/flinkx/kudu/core/KuduUtil.java | 35 ++++-- .../flinkx/kudu/reader/KuduInputFormat.java | 81 +++++++++++-- .../kudu/reader/KuduInputFormatBuilder.java | 30 ++++- .../flinkx/kudu/reader/KuduReader.java | 42 ++++++- .../flinkx/kudu/reader/KuduTableSplit.java | 48 ++++++++ .../resources/dev_test_job/stream_hdfs.json | 64 ++++++++++ 8 files changed, 496 insertions(+), 20 deletions(-) create mode 100644 flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java create mode 100644 flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java create mode 100644 flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduTableSplit.java create mode 100644 flinkx-test/src/main/resources/dev_test_job/stream_hdfs.json diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java new file mode 100644 index 0000000000..894904072c --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.kudu.core; + +import java.io.Serializable; + +/** + * @author jiangbo + * @date 2019/8/2 + */ +public class KuduConfig implements Serializable { + + private String masterAddresses; + + private boolean openKerberos; + + private String user; + + private String keytabPath; + + private Integer workerCount; + + private Integer bossCount; + + private Long operationTimeout; + + private Long adminOperationTimeout; + + + public boolean getOpenKerberos() { + return openKerberos; + } + + public void setOpenKerberos(boolean openKerberos) { + this.openKerberos = openKerberos; + } + + public String getUser() { + return user; + } + + public void setUser(String user) { + this.user = user; + } + + public String getKeytabPath() { + return keytabPath; + } + + public void setKeytabPath(String keytabPath) { + this.keytabPath = keytabPath; + } + + public Integer getBossCount() { + return bossCount; + } + + public void setBossCount(Integer bossCount) { + this.bossCount = bossCount; + } + + public String getMasterAddresses() { + return masterAddresses; + } + + public void setMasterAddresses(String masterAddresses) { + this.masterAddresses = masterAddresses; + } + + public Integer getWorkerCount() { + return workerCount; + } + + public void setWorkerCount(Integer workerCount) { + this.workerCount = workerCount; + } + + public Long getOperationTimeout() { + return operationTimeout; + } + + public void setOperationTimeout(Long operationTimeout) { + this.operationTimeout = operationTimeout; + } + + public Long getAdminOperationTimeout() { + return adminOperationTimeout; + } + + public void setAdminOperationTimeout(Long adminOperationTimeout) { + this.adminOperationTimeout = adminOperationTimeout; + } +} diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java new file mode 100644 index 0000000000..602a76ec4d --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.kudu.core; + +import org.apache.commons.lang.StringUtils; +import org.apache.flink.util.Preconditions; + + +/** + * @author jiangbo + * @date 2019/8/2 + */ +public final class KuduConfigBuilder { + private String masterAddresses; + private boolean openKerberos; + private String user; + private String keytabPath; + private Integer workerCount; + private Integer bossCount; + private Long operationTimeout; + private Long adminOperationTimeout; + + private KuduConfigBuilder() { + } + + public static KuduConfigBuilder getInstance() { + return new KuduConfigBuilder(); + } + + public KuduConfigBuilder withMasterAddresses(String masterAddresses) { + Preconditions.checkArgument(StringUtils.isNotEmpty(masterAddresses), "Parameter [masterAddresses] can not be null or empty"); + this.masterAddresses = masterAddresses; + return this; + } + + public KuduConfigBuilder withOpenKerberos(boolean openKerberos) { + this.openKerberos = openKerberos; + return this; + } + + public KuduConfigBuilder withUser(String user) { + Preconditions.checkArgument(StringUtils.isNotEmpty(user), "Parameter [user] can not be null or empty"); + this.user = user; + return this; + } + + public KuduConfigBuilder withKeytabPath(String keytabPath) { + Preconditions.checkArgument(StringUtils.isNotEmpty(keytabPath), "Parameter [keytabPath] can not be null or empty"); + this.keytabPath = keytabPath; + return this; + } + + public KuduConfigBuilder withWorkerCount(Integer workerCount) { + Preconditions.checkArgument(workerCount > 0, "Parameter [workerCount] should be greater than 0"); + this.workerCount = workerCount; + return this; + } + + public KuduConfigBuilder withBossCount(Integer bossCount) { + Preconditions.checkArgument(bossCount > 0, "Parameter [bossCount] should be greater than 0"); + this.bossCount = bossCount; + return this; + } + + public KuduConfigBuilder withOperationTimeout(Long operationTimeout) { + Preconditions.checkArgument(operationTimeout > 0, "Parameter [operationTimeout] should be greater than 0"); + this.operationTimeout = operationTimeout; + return this; + } + + public KuduConfigBuilder withAdminOperationTimeout(Long adminOperationTimeout) { + Preconditions.checkArgument(adminOperationTimeout > 0, "Parameter [adminOperationTimeout] should be greater than 0"); + this.adminOperationTimeout = adminOperationTimeout; + return this; + } + + public KuduConfig build() { + KuduConfig kuduConfig = new KuduConfig(); + kuduConfig.setMasterAddresses(masterAddresses); + kuduConfig.setOpenKerberos(openKerberos); + kuduConfig.setUser(user); + kuduConfig.setKeytabPath(keytabPath); + kuduConfig.setWorkerCount(workerCount); + kuduConfig.setBossCount(bossCount); + kuduConfig.setOperationTimeout(operationTimeout); + kuduConfig.setAdminOperationTimeout(adminOperationTimeout); + return kuduConfig; + } +} diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java index ce0f615722..f3deff144f 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java @@ -19,7 +19,13 @@ package com.dtstack.flinkx.kudu.core; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.kudu.client.AsyncKuduClient; +import org.apache.kudu.client.KuduClient; + +import java.io.IOException; +import java.security.PrivilegedExceptionAction; +import java.util.Arrays; /** * @author jiangbo @@ -27,20 +33,27 @@ */ public class KuduUtil { - - public static AsyncKuduClient getKuduClient(String masterAddress){ - AsyncKuduClient.AsyncKuduClientBuilder builder; - if(masterAddress.contains(",")){ - builder = new AsyncKuduClient.AsyncKuduClientBuilder(masterAddress); + public static KuduClient getKuduClient(KuduConfig config) throws IOException,InterruptedException { + if(config.getOpenKerberos()){ + UserGroupInformation.loginUserFromKeytab(config.getUser(), config.getKeytabPath()); + return UserGroupInformation.getLoginUser().doAs(new PrivilegedExceptionAction() { + @Override + public KuduClient run() throws Exception { + return getKuduClientInternal(config); + } + }); } else { - builder = new AsyncKuduClient.AsyncKuduClientBuilder(masterAddress); + return getKuduClientInternal(config); } - - return builder.build(); } - public static void main(String[] args) { - AsyncKuduClient client = getKuduClient(""); - client.tableExists(""); + private static KuduClient getKuduClientInternal(KuduConfig config) { + return new AsyncKuduClient.AsyncKuduClientBuilder(Arrays.asList(config.getMasterAddresses().split(","))) + .workerCount(config.getWorkerCount()) + .bossCount(config.getBossCount()) + .defaultAdminOperationTimeoutMs(config.getAdminOperationTimeout()) + .defaultOperationTimeoutMs(config.getOperationTimeout()) + .build() + .syncClient(); } } diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java index 0ad7534566..4da1fcbe37 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java @@ -20,20 +20,65 @@ package com.dtstack.flinkx.kudu.reader; import com.dtstack.flinkx.inputformat.RichInputFormat; +import com.dtstack.flinkx.kudu.core.KuduConfig; +import com.dtstack.flinkx.kudu.core.KuduUtil; +import com.dtstack.flinkx.reader.MetaColumn; +import com.google.common.collect.Lists; import org.apache.flink.configuration.Configuration; import org.apache.flink.core.io.InputSplit; import org.apache.flink.types.Row; +import org.apache.kudu.client.KuduClient; +import org.apache.kudu.client.KuduScanToken; +import org.apache.kudu.client.KuduScanner; +import org.apache.kudu.client.KuduTable; import java.io.IOException; +import java.util.List; /** * @author jiangbo * @date 2019/7/31 */ public class KuduInputFormat extends RichInputFormat { + + protected List columns; + + protected String table; + + protected String readMode; + + protected KuduConfig kuduConfig; + + private List columnNames; + + private transient KuduClient client; + + private transient KuduTable kuduTable; + + private transient KuduScanner scanner; + @Override - protected void openInternal(InputSplit inputSplit) throws IOException { + public void openInputFormat() throws IOException { + super.openInputFormat(); + + columnNames = Lists.newArrayList(); + for (MetaColumn column : columns) { + columnNames.add(column.getName()); + } + + try { + client = KuduUtil.getKuduClient(kuduConfig); + } catch (IOException | InterruptedException e){ + throw new RuntimeException("Get KuduClient error", e); + } + kuduTable = client.openTable(table); + } + + @Override + protected void openInternal(InputSplit inputSplit) throws IOException { + KuduTableSplit kuduTableSplit = (KuduTableSplit)inputSplit; + scanner = KuduScanToken.deserializeIntoScanner(kuduTableSplit.getToken(), client); } @Override @@ -42,22 +87,44 @@ protected Row nextRecordInternal(Row row) throws IOException { } @Override - protected void closeInternal() throws IOException { + public InputSplit[] createInputSplits(int minNumSplits) throws IOException { + List scanTokens = client.newScanTokenBuilder(kuduTable) + .setProjectedColumnNames(columnNames) + .addPredicate(null) + .build(); + KuduTableSplit[] inputSplits = new KuduTableSplit[scanTokens.size()]; + for (int i = 0; i < scanTokens.size(); i++) { + inputSplits[i] = new KuduTableSplit(scanTokens.get(i).serialize(), i); + } + + return inputSplits; } @Override - public void configure(Configuration parameters) { + public boolean reachedEnd() throws IOException { + return false; + } + @Override + protected void closeInternal() throws IOException { + if(scanner != null){ + scanner.close(); + scanner = null; + } } @Override - public InputSplit[] createInputSplits(int minNumSplits) throws IOException { - return new InputSplit[0]; + public void closeInputFormat() throws IOException { + super.closeInputFormat(); + + if (client != null){ + client.close(); + } } @Override - public boolean reachedEnd() throws IOException { - return false; + public void configure(Configuration parameters) { + } } diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java index 46b1c5bb40..9be6ea228a 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java @@ -20,6 +20,10 @@ package com.dtstack.flinkx.kudu.reader; import com.dtstack.flinkx.inputformat.RichInputFormatBuilder; +import com.dtstack.flinkx.kudu.core.KuduConfig; +import com.dtstack.flinkx.reader.MetaColumn; + +import java.util.List; /** * @author jiangbo @@ -27,8 +31,32 @@ */ public class KuduInputFormatBuilder extends RichInputFormatBuilder { + private KuduInputFormat format; + + public KuduInputFormatBuilder() { + super.format = format = new KuduInputFormat(); + } + + public void setColumns(List columns){ + format.columns = columns; + } + + public void setTable(String table){ + format.table = table; + } + + public void setReadMode(String readMode){ + format.readMode = readMode; + } + + public void setKuduConfig(KuduConfig kuduConfig){ + format.kuduConfig = kuduConfig; + } + @Override protected void checkFormat() { - + if (format.columns == null || format.columns.size() == 0){ + throw new IllegalArgumentException("columns can not be empty"); + } } } diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java index a6839d5912..a600733cad 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java @@ -20,10 +20,17 @@ package com.dtstack.flinkx.kudu.reader; import com.dtstack.flinkx.config.DataTransferConfig; +import com.dtstack.flinkx.config.ReaderConfig; +import com.dtstack.flinkx.kudu.core.KuduConfig; +import com.dtstack.flinkx.kudu.core.KuduConfigBuilder; import com.dtstack.flinkx.reader.DataReader; +import com.dtstack.flinkx.reader.MetaColumn; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.types.Row; +import org.apache.kudu.client.AsyncKuduClient; + +import java.util.List; /** * @author jiangbo @@ -31,12 +38,45 @@ */ public class KuduReader extends DataReader { + private String table; + + private List columns; + + private KuduConfig kuduConfig; + + private String readMode; + protected KuduReader(DataTransferConfig config, StreamExecutionEnvironment env) { super(config, env); + + ReaderConfig readerConfig = config.getJob().getContent().get(0).getReader(); + ReaderConfig.ParameterConfig parameterConfig = readerConfig.getParameter(); + + columns = MetaColumn.getMetaColumns(parameterConfig.getColumn()); + table = parameterConfig.getStringVal("table"); + readMode = parameterConfig.getStringVal("readMode"); + kuduConfig = KuduConfigBuilder.getInstance() + .withMasterAddresses(parameterConfig.getStringVal("masterAddresses")) + .withOpenKerberos(parameterConfig.getBooleanVal("openKerberos", false)) + .withUser(parameterConfig.getStringVal("user")) + .withKeytabPath(parameterConfig.getStringVal("keytabPath")) + .withWorkerCount(parameterConfig.getIntVal("workerCount", 2 * Runtime.getRuntime().availableProcessors())) + .withBossCount(parameterConfig.getIntVal("bossCount", 1)) + .withOperationTimeout(parameterConfig.getLongVal("operationTimeout", AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS)) + .withAdminOperationTimeout(parameterConfig.getLongVal("adminOperationTimeout", AsyncKuduClient.DEFAULT_KEEP_ALIVE_PERIOD_MS)) + .build(); } @Override public DataStream readData() { - return null; + KuduInputFormatBuilder builder = new KuduInputFormatBuilder(); + builder.setColumns(columns); + builder.setMonitorUrls(monitorUrls); + builder.setBytes(bytes); + builder.setTable(table); + builder.setReadMode(readMode); + builder.setKuduConfig(kuduConfig); + + return createInput(builder.finish(), "kudureader"); } } diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduTableSplit.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduTableSplit.java new file mode 100644 index 0000000000..ae79e26488 --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduTableSplit.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.kudu.reader; + +import org.apache.flink.core.io.InputSplit; + + +/** + * @author jiangbo + * @date 2019/8/2 + */ +public class KuduTableSplit implements InputSplit { + + private byte[] token; + + private int splitNumber; + + public KuduTableSplit(byte[] token, int splitNumber) { + this.token = token; + this.splitNumber = splitNumber; + } + + @Override + public int getSplitNumber() { + return splitNumber; + } + + public byte[] getToken() { + return token; + } +} diff --git a/flinkx-test/src/main/resources/dev_test_job/stream_hdfs.json b/flinkx-test/src/main/resources/dev_test_job/stream_hdfs.json new file mode 100644 index 0000000000..8a1cac8f51 --- /dev/null +++ b/flinkx-test/src/main/resources/dev_test_job/stream_hdfs.json @@ -0,0 +1,64 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column": [ + { + "name": "id", + "type": "INT" + }, + { + "name": "name", + "index": 1, + "type": "string" + } + ], + "sliceRecordCount": ["100"] + } + }, + "writer": { + "name": "hdfswriter", + "parameter": { + "path": "hdfs://ns1/user/hive/warehouse/impala_test.db/impala_tb2", + "defaultFS": "hdfs://ns1", + "hadoopConfig": { + "dfs.ha.namenodes.ns1":"nn1,nn2", + "dfs.namenode.rpc-address.ns1.nn2":"impala2:9000", + "dfs.client.failover.proxy.provider.ns1":"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider", + "dfs.namenode.rpc-address.ns1.nn1":"impala1:9000", + "dfs.nameservices":"ns1" + }, + "column": [ + { + "name": "id", + "type": "INT" + }, + { + "name": "name", + "type": "string" + } + ], + "fileType": "parquet", + "fieldDelimiter": "\u0001", + "encoding": "utf-8", + "fileName": "pt=1", + "writeMode": "append", + "partition": "pt=1" + } + } + } + ], + "setting": { + "speed": { + "channel": 1, + "bytes": 1048576 + }, + "errorLimit": { + "record": 100 + } + } + } +} \ No newline at end of file From 6cbc9cd151e8eff1249d7288018f1ae3b2716e10 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Mon, 12 Aug 2019 12:47:41 +0800 Subject: [PATCH 03/62] update --- flinkx-kudu/flinkx-kudu-core/pom.xml | 13 ++ .../dtstack/flinkx/kudu/core/KuduConfig.java | 39 ++++ .../dtstack/flinkx/kudu/core/KuduUtil.java | 207 +++++++++++++++++- .../flinkx/kudu/core/test/KuduUtilTest.java | 43 ++++ .../flinkx/kudu/reader/KuduInputFormat.java | 19 +- .../kudu/reader/KuduInputFormatBuilder.java | 4 + .../flinkx/kudu/reader/KuduReader.java | 4 + 7 files changed, 312 insertions(+), 17 deletions(-) create mode 100644 flinkx-kudu/flinkx-kudu-core/src/test/java/com/dtstack/flinkx/kudu/core/test/KuduUtilTest.java diff --git a/flinkx-kudu/flinkx-kudu-core/pom.xml b/flinkx-kudu/flinkx-kudu-core/pom.xml index 3adb954226..abafab1604 100644 --- a/flinkx-kudu/flinkx-kudu-core/pom.xml +++ b/flinkx-kudu/flinkx-kudu-core/pom.xml @@ -17,5 +17,18 @@ kudu-client 1.10.0 + + + com.dtstack.flinkx + flinkx-core + 1.6 + provided + + + junit + junit + 4.12 + test + \ No newline at end of file diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java index 894904072c..a70fa602ea 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java @@ -43,6 +43,45 @@ public class KuduConfig implements Serializable { private Long adminOperationTimeout; + private Long queryTimeout; + + private String table; + + private String readMode; + + private int batchSizeBytes; + + public int getBatchSizeBytes() { + return batchSizeBytes; + } + + public void setBatchSizeBytes(int batchSizeBytes) { + this.batchSizeBytes = batchSizeBytes; + } + + public String getTable() { + return table; + } + + public void setTable(String table) { + this.table = table; + } + + public String getReadMode() { + return readMode; + } + + public void setReadMode(String readMode) { + this.readMode = readMode; + } + + public Long getQueryTimeout() { + return queryTimeout; + } + + public void setQueryTimeout(Long queryTimeout) { + this.queryTimeout = queryTimeout; + } public boolean getOpenKerberos() { return openKerberos; diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java index f3deff144f..09cf5d6e76 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java @@ -19,13 +19,25 @@ package com.dtstack.flinkx.kudu.core; +import com.dtstack.flinkx.reader.MetaColumn; +import com.google.common.collect.Lists; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang.math.NumberUtils; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.kudu.client.AsyncKuduClient; -import org.apache.kudu.client.KuduClient; +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Type; +import org.apache.kudu.client.*; import java.io.IOException; +import java.math.BigDecimal; import java.security.PrivilegedExceptionAction; +import java.sql.Timestamp; import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * @author jiangbo @@ -56,4 +68,195 @@ private static KuduClient getKuduClientInternal(KuduConfig config) { .build() .syncClient(); } + + public static List getKuduScanToken(KuduConfig config, List columns, + String filterString) throws IOException{ + KuduClient client = null; + try { + client = getKuduClient(config); + KuduTable kuduTable = client.openTable(config.getTable()); + + List columnNames = Lists.newArrayList(); + for (MetaColumn column : columns) { + columnNames.add(column.getName()); + } + + KuduScanToken.KuduScanTokenBuilder builder = client.newScanTokenBuilder(kuduTable) + .readMode(getReadMode(config.getReadMode())) + .batchSizeBytes(config.getBatchSizeBytes()) + .setTimeout(config.getQueryTimeout()) + .setProjectedColumnNames(columnNames); + + addPredicates(builder, filterString, columns); + + return builder.build(); + } catch (Exception e){ + throw new IOException("Get ScanToken error", e); + } finally { + if(client != null){ + client.close(); + } + } + } + + private static AsyncKuduScanner.ReadMode getReadMode(String readMode){ + if(AsyncKuduScanner.ReadMode.READ_LATEST.name().equalsIgnoreCase(readMode)){ + return AsyncKuduScanner.ReadMode.READ_LATEST; + } else { + return AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT; + } + } + + private static void addPredicates(KuduScanToken.KuduScanTokenBuilder builder, String filterString, List columns){ + if(StringUtils.isEmpty(filterString)){ + return; + } + + Map nameTypeMap = new HashMap<>(); + for (MetaColumn column : columns) { + nameTypeMap.put(column.getName(), getType(column.getType())); + } + + String[] filters = filterString.split("(?i)\\s+and\\s+"); + for (String filter : filters) { + ExpressResult expressResult = parseExpress(filter, nameTypeMap); + KuduPredicate predicate = KuduPredicate.newComparisonPredicate(expressResult.getColumnSchema(), expressResult.getOp(), expressResult.getValue()); + builder.addPredicate(predicate); + } + } + + private static Type getType(String columnType){ + switch (columnType.toLowerCase()){ + case "boolean" : + case "bool" : return Type.BOOL; + case "int8": + case "byte" : return Type.INT8; + case "int16": + case "short" : return Type.INT16; + case "int32": + case "integer": + case "int" : return Type.INT32; + case "int64": + case "bigint": + case "long" : return Type.INT64; + case "float" : return Type.FLOAT; + case "double" : return Type.DOUBLE; + case "decimal" : return Type.DECIMAL; + case "char": + case "varchar": + case "text": + case "string" : return Type.STRING; + case "timestamp" : return Type.UNIXTIME_MICROS; + default: + throw new IllegalArgumentException("Not support column type:" + columnType); + } + } + + public static ExpressResult parseExpress(String express, Map nameTypeMap){ + String regex = "(?[^=|\\s]+)+\\s*(?(=)|(>)|(>=)|(<)|(<=))\\s*(?.*)"; + Pattern pattern = Pattern.compile(regex); + Matcher matcher = pattern.matcher(express.trim()); + if (matcher.find()) { + String column = matcher.group("column"); + String op = matcher.group("op"); + String value = matcher.group("value"); + + Type type = nameTypeMap.get(column); + if(type == null){ + throw new IllegalArgumentException("Can not find column:" + column + " from column list"); + } + + ColumnSchema columnSchema = new ColumnSchema.ColumnSchemaBuilder(column, type).build(); + + ExpressResult result = new ExpressResult(); + result.setColumnSchema(columnSchema); + result.setOp(getOp(op)); + result.setValue(getValue(value, type)); + + return result; + } else { + throw new IllegalArgumentException("Illegal filter express:" + express); + } + } + + private static Object getValue(String value, Type type){ + if(value == null){ + return null; + } + + if(value.startsWith("\"") && value.endsWith("\"")){ + value = value.substring(1, value.length() - 1); + } + + Object objValue; + if (Type.BOOL.equals(type)){ + objValue = Boolean.valueOf(value); + } else if(Type.INT8.equals(type)){ + objValue = Byte.valueOf(value); + } else if(Type.INT16.equals(type)){ + objValue = Short.valueOf(value); + } else if(Type.INT32.equals(type)){ + objValue = Integer.valueOf(value); + } else if(Type.INT64.equals(type)){ + objValue = Long.valueOf(value); + } else if(Type.FLOAT.equals(type)){ + objValue = Float.valueOf(value); + } else if(Type.DOUBLE.equals(type)){ + objValue = Double.valueOf(value); + } else if(Type.DECIMAL.equals(type)){ + objValue = new BigDecimal(value); + } else if(Type.UNIXTIME_MICROS.equals(type)){ + if(NumberUtils.isNumber(value)){ + objValue = Long.valueOf(value); + } else { + objValue = Timestamp.valueOf(value); + } + } else { + objValue = value; + } + + return objValue; + } + + private static KuduPredicate.ComparisonOp getOp(String opExpress){ + switch (opExpress){ + case "=" : return KuduPredicate.ComparisonOp.EQUAL; + case ">" : return KuduPredicate.ComparisonOp.GREATER; + case ">=" : return KuduPredicate.ComparisonOp.GREATER_EQUAL; + case "<" : return KuduPredicate.ComparisonOp.LESS; + case "<=" : return KuduPredicate.ComparisonOp.LESS_EQUAL; + default: + throw new IllegalArgumentException("Comparison express only support '=','>','>=','<','<='"); + } + } + + public static class ExpressResult{ + private ColumnSchema columnSchema; + private KuduPredicate.ComparisonOp op; + private Object value; + + public ColumnSchema getColumnSchema() { + return columnSchema; + } + + public void setColumnSchema(ColumnSchema columnSchema) { + this.columnSchema = columnSchema; + } + + public KuduPredicate.ComparisonOp getOp() { + return op; + } + + public void setOp(KuduPredicate.ComparisonOp op) { + this.op = op; + } + + public Object getValue() { + return value; + } + + public void setValue(Object value) { + this.value = value; + } + } } diff --git a/flinkx-kudu/flinkx-kudu-core/src/test/java/com/dtstack/flinkx/kudu/core/test/KuduUtilTest.java b/flinkx-kudu/flinkx-kudu-core/src/test/java/com/dtstack/flinkx/kudu/core/test/KuduUtilTest.java new file mode 100644 index 0000000000..e5c2366ade --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-core/src/test/java/com/dtstack/flinkx/kudu/core/test/KuduUtilTest.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.kudu.core.test; + +import com.dtstack.flinkx.kudu.core.KuduUtil; +import org.apache.kudu.Type; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +/** + * @author jiangbo + * @date 2019/8/12 + */ +public class KuduUtilTest { + + @Test + public void parseExpressTest(){ + Map nameTypeMap = new HashMap<>(); + nameTypeMap.put("id", Type.INT32); + + KuduUtil.ExpressResult result = KuduUtil.parseExpress(" id = 1", nameTypeMap); + System.out.println(result); + } +} diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java index 4da1fcbe37..febb95fcef 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java @@ -49,36 +49,29 @@ public class KuduInputFormat extends RichInputFormat { protected KuduConfig kuduConfig; - private List columnNames; + protected String filterString; private transient KuduClient client; - private transient KuduTable kuduTable; - private transient KuduScanner scanner; @Override public void openInputFormat() throws IOException { super.openInputFormat(); - columnNames = Lists.newArrayList(); - for (MetaColumn column : columns) { - columnNames.add(column.getName()); - } - try { client = KuduUtil.getKuduClient(kuduConfig); } catch (IOException | InterruptedException e){ throw new RuntimeException("Get KuduClient error", e); } - - kuduTable = client.openTable(table); } @Override protected void openInternal(InputSplit inputSplit) throws IOException { KuduTableSplit kuduTableSplit = (KuduTableSplit)inputSplit; scanner = KuduScanToken.deserializeIntoScanner(kuduTableSplit.getToken(), client); + + scanner.hasMoreRows(); } @Override @@ -88,11 +81,7 @@ protected Row nextRecordInternal(Row row) throws IOException { @Override public InputSplit[] createInputSplits(int minNumSplits) throws IOException { - List scanTokens = client.newScanTokenBuilder(kuduTable) - .setProjectedColumnNames(columnNames) - .addPredicate(null) - .build(); - + List scanTokens = KuduUtil.getKuduScanToken(kuduConfig, columns, filterString); KuduTableSplit[] inputSplits = new KuduTableSplit[scanTokens.size()]; for (int i = 0; i < scanTokens.size(); i++) { inputSplits[i] = new KuduTableSplit(scanTokens.get(i).serialize(), i); diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java index 9be6ea228a..23d85aac62 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java @@ -53,6 +53,10 @@ public void setKuduConfig(KuduConfig kuduConfig){ format.kuduConfig = kuduConfig; } + public void setFilterString(String filterString){ + format.filterString = filterString; + } + @Override protected void checkFormat() { if (format.columns == null || format.columns.size() == 0){ diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java index a600733cad..f0acfb151f 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java @@ -46,6 +46,8 @@ public class KuduReader extends DataReader { private String readMode; + private String filterString; + protected KuduReader(DataTransferConfig config, StreamExecutionEnvironment env) { super(config, env); @@ -55,6 +57,7 @@ protected KuduReader(DataTransferConfig config, StreamExecutionEnvironment env) columns = MetaColumn.getMetaColumns(parameterConfig.getColumn()); table = parameterConfig.getStringVal("table"); readMode = parameterConfig.getStringVal("readMode"); + filterString = parameterConfig.getStringVal("filter"); kuduConfig = KuduConfigBuilder.getInstance() .withMasterAddresses(parameterConfig.getStringVal("masterAddresses")) .withOpenKerberos(parameterConfig.getBooleanVal("openKerberos", false)) @@ -76,6 +79,7 @@ public DataStream readData() { builder.setTable(table); builder.setReadMode(readMode); builder.setKuduConfig(kuduConfig); + builder.setFilterString(filterString); return createInput(builder.finish(), "kudureader"); } From 26843e84fe65d7b6ab385b5637cc7de4d13cbc4b Mon Sep 17 00:00:00 2001 From: jiangbo Date: Mon, 12 Aug 2019 14:03:12 +0800 Subject: [PATCH 04/62] update --- .../dtstack/flinkx/kudu/core/KuduConfig.java | 10 +++ .../flinkx/kudu/core/KuduConfigBuilder.java | 24 ++++++ .../flinkx/kudu/core/KuduConfigKeys.java | 29 +++++++ .../dtstack/flinkx/kudu/core/KuduUtil.java | 18 +++-- .../flinkx/kudu/core/test/KuduUtilTest.java | 26 ++++++- .../flinkx/kudu/reader/KuduInputFormat.java | 76 +++++++++++++++---- .../kudu/reader/KuduInputFormatBuilder.java | 12 --- .../flinkx/kudu/reader/KuduReader.java | 17 ++--- 8 files changed, 162 insertions(+), 50 deletions(-) create mode 100644 flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java index a70fa602ea..7117ab9456 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java @@ -49,8 +49,18 @@ public class KuduConfig implements Serializable { private String readMode; + private String filterString; + private int batchSizeBytes; + public String getFilterString() { + return filterString; + } + + public void setFilterString(String filterString) { + this.filterString = filterString; + } + public int getBatchSizeBytes() { return batchSizeBytes; } diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java index 602a76ec4d..08a22453d3 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java @@ -36,6 +36,9 @@ public final class KuduConfigBuilder { private Integer bossCount; private Long operationTimeout; private Long adminOperationTimeout; + private String table; + private String readMode; + private String filterString; private KuduConfigBuilder() { } @@ -91,6 +94,24 @@ public KuduConfigBuilder withAdminOperationTimeout(Long adminOperationTimeout) { return this; } + public KuduConfigBuilder withTable(String table){ + Preconditions.checkArgument(StringUtils.isNotEmpty(table), "Parameter [table] can not be null or empty"); + this.table = table; + return this; + } + + public KuduConfigBuilder withReadMode(String readMode){ + Preconditions.checkArgument(StringUtils.isNotEmpty(readMode), "Parameter [readMode] can not be null or empty"); + this.readMode = readMode; + return this; + } + + public KuduConfigBuilder withFilter(String filter){ + Preconditions.checkArgument(StringUtils.isNotEmpty(filter), "Parameter [filter] can not be null or empty"); + this.filterString = filter; + return this; + } + public KuduConfig build() { KuduConfig kuduConfig = new KuduConfig(); kuduConfig.setMasterAddresses(masterAddresses); @@ -101,6 +122,9 @@ public KuduConfig build() { kuduConfig.setBossCount(bossCount); kuduConfig.setOperationTimeout(operationTimeout); kuduConfig.setAdminOperationTimeout(adminOperationTimeout); + kuduConfig.setTable(table); + kuduConfig.setReadMode(readMode); + kuduConfig.setFilterString(filterString); return kuduConfig; } } diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java new file mode 100644 index 0000000000..058cb22913 --- /dev/null +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.kudu.core; + +/** + * @author jiangbo + * @date 2019/8/12 + */ +public class KuduConfigKeys { + + public final static String KEY_TABLE = "table"; +} diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java index 09cf5d6e76..249a6530f1 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java @@ -45,6 +45,10 @@ */ public class KuduUtil { + private static String FILTER_SPLIT_REGEX = "(?i)\\s+and\\s+"; + private static String EXPRESS_REGEX = "(?[^\\=|\\s]+)+\\s*(?[\\>|\\<|\\=]+)\\s*(?.*)"; + private static Pattern EXPRESS_PATTERN = Pattern.compile(EXPRESS_REGEX); + public static KuduClient getKuduClient(KuduConfig config) throws IOException,InterruptedException { if(config.getOpenKerberos()){ UserGroupInformation.loginUserFromKeytab(config.getUser(), config.getKeytabPath()); @@ -117,7 +121,7 @@ private static void addPredicates(KuduScanToken.KuduScanTokenBuilder builder, St nameTypeMap.put(column.getName(), getType(column.getType())); } - String[] filters = filterString.split("(?i)\\s+and\\s+"); + String[] filters = filterString.split(FILTER_SPLIT_REGEX); for (String filter : filters) { ExpressResult expressResult = parseExpress(filter, nameTypeMap); KuduPredicate predicate = KuduPredicate.newComparisonPredicate(expressResult.getColumnSchema(), expressResult.getOp(), expressResult.getValue()); @@ -125,7 +129,7 @@ private static void addPredicates(KuduScanToken.KuduScanTokenBuilder builder, St } } - private static Type getType(String columnType){ + public static Type getType(String columnType){ switch (columnType.toLowerCase()){ case "boolean" : case "bool" : return Type.BOOL; @@ -153,15 +157,13 @@ private static Type getType(String columnType){ } public static ExpressResult parseExpress(String express, Map nameTypeMap){ - String regex = "(?[^=|\\s]+)+\\s*(?(=)|(>)|(>=)|(<)|(<=))\\s*(?.*)"; - Pattern pattern = Pattern.compile(regex); - Matcher matcher = pattern.matcher(express.trim()); + Matcher matcher = EXPRESS_PATTERN.matcher(express.trim()); if (matcher.find()) { String column = matcher.group("column"); String op = matcher.group("op"); String value = matcher.group("value"); - Type type = nameTypeMap.get(column); + Type type = nameTypeMap.get(column.trim()); if(type == null){ throw new IllegalArgumentException("Can not find column:" + column + " from column list"); } @@ -179,12 +181,12 @@ public static ExpressResult parseExpress(String express, Map nameT } } - private static Object getValue(String value, Type type){ + public static Object getValue(String value, Type type){ if(value == null){ return null; } - if(value.startsWith("\"") && value.endsWith("\"")){ + if(value.startsWith("\"") || value.endsWith("'")){ value = value.substring(1, value.length() - 1); } diff --git a/flinkx-kudu/flinkx-kudu-core/src/test/java/com/dtstack/flinkx/kudu/core/test/KuduUtilTest.java b/flinkx-kudu/flinkx-kudu-core/src/test/java/com/dtstack/flinkx/kudu/core/test/KuduUtilTest.java index e5c2366ade..c6a564d50d 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/test/java/com/dtstack/flinkx/kudu/core/test/KuduUtilTest.java +++ b/flinkx-kudu/flinkx-kudu-core/src/test/java/com/dtstack/flinkx/kudu/core/test/KuduUtilTest.java @@ -21,8 +21,11 @@ import com.dtstack.flinkx.kudu.core.KuduUtil; import org.apache.kudu.Type; +import org.apache.kudu.client.KuduPredicate; +import org.junit.Assert; import org.junit.Test; +import java.sql.Timestamp; import java.util.HashMap; import java.util.Map; @@ -36,8 +39,27 @@ public class KuduUtilTest { public void parseExpressTest(){ Map nameTypeMap = new HashMap<>(); nameTypeMap.put("id", Type.INT32); + nameTypeMap.put("name", Type.STRING); + nameTypeMap.put("time", Type.UNIXTIME_MICROS); - KuduUtil.ExpressResult result = KuduUtil.parseExpress(" id = 1", nameTypeMap); - System.out.println(result); + KuduUtil.ExpressResult result = KuduUtil.parseExpress(" id >= 1", nameTypeMap); + Assert.assertEquals(result.getColumnSchema().getName(), "id"); + Assert.assertEquals(result.getOp(), KuduPredicate.ComparisonOp.GREATER_EQUAL); + Assert.assertTrue(result.getValue() instanceof Integer); + + result = KuduUtil.parseExpress("name = \"xxxxx\"", nameTypeMap); + Assert.assertEquals(result.getColumnSchema().getName(), "name"); + Assert.assertEquals(result.getOp(), KuduPredicate.ComparisonOp.EQUAL); + Assert.assertTrue(result.getValue() instanceof String); + + result = KuduUtil.parseExpress("time > 1565586665372 ", nameTypeMap); + Assert.assertEquals(result.getColumnSchema().getName(), "time"); + Assert.assertEquals(result.getOp(), KuduPredicate.ComparisonOp.GREATER); + Assert.assertTrue(result.getValue() instanceof Long); + + result = KuduUtil.parseExpress("time <= '2019-08-12 13:10:12'", nameTypeMap); + Assert.assertEquals(result.getColumnSchema().getName(), "time"); + Assert.assertEquals(result.getOp(), KuduPredicate.ComparisonOp.LESS_EQUAL); + Assert.assertTrue(result.getValue() instanceof Timestamp); } } diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java index febb95fcef..b443a333b2 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java @@ -23,14 +23,11 @@ import com.dtstack.flinkx.kudu.core.KuduConfig; import com.dtstack.flinkx.kudu.core.KuduUtil; import com.dtstack.flinkx.reader.MetaColumn; -import com.google.common.collect.Lists; import org.apache.flink.configuration.Configuration; import org.apache.flink.core.io.InputSplit; import org.apache.flink.types.Row; -import org.apache.kudu.client.KuduClient; -import org.apache.kudu.client.KuduScanToken; -import org.apache.kudu.client.KuduScanner; -import org.apache.kudu.client.KuduTable; +import org.apache.kudu.Type; +import org.apache.kudu.client.*; import java.io.IOException; import java.util.List; @@ -43,18 +40,14 @@ public class KuduInputFormat extends RichInputFormat { protected List columns; - protected String table; - - protected String readMode; - protected KuduConfig kuduConfig; - protected String filterString; - private transient KuduClient client; private transient KuduScanner scanner; + private transient RowResultIterator iterator; + @Override public void openInputFormat() throws IOException { super.openInputFormat(); @@ -70,18 +63,57 @@ public void openInputFormat() throws IOException { protected void openInternal(InputSplit inputSplit) throws IOException { KuduTableSplit kuduTableSplit = (KuduTableSplit)inputSplit; scanner = KuduScanToken.deserializeIntoScanner(kuduTableSplit.getToken(), client); - - scanner.hasMoreRows(); } @Override protected Row nextRecordInternal(Row row) throws IOException { - return null; + row = new Row(columns.size()); + RowResult rowResult = iterator.next(); + + for (int i = 0; i < columns.size(); i++) { + MetaColumn column = columns.get(i); + Type type = KuduUtil.getType(column.getType()); + if(column.getValue() != null){ + row.setField(i, KuduUtil.getValue(column.getValue(), type)); + } else { + row.setField(i, getValue(type, rowResult, column.getName())); + } + } + + return row; + } + + private Object getValue(Type type, RowResult rowResult, String name){ + Object objValue; + + if (Type.BOOL.equals(type)){ + objValue = rowResult.getBoolean(name); + } else if(Type.INT8.equals(type)){ + objValue = rowResult.getByte(name); + } else if(Type.INT16.equals(type)){ + objValue = rowResult.getShort(name); + } else if(Type.INT32.equals(type)){ + objValue = rowResult.getInt(name); + } else if(Type.INT64.equals(type)){ + objValue = rowResult.getLong(name); + } else if(Type.FLOAT.equals(type)){ + objValue = rowResult.getFloat(name); + } else if(Type.DOUBLE.equals(type)){ + objValue = rowResult.getDouble(name); + } else if(Type.DECIMAL.equals(type)){ + objValue = rowResult.getDecimal(name); + } else if(Type.UNIXTIME_MICROS.equals(type)){ + objValue = rowResult.getTimestamp(name); + } else { + objValue = rowResult.getString(name); + } + + return objValue; } @Override public InputSplit[] createInputSplits(int minNumSplits) throws IOException { - List scanTokens = KuduUtil.getKuduScanToken(kuduConfig, columns, filterString); + List scanTokens = KuduUtil.getKuduScanToken(kuduConfig, columns, kuduConfig.getFilterString()); KuduTableSplit[] inputSplits = new KuduTableSplit[scanTokens.size()]; for (int i = 0; i < scanTokens.size(); i++) { inputSplits[i] = new KuduTableSplit(scanTokens.get(i).serialize(), i); @@ -92,7 +124,19 @@ public InputSplit[] createInputSplits(int minNumSplits) throws IOException { @Override public boolean reachedEnd() throws IOException { - return false; + if(iterator == null || !iterator.hasNext()){ + return getNextRows(); + } + + return true; + } + + private boolean getNextRows() throws IOException{ + if(scanner.hasMoreRows()){ + iterator = scanner.nextRows(); + } + + return iterator == null || !iterator.hasNext(); } @Override diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java index 23d85aac62..b9f9c919c8 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java @@ -41,22 +41,10 @@ public void setColumns(List columns){ format.columns = columns; } - public void setTable(String table){ - format.table = table; - } - - public void setReadMode(String readMode){ - format.readMode = readMode; - } - public void setKuduConfig(KuduConfig kuduConfig){ format.kuduConfig = kuduConfig; } - public void setFilterString(String filterString){ - format.filterString = filterString; - } - @Override protected void checkFormat() { if (format.columns == null || format.columns.size() == 0){ diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java index f0acfb151f..5929a1fdae 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java @@ -32,22 +32,18 @@ import java.util.List; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_TABLE; + /** * @author jiangbo * @date 2019/7/31 */ public class KuduReader extends DataReader { - private String table; - private List columns; private KuduConfig kuduConfig; - private String readMode; - - private String filterString; - protected KuduReader(DataTransferConfig config, StreamExecutionEnvironment env) { super(config, env); @@ -55,9 +51,6 @@ protected KuduReader(DataTransferConfig config, StreamExecutionEnvironment env) ReaderConfig.ParameterConfig parameterConfig = readerConfig.getParameter(); columns = MetaColumn.getMetaColumns(parameterConfig.getColumn()); - table = parameterConfig.getStringVal("table"); - readMode = parameterConfig.getStringVal("readMode"); - filterString = parameterConfig.getStringVal("filter"); kuduConfig = KuduConfigBuilder.getInstance() .withMasterAddresses(parameterConfig.getStringVal("masterAddresses")) .withOpenKerberos(parameterConfig.getBooleanVal("openKerberos", false)) @@ -67,6 +60,9 @@ protected KuduReader(DataTransferConfig config, StreamExecutionEnvironment env) .withBossCount(parameterConfig.getIntVal("bossCount", 1)) .withOperationTimeout(parameterConfig.getLongVal("operationTimeout", AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS)) .withAdminOperationTimeout(parameterConfig.getLongVal("adminOperationTimeout", AsyncKuduClient.DEFAULT_KEEP_ALIVE_PERIOD_MS)) + .withTable(parameterConfig.getStringVal(KEY_TABLE)) + .withReadMode(parameterConfig.getStringVal("readMode")) + .withFilter(parameterConfig.getStringVal("filter")) .build(); } @@ -76,10 +72,7 @@ public DataStream readData() { builder.setColumns(columns); builder.setMonitorUrls(monitorUrls); builder.setBytes(bytes); - builder.setTable(table); - builder.setReadMode(readMode); builder.setKuduConfig(kuduConfig); - builder.setFilterString(filterString); return createInput(builder.finish(), "kudureader"); } From e247185ca4493d66b3400b6ce6512867e08b7f5f Mon Sep 17 00:00:00 2001 From: jiangbo Date: Mon, 12 Aug 2019 21:47:15 +0800 Subject: [PATCH 05/62] test kudu reader --- .../flinkx/constants/PluginNameConstrant.java | 2 + .../com/dtstack/flinkx/enums/EWriteMode.java | 4 +- .../dtstack/flinkx/kudu/core/KuduConfig.java | 30 ++++---- .../flinkx/kudu/core/KuduConfigBuilder.java | 41 ++++++---- .../flinkx/kudu/core/KuduConfigKeys.java | 12 +++ .../dtstack/flinkx/kudu/core/KuduUtil.java | 6 +- .../flinkx/kudu/reader/KuduInputFormat.java | 3 +- .../flinkx/kudu/reader/KuduReader.java | 26 ++++--- .../flinkx/kudu/writer/KuduOutputFormat.java | 74 +++++++++++++++++++ .../kudu/writer/KuduOutputFormatBuilder.java | 22 +++++- .../flinkx/kudu/writer/KuduWriter.java | 39 +++++++++- flinkx-test/pom.xml | 11 +++ .../com/dtstack/flinkx/test/LocalTest.java | 6 +- .../dev_test_job/kudu_reader_template.json | 46 ++++++++++++ 14 files changed, 273 insertions(+), 49 deletions(-) create mode 100644 flinkx-test/src/main/resources/dev_test_job/kudu_reader_template.json diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/constants/PluginNameConstrant.java b/flinkx-core/src/main/java/com/dtstack/flinkx/constants/PluginNameConstrant.java index e02b8e3627..25df51d96b 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/constants/PluginNameConstrant.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/constants/PluginNameConstrant.java @@ -38,6 +38,7 @@ public class PluginNameConstrant { public static final String POSTGRESQL_READER = "postgresqlreader"; public static final String SQLSERVER_READER = "sqlserverreader"; public static final String GBASE_READER = "gbasereader"; + public static final String KUDU_READER = "kudureader"; public static final String STREAM_WRITER = "streamwriter"; public static final String CARBONDATA_WRITER = "carbondatawriter"; @@ -54,4 +55,5 @@ public class PluginNameConstrant { public static final String REDIS_WRITER = "rediswriter"; public static final String SQLSERVER_WRITER = "sqlserverwriter"; public static final String GBASE_WRITER = "gbasewriter"; + public static final String KUDU_WRITER = "kuduwriter"; } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/enums/EWriteMode.java b/flinkx-core/src/main/java/com/dtstack/flinkx/enums/EWriteMode.java index ac7fcd257b..e04688f2f6 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/enums/EWriteMode.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/enums/EWriteMode.java @@ -39,5 +39,7 @@ public enum EWriteMode { /** * replace into */ - REPLACE + REPLACE, + + UPSERT } diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java index 7117ab9456..493e5ce1a0 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java @@ -29,11 +29,11 @@ public class KuduConfig implements Serializable { private String masterAddresses; - private boolean openKerberos; + private String authentication; - private String user; + private String principal; - private String keytabPath; + private String keytabFile; private Integer workerCount; @@ -93,28 +93,28 @@ public void setQueryTimeout(Long queryTimeout) { this.queryTimeout = queryTimeout; } - public boolean getOpenKerberos() { - return openKerberos; + public String getAuthentication() { + return authentication; } - public void setOpenKerberos(boolean openKerberos) { - this.openKerberos = openKerberos; + public void setAuthentication(String authentication) { + this.authentication = authentication; } - public String getUser() { - return user; + public String getPrincipal() { + return principal; } - public void setUser(String user) { - this.user = user; + public void setPrincipal(String principal) { + this.principal = principal; } - public String getKeytabPath() { - return keytabPath; + public String getKeytabFile() { + return keytabFile; } - public void setKeytabPath(String keytabPath) { - this.keytabPath = keytabPath; + public void setKeytabFile(String keytabFile) { + this.keytabFile = keytabFile; } public Integer getBossCount() { diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java index 08a22453d3..beafa11516 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java @@ -29,16 +29,18 @@ */ public final class KuduConfigBuilder { private String masterAddresses; - private boolean openKerberos; - private String user; - private String keytabPath; + private String authentication; + private String principal; + private String keytabFile; private Integer workerCount; private Integer bossCount; private Long operationTimeout; private Long adminOperationTimeout; + private Long queryTimeout; private String table; private String readMode; private String filterString; + private int batchSizeBytes; private KuduConfigBuilder() { } @@ -53,20 +55,18 @@ public KuduConfigBuilder withMasterAddresses(String masterAddresses) { return this; } - public KuduConfigBuilder withOpenKerberos(boolean openKerberos) { - this.openKerberos = openKerberos; + public KuduConfigBuilder withAuthentication(String authentication) { + this.authentication = authentication; return this; } - public KuduConfigBuilder withUser(String user) { - Preconditions.checkArgument(StringUtils.isNotEmpty(user), "Parameter [user] can not be null or empty"); - this.user = user; + public KuduConfigBuilder withprincipal(String principal) { + this.principal = principal; return this; } - public KuduConfigBuilder withKeytabPath(String keytabPath) { - Preconditions.checkArgument(StringUtils.isNotEmpty(keytabPath), "Parameter [keytabPath] can not be null or empty"); - this.keytabPath = keytabPath; + public KuduConfigBuilder withKeytabFile(String keytabFile) { + this.keytabFile = keytabFile; return this; } @@ -107,24 +107,35 @@ public KuduConfigBuilder withReadMode(String readMode){ } public KuduConfigBuilder withFilter(String filter){ - Preconditions.checkArgument(StringUtils.isNotEmpty(filter), "Parameter [filter] can not be null or empty"); this.filterString = filter; return this; } + public KuduConfigBuilder withQueryTimeout(Long queryTimeout){ + this.queryTimeout = queryTimeout; + return this; + } + + public KuduConfigBuilder withBatchSizeBytes(Integer batchSizeBytes){ + this.batchSizeBytes = batchSizeBytes; + return this; + } + public KuduConfig build() { KuduConfig kuduConfig = new KuduConfig(); kuduConfig.setMasterAddresses(masterAddresses); - kuduConfig.setOpenKerberos(openKerberos); - kuduConfig.setUser(user); - kuduConfig.setKeytabPath(keytabPath); + kuduConfig.setAuthentication(authentication); + kuduConfig.setPrincipal(principal); + kuduConfig.setKeytabFile(keytabFile); kuduConfig.setWorkerCount(workerCount); kuduConfig.setBossCount(bossCount); kuduConfig.setOperationTimeout(operationTimeout); kuduConfig.setAdminOperationTimeout(adminOperationTimeout); + kuduConfig.setQueryTimeout(queryTimeout); kuduConfig.setTable(table); kuduConfig.setReadMode(readMode); kuduConfig.setFilterString(filterString); + kuduConfig.setBatchSizeBytes(batchSizeBytes); return kuduConfig; } } diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java index 058cb22913..c2f3ae0255 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java @@ -25,5 +25,17 @@ */ public class KuduConfigKeys { + public final static String KEY_MASTER_ADDRESSES = "masterAddresses"; + public final static String KEY_AUTHENTICATION = "authentication"; + public final static String KEY_PRINCIPAL = "principal"; + public final static String KEY_KEYTABFILE = "keytabFile"; + public final static String KEY_WORKER_COUNT = "workerCount"; + public final static String KEY_BOSS_COUNT = "bossCount"; + public final static String KEY_OPERATION_TIMEOUT = "operationTimeout"; + public final static String KEY_QUERY_TIMEOUT = "queryTimeout"; + public final static String KEY_ADMIN_OPERATION_TIMEOUT = "adminOperationTimeout"; public final static String KEY_TABLE = "table"; + public final static String KEY_READ_MODE = "readMode"; + public final static String KEY_FILTER = "filter"; + public final static String KEY_BATCH_SIZE_BYTES = "batchSizeBytes"; } diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java index 249a6530f1..4a802a43d7 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java @@ -49,9 +49,11 @@ public class KuduUtil { private static String EXPRESS_REGEX = "(?[^\\=|\\s]+)+\\s*(?[\\>|\\<|\\=]+)\\s*(?.*)"; private static Pattern EXPRESS_PATTERN = Pattern.compile(EXPRESS_REGEX); + public final static String AUTHENTICATION_TYPE = "Kerberos"; + public static KuduClient getKuduClient(KuduConfig config) throws IOException,InterruptedException { - if(config.getOpenKerberos()){ - UserGroupInformation.loginUserFromKeytab(config.getUser(), config.getKeytabPath()); + if(AUTHENTICATION_TYPE.equals(config.getAuthentication())){ + UserGroupInformation.loginUserFromKeytab(config.getPrincipal(), config.getKeytabFile()); return UserGroupInformation.getLoginUser().doAs(new PrivilegedExceptionAction() { @Override public KuduClient run() throws Exception { diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java index b443a333b2..fb29870c57 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java @@ -128,7 +128,7 @@ public boolean reachedEnd() throws IOException { return getNextRows(); } - return true; + return false; } private boolean getNextRows() throws IOException{ @@ -153,6 +153,7 @@ public void closeInputFormat() throws IOException { if (client != null){ client.close(); + client = null; } } diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java index 5929a1fdae..b030013266 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java @@ -32,7 +32,7 @@ import java.util.List; -import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_TABLE; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.*; /** * @author jiangbo @@ -44,7 +44,7 @@ public class KuduReader extends DataReader { private KuduConfig kuduConfig; - protected KuduReader(DataTransferConfig config, StreamExecutionEnvironment env) { + public KuduReader(DataTransferConfig config, StreamExecutionEnvironment env) { super(config, env); ReaderConfig readerConfig = config.getJob().getContent().get(0).getReader(); @@ -52,17 +52,19 @@ protected KuduReader(DataTransferConfig config, StreamExecutionEnvironment env) columns = MetaColumn.getMetaColumns(parameterConfig.getColumn()); kuduConfig = KuduConfigBuilder.getInstance() - .withMasterAddresses(parameterConfig.getStringVal("masterAddresses")) - .withOpenKerberos(parameterConfig.getBooleanVal("openKerberos", false)) - .withUser(parameterConfig.getStringVal("user")) - .withKeytabPath(parameterConfig.getStringVal("keytabPath")) - .withWorkerCount(parameterConfig.getIntVal("workerCount", 2 * Runtime.getRuntime().availableProcessors())) - .withBossCount(parameterConfig.getIntVal("bossCount", 1)) - .withOperationTimeout(parameterConfig.getLongVal("operationTimeout", AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS)) - .withAdminOperationTimeout(parameterConfig.getLongVal("adminOperationTimeout", AsyncKuduClient.DEFAULT_KEEP_ALIVE_PERIOD_MS)) + .withMasterAddresses(parameterConfig.getStringVal(KEY_MASTER_ADDRESSES)) + .withAuthentication(parameterConfig.getStringVal(KEY_AUTHENTICATION)) + .withprincipal(parameterConfig.getStringVal(KEY_PRINCIPAL)) + .withKeytabFile(parameterConfig.getStringVal(KEY_KEYTABFILE)) + .withWorkerCount(parameterConfig.getIntVal(KEY_WORKER_COUNT, 2 * Runtime.getRuntime().availableProcessors())) + .withBossCount(parameterConfig.getIntVal(KEY_BOSS_COUNT, 1)) + .withOperationTimeout(parameterConfig.getLongVal(KEY_OPERATION_TIMEOUT, AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS)) + .withQueryTimeout(parameterConfig.getLongVal(KEY_QUERY_TIMEOUT, AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS)) + .withAdminOperationTimeout(parameterConfig.getLongVal(KEY_ADMIN_OPERATION_TIMEOUT, AsyncKuduClient.DEFAULT_KEEP_ALIVE_PERIOD_MS)) .withTable(parameterConfig.getStringVal(KEY_TABLE)) - .withReadMode(parameterConfig.getStringVal("readMode")) - .withFilter(parameterConfig.getStringVal("filter")) + .withReadMode(parameterConfig.getStringVal(KEY_READ_MODE)) + .withBatchSizeBytes(parameterConfig.getIntVal(KEY_BATCH_SIZE_BYTES, 1024*1024)) + .withFilter(parameterConfig.getStringVal(KEY_FILTER)) .build(); } diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java index d64f692418..8ee34fb317 100644 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java @@ -19,11 +19,17 @@ package com.dtstack.flinkx.kudu.writer; +import com.dtstack.flinkx.enums.EWriteMode; import com.dtstack.flinkx.exception.WriteRecordException; +import com.dtstack.flinkx.kudu.core.KuduConfig; +import com.dtstack.flinkx.kudu.core.KuduUtil; import com.dtstack.flinkx.outputformat.RichOutputFormat; +import com.dtstack.flinkx.reader.MetaColumn; import org.apache.flink.types.Row; +import org.apache.kudu.client.*; import java.io.IOException; +import java.util.List; /** * @author jiangbo @@ -31,23 +37,91 @@ */ public class KuduOutputFormat extends RichOutputFormat { + protected List columns; + + protected KuduConfig kuduConfig; + + protected String writeMode; + + private transient KuduClient client; + + private transient KuduSession session; + + private transient KuduTable kuduTable; + @Override protected void openInternal(int taskNumber, int numTasks) throws IOException { + try{ + client = KuduUtil.getKuduClient(kuduConfig); + } catch (Exception e){ + throw new RuntimeException("Get KuduClient error", e); + } + session = client.newSession(); + kuduTable = client.openTable(kuduConfig.getTable()); } @Override protected void writeSingleRecordInternal(Row row) throws WriteRecordException { + writeData(row); + if(numWriteCounter.getLocalValue() % batchInterval == 0){ + try { + session.flush(); + } catch (KuduException e) { + throw new RuntimeException("Flush data error", e); + } + } + } + + private void writeData(Row row) throws WriteRecordException { + int index = 0; + try { + Operation operation = getOperation(); + for (int i = 0; i < columns.size(); i++) { + index = i; + MetaColumn column = columns.get(i); + operation.getRow().addObject(column.getName(), row.getField(i)); + } + + session.apply(operation); + } catch (Exception e){ + throw new WriteRecordException("Write data error", e, index, row); + } + } + + private Operation getOperation(){ + if(EWriteMode.INSERT.name().equals(writeMode)){ + return kuduTable.newInsert(); + } else if(EWriteMode.UPDATE.name().equals(writeMode)){ + return kuduTable.newUpdate(); + } else if(EWriteMode.UPSERT.name().equals(writeMode)){ + return kuduTable.newUpsert(); + } else { + throw new IllegalArgumentException("Not support writeMode:" + writeMode); + } } @Override protected void writeMultipleRecordsInternal() throws Exception { + for (Row row : rows) { + writeData(row); + } + session.flush(); } @Override public void closeInternal() throws IOException { super.closeInternal(); + + if(session != null){ + session.flush(); + session.close(); + } + + if(client != null){ + client.close(); + } } } diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java index 1f5b618273..d04234664b 100644 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java @@ -19,7 +19,11 @@ package com.dtstack.flinkx.kudu.writer; +import com.dtstack.flinkx.kudu.core.KuduConfig; import com.dtstack.flinkx.outputformat.RichOutputFormatBuilder; +import com.dtstack.flinkx.reader.MetaColumn; + +import java.util.List; /** * @author jiangbo @@ -27,8 +31,24 @@ */ public class KuduOutputFormatBuilder extends RichOutputFormatBuilder { + private KuduOutputFormat format; + + public KuduOutputFormatBuilder() { + super.format = format = new KuduOutputFormat(); + } + + public void setColumns(List columns){ + format.columns = columns; + } + + public void setKuduConfig(KuduConfig kuduConfig){ + format.kuduConfig = kuduConfig; + } + @Override protected void checkFormat() { - + if (format.columns == null || format.columns.size() == 0){ + throw new IllegalArgumentException("columns can not be empty"); + } } } diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java index da4eddcde9..eda8d4c69c 100644 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java @@ -20,10 +20,20 @@ package com.dtstack.flinkx.kudu.writer; import com.dtstack.flinkx.config.DataTransferConfig; +import com.dtstack.flinkx.config.WriterConfig; +import com.dtstack.flinkx.kudu.core.KuduConfig; +import com.dtstack.flinkx.kudu.core.KuduConfigBuilder; +import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.writer.DataWriter; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.DataStreamSink; +import org.apache.flink.streaming.api.functions.sink.DtOutputFormatSinkFunction; import org.apache.flink.types.Row; +import org.apache.kudu.client.AsyncKuduClient; + +import java.util.List; + +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.*; /** * @author jiangbo @@ -31,12 +41,39 @@ */ public class KuduWriter extends DataWriter { + private List columns; + + private KuduConfig kuduConfig; + public KuduWriter(DataTransferConfig config) { super(config); + + WriterConfig.ParameterConfig parameterConfig = config.getJob().getContent().get(0).getWriter().getParameter(); + + columns = MetaColumn.getMetaColumns(parameterConfig.getColumn()); + kuduConfig = KuduConfigBuilder.getInstance() + .withMasterAddresses(parameterConfig.getStringVal(KEY_MASTER_ADDRESSES)) + .withAuthentication(parameterConfig.getStringVal(KEY_AUTHENTICATION)) + .withprincipal(parameterConfig.getStringVal(KEY_PRINCIPAL)) + .withKeytabFile(parameterConfig.getStringVal(KEY_KEYTABFILE)) + .withWorkerCount(parameterConfig.getIntVal(KEY_WORKER_COUNT, 2 * Runtime.getRuntime().availableProcessors())) + .withBossCount(parameterConfig.getIntVal(KEY_BOSS_COUNT, 1)) + .withOperationTimeout(parameterConfig.getLongVal(KEY_OPERATION_TIMEOUT, AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS)) + .withAdminOperationTimeout(parameterConfig.getLongVal(KEY_ADMIN_OPERATION_TIMEOUT, AsyncKuduClient.DEFAULT_KEEP_ALIVE_PERIOD_MS)) + .withTable(parameterConfig.getStringVal(KEY_TABLE)) + .build(); } @Override public DataStreamSink writeData(DataStream dataSet) { - return null; + KuduOutputFormatBuilder builder = new KuduOutputFormatBuilder(); + builder.setMonitorUrls(monitorUrls); + builder.setColumns(columns); + builder.setKuduConfig(kuduConfig); + + DtOutputFormatSinkFunction formatSinkFunction = new DtOutputFormatSinkFunction(builder.finish()); + DataStreamSink dataStreamSink = dataSet.addSink(formatSinkFunction); + dataStreamSink.name("kuduwriter"); + return dataStreamSink; } } diff --git a/flinkx-test/pom.xml b/flinkx-test/pom.xml index e52d6cecab..60c00da27a 100644 --- a/flinkx-test/pom.xml +++ b/flinkx-test/pom.xml @@ -206,6 +206,17 @@ 1.6 + + com.dtstack.flinkx + flinkx-kudu-reader + 1.6 + + + + com.dtstack.flinkx + flinkx-kudu-writer + 1.6 + diff --git a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java index c6ffa3b9f4..d434ee8a41 100644 --- a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java +++ b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java @@ -36,6 +36,8 @@ import com.dtstack.flinkx.hbase.writer.HbaseWriter; import com.dtstack.flinkx.hdfs.reader.HdfsReader; import com.dtstack.flinkx.hdfs.writer.HdfsWriter; +import com.dtstack.flinkx.kudu.reader.KuduReader; +import com.dtstack.flinkx.kudu.writer.KuduWriter; import com.dtstack.flinkx.mongodb.reader.MongodbReader; import com.dtstack.flinkx.mongodb.writer.MongodbWriter; import com.dtstack.flinkx.mysql.reader.MysqlReader; @@ -92,7 +94,7 @@ public class LocalTest { public static final String TEST_RESOURCE_DIR = "flinkx-test/src/main/resources/dev_test_job/"; public static void main(String[] args) throws Exception{ - String jobPath = TEST_RESOURCE_DIR + "stream_template.json"; + String jobPath = TEST_RESOURCE_DIR + "kudu_reader_template.json"; JobExecutionResult result = LocalTest.runJob(new File(jobPath), null, null); ResultPrintUtil.printResult(result); } @@ -159,6 +161,7 @@ private static DataReader buildDataReader(DataTransferConfig config, StreamExecu case PluginNameConstrant.HDFS_READER : reader = new HdfsReader(config, env); break; case PluginNameConstrant.MONGODB_READER : reader = new MongodbReader(config, env); break; case PluginNameConstrant.ODPS_READER : reader = new OdpsReader(config, env); break; + case PluginNameConstrant.KUDU_READER : reader = new KuduReader(config, env); break; default:throw new IllegalArgumentException("Can not find reader by name:" + readerName); } @@ -184,6 +187,7 @@ private static DataWriter buildDataWriter(DataTransferConfig config){ case PluginNameConstrant.MONGODB_WRITER : writer = new MongodbWriter(config); break; case PluginNameConstrant.ODPS_WRITER : writer = new OdpsWriter(config); break; case PluginNameConstrant.REDIS_WRITER : writer = new RedisWriter(config); break; + case PluginNameConstrant.KUDU_WRITER : writer = new KuduWriter(config); break; default:throw new IllegalArgumentException("Can not find writer by name:" + writerName); } diff --git a/flinkx-test/src/main/resources/dev_test_job/kudu_reader_template.json b/flinkx-test/src/main/resources/dev_test_job/kudu_reader_template.json new file mode 100644 index 0000000000..9a2bd1ae10 --- /dev/null +++ b/flinkx-test/src/main/resources/dev_test_job/kudu_reader_template.json @@ -0,0 +1,46 @@ +{ + "job": { + "content": [ + { + "reader": { + "parameter": { + "column": [ + { + "name": "id", + "type": "long" + }, + { + "name": "user_id", + "type": "long" + }, + { + "name": "name", + "type": "string" + } + ], + "masterAddresses": "impala1:7051,impala2:7051,impala3:7051", + "table": "kudu_range_table", + "readMode": "read_latest", + "filter": "" + }, + "name": "kudureader" + }, + "writer": { + "parameter": { + "print": false + }, + "name": "streamwriter" + } + } + ], + "setting": { + "errorLimit": { + "record": 100 + }, + "speed": { + "bytes": 1048576, + "channel": 1 + } + } + } +} \ No newline at end of file From 245e50c0913bab5d1a135025b7b8ea05391bf2f1 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Mon, 12 Aug 2019 21:59:38 +0800 Subject: [PATCH 06/62] test kudu writer --- .../flinkx/kudu/writer/KuduOutputFormat.java | 6 +- .../kudu/writer/KuduOutputFormatBuilder.java | 4 ++ .../flinkx/kudu/writer/KuduWriter.java | 4 ++ .../com/dtstack/flinkx/test/LocalTest.java | 2 +- .../dev_test_job/kudu_writer_template.json | 59 +++++++++++++++++++ 5 files changed, 71 insertions(+), 4 deletions(-) create mode 100644 flinkx-test/src/main/resources/dev_test_job/kudu_writer_template.json diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java index 8ee34fb317..872cdd346f 100644 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java @@ -91,11 +91,11 @@ private void writeData(Row row) throws WriteRecordException { } private Operation getOperation(){ - if(EWriteMode.INSERT.name().equals(writeMode)){ + if(EWriteMode.INSERT.name().equalsIgnoreCase(writeMode)){ return kuduTable.newInsert(); - } else if(EWriteMode.UPDATE.name().equals(writeMode)){ + } else if(EWriteMode.UPDATE.name().equalsIgnoreCase(writeMode)){ return kuduTable.newUpdate(); - } else if(EWriteMode.UPSERT.name().equals(writeMode)){ + } else if(EWriteMode.UPSERT.name().equalsIgnoreCase(writeMode)){ return kuduTable.newUpsert(); } else { throw new IllegalArgumentException("Not support writeMode:" + writeMode); diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java index d04234664b..a18d17e689 100644 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java @@ -45,6 +45,10 @@ public void setKuduConfig(KuduConfig kuduConfig){ format.kuduConfig = kuduConfig; } + public void setWriteMode(String writeMode){ + format.writeMode = writeMode; + } + @Override protected void checkFormat() { if (format.columns == null || format.columns.size() == 0){ diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java index eda8d4c69c..8c7b9b5e0c 100644 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java @@ -45,12 +45,15 @@ public class KuduWriter extends DataWriter { private KuduConfig kuduConfig; + private String writeMode; + public KuduWriter(DataTransferConfig config) { super(config); WriterConfig.ParameterConfig parameterConfig = config.getJob().getContent().get(0).getWriter().getParameter(); columns = MetaColumn.getMetaColumns(parameterConfig.getColumn()); + writeMode = parameterConfig.getStringVal("writeMode"); kuduConfig = KuduConfigBuilder.getInstance() .withMasterAddresses(parameterConfig.getStringVal(KEY_MASTER_ADDRESSES)) .withAuthentication(parameterConfig.getStringVal(KEY_AUTHENTICATION)) @@ -70,6 +73,7 @@ public DataStreamSink writeData(DataStream dataSet) { builder.setMonitorUrls(monitorUrls); builder.setColumns(columns); builder.setKuduConfig(kuduConfig); + builder.setWriteMode(writeMode); DtOutputFormatSinkFunction formatSinkFunction = new DtOutputFormatSinkFunction(builder.finish()); DataStreamSink dataStreamSink = dataSet.addSink(formatSinkFunction); diff --git a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java index d434ee8a41..d21007f529 100644 --- a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java +++ b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java @@ -94,7 +94,7 @@ public class LocalTest { public static final String TEST_RESOURCE_DIR = "flinkx-test/src/main/resources/dev_test_job/"; public static void main(String[] args) throws Exception{ - String jobPath = TEST_RESOURCE_DIR + "kudu_reader_template.json"; + String jobPath = TEST_RESOURCE_DIR + "kudu_writer_template.json"; JobExecutionResult result = LocalTest.runJob(new File(jobPath), null, null); ResultPrintUtil.printResult(result); } diff --git a/flinkx-test/src/main/resources/dev_test_job/kudu_writer_template.json b/flinkx-test/src/main/resources/dev_test_job/kudu_writer_template.json new file mode 100644 index 0000000000..6960765f86 --- /dev/null +++ b/flinkx-test/src/main/resources/dev_test_job/kudu_writer_template.json @@ -0,0 +1,59 @@ +{ + "job": { + "content": [ + { + "writer": { + "parameter": { + "column": [ + { + "name": "id", + "type": "long" + }, + { + "name": "user_id", + "type": "long" + }, + { + "name": "name", + "type": "string" + } + ], + "masterAddresses": "impala1:7051,impala2:7051,impala3:7051", + "table": "kudu_range_table_write", + "writeMode": "insert" + }, + "name": "kuduwriter" + }, + "reader": { + "parameter": { + "column": [ + { + "name": "id", + "type": "long" + }, + { + "name": "user_id", + "type": "long" + }, + { + "name": "name", + "type": "string" + } + ], + "sliceRecordCount": ["10000"] + }, + "name": "streamreader" + } + } + ], + "setting": { + "errorLimit": { + "record": 100 + }, + "speed": { + "bytes": 1048576, + "channel": 1 + } + } + } +} \ No newline at end of file From dc6aa5bfabde5eef32dc3825c1970bdabb4615f4 Mon Sep 17 00:00:00 2001 From: tudou Date: Fri, 20 Sep 2019 13:34:48 +0800 Subject: [PATCH 07/62] temp commit --- .../flinkx/mysql/format/MysqlInputFormat.java | 121 ++++++++ .../flinkx/mysql/reader/MysqlReader.java | 5 +- .../mysql/format/MysqlOutputFormat.java | 12 + .../oracle/format/OracleInputFormat.java | 42 +++ .../flinkx/rdb/loader/JdbcFormatLoader.java | 67 +++++ .../com/dtstack/flinkx/rdb/util/DBUtil.java | 272 +++++------------- .../DistributedJdbcDataReader.java | 3 +- .../IncrementConfig.java | 9 + .../JdbcDataReader.java | 6 +- .../DistributedJdbcInputFormat.java | 3 +- .../JdbcInputFormat.java | 212 ++++++++++---- .../JdbcInputFormatBuilder.java | 6 +- .../JdbcDataWriter.java | 5 +- .../outputformat/JdbcOutputFormatBuilder.java | 6 +- .../format/SqlserverInputFormat.java | 46 +++ 15 files changed, 538 insertions(+), 277 deletions(-) create mode 100644 flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java create mode 100644 flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java create mode 100644 flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java create mode 100644 flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java create mode 100644 flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java diff --git a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java new file mode 100644 index 0000000000..85619af16e --- /dev/null +++ b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java @@ -0,0 +1,121 @@ +package com.dtstack.flinkx.mysql.format; + +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import com.dtstack.flinkx.rdb.util.DBUtil; +import com.dtstack.flinkx.reader.MetaColumn; +import com.dtstack.flinkx.util.ClassUtil; +import com.dtstack.flinkx.util.DateUtil; +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.core.io.InputSplit; +import org.apache.flink.types.Row; + +import java.io.IOException; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; + +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; + +/** + * Date: 2019/09/19 + * Company: www.dtstack.com + * + * @author tudou + */ +public class MysqlInputFormat extends JdbcInputFormat { + + + @Override + public void openInternal(InputSplit inputSplit) throws IOException { + try { + LOG.info(inputSplit.toString()); + + ClassUtil.forName(drivername, getClass().getClassLoader()); + + if (incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc()){ + getMaxValue(inputSplit); + } + + initMetric(inputSplit); + + if(!canReadData(inputSplit)){ + LOG.warn("Not read data when the start location are equal to end location"); + + hasNext = false; + return; + } + + dbConn = DBUtil.getConnection(dbURL, username, password); + + // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 + dbConn.setAutoCommit(false); + + Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); + + statement.setFetchSize(Integer.MIN_VALUE); + + statement.setQueryTimeout(queryTimeOut); + String querySql = buildQuerySql(inputSplit); + resultSet = statement.executeQuery(querySql); + columnCount = resultSet.getMetaData().getColumnCount(); + + boolean splitWithRowCol = numPartitions > 1 && StringUtils.isNotEmpty(splitKey) && splitKey.contains("("); + if(splitWithRowCol){ + columnCount = columnCount-1; + } + + hasNext = resultSet.next(); + + if (StringUtils.isEmpty(customSql)){ + descColumnTypeList = DBUtil.analyzeTable(dbURL, username, password,databaseInterface,table,metaColumns); + } else { + descColumnTypeList = new ArrayList<>(); + for (MetaColumn metaColumn : metaColumns) { + descColumnTypeList.add(metaColumn.getName()); + } + } + + } catch (SQLException se) { + throw new IllegalArgumentException("open() failed. " + se.getMessage(), se); + } + + LOG.info("JdbcInputFormat[" + jobName + "]open: end"); + } + + @Override + public Row nextRecordInternal(Row row) throws IOException { + if (!hasNext) { + return null; + } + row = new Row(columnCount); + + try { + for (int pos = 0; pos < row.getArity(); pos++) { + Object obj = resultSet.getObject(pos + 1); + if(obj != null) { + if(descColumnTypeList != null && descColumnTypeList.size() != 0) { + if(descColumnTypeList.get(pos).equalsIgnoreCase("year")) { + java.util.Date date = (java.util.Date) obj; + obj = DateUtil.dateToYearString(date); + } else if(descColumnTypeList.get(pos).equalsIgnoreCase("tinyint")) { + if(obj instanceof Boolean) { + obj = ((Boolean) obj ? 1 : 0); + } + } else if(descColumnTypeList.get(pos).equalsIgnoreCase("bit")) { + if(obj instanceof Boolean) { + obj = ((Boolean) obj ? 1 : 0); + } + } + } + obj = clobToString(obj); + } + + row.setField(pos, obj); + } + return super.nextRecordInternal(row); + }catch (Exception e) { + throw new IOException("Couldn't read data - " + e.getMessage(), e); + } + } + +} diff --git a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/reader/MysqlReader.java b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/reader/MysqlReader.java index cd396e4454..27c30a6692 100644 --- a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/reader/MysqlReader.java +++ b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/reader/MysqlReader.java @@ -21,8 +21,11 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.mysql.MySqlDatabaseMeta; import com.dtstack.flinkx.rdb.datareader.JdbcDataReader; +import com.dtstack.flinkx.rdb.util.DBUtil; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import java.util.Collections; + /** * MySQL reader plugin * @@ -34,6 +37,6 @@ public class MysqlReader extends JdbcDataReader { public MysqlReader(DataTransferConfig config, StreamExecutionEnvironment env) { super(config, env); setDatabaseInterface(new MySqlDatabaseMeta()); + dbUrl = DBUtil.formatJdbcUrl(dbUrl, Collections.singletonMap("zeroDateTimeBehavior", "convertToNull")); } - } diff --git a/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java b/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java new file mode 100644 index 0000000000..75d0141868 --- /dev/null +++ b/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java @@ -0,0 +1,12 @@ +package com.dtstack.flinkx.mysql.format; + +import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; + +/** + * Date: 2019/09/19 + * Company: www.dtstack.com + * + * @author tudou + */ +public class MysqlOutputFormat extends JdbcOutputFormat { +} diff --git a/flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java b/flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java new file mode 100644 index 0000000000..5f3d2f8961 --- /dev/null +++ b/flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java @@ -0,0 +1,42 @@ +package com.dtstack.flinkx.oracle.format; + +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import org.apache.flink.types.Row; + +import java.io.IOException; + +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; + +/** + * Date: 2019/09/19 + * Company: www.dtstack.com + * + * @author tudou + */ +public class OracleInputFormat extends JdbcInputFormat { + + @Override + public Row nextRecordInternal(Row row) throws IOException { + if (!hasNext) { + return null; + } + row = new Row(columnCount); + + try { + for (int pos = 0; pos < row.getArity(); pos++) { + Object obj = resultSet.getObject(pos + 1); + if(obj != null) { + if((obj instanceof java.util.Date || obj.getClass().getSimpleName().toUpperCase().contains("TIMESTAMP")) ) { + obj = resultSet.getTimestamp(pos + 1); + } + obj = clobToString(obj); + } + + row.setField(pos, obj); + } + return super.nextRecordInternal(row); + }catch (Exception e) { + throw new IOException("Couldn't read data - " + e.getMessage(), e); + } + } +} diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java new file mode 100644 index 0000000000..b2d56f8d48 --- /dev/null +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java @@ -0,0 +1,67 @@ +package com.dtstack.flinkx.rdb.loader; + +import org.apache.flink.util.Preconditions; + +/** + * FlinkX jdbc format loader + * + * Date: 2019/09/19 + * Company: www.dtstack.com + * + * @author tudou + */ +public class JdbcFormatLoader { + + /** + * 类型名称 + */ + private String formatName; + + /** + * format全限定名 + */ + private String formatClassName; + + public static final int INPUT_FORMAT = 0; + public static final int OUTPUT_FORMAT = 1; + + private final String pkgPrefixFormat = "com.dtstack.flinkx.%s.format.%s"; + + private final String INPUT_FORMAT_SUFFIX = "InputFormat"; + + private final String OUTPUT_FORMAT_SUFFIX = "OutputFormat"; + + /** + * JdbcFormatLoader构造器 + * @param dataType jdbc数据源类型 + * @param formatType format类型:INPUT_FORMAT,OUTPUT_FORMAT + */ + public JdbcFormatLoader(String dataType, int formatType){ + + Preconditions.checkArgument(dataType != null && dataType.trim().length() != 0); + Preconditions.checkArgument(formatType == INPUT_FORMAT || formatType == OUTPUT_FORMAT); + + dataType = dataType.toLowerCase(); + if(formatType == INPUT_FORMAT){ + this.formatName = dataType + INPUT_FORMAT_SUFFIX; + }else{ + this.formatName = dataType + OUTPUT_FORMAT_SUFFIX; + } + this.formatClassName = String.format(pkgPrefixFormat, dataType, this.formatName.substring(0, 1).toUpperCase() + this.formatName.substring(1)); + } + + public Object getFormatInstance() { + Object format = null; + try { + Class clz = Class.forName(formatClassName); + format = clz.newInstance(); + } catch (ClassNotFoundException e) { + throw new RuntimeException("error to load " + formatClassName, e); + } catch (Exception e) { + throw new RuntimeException(formatClassName + "don't have no parameter constructor", e); + } + + return format; + } + +} diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java index 6108137732..7ad8fa8b71 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java @@ -17,16 +17,14 @@ */ package com.dtstack.flinkx.rdb.util; -import com.dtstack.flinkx.constants.PluginNameConstrant; -import com.dtstack.flinkx.enums.ColumnType; -import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.ParameterValuesProvider; -import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.*; +import com.dtstack.flinkx.util.ClassUtil; +import com.dtstack.flinkx.util.SysUtil; +import com.dtstack.flinkx.util.TelnetUtil; import org.apache.commons.lang.StringUtils; -import org.apache.flink.types.Row; +import org.apache.flink.util.CollectionUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,6 +36,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.regex.Pattern; /** * @@ -56,6 +55,8 @@ public class DBUtil { private static int MICRO_LENGTH = 16; private static int NANOS_LENGTH = 19; + public static final Pattern DB_PATTERN = Pattern.compile("\\?"); + public static final String INCREMENT_FILTER_PLACEHOLDER = "${incrementFilter}"; public static final String RESTORE_FILTER_PLACEHOLDER = "${restoreFilter}"; @@ -83,31 +84,27 @@ private static Connection getConnectionInternal(String url, String username, Str } public static Connection getConnection(String url, String username, String password) throws SQLException { - if (!url.startsWith("jdbc:mysql")) { - return getConnectionInternal(url, username, password); - } else { - boolean failed = true; - Connection dbConn = null; - for (int i = 0; i < MAX_RETRY_TIMES && failed; ++i) { - try { - dbConn = getConnectionInternal(url, username, password); - dbConn.createStatement().execute("select 111"); - failed = false; - } catch (Exception e) { - if (dbConn != null) { - dbConn.close(); - } - - if (i == MAX_RETRY_TIMES - 1) { - throw e; - } else { - SysUtil.sleep(3000); - } + boolean failed = true; + Connection dbConn = null; + for (int i = 0; i < MAX_RETRY_TIMES && failed; ++i) { + try { + dbConn = getConnectionInternal(url, username, password); + dbConn.createStatement().execute("select 111"); + failed = false; + } catch (Exception e) { + if (dbConn != null) { + dbConn.close(); } - } - return dbConn; + if (i == MAX_RETRY_TIMES - 1) { + throw e; + } else { + SysUtil.sleep(3000); + } + } } + + return dbConn; } @@ -305,52 +302,6 @@ public static void setParameterValue(Object param,PreparedStatement statement,in } } - public static void getRow(EDatabaseType dbType, Row row, List descColumnTypeList, ResultSet resultSet, - TypeConverterInterface typeConverter) throws Exception{ - for (int pos = 0; pos < row.getArity(); pos++) { - Object obj = resultSet.getObject(pos + 1); - if(obj != null) { - if (EDatabaseType.Oracle == dbType) { - if((obj instanceof java.util.Date || obj.getClass().getSimpleName().toUpperCase().contains("TIMESTAMP")) ) { - obj = resultSet.getTimestamp(pos + 1); - } - } else if(EDatabaseType.MySQL == dbType) { - if(descColumnTypeList != null && descColumnTypeList.size() != 0) { - if(descColumnTypeList.get(pos).equalsIgnoreCase("year")) { - java.util.Date date = (java.util.Date) obj; - String year = DateUtil.dateToYearString(date); - System.out.println(year); - obj = year; - } else if(descColumnTypeList.get(pos).equalsIgnoreCase("tinyint")) { - if(obj instanceof Boolean) { - obj = ((Boolean) obj ? 1 : 0); - } - } else if(descColumnTypeList.get(pos).equalsIgnoreCase("bit")) { - if(obj instanceof Boolean) { - obj = ((Boolean) obj ? 1 : 0); - } - } - } - } else if(EDatabaseType.SQLServer == dbType) { - if(descColumnTypeList != null && descColumnTypeList.size() != 0) { - if(descColumnTypeList.get(pos).equalsIgnoreCase("bit")) { - if(obj instanceof Boolean) { - obj = ((Boolean) obj ? 1 : 0); - } - } - } - } else if(EDatabaseType.PostgreSQL == dbType){ - if(descColumnTypeList != null && descColumnTypeList.size() != 0) { - obj = typeConverter.convert(obj,descColumnTypeList.get(pos)); - } - } - - obj = clobToString(obj); - } - - row.setField(pos, obj); - } - } public static Object clobToString(Object obj) throws Exception{ String dataStr; @@ -370,102 +321,9 @@ public static Object clobToString(Object obj) throws Exception{ return dataStr; } - public static String buildIncrementFilter(DatabaseInterface databaseInterface,String incrementColType,String incrementCol, - String startLocation,String endLocation, String customSql, boolean useMaxFunc){ - StringBuilder filter = new StringBuilder(); - - if (StringUtils.isNotEmpty(customSql)){ - incrementCol = String.format("%s.%s", TEMPORARY_TABLE_NAME, databaseInterface.quoteColumn(incrementCol)); - } else { - incrementCol = databaseInterface.quoteColumn(incrementCol); - } - - String startFilter = buildStartLocationSql(databaseInterface, incrementColType, incrementCol, startLocation, useMaxFunc); - if (StringUtils.isNotEmpty(startFilter)){ - filter.append(startFilter); - } - - String endFilter = buildEndLocationSql(databaseInterface, incrementColType, incrementCol, endLocation); - if (StringUtils.isNotEmpty(endFilter)){ - if (filter.length() > 0){ - filter.append(" and ").append(endFilter); - } else { - filter.append(endFilter); - } - } - - return filter.toString(); - } - - public static String buildStartLocationSql(DatabaseInterface databaseInterface,String incrementColType, - String incrementCol,String startLocation,boolean useMaxFunc){ - if(StringUtils.isEmpty(startLocation) || NULL_STRING.equalsIgnoreCase(startLocation)){ - return null; - } - - String operator = " >= "; - if(!useMaxFunc){ - operator = " > "; - } - - return getLocationSql(databaseInterface, incrementColType, incrementCol, startLocation, operator); - } - - public static String buildEndLocationSql(DatabaseInterface databaseInterface,String incrementColType,String incrementCol, - String endLocation){ - if(StringUtils.isEmpty(endLocation) || NULL_STRING.equalsIgnoreCase(endLocation)){ - return null; - } - - return getLocationSql(databaseInterface, incrementColType, incrementCol, endLocation, " < "); - } - - private static String getLocationSql(DatabaseInterface databaseInterface, String incrementColType, String incrementCol, - String location, String operator) { - String endTimeStr; - String endLocationSql; - boolean isTimeType = ColumnType.isTimeType(incrementColType) - || (databaseInterface.getDatabaseType() == EDatabaseType.SQLServer && ColumnType.NVARCHAR.name().equals(incrementColType)); - if(isTimeType){ - endTimeStr = getTimeStr(databaseInterface.getDatabaseType(), Long.parseLong(location), incrementColType); - endLocationSql = incrementCol + operator + endTimeStr; - } else if(ColumnType.isNumberType(incrementColType)){ - endLocationSql = incrementCol + operator + location; - } else { - endTimeStr = String.format("'%s'",location); - endLocationSql = incrementCol + operator + endTimeStr; - } - - return endLocationSql; - } - - private static String getTimeStr(EDatabaseType databaseType,Long startLocation,String incrementColType){ - String timeStr; - Timestamp ts = new Timestamp(getMillis(startLocation)); - ts.setNanos(getNanos(startLocation)); - timeStr = getNanosTimeStr(ts.toString()); - - if(databaseType == EDatabaseType.SQLServer){ - timeStr = timeStr.substring(0,23); - } else { - timeStr = timeStr.substring(0,26); - } - - if (databaseType == EDatabaseType.Oracle){ - if(ColumnType.TIMESTAMP.name().equals(incrementColType)){ - timeStr = String.format("TO_TIMESTAMP('%s','YYYY-MM-DD HH24:MI:SS:FF6')",timeStr); - } else { - timeStr = timeStr.substring(0, 19); - timeStr = String.format("TO_DATE('%s','YYYY-MM-DD HH24:MI:SS')", timeStr); - } - } else { - timeStr = String.format("'%s'",timeStr); - } - return timeStr; - } - private static String getNanosTimeStr(String timeStr){ + public static String getNanosTimeStr(String timeStr){ if(timeStr.length() < 29){ timeStr += StringUtils.repeat("0",29 - timeStr.length()); } @@ -473,7 +331,7 @@ private static String getNanosTimeStr(String timeStr){ return timeStr; } - private static int getNanos(long startLocation){ + public static int getNanos(long startLocation){ String timeStr = String.valueOf(startLocation); int nanos; if (timeStr.length() == SECOND_LENGTH){ @@ -491,7 +349,7 @@ private static int getNanos(long startLocation){ return nanos; } - private static long getMillis(long startLocation){ + public static long getMillis(long startLocation){ String timeStr = String.valueOf(startLocation); long millisSecond; if (timeStr.length() == SECOND_LENGTH){ @@ -509,46 +367,48 @@ private static long getMillis(long startLocation){ return millisSecond; } - public static String formatJdbcUrl(String pluginName,String dbUrl){ - if(pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQL_READER) - || pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQLD_READER) - || pluginName.equalsIgnoreCase(PluginNameConstrant.POSTGRESQL_READER) - || pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQL_WRITER) - || pluginName.equalsIgnoreCase(PluginNameConstrant.GBASE_WRITER) ){ - String[] splits = dbUrl.split("\\?"); - - Map paramMap = new HashMap(); - if(splits.length > 1) { - String[] pairs = splits[1].split("&"); - for(String pair : pairs) { - String[] leftRight = pair.split("="); - paramMap.put(leftRight[0], leftRight[1]); - } - } - - paramMap.put("useCursorFetch", "true"); - paramMap.put("rewriteBatchedStatements", "true"); - if(pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQL_READER) - || pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQLD_READER)){ - paramMap.put("zeroDateTimeBehavior","convertToNull"); + /** + * 格式化jdbc连接 + * @param dbUrl 原jdbc连接 + * @param extParamMap 需要额外添加的参数 + * @return 格式化后jdbc连接URL字符串 + */ + public static String formatJdbcUrl(String dbUrl, Map extParamMap){ +// if(pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQLD_READER) +// || pluginName.equalsIgnoreCase(PluginNameConstrant.POSTGRESQL_READER) +// || pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQL_WRITER) +// || pluginName.equalsIgnoreCase(PluginNameConstrant.GBASE_WRITER) ){ + String[] splits = DB_PATTERN.split(dbUrl); + + Map paramMap = new HashMap(); + if(splits.length > 1) { + String[] pairs = splits[1].split("&"); + for(String pair : pairs) { + String[] leftRight = pair.split("="); + paramMap.put(leftRight[0], leftRight[1]); } + } - StringBuffer sb = new StringBuffer(splits[0]); - if(paramMap.size() != 0) { - sb.append("?"); - int index = 0; - for(Map.Entry entry : paramMap.entrySet()) { - if(index != 0) { - sb.append("&"); - } - sb.append(entry.getKey() + "=" + entry.getValue()); - index++; - } + paramMap.put("useCursorFetch", "true"); + paramMap.put("rewriteBatchedStatements", "true"); + if(!CollectionUtil.isNullOrEmpty(extParamMap)){ + paramMap.putAll(extParamMap); + } +// if(pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQLD_READER)){ +// paramMap.put("zeroDateTimeBehavior","convertToNull"); +// } + + StringBuffer sb = new StringBuffer(dbUrl.length() + 128); + sb.append(splits[0]).append("?"); + int index = 0; + for(Map.Entry entry : paramMap.entrySet()) { + if(index != 0) { + sb.append("&"); } - - dbUrl = sb.toString(); + sb.append(entry.getKey()).append("=").append(entry.getValue()); + index++; } - return dbUrl; + return sb.toString(); } } diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java index a3e9da992e..e2701b03cb 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java @@ -111,7 +111,8 @@ private List buildConnections(){ ? username : connectionConfig.getUsername(); String curPassword = (connectionConfig.getPassword() == null || connectionConfig.getPassword().length() == 0) ? password : connectionConfig.getPassword(); - String curJdbcUrl = DBUtil.formatJdbcUrl(pluginName,connectionConfig.getJdbcUrl().get(0)); + //todo mysql添加额外参数 + String curJdbcUrl = DBUtil.formatJdbcUrl(connectionConfig.getJdbcUrl().get(0), null); for (String table : connectionConfig.getTable()) { DataSource source = new DataSource(); source.setTable(table); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java index 103283a9f0..03f8f92458 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java @@ -27,8 +27,17 @@ */ public class IncrementConfig implements Serializable { + /** + * 是否为增量任务 + */ private boolean increment; + /** + * 用于标记是否保存endLocation位置的一条或多条数据 + * true:不保存 + * false(默认):保存 + * 某些情况下可能出现最后几条数据被重复记录的情况,可能 + */ private boolean useMaxFunc; private int columnIndex; diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java index dc016efb52..e47a5d82b7 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java @@ -20,11 +20,10 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.config.ReaderConfig; +import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormatBuilder; -import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; -import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.DataReader; import com.dtstack.flinkx.reader.MetaColumn; import org.apache.commons.lang3.StringUtils; @@ -84,7 +83,6 @@ public JdbcDataReader(DataTransferConfig config, StreamExecutionEnvironment env) ReaderConfig readerConfig = config.getJob().getContent().get(0).getReader(); dbUrl = readerConfig.getParameter().getConnection().get(0).getJdbcUrl().get(0); - dbUrl = DBUtil.formatJdbcUrl(readerConfig.getName(),dbUrl); username = readerConfig.getParameter().getStringVal(JdbcConfigKeys.KEY_USER_NAME); password = readerConfig.getParameter().getStringVal(JdbcConfigKeys.KEY_PASSWORD); table = readerConfig.getParameter().getConnection().get(0).getTable().get(0); @@ -101,7 +99,7 @@ public JdbcDataReader(DataTransferConfig config, StreamExecutionEnvironment env) @Override public DataStream readData() { - JdbcInputFormatBuilder builder = new JdbcInputFormatBuilder(); + JdbcInputFormatBuilder builder = new JdbcInputFormatBuilder(databaseInterface); builder.setDrivername(databaseInterface.getDriverClass()); builder.setDBUrl(dbUrl); builder.setUsername(username); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java index a48ff5489b..f3c66da9f3 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java @@ -160,7 +160,8 @@ private boolean readNextRecord() throws IOException{ hasNext = currentResultSet.next(); if (hasNext){ currentRecord = new Row(columnCount); - DBUtil.getRow(databaseInterface.getDatabaseType(),currentRecord,descColumnTypeList,currentResultSet,typeConverter); + //todo +// DBUtil.getRow(databaseInterface.getDatabaseType(),currentRecord,descColumnTypeList,currentResultSet,typeConverter); if(!"*".equals(metaColumns.get(0).getName())){ for (int i = 0; i < columnCount; i++) { Object val = currentRecord.getField(i); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java index 9a40ccfb23..39cc51efea 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java @@ -18,9 +18,9 @@ package com.dtstack.flinkx.rdb.inputformat; -import com.dtstack.flinkx.enums.ColumnType; import com.dtstack.flinkx.constants.Metrics; -import com.dtstack.flinkx.enums.EDatabaseType; +import com.dtstack.flinkx.enums.ColumnType; +import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.datareader.IncrementConfig; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; @@ -41,12 +41,6 @@ import org.apache.flink.hadoop.shaded.org.apache.http.impl.client.CloseableHttpClient; import org.apache.flink.hadoop.shaded.org.apache.http.impl.client.HttpClientBuilder; import org.apache.flink.types.Row; -import java.io.IOException; -import java.sql.*; -import java.util.*; -import java.util.Date; - -import com.dtstack.flinkx.inputformat.RichInputFormat; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -54,6 +48,11 @@ import org.apache.hadoop.io.IOUtils; import org.codehaus.jackson.map.ObjectMapper; +import java.io.IOException; +import java.sql.*; +import java.util.Date; +import java.util.*; + /** * InputFormat for reading data from a database and generate Rows. * @@ -108,6 +107,9 @@ public class JdbcInputFormat extends RichInputFormat { protected String customSql; + /** + * 增量任务配置 + */ protected IncrementConfig incrementConfig; protected StringAccumulator tableColAccumulator; @@ -168,23 +170,20 @@ public void openInternal(InputSplit inputSplit) throws IOException { // 部分驱动需要关闭事务自动提交,featchSize参数才会起作用 dbConn.setAutoCommit(false); - // 读取前先提交事务,确保程序异常退出时,下次再读取PG时的顺序不变 - if(EDatabaseType.PostgreSQL == databaseInterface.getDatabaseType()){ - dbConn.commit(); - } - - Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); - if(EDatabaseType.MySQL == databaseInterface.getDatabaseType() - || EDatabaseType.GBase == databaseInterface.getDatabaseType()){ - statement.setFetchSize(Integer.MIN_VALUE); - } else { - statement.setFetchSize(fetchSize); - } - - if(EDatabaseType.Carbondata != databaseInterface.getDatabaseType()) { - statement.setQueryTimeout(queryTimeOut); - } - +// // 读取前先提交事务,确保程序异常退出时,下次再读取PG时的顺序不变 +// if(EDatabaseType.PostgreSQL == databaseInterface.getDatabaseType()){ +// dbConn.commit(); +// } +// +// Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); +// if(EDatabaseType.MySQL == databaseInterface.getDatabaseType() +// || EDatabaseType.GBase == databaseInterface.getDatabaseType()){ +// statement.setFetchSize(Integer.MIN_VALUE); +// } else { +// statement.setFetchSize(fetchSize); +// } +// + statement.setQueryTimeout(queryTimeOut); String querySql = buildQuerySql(inputSplit); resultSet = statement.executeQuery(querySql); columnCount = resultSet.getMetaData().getColumnCount(); @@ -240,13 +239,7 @@ public boolean reachedEnd() throws IOException { @Override public Row nextRecordInternal(Row row) throws IOException { - row = new Row(columnCount); try { - if (!hasNext) { - return null; - } - - DBUtil.getRow(databaseInterface.getDatabaseType(),row,descColumnTypeList,resultSet,typeConverter); if(!"*".equals(metaColumns.get(0).getName())){ for (int i = 0; i < columnCount; i++) { Object val = row.getField(i); @@ -269,31 +262,45 @@ public Row nextRecordInternal(Row row) throws IOException { //update hasNext after we've read the record hasNext = resultSet.next(); return row; - } catch (SQLException se) { - throw new IOException("Couldn't read data - " + se.getMessage(), se); - } catch (Exception npe) { - throw new IOException("Couldn't access resultSet", npe); + } catch (SQLException e) { + throw new IOException("Couldn't access resultSet", e); } } - private void initMetric(InputSplit split){ + @Override + public void closeInternal() throws IOException { + if(incrementConfig.isIncrement() && hadoopConfig != null) { + uploadMetricData(); + } + DBUtil.closeDBResources(resultSet,statement,dbConn, true); + } + + /** + * 初始化增量任务指标 + * @param split 数据分片 + */ + protected void initMetric(InputSplit split){ if (!incrementConfig.isIncrement()){ return; } + //获取所有的累加器 Map> accumulatorMap = getRuntimeContext().getAllAccumulators(); + //如果没有tableCol累加器,则创建一个用来记录表名-增量字段并保存到context上下文 if(!accumulatorMap.containsKey(Metrics.TABLE_COL)){ tableColAccumulator = new StringAccumulator(); tableColAccumulator.add(table + "-" + incrementConfig.getColumnName()); getRuntimeContext().addAccumulator(Metrics.TABLE_COL,tableColAccumulator); } + //创建一个记录起始位置的累加器 startLocationAccumulator = new StringAccumulator(); if (incrementConfig.getStartLocation() != null){ startLocationAccumulator.add(incrementConfig.getStartLocation()); } getRuntimeContext().addAccumulator(Metrics.START_LOCATION,startLocationAccumulator); + //创建一个记录结束位置的累加器 endLocationAccumulator = new MaximumAccumulator(); String endLocation = ((JdbcInputSplit)split).getEndLocation(); if(endLocation != null && incrementConfig.isUseMaxFunc()){ @@ -304,7 +311,7 @@ private void initMetric(InputSplit split){ getRuntimeContext().addAccumulator(Metrics.END_LOCATION,endLocationAccumulator); } - private void getMaxValue(InputSplit inputSplit){ + protected void getMaxValue(InputSplit inputSplit){ String maxValue = null; if (inputSplit.getSplitNumber() == 0){ maxValue = getMaxValueFromDb(); @@ -388,7 +395,7 @@ private String getMaxvalueFromAccumulator(CloseableHttpClient httpClient,String[ return maxValue; } - private boolean canReadData(InputSplit split){ + protected boolean canReadData(InputSplit split){ if (!incrementConfig.isIncrement()){ return true; } @@ -401,7 +408,13 @@ private boolean canReadData(InputSplit split){ return !StringUtils.equals(jdbcInputSplit.getStartLocation(), jdbcInputSplit.getEndLocation()); } - private String buildQuerySql(InputSplit inputSplit){ + /** + * 构造查询sql + * @param inputSplit 数据切片 + * @return 构建的sql字符串 + */ + protected String buildQuerySql(InputSplit inputSplit){ + //QuerySqlBuilder中构建的queryTemplate String querySql = queryTemplate; if (inputSplit == null){ @@ -412,8 +425,7 @@ private String buildQuerySql(InputSplit inputSplit){ JdbcInputSplit jdbcInputSplit = (JdbcInputSplit) inputSplit; if (StringUtils.isNotEmpty(splitKey)){ - querySql = queryTemplate.replace("${N}", String.valueOf(numPartitions)) - .replace("${M}", String.valueOf(indexOfSubtask)); + querySql = queryTemplate.replace("${N}", String.valueOf(numPartitions)) .replace("${M}", String.valueOf(indexOfSubtask)); } if (restoreConfig.isRestore()){ @@ -425,8 +437,12 @@ private String buildQuerySql(InputSplit inputSplit){ } } else { String startLocation = getLocation(restoreColumn.getType(), formatState.getState()); - String restoreFilter = DBUtil.buildIncrementFilter(databaseInterface, restoreColumn.getType(), - restoreColumn.getName(), startLocation, jdbcInputSplit.getEndLocation(), customSql, incrementConfig.isUseMaxFunc()); + String restoreFilter = buildIncrementFilter(restoreColumn.getType(), + restoreColumn.getName(), + startLocation, + jdbcInputSplit.getEndLocation(), + customSql, + incrementConfig.isUseMaxFunc()); if(StringUtils.isNotEmpty(restoreFilter)){ restoreFilter = " and " + restoreFilter; @@ -446,9 +462,12 @@ private String buildQuerySql(InputSplit inputSplit){ } private String buildIncrementSql(JdbcInputSplit jdbcInputSplit, String querySql){ - String incrementFilter = DBUtil.buildIncrementFilter(databaseInterface, incrementConfig.getColumnType(), - incrementConfig.getColumnName(), jdbcInputSplit.getStartLocation(), - jdbcInputSplit.getEndLocation(), customSql, incrementConfig.isUseMaxFunc()); + String incrementFilter = buildIncrementFilter(incrementConfig.getColumnType(), + incrementConfig.getColumnName(), + jdbcInputSplit.getStartLocation(), + jdbcInputSplit.getEndLocation(), + customSql, + incrementConfig.isUseMaxFunc()); if(StringUtils.isNotEmpty(incrementFilter)){ incrementFilter = " and " + incrementFilter; @@ -457,6 +476,93 @@ private String buildIncrementSql(JdbcInputSplit jdbcInputSplit, String querySql) return querySql.replace(DBUtil.INCREMENT_FILTER_PLACEHOLDER, incrementFilter); } + protected String buildIncrementFilter(String incrementColType,String incrementCol, String startLocation,String endLocation, String customSql, boolean useMaxFunc){ + StringBuilder filter = new StringBuilder(128); + + if (org.apache.commons.lang.StringUtils.isNotEmpty(customSql)){ + incrementCol = String.format("%s.%s", DBUtil.TEMPORARY_TABLE_NAME, databaseInterface.quoteColumn(incrementCol)); + } else { + incrementCol = databaseInterface.quoteColumn(incrementCol); + } + + String startFilter = buildStartLocationSql(incrementColType, incrementCol, startLocation, useMaxFunc); + if (org.apache.commons.lang.StringUtils.isNotEmpty(startFilter)){ + filter.append(startFilter); + } + + String endFilter = buildEndLocationSql(incrementColType, incrementCol, endLocation); + if (org.apache.commons.lang.StringUtils.isNotEmpty(endFilter)){ + if (filter.length() > 0){ + filter.append(" and ").append(endFilter); + } else { + filter.append(endFilter); + } + } + + return filter.toString(); + } + + protected String buildStartLocationSql(String incrementColType, String incrementCol, String startLocation, boolean useMaxFunc){ + if(org.apache.commons.lang.StringUtils.isEmpty(startLocation) || DBUtil.NULL_STRING.equalsIgnoreCase(startLocation)){ + return null; + } + + String operator = useMaxFunc?" >= ":" > "; + + return getLocationSql(incrementColType, incrementCol, startLocation, operator); + } + + public String buildEndLocationSql(String incrementColType, String incrementCol, String endLocation){ + if(org.apache.commons.lang.StringUtils.isEmpty(endLocation) || DBUtil.NULL_STRING.equalsIgnoreCase(endLocation)){ + return null; + } + + return getLocationSql(incrementColType, incrementCol, endLocation, " < "); + } + + private String getLocationSql(String incrementColType, String incrementCol, String location, String operator) { + String endTimeStr; + String endLocationSql; +// boolean isTimeType = ColumnType.isTimeType(incrementColType) +// || (databaseInterface.getDatabaseType() == EDatabaseType.SQLServer && ColumnType.NVARCHAR.name().equals(incrementColType)); + if(ColumnType.isTimeType(incrementColType)){ + endTimeStr = getTimeStr(Long.parseLong(location)); + endLocationSql = incrementCol + operator + endTimeStr; + } else if(ColumnType.isNumberType(incrementColType)){ + endLocationSql = incrementCol + operator + location; + } else { + endTimeStr = String.format("'%s'",location); + endLocationSql = incrementCol + operator + endTimeStr; + } + + return endLocationSql; + } + + protected String getTimeStr(Long startLocation){ + String timeStr; + Timestamp ts = new Timestamp(DBUtil.getMillis(startLocation)); + ts.setNanos(DBUtil.getNanos(startLocation)); + timeStr = DBUtil.getNanosTimeStr(ts.toString()); + +// if(databaseType == EDatabaseType.SQLServer){ +// timeStr = timeStr.substring(0,23); +// } +// if (databaseType == EDatabaseType.Oracle){ +// if(ColumnType.TIMESTAMP.name().equals(incrementColType)){ +// timeStr = String.format("TO_TIMESTAMP('%s','YYYY-MM-DD HH24:MI:SS:FF6')",timeStr); +// } else { +// timeStr = timeStr.substring(0, 19); +// timeStr = String.format("TO_DATE('%s','YYYY-MM-DD HH24:MI:SS')", timeStr); +// } +// } + + timeStr = timeStr.substring(0,26); + timeStr = String.format("'%s'",timeStr); + + return timeStr; + } + + private String getMaxValueFromDb() { String maxValue = null; Connection conn = null; @@ -474,8 +580,10 @@ private String getMaxValueFromDb() { databaseInterface.quoteColumn(incrementConfig.getColumnName()), databaseInterface.quoteTable(table)); } - String startSql = DBUtil.buildStartLocationSql(databaseInterface, incrementConfig.getColumnType(), - databaseInterface.quoteColumn(incrementConfig.getColumnName()), incrementConfig.getStartLocation(), incrementConfig.isUseMaxFunc()); + String startSql = buildStartLocationSql(incrementConfig.getColumnType(), + databaseInterface.quoteColumn(incrementConfig.getColumnName()), + incrementConfig.getStartLocation(), + incrementConfig.isUseMaxFunc()); if(StringUtils.isNotEmpty(startSql)){ queryMaxValueSql += " where " + startSql; } @@ -567,12 +675,4 @@ private void uploadMetricData() throws IOException { } } - @Override - public void closeInternal() throws IOException { - if(incrementConfig.isIncrement() && hadoopConfig != null) { - uploadMetricData(); - } - DBUtil.closeDBResources(resultSet,statement,dbConn, true); - } - } \ No newline at end of file diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java index 658c3cf0b0..90e3b0cb19 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java @@ -21,6 +21,7 @@ import com.dtstack.flinkx.inputformat.RichInputFormatBuilder; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.datareader.IncrementConfig; +import com.dtstack.flinkx.rdb.loader.JdbcFormatLoader; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.reader.MetaColumn; import org.apache.commons.lang.StringUtils; @@ -38,8 +39,9 @@ public class JdbcInputFormatBuilder extends RichInputFormatBuilder { private JdbcInputFormat format; - public JdbcInputFormatBuilder() { - super.format = format = new JdbcInputFormat(); + public JdbcInputFormatBuilder(DatabaseInterface databaseInterface) { + JdbcFormatLoader jdbcFormatLoader = new JdbcFormatLoader(databaseInterface.getDatabaseType().name(), JdbcFormatLoader.INPUT_FORMAT); + super.format = format = (JdbcInputFormat) jdbcFormatLoader.getFormatInstance(); } public void setDrivername(String drivername) { diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java index 595baf6ed7..ce5e8e4f27 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java @@ -23,7 +23,6 @@ import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormatBuilder; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; -import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.writer.DataWriter; import org.apache.flink.streaming.api.datastream.DataStream; @@ -76,8 +75,6 @@ public JdbcDataWriter(DataTransferConfig config) { WriterConfig writerConfig = config.getJob().getContent().get(0).getWriter(); dbUrl = writerConfig.getParameter().getConnection().get(0).getJdbcUrl(); - dbUrl = DBUtil.formatJdbcUrl(writerConfig.getName(), dbUrl); - username = writerConfig.getParameter().getStringVal(KEY_USERNAME); password = writerConfig.getParameter().getStringVal(KEY_PASSWORD); table = writerConfig.getParameter().getConnection().get(0).getTable().get(0); @@ -95,7 +92,7 @@ public JdbcDataWriter(DataTransferConfig config) { @Override public DataStreamSink writeData(DataStream dataSet) { - JdbcOutputFormatBuilder builder = new JdbcOutputFormatBuilder(); + JdbcOutputFormatBuilder builder = new JdbcOutputFormatBuilder(databaseInterface); builder.setDriverName(databaseInterface.getDriverClass()); builder.setDBUrl(dbUrl); builder.setUsername(username); diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java index 0bd3adff21..de486d39ee 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java @@ -19,6 +19,7 @@ import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.outputformat.RichOutputFormatBuilder; +import com.dtstack.flinkx.rdb.loader.JdbcFormatLoader; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import java.util.List; @@ -32,8 +33,9 @@ public class JdbcOutputFormatBuilder extends RichOutputFormatBuilder { private JdbcOutputFormat format; - public JdbcOutputFormatBuilder() { - super.format = format = new JdbcOutputFormat(); + public JdbcOutputFormatBuilder(DatabaseInterface databaseInterface) { + JdbcFormatLoader jdbcFormatLoader = new JdbcFormatLoader(databaseInterface.getDatabaseType().name(), JdbcFormatLoader.OUTPUT_FORMAT); + super.format = format = (JdbcOutputFormat) jdbcFormatLoader.getFormatInstance(); } public JdbcOutputFormatBuilder(JdbcOutputFormat format) { diff --git a/flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java b/flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java new file mode 100644 index 0000000000..f0b1abde1b --- /dev/null +++ b/flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java @@ -0,0 +1,46 @@ +package com.dtstack.flinkx.sqlserver.format; + +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import org.apache.flink.types.Row; + +import java.io.IOException; + +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; + +/** + * Date: 2019/09/19 + * Company: www.dtstack.com + * + * @author tudou + */ +public class SqlserverInputFormat extends JdbcInputFormat { + + @Override + public Row nextRecordInternal(Row row) throws IOException { + if (!hasNext) { + return null; + } + row = new Row(columnCount); + + try { + for (int pos = 0; pos < row.getArity(); pos++) { + Object obj = resultSet.getObject(pos + 1); + if(obj != null) { + if(descColumnTypeList != null && descColumnTypeList.size() != 0) { + if(descColumnTypeList.get(pos).equalsIgnoreCase("bit")) { + if(obj instanceof Boolean) { + obj = ((Boolean) obj ? 1 : 0); + } + } + } + obj = clobToString(obj); + } + + row.setField(pos, obj); + } + return super.nextRecordInternal(row); + }catch (Exception e) { + throw new IOException("Couldn't read data - " + e.getMessage(), e); + } + } +} From 61ca2f79449f580c87ad05b23ddde6f82a8742f9 Mon Sep 17 00:00:00 2001 From: tudou Date: Fri, 20 Sep 2019 19:11:32 +0800 Subject: [PATCH 08/62] =?UTF-8?q?=E5=AE=8C=E6=88=90reader=E7=9A=84?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=8B=86=E5=88=86=E4=B8=8E=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flinkx/inputformat/RichInputFormat.java | 7 +- .../flinkx/gbase/format/GbaseInputFormat.java | 93 ++++++++++++ .../mysqld/format/MysqldInputFormat.java | 135 ++++++++++++++++++ .../flinkx/mysqld/reader/MysqldReader.java | 47 ++++++ .../flinkx/mysql/format/MysqlInputFormat.java | 30 +++- .../oracle/format/OracleInputFormat.java | 47 +++++- .../format/PostgresqlInputFormat.java | 126 ++++++++++++++++ .../postgresql/reader/PostgresqlReader.java | 2 + .../com/dtstack/flinkx/rdb/DataSource.java | 17 +++ .../flinkx/rdb/ParameterValuesProvider.java | 17 +++ .../flinkx/rdb/loader/JdbcFormatLoader.java | 17 +++ .../com/dtstack/flinkx/rdb/util/DBUtil.java | 12 +- .../DistributedJdbcDataReader.java | 16 +-- .../IncrementConfig.java | 5 +- .../JdbcDataReader.java | 2 +- .../DistributedJdbcInputFormat.java | 35 ++--- .../DistributedJdbcInputFormatBuilder.java | 7 +- .../JdbcInputFormat.java | 133 ++++++++++++----- .../JdbcInputFormatBuilder.java | 4 +- .../format/SqlserverInputFormat.java | 68 ++++++++- 20 files changed, 726 insertions(+), 94 deletions(-) create mode 100644 flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java create mode 100644 flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/format/MysqldInputFormat.java create mode 100644 flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/RichInputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/RichInputFormat.java index 973f0d61ea..ac407f8730 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/RichInputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/RichInputFormat.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -24,7 +24,6 @@ import com.dtstack.flinkx.metrics.BaseMetric; import com.dtstack.flinkx.reader.ByteRateLimiter; import com.dtstack.flinkx.restore.FormatState; -import org.apache.commons.lang.StringUtils; import org.apache.flink.api.common.accumulators.LongCounter; import org.apache.flink.api.common.io.DefaultInputSplitAssigner; import org.apache.flink.api.common.io.statistics.BaseStatistics; @@ -33,6 +32,7 @@ import org.apache.flink.types.Row; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import java.io.IOException; import java.util.Arrays; import java.util.Map; @@ -57,6 +57,9 @@ public abstract class RichInputFormat extends org.apache.flink.api.common.io.Ric protected long bytes; protected ByteRateLimiter byteRateLimiter; + /** + * 断点续传配置 + */ protected RestoreConfig restoreConfig; protected FormatState formatState; diff --git a/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java b/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java new file mode 100644 index 0000000000..8ab4bae7da --- /dev/null +++ b/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.gbase.format; + +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import com.dtstack.flinkx.rdb.util.DBUtil; +import com.dtstack.flinkx.reader.MetaColumn; +import com.dtstack.flinkx.util.ClassUtil; +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.core.io.InputSplit; + +import java.io.IOException; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class GbaseInputFormat extends JdbcInputFormat { + @Override + public void openInternal(InputSplit inputSplit) throws IOException { + try { + LOG.info(inputSplit.toString()); + + ClassUtil.forName(drivername, getClass().getClassLoader()); + + if (incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc()){ + getMaxValue(inputSplit); + } + + initMetric(inputSplit); + + if(!canReadData(inputSplit)){ + LOG.warn("Not read data when the start location are equal to end location"); + + hasNext = false; + return; + } + + dbConn = DBUtil.getConnection(dbURL, username, password); + + // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 + dbConn.setAutoCommit(false); + statement.setFetchSize(Integer.MIN_VALUE); + Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); + statement.setFetchSize(fetchSize); + statement.setQueryTimeout(queryTimeOut); + String querySql = buildQuerySql(inputSplit); + resultSet = statement.executeQuery(querySql); + columnCount = resultSet.getMetaData().getColumnCount(); + + boolean splitWithRowCol = numPartitions > 1 && StringUtils.isNotEmpty(splitKey) && splitKey.contains("("); + if(splitWithRowCol){ + columnCount = columnCount-1; + } + + hasNext = resultSet.next(); + + if (StringUtils.isEmpty(customSql)){ + descColumnTypeList = DBUtil.analyzeTable(dbURL, username, password,databaseInterface,table,metaColumns); + } else { + descColumnTypeList = new ArrayList<>(); + for (MetaColumn metaColumn : metaColumns) { + descColumnTypeList.add(metaColumn.getName()); + } + } + + } catch (SQLException se) { + throw new IllegalArgumentException("open() failed." + se.getMessage(), se); + } + + LOG.info("JdbcInputFormat[{}]open: end", jobName); + } +} diff --git a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/format/MysqldInputFormat.java b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/format/MysqldInputFormat.java new file mode 100644 index 0000000000..beda91cc76 --- /dev/null +++ b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/format/MysqldInputFormat.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.mysqld.format; + +import com.dtstack.flinkx.rdb.DataSource; +import com.dtstack.flinkx.rdb.datareader.QuerySqlBuilder; +import com.dtstack.flinkx.rdb.inputformat.DistributedJdbcInputFormat; +import com.dtstack.flinkx.rdb.util.DBUtil; +import com.dtstack.flinkx.util.DateUtil; +import com.dtstack.flinkx.util.StringUtil; +import org.apache.commons.collections.CollectionUtils; +import org.apache.flink.types.Row; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Arrays; + +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class MysqldInputFormat extends DistributedJdbcInputFormat { + + @Override + protected void openNextSource() throws SQLException { + DataSource currentSource = sourceList.get(sourceIndex); + currentConn = DBUtil.getConnection(currentSource.getJdbcUrl(), currentSource.getUserName(), currentSource.getPassword()); + currentConn.setAutoCommit(false); + String queryTemplate = new QuerySqlBuilder(databaseInterface, currentSource.getTable(),metaColumns,splitKey, + where, currentSource.isSplitByKey(), false, false).buildSql(); + currentStatement = currentConn.createStatement(resultSetType, resultSetConcurrency); + + if (currentSource.isSplitByKey()){ + String n = currentSource.getParameterValues()[0].toString(); + String m = currentSource.getParameterValues()[1].toString(); + queryTemplate = queryTemplate.replace("${N}",n).replace("${M}",m); + + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Executing '%s' with parameters %s", queryTemplate, + Arrays.deepToString(currentSource.getParameterValues()))); + } + } + + currentStatement.setFetchSize(Integer.MIN_VALUE); + currentStatement.setQueryTimeout(queryTimeOut); + currentResultSet = currentStatement.executeQuery(queryTemplate); + columnCount = currentResultSet.getMetaData().getColumnCount(); + + if(descColumnTypeList == null) { + descColumnTypeList = DBUtil.analyzeTable(currentSource.getJdbcUrl(), currentSource.getUserName(), + currentSource.getPassword(),databaseInterface, currentSource.getTable(),metaColumns); + } + + LOG.info("open source: {} ,table: {}", currentSource.getJdbcUrl(), currentSource.getTable()); + } + + @Override + protected boolean readNextRecord() throws IOException { + try{ + if(currentConn == null){ + openNextSource(); + } + + hasNext = currentResultSet.next(); + if (hasNext){ + currentRecord = new Row(columnCount); + + for (int pos = 0; pos < currentRecord.getArity(); pos++) { + Object obj = currentResultSet.getObject(pos + 1); + if(obj != null) { + if(CollectionUtils.isNotEmpty(descColumnTypeList)) { + String columnType = descColumnTypeList.get(pos); + if("year".equalsIgnoreCase(columnType)) { + java.util.Date date = (java.util.Date) obj; + obj = DateUtil.dateToYearString(date); + } else if("tinyint".equalsIgnoreCase(columnType) + || "bit".equalsIgnoreCase(columnType)) { + if(obj instanceof Boolean) { + obj = ((Boolean) obj ? 1 : 0); + } + } + } + obj = clobToString(obj); + } + currentRecord.setField(pos, obj); + } + + if(!"*".equals(metaColumns.get(0).getName())){ + for (int i = 0; i < columnCount; i++) { + Object val = currentRecord.getField(i); + if(val == null && metaColumns.get(i).getValue() != null){ + val = metaColumns.get(i).getValue(); + } + + if (val instanceof String){ + val = StringUtil.string2col(String.valueOf(val),metaColumns.get(i).getType(),metaColumns.get(i).getTimeFormat()); + currentRecord.setField(i,val); + } + } + } + } else { + if(sourceIndex + 1 < sourceList.size()){ + closeCurrentSource(); + sourceIndex++; + return readNextRecord(); + } + } + + return !hasNext; + }catch (SQLException se) { + throw new IOException("Couldn't read data - " + se.getMessage(), se); + } catch (Exception npe) { + throw new IOException("Couldn't access resultSet", npe); + } + } +} diff --git a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java index 56dbb38299..4da26aab5c 100644 --- a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java +++ b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java @@ -1,14 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.mysqld.reader; import com.dtstack.flinkx.config.DataTransferConfig; +import com.dtstack.flinkx.config.ReaderConfig; import com.dtstack.flinkx.mysql.MySqlDatabaseMeta; +import com.dtstack.flinkx.rdb.DataSource; import com.dtstack.flinkx.rdb.datareader.DistributedJdbcDataReader; +import com.dtstack.flinkx.rdb.util.DBUtil; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + public class MysqldReader extends DistributedJdbcDataReader { public MysqldReader(DataTransferConfig config, StreamExecutionEnvironment env) { super(config, env); setDatabaseInterface(new MySqlDatabaseMeta()); } + + @Override + protected List buildConnections(){ + List sourceList = new ArrayList<>(connectionConfigs.size()); + for (ReaderConfig.ParameterConfig.ConnectionConfig connectionConfig : connectionConfigs) { + String curUsername = (connectionConfig.getUsername() == null || connectionConfig.getUsername().length() == 0) + ? username : connectionConfig.getUsername(); + String curPassword = (connectionConfig.getPassword() == null || connectionConfig.getPassword().length() == 0) + ? password : connectionConfig.getPassword(); + String curJdbcUrl = DBUtil.formatJdbcUrl(connectionConfig.getJdbcUrl().get(0), Collections.singletonMap("zeroDateTimeBehavior", "convertToNull")); + for (String table : connectionConfig.getTable()) { + DataSource source = new DataSource(); + source.setTable(table); + source.setUserName(curUsername); + source.setPassword(curPassword); + source.setJdbcUrl(curJdbcUrl); + + sourceList.add(source); + } + } + + return sourceList; + } } diff --git a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java index 85619af16e..cfdd48dbca 100644 --- a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java +++ b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.mysql.format; import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; @@ -5,6 +22,7 @@ import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.util.ClassUtil; import com.dtstack.flinkx.util.DateUtil; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.apache.flink.core.io.InputSplit; import org.apache.flink.types.Row; @@ -93,15 +111,13 @@ public Row nextRecordInternal(Row row) throws IOException { for (int pos = 0; pos < row.getArity(); pos++) { Object obj = resultSet.getObject(pos + 1); if(obj != null) { - if(descColumnTypeList != null && descColumnTypeList.size() != 0) { - if(descColumnTypeList.get(pos).equalsIgnoreCase("year")) { + if(CollectionUtils.isNotEmpty(descColumnTypeList)) { + String columnType = descColumnTypeList.get(pos); + if("year".equalsIgnoreCase(columnType)) { java.util.Date date = (java.util.Date) obj; obj = DateUtil.dateToYearString(date); - } else if(descColumnTypeList.get(pos).equalsIgnoreCase("tinyint")) { - if(obj instanceof Boolean) { - obj = ((Boolean) obj ? 1 : 0); - } - } else if(descColumnTypeList.get(pos).equalsIgnoreCase("bit")) { + } else if("tinyint".equalsIgnoreCase(columnType) + || "bit".equalsIgnoreCase(columnType)) { if(obj instanceof Boolean) { obj = ((Boolean) obj ? 1 : 0); } diff --git a/flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java b/flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java index 5f3d2f8961..2821d45160 100644 --- a/flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java +++ b/flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java @@ -1,9 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.oracle.format; +import com.dtstack.flinkx.enums.ColumnType; import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import com.dtstack.flinkx.rdb.util.DBUtil; import org.apache.flink.types.Row; import java.io.IOException; +import java.sql.Timestamp; import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; @@ -26,7 +46,8 @@ public Row nextRecordInternal(Row row) throws IOException { for (int pos = 0; pos < row.getArity(); pos++) { Object obj = resultSet.getObject(pos + 1); if(obj != null) { - if((obj instanceof java.util.Date || obj.getClass().getSimpleName().toUpperCase().contains("TIMESTAMP")) ) { + if((obj instanceof java.util.Date + || obj.getClass().getSimpleName().toUpperCase().contains("TIMESTAMP")) ) { obj = resultSet.getTimestamp(pos + 1); } obj = clobToString(obj); @@ -39,4 +60,28 @@ public Row nextRecordInternal(Row row) throws IOException { throw new IOException("Couldn't read data - " + e.getMessage(), e); } } + + /** + * 构建时间边界字符串 + * @param location 边界位置(起始/结束) + * @param incrementColType 增量字段类型 + * @return + */ + @Override + protected String getTimeStr(Long location, String incrementColType){ + String timeStr; + Timestamp ts = new Timestamp(DBUtil.getMillis(location)); + ts.setNanos(DBUtil.getNanos(location)); + timeStr = DBUtil.getNanosTimeStr(ts.toString()); + + if(ColumnType.TIMESTAMP.name().equals(incrementColType)){ + timeStr = String.format("TO_TIMESTAMP('%s','YYYY-MM-DD HH24:MI:SS:FF6')",timeStr); + } else { + timeStr = timeStr.substring(0, 19); + timeStr = String.format("TO_DATE('%s','YYYY-MM-DD HH24:MI:SS')", timeStr); + } + timeStr = String.format("'%s'",timeStr); + + return timeStr; + } } diff --git a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java new file mode 100644 index 0000000000..63abdd400e --- /dev/null +++ b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flinkx.postgresql.format; + +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import com.dtstack.flinkx.rdb.util.DBUtil; +import com.dtstack.flinkx.reader.MetaColumn; +import com.dtstack.flinkx.util.ClassUtil; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.core.io.InputSplit; +import org.apache.flink.types.Row; + +import java.io.IOException; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; + +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class PostgresqlInputFormat extends JdbcInputFormat { + + @Override + public void openInternal(InputSplit inputSplit) throws IOException { + try { + LOG.info(inputSplit.toString()); + + ClassUtil.forName(drivername, getClass().getClassLoader()); + + if (incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc()){ + getMaxValue(inputSplit); + } + + initMetric(inputSplit); + + if(!canReadData(inputSplit)){ + LOG.warn("Not read data when the start location are equal to end location"); + + hasNext = false; + return; + } + + dbConn = DBUtil.getConnection(dbURL, username, password); + + // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 + dbConn.setAutoCommit(false); + + // 读取前先提交事务,确保程序异常退出时,下次再读取PG时的顺序不变 + dbConn.commit(); + Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); + statement.setFetchSize(fetchSize); + statement.setQueryTimeout(queryTimeOut); + String querySql = buildQuerySql(inputSplit); + resultSet = statement.executeQuery(querySql); + columnCount = resultSet.getMetaData().getColumnCount(); + + boolean splitWithRowCol = numPartitions > 1 && StringUtils.isNotEmpty(splitKey) && splitKey.contains("("); + if(splitWithRowCol){ + columnCount = columnCount-1; + } + + hasNext = resultSet.next(); + + if (StringUtils.isEmpty(customSql)){ + descColumnTypeList = DBUtil.analyzeTable(dbURL, username, password,databaseInterface,table,metaColumns); + } else { + descColumnTypeList = new ArrayList<>(); + for (MetaColumn metaColumn : metaColumns) { + descColumnTypeList.add(metaColumn.getName()); + } + } + + } catch (SQLException se) { + throw new IllegalArgumentException("open() failed." + se.getMessage(), se); + } + + LOG.info("JdbcInputFormat[{}]open: end", jobName); + } + + @Override + public Row nextRecordInternal(Row row) throws IOException { + if (!hasNext) { + return null; + } + row = new Row(columnCount); + + try { + for (int pos = 0; pos < row.getArity(); pos++) { + Object obj = resultSet.getObject(pos + 1); + if(obj != null) { + if(CollectionUtils.isNotEmpty(descColumnTypeList)) { + obj = typeConverter.convert(obj,descColumnTypeList.get(pos)); + } + obj = clobToString(obj); + } + + row.setField(pos, obj); + } + return super.nextRecordInternal(row); + }catch (Exception e) { + throw new IOException("Couldn't read data - " + e.getMessage(), e); + } + } +} diff --git a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java index d333c77964..4e4516115c 100644 --- a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java +++ b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java @@ -37,5 +37,7 @@ public PostgresqlReader(DataTransferConfig config, StreamExecutionEnvironment en super(config, env); setDatabaseInterface(new PostgresqlDatabaseMeta()); setTypeConverterInterface(new PostgresqlTypeConverter()); + dbUrl = DBUtil.formatJdbcUrl(dbUrl, null); + } } diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/DataSource.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/DataSource.java index 1840f866dc..d9f3508798 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/DataSource.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/DataSource.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.rdb; import java.io.Serializable; diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/ParameterValuesProvider.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/ParameterValuesProvider.java index 48faab016e..79cac26365 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/ParameterValuesProvider.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/ParameterValuesProvider.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.rdb; import java.io.Serializable; diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java index b2d56f8d48..ad942fa79a 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.rdb.loader; import org.apache.flink.util.Preconditions; diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java index 7ad8fa8b71..4b23668d65 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java @@ -302,7 +302,6 @@ public static void setParameterValue(Object param,PreparedStatement statement,in } } - public static Object clobToString(Object obj) throws Exception{ String dataStr; if(obj instanceof Clob){ @@ -374,9 +373,7 @@ public static long getMillis(long startLocation){ * @return 格式化后jdbc连接URL字符串 */ public static String formatJdbcUrl(String dbUrl, Map extParamMap){ -// if(pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQLD_READER) -// || pluginName.equalsIgnoreCase(PluginNameConstrant.POSTGRESQL_READER) -// || pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQL_WRITER) +// if(pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQL_WRITER) // || pluginName.equalsIgnoreCase(PluginNameConstrant.GBASE_WRITER) ){ String[] splits = DB_PATTERN.split(dbUrl); @@ -389,14 +386,11 @@ public static String formatJdbcUrl(String dbUrl, Map extParamMap) } } - paramMap.put("useCursorFetch", "true"); - paramMap.put("rewriteBatchedStatements", "true"); if(!CollectionUtil.isNullOrEmpty(extParamMap)){ paramMap.putAll(extParamMap); } -// if(pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQLD_READER)){ -// paramMap.put("zeroDateTimeBehavior","convertToNull"); -// } + paramMap.put("useCursorFetch", "true"); + paramMap.put("rewriteBatchedStatements", "true"); StringBuffer sb = new StringBuffer(dbUrl.length() + 128); sb.append(splits[0]).append("?"); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java index e2701b03cb..5c265935df 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java @@ -28,6 +28,7 @@ import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.DataReader; import com.dtstack.flinkx.reader.MetaColumn; +import org.apache.commons.lang.StringUtils; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.types.Row; @@ -63,7 +64,7 @@ public class DistributedJdbcDataReader extends DataReader { protected int queryTimeOut; - private List connectionConfigs; + protected List connectionConfigs; private static String DISTRIBUTED_TAG = "d"; @@ -84,7 +85,7 @@ protected DistributedJdbcDataReader(DataTransferConfig config, StreamExecutionEn @Override public DataStream readData() { - DistributedJdbcInputFormatBuilder builder = new DistributedJdbcInputFormatBuilder(); + DistributedJdbcInputFormatBuilder builder = new DistributedJdbcInputFormatBuilder(databaseInterface.getDatabaseType().name()); builder.setDrivername(databaseInterface.getDriverClass()); builder.setUsername(username); builder.setPassword(password); @@ -104,14 +105,11 @@ public DataStream readData() { return createInput(format, (databaseInterface.getDatabaseType() + DISTRIBUTED_TAG + "reader").toLowerCase()); } - private List buildConnections(){ - List sourceList = new ArrayList<>(); + protected List buildConnections(){ + List sourceList = new ArrayList<>(connectionConfigs.size()); for (ReaderConfig.ParameterConfig.ConnectionConfig connectionConfig : connectionConfigs) { - String curUsername = (connectionConfig.getUsername() == null || connectionConfig.getUsername().length() == 0) - ? username : connectionConfig.getUsername(); - String curPassword = (connectionConfig.getPassword() == null || connectionConfig.getPassword().length() == 0) - ? password : connectionConfig.getPassword(); - //todo mysql添加额外参数 + String curUsername = (StringUtils.isBlank(connectionConfig.getUsername())) ? username : connectionConfig.getUsername(); + String curPassword = (StringUtils.isBlank(connectionConfig.getPassword())) ? password : connectionConfig.getPassword(); String curJdbcUrl = DBUtil.formatJdbcUrl(connectionConfig.getJdbcUrl().get(0), null); for (String table : connectionConfig.getTable()) { DataSource source = new DataSource(); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java index 03f8f92458..5ae1fc6a6a 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java @@ -36,7 +36,7 @@ public class IncrementConfig implements Serializable { * 用于标记是否保存endLocation位置的一条或多条数据 * true:不保存 * false(默认):保存 - * 某些情况下可能出现最后几条数据被重复记录的情况,可能 + * 某些情况下可能出现最后几条数据被重复记录的情况,可以将此参数配置为true */ private boolean useMaxFunc; @@ -48,6 +48,9 @@ public class IncrementConfig implements Serializable { private String startLocation; + /** + * 发送查询累加器请求的间隔时间 + */ private int requestAccumulatorInterval; public int getRequestAccumulatorInterval() { diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java index e47a5d82b7..6582818cba 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java @@ -99,7 +99,7 @@ public JdbcDataReader(DataTransferConfig config, StreamExecutionEnvironment env) @Override public DataStream readData() { - JdbcInputFormatBuilder builder = new JdbcInputFormatBuilder(databaseInterface); + JdbcInputFormatBuilder builder = new JdbcInputFormatBuilder(databaseInterface.getDatabaseType().name()); builder.setDrivername(databaseInterface.getDriverClass()); builder.setDBUrl(dbUrl); builder.setUsername(username); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java index f3c66da9f3..8c4e41df64 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java @@ -18,7 +18,6 @@ package com.dtstack.flinkx.rdb.inputformat; -import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.rdb.DataSource; import com.dtstack.flinkx.rdb.DatabaseInterface; @@ -33,7 +32,10 @@ import org.apache.flink.types.Row; import java.io.*; -import java.sql.*; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -66,15 +68,15 @@ public class DistributedJdbcInputFormat extends RichInputFormat { protected List sourceList; - private transient int sourceIndex; + protected transient int sourceIndex; - private transient Connection currentConn; + protected transient Connection currentConn; - private transient Statement currentStatement; + protected transient Statement currentStatement; - private transient ResultSet currentResultSet; + protected transient ResultSet currentResultSet; - private transient Row currentRecord; + protected transient Row currentRecord; protected String username; @@ -111,10 +113,10 @@ protected void openInternal(InputSplit inputSplit) throws IOException { throw new IllegalArgumentException("open() failed." + e.getMessage(), e); } - LOG.info("JdbcInputFormat[" + jobName + "]open: end"); + LOG.info("JdbcInputFormat[{}}]open: end", jobName); } - private void openNextSource() throws SQLException{ + protected void openNextSource() throws SQLException{ DataSource currentSource = sourceList.get(sourceIndex); currentConn = DBUtil.getConnection(currentSource.getJdbcUrl(), currentSource.getUserName(), currentSource.getPassword()); currentConn.setAutoCommit(false); @@ -133,12 +135,7 @@ private void openNextSource() throws SQLException{ } } - if(databaseInterface.getDatabaseType() == EDatabaseType.MySQL){ - currentStatement.setFetchSize(Integer.MIN_VALUE); - } else { - currentStatement.setFetchSize(fetchSize); - } - + currentStatement.setFetchSize(fetchSize); currentStatement.setQueryTimeout(queryTimeOut); currentResultSet = currentStatement.executeQuery(queryTemplate); columnCount = currentResultSet.getMetaData().getColumnCount(); @@ -148,10 +145,10 @@ private void openNextSource() throws SQLException{ currentSource.getPassword(),databaseInterface, currentSource.getTable(),metaColumns); } - LOG.info("open source:" + currentSource.getJdbcUrl() + ",table:" + currentSource.getTable()); + LOG.info("open source: {} ,table: {}", currentSource.getJdbcUrl(), currentSource.getTable()); } - private boolean readNextRecord() throws IOException{ + protected boolean readNextRecord() throws IOException{ try{ if(currentConn == null){ openNextSource(); @@ -160,8 +157,6 @@ private boolean readNextRecord() throws IOException{ hasNext = currentResultSet.next(); if (hasNext){ currentRecord = new Row(columnCount); - //todo -// DBUtil.getRow(databaseInterface.getDatabaseType(),currentRecord,descColumnTypeList,currentResultSet,typeConverter); if(!"*".equals(metaColumns.get(0).getName())){ for (int i = 0; i < columnCount; i++) { Object val = currentRecord.getField(i); @@ -196,7 +191,7 @@ protected Row nextRecordInternal(Row row) throws IOException { return currentRecord; } - private void closeCurrentSource(){ + protected void closeCurrentSource(){ try { DBUtil.closeDBResources(currentResultSet,currentStatement,currentConn, true); currentConn = null; diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java index 1c1ad058e7..f8b4bb04e0 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java @@ -21,6 +21,7 @@ import com.dtstack.flinkx.inputformat.RichInputFormatBuilder; import com.dtstack.flinkx.rdb.DataSource; import com.dtstack.flinkx.rdb.DatabaseInterface; +import com.dtstack.flinkx.rdb.loader.JdbcFormatLoader; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.reader.MetaColumn; import org.apache.commons.lang.StringUtils; @@ -35,10 +36,12 @@ */ public class DistributedJdbcInputFormatBuilder extends RichInputFormatBuilder { + private static String DISTRIBUTED_TAG = "d"; private DistributedJdbcInputFormat format; - public DistributedJdbcInputFormatBuilder() { - super.format = this.format = new DistributedJdbcInputFormat(); + public DistributedJdbcInputFormatBuilder(String name) { + JdbcFormatLoader jdbcFormatLoader = new JdbcFormatLoader(name + DISTRIBUTED_TAG, JdbcFormatLoader.INPUT_FORMAT); + super.format = format = (DistributedJdbcInputFormat) jdbcFormatLoader.getFormatInstance(); } public void setDrivername(String driverName) { diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java index 39cc51efea..3ba932e01d 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java @@ -101,6 +101,9 @@ public class JdbcInputFormat extends RichInputFormat { protected int fetchSize; + /** + * 各DatabaseMeta.getQueryTimeout()返回的超时时间,默认1000ms + */ protected int queryTimeOut; protected int numPartitions; @@ -167,22 +170,10 @@ public void openInternal(InputSplit inputSplit) throws IOException { dbConn = DBUtil.getConnection(dbURL, username, password); - // 部分驱动需要关闭事务自动提交,featchSize参数才会起作用 + // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 dbConn.setAutoCommit(false); - -// // 读取前先提交事务,确保程序异常退出时,下次再读取PG时的顺序不变 -// if(EDatabaseType.PostgreSQL == databaseInterface.getDatabaseType()){ -// dbConn.commit(); -// } -// -// Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); -// if(EDatabaseType.MySQL == databaseInterface.getDatabaseType() -// || EDatabaseType.GBase == databaseInterface.getDatabaseType()){ -// statement.setFetchSize(Integer.MIN_VALUE); -// } else { -// statement.setFetchSize(fetchSize); -// } -// + Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); + statement.setFetchSize(fetchSize); statement.setQueryTimeout(queryTimeOut); String querySql = buildQuerySql(inputSplit); resultSet = statement.executeQuery(querySql); @@ -208,7 +199,7 @@ public void openInternal(InputSplit inputSplit) throws IOException { throw new IllegalArgumentException("open() failed." + se.getMessage(), se); } - LOG.info("JdbcInputFormat[" + jobName + "]open: end"); + LOG.info("JdbcInputFormat[{}]open: end", jobName); } @@ -311,9 +302,15 @@ protected void initMetric(InputSplit split){ getRuntimeContext().addAccumulator(Metrics.END_LOCATION,endLocationAccumulator); } + /** + * 将增量任务的数据最大值设置到累加器中 + * @param inputSplit 数据分片 + */ protected void getMaxValue(InputSplit inputSplit){ String maxValue = null; + //第0个通道新建累加器并保存最大值,多通道下其他通道从historyServer中获取最大值 if (inputSplit.getSplitNumber() == 0){ + //从数据库中获取当前增量字段的最大值 maxValue = getMaxValueFromDb(); maxValueAccumulator = new StringAccumulator(); maxValueAccumulator.add(maxValue); @@ -343,6 +340,7 @@ protected void getMaxValue(InputSplit inputSplit){ */ int maxAcquireTimes = (queryTimeOut / incrementConfig.getRequestAccumulatorInterval()) + 10; + //当前重试次数 int acquireTimes = 0; while (StringUtils.isEmpty(maxValue) && acquireTimes < maxAcquireTimes){ try { @@ -365,6 +363,13 @@ protected void getMaxValue(InputSplit inputSplit){ ((JdbcInputSplit) inputSplit).setEndLocation(maxValue); } + /** + * 从historyServer中获取增量最大值 + * @param httpClient httpClient + * @param monitors 请求的URL数组 + * @return + */ + @SuppressWarnings("unchecked") private String getMaxvalueFromAccumulator(CloseableHttpClient httpClient,String[] monitors){ String maxValue = null; Gson gson = new Gson(); @@ -395,6 +400,12 @@ private String getMaxvalueFromAccumulator(CloseableHttpClient httpClient,String[ return maxValue; } + /** + * 判断增量任务是否还能继续读取数据 + * 增量任务,startLocation = endLocation且两者都不为null,返回false,其余情况返回true + * @param split 数据分片 + * @return + */ protected boolean canReadData(InputSplit split){ if (!incrementConfig.isIncrement()){ return true; @@ -418,7 +429,7 @@ protected String buildQuerySql(InputSplit inputSplit){ String querySql = queryTemplate; if (inputSplit == null){ - LOG.warn(String.format("Executing sql is: '%s'", querySql)); + LOG.warn("Executing sql is: '{}'", querySql); return querySql; } @@ -428,6 +439,7 @@ protected String buildQuerySql(InputSplit inputSplit){ querySql = queryTemplate.replace("${N}", String.valueOf(numPartitions)) .replace("${M}", String.valueOf(indexOfSubtask)); } + //是否开启断点续传 if (restoreConfig.isRestore()){ if(formatState == null){ querySql = querySql.replace(DBUtil.RESTORE_FILTER_PLACEHOLDER, StringUtils.EMPTY); @@ -456,11 +468,17 @@ protected String buildQuerySql(InputSplit inputSplit){ querySql = buildIncrementSql(jdbcInputSplit, querySql); } - LOG.warn(String.format("Executing sql is: '%s'", querySql)); + LOG.warn("Executing sql is: '{}}'", querySql); return querySql; } + /** + * 构造增量任务查询sql + * @param jdbcInputSplit 数据切片 + * @param querySql 已经创建的查询sql + * @return + */ private String buildIncrementSql(JdbcInputSplit jdbcInputSplit, String querySql){ String incrementFilter = buildIncrementFilter(incrementConfig.getColumnType(), incrementConfig.getColumnName(), @@ -476,6 +494,16 @@ private String buildIncrementSql(JdbcInputSplit jdbcInputSplit, String querySql) return querySql.replace(DBUtil.INCREMENT_FILTER_PLACEHOLDER, incrementFilter); } + /** + * 构建增量任务查询sql的过滤条件 + * @param incrementColType 增量字段类型 + * @param incrementCol 增量字段名称 + * @param startLocation 开始位置 + * @param endLocation 结束位置 + * @param customSql 用户自定义sql + * @param useMaxFunc 是否保存结束位置数据 + * @return + */ protected String buildIncrementFilter(String incrementColType,String incrementCol, String startLocation,String endLocation, String customSql, boolean useMaxFunc){ StringBuilder filter = new StringBuilder(128); @@ -502,6 +530,14 @@ protected String buildIncrementFilter(String incrementColType,String incrementCo return filter.toString(); } + /** + * 构建起始位置sql + * @param incrementColType 增量字段类型 + * @param incrementCol 增量字段名称 + * @param startLocation 开始位置 + * @param useMaxFunc 是否保存结束位置数据 + * @return + */ protected String buildStartLocationSql(String incrementColType, String incrementCol, String startLocation, boolean useMaxFunc){ if(org.apache.commons.lang.StringUtils.isEmpty(startLocation) || DBUtil.NULL_STRING.equalsIgnoreCase(startLocation)){ return null; @@ -512,6 +548,13 @@ protected String buildStartLocationSql(String incrementColType, String increment return getLocationSql(incrementColType, incrementCol, startLocation, operator); } + /** + * 构建结束位置sql + * @param incrementColType 增量字段类型 + * @param incrementCol 增量字段名称 + * @param endLocation 结束位置 + * @return + */ public String buildEndLocationSql(String incrementColType, String incrementCol, String endLocation){ if(org.apache.commons.lang.StringUtils.isEmpty(endLocation) || DBUtil.NULL_STRING.equalsIgnoreCase(endLocation)){ return null; @@ -520,13 +563,19 @@ public String buildEndLocationSql(String incrementColType, String incrementCol, return getLocationSql(incrementColType, incrementCol, endLocation, " < "); } - private String getLocationSql(String incrementColType, String incrementCol, String location, String operator) { + /** + * 构建边界位置sql + * @param incrementColType 增量字段类型 + * @param incrementCol 增量字段名称 + * @param location 边界位置(起始/结束) + * @param operator 判断符( >, >=, <) + * @return + */ + protected String getLocationSql(String incrementColType, String incrementCol, String location, String operator) { String endTimeStr; String endLocationSql; -// boolean isTimeType = ColumnType.isTimeType(incrementColType) -// || (databaseInterface.getDatabaseType() == EDatabaseType.SQLServer && ColumnType.NVARCHAR.name().equals(incrementColType)); if(ColumnType.isTimeType(incrementColType)){ - endTimeStr = getTimeStr(Long.parseLong(location)); + endTimeStr = getTimeStr(Long.parseLong(location), incrementColType); endLocationSql = incrementCol + operator + endTimeStr; } else if(ColumnType.isNumberType(incrementColType)){ endLocationSql = incrementCol + operator + location; @@ -538,31 +587,27 @@ private String getLocationSql(String incrementColType, String incrementCol, Stri return endLocationSql; } - protected String getTimeStr(Long startLocation){ + /** + * 构建时间边界字符串 + * @param location 边界位置(起始/结束) + * @param incrementColType 增量字段类型 + * @return + */ + protected String getTimeStr(Long location, String incrementColType){ String timeStr; - Timestamp ts = new Timestamp(DBUtil.getMillis(startLocation)); - ts.setNanos(DBUtil.getNanos(startLocation)); + Timestamp ts = new Timestamp(DBUtil.getMillis(location)); + ts.setNanos(DBUtil.getNanos(location)); timeStr = DBUtil.getNanosTimeStr(ts.toString()); - -// if(databaseType == EDatabaseType.SQLServer){ -// timeStr = timeStr.substring(0,23); -// } -// if (databaseType == EDatabaseType.Oracle){ -// if(ColumnType.TIMESTAMP.name().equals(incrementColType)){ -// timeStr = String.format("TO_TIMESTAMP('%s','YYYY-MM-DD HH24:MI:SS:FF6')",timeStr); -// } else { -// timeStr = timeStr.substring(0, 19); -// timeStr = String.format("TO_DATE('%s','YYYY-MM-DD HH24:MI:SS')", timeStr); -// } -// } - timeStr = timeStr.substring(0,26); timeStr = String.format("'%s'",timeStr); return timeStr; } - + /** + * 从数据库中查询增量字段的最大值 + * @return + */ private String getMaxValueFromDb() { String maxValue = null; Connection conn = null; @@ -607,6 +652,12 @@ private String getMaxValueFromDb() { } } + /** + * 边界位置值转字符串 + * @param columnType 边界字段类型 + * @param columnVal 边界值 + * @return + */ private String getLocation(String columnType, Object columnVal){ String location; if (columnVal == null){ @@ -641,6 +692,10 @@ private String getLocation(String columnType, Object columnVal){ return location; } + /** + * 上传累加器数据 + * @throws IOException + */ private void uploadMetricData() throws IOException { FSDataOutputStream out = null; try { diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java index 90e3b0cb19..d0976b6a13 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java @@ -39,8 +39,8 @@ public class JdbcInputFormatBuilder extends RichInputFormatBuilder { private JdbcInputFormat format; - public JdbcInputFormatBuilder(DatabaseInterface databaseInterface) { - JdbcFormatLoader jdbcFormatLoader = new JdbcFormatLoader(databaseInterface.getDatabaseType().name(), JdbcFormatLoader.INPUT_FORMAT); + public JdbcInputFormatBuilder(String dataType) { + JdbcFormatLoader jdbcFormatLoader = new JdbcFormatLoader(dataType, JdbcFormatLoader.INPUT_FORMAT); super.format = format = (JdbcInputFormat) jdbcFormatLoader.getFormatInstance(); } diff --git a/flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java b/flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java index f0b1abde1b..fc95e68534 100644 --- a/flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java +++ b/flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java @@ -1,9 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.sqlserver.format; +import com.dtstack.flinkx.enums.ColumnType; import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import com.dtstack.flinkx.rdb.util.DBUtil; +import org.apache.commons.collections.CollectionUtils; import org.apache.flink.types.Row; import java.io.IOException; +import java.sql.Timestamp; import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; @@ -26,7 +47,7 @@ public Row nextRecordInternal(Row row) throws IOException { for (int pos = 0; pos < row.getArity(); pos++) { Object obj = resultSet.getObject(pos + 1); if(obj != null) { - if(descColumnTypeList != null && descColumnTypeList.size() != 0) { + if(CollectionUtils.isNotEmpty(descColumnTypeList)) { if(descColumnTypeList.get(pos).equalsIgnoreCase("bit")) { if(obj instanceof Boolean) { obj = ((Boolean) obj ? 1 : 0); @@ -43,4 +64,49 @@ public Row nextRecordInternal(Row row) throws IOException { throw new IOException("Couldn't read data - " + e.getMessage(), e); } } + + /** + * 构建边界位置sql + * @param incrementColType 增量字段类型 + * @param incrementCol 增量字段名称 + * @param location 边界位置(起始/结束) + * @param operator 判断符( >, >=, <) + * @return + */ + @Override + protected String getLocationSql(String incrementColType, String incrementCol, String location, String operator) { + String endTimeStr; + String endLocationSql; + boolean isTimeType = ColumnType.isTimeType(incrementColType) + || ColumnType.NVARCHAR.name().equals(incrementColType); + if(isTimeType){ + endTimeStr = getTimeStr(Long.parseLong(location), incrementColType); + endLocationSql = incrementCol + operator + endTimeStr; + } else if(ColumnType.isNumberType(incrementColType)){ + endLocationSql = incrementCol + operator + location; + } else { + endTimeStr = String.format("'%s'",location); + endLocationSql = incrementCol + operator + endTimeStr; + } + + return endLocationSql; + } + + /** + * 构建时间边界字符串 + * @param location 边界位置(起始/结束) + * @param incrementColType 增量字段类型 + * @return + */ + @Override + protected String getTimeStr(Long location, String incrementColType){ + String timeStr; + Timestamp ts = new Timestamp(DBUtil.getMillis(location)); + ts.setNanos(DBUtil.getNanos(location)); + timeStr = DBUtil.getNanosTimeStr(ts.toString()); + timeStr = timeStr.substring(0,23); + timeStr = String.format("'%s'",timeStr); + + return timeStr; + } } From fde6891eeb19b4cd8e8d7e54770778b303af36df Mon Sep 17 00:00:00 2001 From: tudou Date: Fri, 20 Sep 2019 21:10:36 +0800 Subject: [PATCH 09/62] =?UTF-8?q?=E5=AE=8C=E6=88=90writer=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E6=8B=86=E5=88=86=E4=B8=8E=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../dtstack/flinkx/db2/Db2DatabaseMeta.java | 2 - .../flinkx/db2/format/Db2InputFormat.java | 29 +++++ .../dtstack/flinkx/db2/Db2OutputFormat.java | 59 ++++++++++ .../flinkx/gbase/format/GbaseInputFormat.java | 1 + .../gbase/format/GbaseOutputFormat.java | 29 +++++ .../flinkx/gbase/writer/GbaseWriter.java | 2 + .../flinkx/mysql/format/MysqlInputFormat.java | 3 +- .../mysql/format/MysqlOutputFormat.java | 17 +++ .../flinkx/mysql/writer/MysqlWriter.java | 5 + .../oracle/format/OracleOutputFormat.java | 102 ++++++++++++++++++ .../reader/PostgresqlQuerySqlBuilder.java | 78 ++++++++++++++ .../postgresql/reader/PostgresqlReader.java | 33 ++++++ .../PostgresqlOutputFormat.java | 38 +++++-- .../postgresql/writer/PostgresqlWriter.java | 1 + .../com/dtstack/flinkx/rdb/util/DBUtil.java | 2 - .../JdbcDataReader.java | 2 +- .../QuerySqlBuilder.java | 52 ++++----- .../JdbcDataWriter.java | 3 +- .../rdb/outputformat/JdbcOutputFormat.java | 83 +++----------- .../outputformat/JdbcOutputFormatBuilder.java | 4 +- .../format/SqlserverOutputFormat.java | 29 +++++ pom.xml | 30 +++--- 22 files changed, 473 insertions(+), 131 deletions(-) create mode 100644 flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java create mode 100644 flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/Db2OutputFormat.java create mode 100644 flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/format/GbaseOutputFormat.java create mode 100644 flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java create mode 100644 flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlQuerySqlBuilder.java rename flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/{writer => format}/PostgresqlOutputFormat.java (75%) create mode 100644 flinkx-sqlserver/flinkx-sqlserver-writer/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverOutputFormat.java diff --git a/flinkx-db2/flinkx-db2-core/src/main/java/com/dtstack/flinkx/db2/Db2DatabaseMeta.java b/flinkx-db2/flinkx-db2-core/src/main/java/com/dtstack/flinkx/db2/Db2DatabaseMeta.java index a549f7fdea..2db50637ce 100644 --- a/flinkx-db2/flinkx-db2-core/src/main/java/com/dtstack/flinkx/db2/Db2DatabaseMeta.java +++ b/flinkx-db2/flinkx-db2-core/src/main/java/com/dtstack/flinkx/db2/Db2DatabaseMeta.java @@ -20,9 +20,7 @@ import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.rdb.BaseDatabaseMeta; -import org.apache.commons.lang3.StringUtils; -import java.util.ArrayList; import java.util.List; /** diff --git a/flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java b/flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java new file mode 100644 index 0000000000..7434e362b0 --- /dev/null +++ b/flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.db2.format; + +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class Db2InputFormat extends JdbcInputFormat { +} diff --git a/flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/Db2OutputFormat.java b/flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/Db2OutputFormat.java new file mode 100644 index 0000000000..ecb005635e --- /dev/null +++ b/flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/Db2OutputFormat.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.db2; + +import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class Db2OutputFormat extends JdbcOutputFormat { + + @Override + protected Map> probePrimaryKeys(String table, Connection dbConn) throws SQLException { + Map> map = new HashMap<>(); + ResultSet rs = dbConn.getMetaData().getIndexInfo(null, null, table.toUpperCase(), true, false); + while(rs.next()) { + String indexName = rs.getString("INDEX_NAME"); + if(!map.containsKey(indexName)) { + map.put(indexName,new ArrayList<>()); + } + map.get(indexName).add(rs.getString("COLUMN_NAME")); + } + Map> retMap = new HashMap<>(); + for(Map.Entry> entry: map.entrySet()) { + String k = entry.getKey(); + List v = entry.getValue(); + if(v!=null && v.size() != 0 && v.get(0) != null) { + retMap.put(k, v); + } + } + return retMap; + } +} diff --git a/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java b/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java index 8ab4bae7da..02223df8f9 100644 --- a/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java +++ b/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java @@ -36,6 +36,7 @@ * @author tudou */ public class GbaseInputFormat extends JdbcInputFormat { + @Override public void openInternal(InputSplit inputSplit) throws IOException { try { diff --git a/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/format/GbaseOutputFormat.java b/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/format/GbaseOutputFormat.java new file mode 100644 index 0000000000..3ac78d0036 --- /dev/null +++ b/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/format/GbaseOutputFormat.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.gbase.format; + +import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class GbaseOutputFormat extends JdbcOutputFormat { +} diff --git a/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/writer/GbaseWriter.java b/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/writer/GbaseWriter.java index 2ba9a9e30e..553179c507 100644 --- a/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/writer/GbaseWriter.java +++ b/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/writer/GbaseWriter.java @@ -22,6 +22,7 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.gbase.GbaseDatabaseMeta; import com.dtstack.flinkx.rdb.datawriter.JdbcDataWriter; +import com.dtstack.flinkx.rdb.util.DBUtil; /** * @author jiangbo @@ -32,5 +33,6 @@ public class GbaseWriter extends JdbcDataWriter { public GbaseWriter(DataTransferConfig config) { super(config); setDatabaseInterface(new GbaseDatabaseMeta()); + dbUrl = DBUtil.formatJdbcUrl(dbUrl, null); } } diff --git a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java index cfdd48dbca..6db68313ef 100644 --- a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java +++ b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java @@ -42,7 +42,6 @@ */ public class MysqlInputFormat extends JdbcInputFormat { - @Override public void openInternal(InputSplit inputSplit) throws IOException { try { @@ -97,7 +96,7 @@ public void openInternal(InputSplit inputSplit) throws IOException { throw new IllegalArgumentException("open() failed. " + se.getMessage(), se); } - LOG.info("JdbcInputFormat[" + jobName + "]open: end"); + LOG.info("JdbcInputFormat[{}]open: end", jobName); } @Override diff --git a/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java b/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java index 75d0141868..41a2b9df75 100644 --- a/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java +++ b/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.mysql.format; import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; diff --git a/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/writer/MysqlWriter.java b/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/writer/MysqlWriter.java index 15a5294592..52b3b28c85 100644 --- a/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/writer/MysqlWriter.java +++ b/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/writer/MysqlWriter.java @@ -21,6 +21,9 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.mysql.MySqlDatabaseMeta; import com.dtstack.flinkx.rdb.datawriter.JdbcDataWriter; +import com.dtstack.flinkx.rdb.util.DBUtil; + +import java.util.Collections; /** * MySQL writer plugin @@ -33,6 +36,8 @@ public class MysqlWriter extends JdbcDataWriter { public MysqlWriter(DataTransferConfig config) { super(config); setDatabaseInterface(new MySqlDatabaseMeta()); + dbUrl = DBUtil.formatJdbcUrl(dbUrl, Collections.singletonMap("zeroDateTimeBehavior", "convertToNull")); + } } diff --git a/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java b/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java new file mode 100644 index 0000000000..77f9d65502 --- /dev/null +++ b/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.oracle.format; + +import com.dtstack.flinkx.enums.ColumnType; +import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; +import com.dtstack.flinkx.util.DateUtil; +import org.apache.flink.types.Row; + +import java.sql.*; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class OracleOutputFormat extends JdbcOutputFormat { + + @Override + protected Object getField(Row row, int index) { + Object field = super.getField(row, index); + String type = columnType.get(index); + + //oracle timestamp to oracle varchar or varchar2 or long field format + if (!(field instanceof Timestamp)){ + return field; + } + + if (type.equalsIgnoreCase(ColumnType.VARCHAR.name()) || type.equalsIgnoreCase(ColumnType.VARCHAR2.name())){ + SimpleDateFormat format = DateUtil.getDateTimeFormatter(); + field= format.format(field); + } + + if (type.equalsIgnoreCase(ColumnType.LONG.name()) ){ + field = ((Timestamp) field).getTime(); + } + return field; + } + + @Override + protected List probeFullColumns(String table, Connection dbConn) throws SQLException { + String schema =null; + + String[] parts = table.split("\\."); + if(parts.length == 2) { + schema = parts[0].toUpperCase(); + table = parts[1]; + } + + List ret = new ArrayList<>(); + ResultSet rs = dbConn.getMetaData().getColumns(null, schema, table, null); + while(rs.next()) { + ret.add(rs.getString("COLUMN_NAME")); + } + return ret; + } + + @Override + protected Map> probePrimaryKeys(String table, Connection dbConn) throws SQLException { + Map> map = new HashMap<>(); + PreparedStatement ps = dbConn.prepareStatement(String.format(GET_ORACLE_INDEX_SQL,table)); + ResultSet rs = ps.executeQuery(); + + while(rs.next()) { + String indexName = rs.getString("INDEX_NAME"); + if(!map.containsKey(indexName)) { + map.put(indexName,new ArrayList<>()); + } + map.get(indexName).add(rs.getString("COLUMN_NAME")); + } + Map> retMap = new HashMap<>(); + for(Map.Entry> entry: map.entrySet()) { + String k = entry.getKey(); + List v = entry.getValue(); + if(v!=null && v.size() != 0 && v.get(0) != null) { + retMap.put(k, v); + } + } + return retMap; + } +} diff --git a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlQuerySqlBuilder.java b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlQuerySqlBuilder.java new file mode 100644 index 0000000000..6868aeb8e4 --- /dev/null +++ b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlQuerySqlBuilder.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.postgresql.reader; + +import com.dtstack.flinkx.rdb.datareader.JdbcDataReader; +import com.dtstack.flinkx.rdb.datareader.QuerySqlBuilder; +import org.apache.commons.lang3.StringUtils; + +import java.util.List; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class PostgresqlQuerySqlBuilder extends QuerySqlBuilder { + + public PostgresqlQuerySqlBuilder(JdbcDataReader reader){ + super(reader); + } + + @Override + protected String buildQuerySql(){ + List selectColumns = buildSelectColumns(databaseInterface, metaColumns); + boolean splitWithRowNum = addRowNumColumn(databaseInterface, selectColumns, isSplitByKey, splitKey); + + StringBuilder sb = new StringBuilder(); + sb.append("SELECT ").append(StringUtils.join(selectColumns,",")).append(" FROM "); + sb.append(databaseInterface.quoteTable(table)); + sb.append(" WHERE 1=1 "); + + StringBuilder filter = new StringBuilder(); + + if(isSplitByKey && !splitWithRowNum) { + filter.append(" AND ").append(databaseInterface.getSplitFilter(splitKey)); + } + + if (customFilter != null){ + customFilter = customFilter.trim(); + if (customFilter.length() > 0){ + filter.append(" AND ").append(customFilter); + } + } + + if(isIncrement){ + filter.append(" ").append(INCREMENT_FILTER_PLACEHOLDER); + } + + if(isRestore){ + filter.append(" ").append(RESTORE_FILTER_PLACEHOLDER); + } + + sb.append(filter); + sb.append(buildOrderSql()); + + if(isSplitByKey && splitWithRowNum){ + return String.format(SQL_SPLIT_WITH_ROW_NUM, sb.toString(), databaseInterface.getSplitFilter(ROW_NUM_COLUMN_ALIAS)); + } else { + return sb.toString(); + } + } +} diff --git a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java index 4e4516115c..591e31abd5 100644 --- a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java +++ b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java @@ -19,11 +19,16 @@ package com.dtstack.flinkx.postgresql.reader; import com.dtstack.flinkx.config.DataTransferConfig; +import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.postgresql.PostgresqlDatabaseMeta; import com.dtstack.flinkx.postgresql.PostgresqlTypeConverter; import com.dtstack.flinkx.rdb.datareader.JdbcDataReader; +import com.dtstack.flinkx.rdb.datareader.QuerySqlBuilder; +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormatBuilder; import com.dtstack.flinkx.rdb.util.DBUtil; +import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.types.Row; /** * The reader plugin for PostgreSQL database @@ -38,6 +43,34 @@ public PostgresqlReader(DataTransferConfig config, StreamExecutionEnvironment en setDatabaseInterface(new PostgresqlDatabaseMeta()); setTypeConverterInterface(new PostgresqlTypeConverter()); dbUrl = DBUtil.formatJdbcUrl(dbUrl, null); + } + + @Override + public DataStream readData() { + JdbcInputFormatBuilder builder = new JdbcInputFormatBuilder(databaseInterface.getDatabaseType().name()); + builder.setDrivername(databaseInterface.getDriverClass()); + builder.setDBUrl(dbUrl); + builder.setUsername(username); + builder.setPassword(password); + builder.setBytes(bytes); + builder.setMonitorUrls(monitorUrls); + builder.setTable(table); + builder.setDatabaseInterface(databaseInterface); + builder.setTypeConverter(typeConverter); + builder.setMetaColumn(metaColumns); + builder.setFetchSize(fetchSize == 0 ? databaseInterface.getFetchSize() : fetchSize); + builder.setQueryTimeOut(queryTimeOut == 0 ? databaseInterface.getQueryTimeout() : queryTimeOut); + builder.setIncrementConfig(incrementConfig); + builder.setSplitKey(splitKey); + builder.setNumPartitions(numPartitions); + builder.setCustomSql(customSql); + builder.setRestoreConfig(restoreConfig); + builder.setHadoopConfig(hadoopConfig); + + QuerySqlBuilder sqlBuilder = new PostgresqlQuerySqlBuilder(this); + builder.setQuery(sqlBuilder.buildSql()); + RichInputFormat format = builder.finish(); + return createInput(format, (databaseInterface.getDatabaseType() + "reader").toLowerCase()); } } diff --git a/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlOutputFormat.java b/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlOutputFormat.java similarity index 75% rename from flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlOutputFormat.java rename to flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlOutputFormat.java index 4af60e797e..ce3653c985 100644 --- a/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlOutputFormat.java +++ b/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlOutputFormat.java @@ -1,4 +1,21 @@ -package com.dtstack.flinkx.postgresql.writer; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.postgresql.format; import com.dtstack.flinkx.enums.EWriteMode; import com.dtstack.flinkx.exception.WriteRecordException; @@ -7,8 +24,6 @@ import org.apache.flink.types.Row; import org.postgresql.copy.CopyManager; import org.postgresql.core.BaseConnection; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.sql.PreparedStatement; @@ -23,15 +38,15 @@ public class PostgresqlOutputFormat extends JdbcOutputFormat { - private static final Logger LOG = LoggerFactory.getLogger(PostgresqlOutputFormat.class); - private static final String COPY_SQL_TEMPL = "copy %s(%s) from stdin DELIMITER '%s'"; private static final String DEFAULT_FIELD_DELIM = "\001"; private static final String LINE_DELIMITER = "\n"; - /**now just add ext insert mode:copy*/ + /** + * now just add ext insert mode:copy + */ private static final String INSERT_SQL_MODE_TYPE = "copy"; private String copySql = ""; @@ -87,7 +102,7 @@ protected void writeMultipleRecordsInternal() throws Exception { return; } - StringBuilder sb = new StringBuilder(); + StringBuilder sb = new StringBuilder(128); for (Row row : rows) { int lastIndex = row.getArity() - 1; for (int index =0; index < row.getArity(); index++) { @@ -110,6 +125,15 @@ protected void writeMultipleRecordsInternal() throws Exception { } } + @Override + protected Object getField(Row row, int index) { + Object field = super.getField(row, index); + String type = columnType.get(index); + field = typeConverter.convert(field,type); + + return field; + } + private boolean checkIsCopyMode(String insertMode){ if(Strings.isNullOrEmpty(insertMode)){ return false; diff --git a/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlWriter.java b/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlWriter.java index a81de66aed..867fd909bb 100644 --- a/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlWriter.java +++ b/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlWriter.java @@ -21,6 +21,7 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.postgresql.PostgresqlDatabaseMeta; import com.dtstack.flinkx.postgresql.PostgresqlTypeConverter; +import com.dtstack.flinkx.postgresql.format.PostgresqlOutputFormat; import com.dtstack.flinkx.rdb.datawriter.JdbcDataWriter; import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormatBuilder; import org.apache.flink.streaming.api.datastream.DataStream; diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java index 4b23668d65..0902308f05 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java @@ -373,8 +373,6 @@ public static long getMillis(long startLocation){ * @return 格式化后jdbc连接URL字符串 */ public static String formatJdbcUrl(String dbUrl, Map extParamMap){ -// if(pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQL_WRITER) -// || pluginName.equalsIgnoreCase(PluginNameConstrant.GBASE_WRITER) ){ String[] splits = DB_PATTERN.split(dbUrl); Map paramMap = new HashMap(); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java index 6582818cba..0a1d19b5da 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java @@ -140,7 +140,7 @@ private void buildIncrementConfig(ReaderConfig readerConfig){ String incrementColStr = String.valueOf(incrementColumn); if(NumberUtils.isNumber(incrementColStr)){ - MetaColumn metaColumn = metaColumns.get(Integer.valueOf(incrementColStr)); + MetaColumn metaColumn = metaColumns.get(Integer.parseInt(incrementColStr)); type = metaColumn.getType(); name = metaColumn.getName(); index = metaColumn.getIndex(); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/QuerySqlBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/QuerySqlBuilder.java index 3f585c6f0a..8b365ebe72 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/QuerySqlBuilder.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/QuerySqlBuilder.java @@ -19,10 +19,8 @@ package com.dtstack.flinkx.rdb.datareader; -import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.StringUtil; import org.apache.commons.lang3.StringUtils; import java.util.ArrayList; @@ -34,25 +32,25 @@ */ public class QuerySqlBuilder { - private static final String CUSTOM_SQL_TEMPLATE = "select * from (%s) %s"; - private static final String TEMPORARY_TABLE_NAME = "flinkx_tmp"; - private static final String INCREMENT_FILTER_PLACEHOLDER = "${incrementFilter}"; - private static final String RESTORE_FILTER_PLACEHOLDER = "${restoreFilter}"; - private static final String SQL_SPLIT_WITH_ROW_NUM = "SELECT * FROM (%s) tmp WHERE %s"; - private static final String ROW_NUM_COLUMN_ALIAS = "FLINKX_ROWNUM"; - - private DatabaseInterface databaseInterface; - private String table; - private List metaColumns; - private String splitKey; - private String customFilter; - private String customSql; - private boolean isSplitByKey; - private boolean isIncrement; - private String incrementColumn; - private String restoreColumn; - private boolean isRestore; - private String orderByColumn; + protected static final String CUSTOM_SQL_TEMPLATE = "select * from (%s) %s"; + protected static final String TEMPORARY_TABLE_NAME = "flinkx_tmp"; + protected static final String INCREMENT_FILTER_PLACEHOLDER = "${incrementFilter}"; + protected static final String RESTORE_FILTER_PLACEHOLDER = "${restoreFilter}"; + protected static final String SQL_SPLIT_WITH_ROW_NUM = "SELECT * FROM (%s) tmp WHERE %s"; + protected static final String ROW_NUM_COLUMN_ALIAS = "FLINKX_ROWNUM"; + + protected DatabaseInterface databaseInterface; + protected String table; + protected List metaColumns; + protected String splitKey; + protected String customFilter; + protected String customSql; + protected boolean isSplitByKey; + protected boolean isIncrement; + protected String incrementColumn; + protected String restoreColumn; + protected boolean isRestore; + protected String orderByColumn; public QuerySqlBuilder(JdbcDataReader reader) { databaseInterface = reader.databaseInterface; @@ -92,7 +90,7 @@ public String buildSql(){ return query; } - private String buildQuerySql(){ + protected String buildQuerySql(){ List selectColumns = buildSelectColumns(databaseInterface, metaColumns); boolean splitWithRowNum = addRowNumColumn(databaseInterface, selectColumns, isSplitByKey, splitKey); @@ -124,10 +122,6 @@ private String buildQuerySql(){ sb.append(filter); - if(EDatabaseType.PostgreSQL.equals(databaseInterface.getDatabaseType())){ - sb.append(buildOrderSql()); - } - if(isSplitByKey && splitWithRowNum){ return String.format(SQL_SPLIT_WITH_ROW_NUM, sb.toString(), databaseInterface.getSplitFilter(ROW_NUM_COLUMN_ALIAS)); } else { @@ -135,7 +129,7 @@ private String buildQuerySql(){ } } - private String buildOrderSql(){ + protected String buildOrderSql(){ String column; if(isIncrement){ column = incrementColumn; @@ -168,7 +162,7 @@ private String buildQuerySqlWithCustomSql(){ return querySql.toString(); } - private static List buildSelectColumns(DatabaseInterface databaseInterface, List metaColumns){ + protected static List buildSelectColumns(DatabaseInterface databaseInterface, List metaColumns){ List selectColumns = new ArrayList<>(); if(metaColumns.size() == 1 && "*".equals(metaColumns.get(0).getName())){ selectColumns.add("*"); @@ -185,7 +179,7 @@ private static List buildSelectColumns(DatabaseInterface databaseInterfa return selectColumns; } - private static boolean addRowNumColumn(DatabaseInterface databaseInterface, List selectColumns, boolean isSplitByKey,String splitKey){ + protected static boolean addRowNumColumn(DatabaseInterface databaseInterface, List selectColumns, boolean isSplitByKey,String splitKey){ if(!isSplitByKey || !splitKey.contains("(")){ return false; } diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java index ce5e8e4f27..0d70d2361a 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java @@ -68,6 +68,7 @@ public void setDatabaseInterface(DatabaseInterface databaseInterface) { this.databaseInterface = databaseInterface; } + @SuppressWarnings("unchecked") public JdbcDataWriter(DataTransferConfig config) { super(config); @@ -92,7 +93,7 @@ public JdbcDataWriter(DataTransferConfig config) { @Override public DataStreamSink writeData(DataStream dataSet) { - JdbcOutputFormatBuilder builder = new JdbcOutputFormatBuilder(databaseInterface); + JdbcOutputFormatBuilder builder = new JdbcOutputFormatBuilder(databaseInterface.getDatabaseType().name()); builder.setDriverName(databaseInterface.getDriverClass()); builder.setDBUrl(dbUrl); builder.setUsername(username); diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java index 83b5acaa62..bc60fc02ec 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java @@ -18,7 +18,6 @@ package com.dtstack.flinkx.rdb.outputformat; import com.dtstack.flinkx.enums.ColumnType; -import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.enums.EWriteMode; import com.dtstack.flinkx.exception.WriteRecordException; import com.dtstack.flinkx.outputformat.RichOutputFormat; @@ -28,14 +27,13 @@ import com.dtstack.flinkx.restore.FormatState; import com.dtstack.flinkx.util.ClassUtil; import com.dtstack.flinkx.util.DateUtil; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.ObjectUtils; import org.apache.flink.types.Row; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.sql.*; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -86,7 +84,7 @@ public class JdbcOutputFormat extends RichOutputFormat { protected List fullColumnType; - private List columnType = new ArrayList<>(); + protected List columnType = new ArrayList<>(); protected TypeConverterInterface typeConverter; @@ -96,7 +94,7 @@ public class JdbcOutputFormat extends RichOutputFormat { protected long rowsOfCurrentTransaction; - private final static String GET_ORACLE_INDEX_SQL = "SELECT " + + protected final static String GET_ORACLE_INDEX_SQL = "SELECT " + "t.INDEX_NAME," + "t.COLUMN_NAME " + "FROM " + @@ -110,7 +108,7 @@ public class JdbcOutputFormat extends RichOutputFormat { protected final static String CONN_CLOSE_ERROR_MSG = "No operations allowed"; protected PreparedStatement prepareTemplates() throws SQLException { - if(fullColumn == null || fullColumn.size() == 0) { + if(CollectionUtils.isEmpty(fullColumn)) { fullColumn = column; } @@ -131,7 +129,7 @@ protected PreparedStatement prepareTemplates() throws SQLException { } @Override - protected void openInternal(int taskNumber, int numTasks) throws IOException { + protected void openInternal(int taskNumber, int numTasks){ try { ClassUtil.forName(driverName, getClass().getClassLoader()); dbConn = DBUtil.getConnection(dbURL, username, password); @@ -140,7 +138,7 @@ protected void openInternal(int taskNumber, int numTasks) throws IOException { dbConn.setAutoCommit(false); } - if(fullColumn == null || fullColumn.size() == 0) { + if(CollectionUtils.isEmpty(fullColumn)) { fullColumn = probeFullColumns(table, dbConn); } @@ -166,7 +164,7 @@ protected void openInternal(int taskNumber, int numTasks) throws IOException { preparedStatement = prepareTemplates(); readyCheckpoint = false; - LOG.info("subtask[" + taskNumber + "] wait finished"); + LOG.info("subTask[{}}] wait finished", taskNumber); } catch (SQLException sqe) { throw new IllegalArgumentException("open() failed.", sqe); } @@ -184,7 +182,7 @@ private List analyzeTable() { ret.add(rd.getColumnTypeName(i+1)); } - if(fullColumn == null || fullColumn.size() == 0){ + if(CollectionUtils.isEmpty(fullColumn)){ for(int i = 0; i < rd.getColumnCount(); ++i) { fullColumn.add(rd.getColumnName(i+1)); } @@ -273,8 +271,7 @@ public FormatState getFormatState(){ } try { - LOG.info("readyCheckpoint:" + readyCheckpoint); - LOG.info("rowsOfCurrentTransaction:" + rowsOfCurrentTransaction); + LOG.info("readyCheckpoint: {}, rowsOfCurrentTransaction: {}", readyCheckpoint, rowsOfCurrentTransaction); if (readyCheckpoint || rowsOfCurrentTransaction > restoreConfig.getMaxRowNumForCheckpoint()){ @@ -321,75 +318,21 @@ protected Object getField(Row row, int index) { field = ((java.util.Date) field).getTime(); } - field=dealOracleTimestampToVarcharOrLong(databaseInterface.getDatabaseType(),field,type); - - - if(EDatabaseType.PostgreSQL == databaseInterface.getDatabaseType()){ - field = typeConverter.convert(field,type); - } - - return field; - } - - /** - * oracle timestamp to oracle varchar or varchar2 or long field format - * @param databaseType - * @param field - * @param type - * @return - */ - private Object dealOracleTimestampToVarcharOrLong(EDatabaseType databaseType, Object field, String type) { - if (EDatabaseType.Oracle!=databaseInterface.getDatabaseType()){ - return field; - } - - if (!(field instanceof Timestamp)){ - return field; - } - - if (type.equalsIgnoreCase(ColumnType.VARCHAR.name()) || type.equalsIgnoreCase(ColumnType.VARCHAR2.name())){ - SimpleDateFormat format = DateUtil.getDateTimeFormatter(); - field= format.format(field); - } - - if (type.equalsIgnoreCase(ColumnType.LONG.name()) ){ - field = ((Timestamp) field).getTime(); - } return field; } protected List probeFullColumns(String table, Connection dbConn) throws SQLException { - String schema =null; - if(EDatabaseType.Oracle == databaseInterface.getDatabaseType()) { - String[] parts = table.split("\\."); - if(parts.length == 2) { - schema = parts[0].toUpperCase(); - table = parts[1]; - } - } - List ret = new ArrayList<>(); - ResultSet rs = dbConn.getMetaData().getColumns(null, schema, table, null); + ResultSet rs = dbConn.getMetaData().getColumns(null, null, table, null); while(rs.next()) { ret.add(rs.getString("COLUMN_NAME")); } return ret; } - - protected Map> probePrimaryKeys(String table, Connection dbConn) throws SQLException { Map> map = new HashMap<>(); - ResultSet rs; - if(EDatabaseType.Oracle == databaseInterface.getDatabaseType()){ - PreparedStatement ps = dbConn.prepareStatement(String.format(GET_ORACLE_INDEX_SQL,table)); - rs = ps.executeQuery(); - } else if(EDatabaseType.DB2 == databaseInterface.getDatabaseType()){ - rs = dbConn.getMetaData().getIndexInfo(null, null, table.toUpperCase(), true, false); - } else { - rs = dbConn.getMetaData().getIndexInfo(null, null, table, true, false); - } - + ResultSet rs = dbConn.getMetaData().getIndexInfo(null, null, table, true, false); while(rs.next()) { String indexName = rs.getString("INDEX_NAME"); if(!map.containsKey(indexName)) { @@ -428,7 +371,7 @@ public void closeInternal() { @Override protected boolean needWaitBeforeWriteRecords() { - return preSql != null && preSql.size() != 0; + return CollectionUtils.isNotEmpty(preSql); } @Override @@ -440,7 +383,7 @@ protected void beforeWriteRecords() { @Override protected boolean needWaitBeforeCloseInternal() { - return postSql != null && postSql.size() != 0; + return CollectionUtils.isNotEmpty(postSql); } @Override diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java index de486d39ee..0cc8516216 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java @@ -33,8 +33,8 @@ public class JdbcOutputFormatBuilder extends RichOutputFormatBuilder { private JdbcOutputFormat format; - public JdbcOutputFormatBuilder(DatabaseInterface databaseInterface) { - JdbcFormatLoader jdbcFormatLoader = new JdbcFormatLoader(databaseInterface.getDatabaseType().name(), JdbcFormatLoader.OUTPUT_FORMAT); + public JdbcOutputFormatBuilder(String dataType) { + JdbcFormatLoader jdbcFormatLoader = new JdbcFormatLoader(dataType, JdbcFormatLoader.OUTPUT_FORMAT); super.format = format = (JdbcOutputFormat) jdbcFormatLoader.getFormatInstance(); } diff --git a/flinkx-sqlserver/flinkx-sqlserver-writer/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverOutputFormat.java b/flinkx-sqlserver/flinkx-sqlserver-writer/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverOutputFormat.java new file mode 100644 index 0000000000..d574881b56 --- /dev/null +++ b/flinkx-sqlserver/flinkx-sqlserver-writer/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverOutputFormat.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.sqlserver.format; + +import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class SqlserverOutputFormat extends JdbcOutputFormat { +} diff --git a/pom.xml b/pom.xml index 1378ebe2b4..be479144b1 100644 --- a/pom.xml +++ b/pom.xml @@ -12,29 +12,29 @@ flinkx-core flinkx-rdb - flinkx-es - flinkx-ftp - flinkx-odps - flinkx-examples + + + + flinkx-mysql - flinkx-hbase - flinkx-hdfs - flinkx-hive + + + flinkx-oracle flinkx-sqlserver - flinkx-redis - flinkx-mongodb + + flinkx-postgresql flinkx-launcher - flinkx-stream - flinkx-carbondata + + flinkx-db2 flinkx-test flinkx-gbase - flinkx-binlog - flinkx-kafka09 - flinkx-kafka10 - flinkx-kafka11 + + + + From afcbf77e7643b3ec1941c05b78b9db26c3bd1b6c Mon Sep 17 00:00:00 2001 From: tudou Date: Mon, 23 Sep 2019 10:34:07 +0800 Subject: [PATCH 10/62] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=B3=A8=E9=87=8A?= =?UTF-8?q?=E3=80=81=E6=A0=BC=E5=BC=8F=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/dtstack/flinkx/util/ClassUtil.java | 2 +- .../dtstack/flinkx/util/ExceptionUtil.java | 23 ++- .../flinkx/rdb/loader/JdbcFormatLoader.java | 7 +- .../rdb/type/TypeConverterInterface.java | 6 + .../com/dtstack/flinkx/rdb/util/DBUtil.java | 177 ++++++++++++------ 5 files changed, 150 insertions(+), 65 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ClassUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ClassUtil.java index 487c9b6ff5..856b02f816 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ClassUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ClassUtil.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java index 179c39b8f6..e6f91a66c7 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java @@ -32,6 +32,11 @@ public class ExceptionUtil { private static Logger logger = LoggerFactory.getLogger(ExceptionUtil.class); + /** + * 获取错误的堆栈信息 + * @param e throwable + * @return 堆栈信息 + */ public static String getErrorMessage(Throwable e) { StringWriter stringWriter = null; PrintWriter writer = null; @@ -47,16 +52,16 @@ public static String getErrorMessage(Throwable e) { logger.error("",ee); }finally { - if(writer!=null){ - writer.close(); - } - if(stringWriter!=null){ - try{ - stringWriter.close(); - }catch (Throwable ee){ - logger.error("",ee); - } + if(writer!=null){ + writer.close(); + } + if(stringWriter!=null){ + try{ + stringWriter.close(); + }catch (Throwable ee){ + logger.error("",ee); } + } } return null; } diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java index ad942fa79a..94c846429c 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java @@ -17,6 +17,7 @@ */ package com.dtstack.flinkx.rdb.loader; +import org.apache.commons.lang3.StringUtils; import org.apache.flink.util.Preconditions; /** @@ -55,7 +56,7 @@ public class JdbcFormatLoader { */ public JdbcFormatLoader(String dataType, int formatType){ - Preconditions.checkArgument(dataType != null && dataType.trim().length() != 0); + Preconditions.checkArgument(StringUtils.isNotBlank(dataType)); Preconditions.checkArgument(formatType == INPUT_FORMAT || formatType == OUTPUT_FORMAT); dataType = dataType.toLowerCase(); @@ -67,6 +68,10 @@ public JdbcFormatLoader(String dataType, int formatType){ this.formatClassName = String.format(pkgPrefixFormat, dataType, this.formatName.substring(0, 1).toUpperCase() + this.formatName.substring(1)); } + /** + * 获取format实例对象 + * @return + */ public Object getFormatInstance() { Object format = null; try { diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/type/TypeConverterInterface.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/type/TypeConverterInterface.java index 8ac4ca6fda..35088110d0 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/type/TypeConverterInterface.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/type/TypeConverterInterface.java @@ -28,6 +28,12 @@ */ public interface TypeConverterInterface extends Serializable { + /** + * 类型转换,将数据库数据某类型的对象转换为对应的Java基本数据对象实例 + * @param data 数据记录 + * @param typeName 数据类型 + * @return + */ Object convert(Object data,String typeName); } diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java index 0902308f05..b007642e5c 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java @@ -21,6 +21,7 @@ import com.dtstack.flinkx.rdb.ParameterValuesProvider; import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.util.ClassUtil; +import com.dtstack.flinkx.util.ExceptionUtil; import com.dtstack.flinkx.util.SysUtil; import com.dtstack.flinkx.util.TelnetUtil; import org.apache.commons.lang.StringUtils; @@ -29,7 +30,6 @@ import org.slf4j.LoggerFactory; import java.io.BufferedReader; -import java.io.Serializable; import java.math.BigDecimal; import java.sql.*; import java.util.ArrayList; @@ -48,23 +48,55 @@ public class DBUtil { private static final Logger LOG = LoggerFactory.getLogger(DBUtil.class); + /** + * 数据库连接的最大重试次数 + */ private static int MAX_RETRY_TIMES = 3; + /** + * 秒级时间戳的长度为10位 + */ private static int SECOND_LENGTH = 10; + /** + * 毫秒级时间戳的长度为13位 + */ private static int MILLIS_LENGTH = 13; + /** + * 微秒级时间戳的长度为16位 + */ private static int MICRO_LENGTH = 16; + /** + * 纳秒级时间戳的长度为19位 + */ private static int NANOS_LENGTH = 19; + /** + * jdbc连接URL的分割正则,用于获取URL?后的连接参数 + */ public static final Pattern DB_PATTERN = Pattern.compile("\\?"); + /** + * 增量任务过滤条件占位符 + */ public static final String INCREMENT_FILTER_PLACEHOLDER = "${incrementFilter}"; + /** + * 断点续传过滤条件占位符 + */ public static final String RESTORE_FILTER_PLACEHOLDER = "${restoreFilter}"; public static final String TEMPORARY_TABLE_NAME = "flinkx_tmp"; public static final String NULL_STRING = "null"; + /** + * 获取jdbc连接(超时10S) + * @param url url + * @param username 账号 + * @param password 密码 + * @return + * @throws SQLException + */ private static Connection getConnectionInternal(String url, String username, String password) throws SQLException { Connection dbConn; synchronized (ClassUtil.lock_str){ @@ -83,6 +115,14 @@ private static Connection getConnectionInternal(String url, String username, Str return dbConn; } + /** + * 获取jdbc连接(重试3次) + * @param url url + * @param username 账号 + * @param password 密码 + * @return + * @throws SQLException + */ public static Connection getConnection(String url, String username, String password) throws SQLException { boolean failed = true; Connection dbConn = null; @@ -107,45 +147,21 @@ public static Connection getConnection(String url, String username, String passw return dbConn; } - - public static List> executeQuery(Connection connection, String sql) { - List> result = com.google.common.collect.Lists.newArrayList(); - ResultSet res = null; - Statement statement = null; - try{ - statement = connection.createStatement(); - res = statement.executeQuery(sql); - int columns = res.getMetaData().getColumnCount(); - List columnName = com.google.common.collect.Lists.newArrayList(); - for(int i = 0; i < columns; i++){ - columnName.add(res.getMetaData().getColumnName(i + 1)); - } - - while(res.next()){ - Map row = com.google.common.collect.Maps.newHashMap(); - for(int i = 0;i < columns; i++){ - row.put(columnName.get(i), res.getObject(i + 1)); - } - result.add(row); - } - }catch(Exception e){ - throw new RuntimeException(e); - } - finally{ - DBUtil.closeDBResources(res, statement, null, false); - } - return result; - } - - public static void closeDBResources(ResultSet rs, Statement stmt, - Connection conn, boolean commit) { + /** + * 关闭连接资源 + * @param rs ResultSet + * @param stmt Statement + * @param conn Connection + * @param commit + */ + public static void closeDBResources(ResultSet rs, Statement stmt, Connection conn, boolean commit) { if (null != rs) { try { LOG.info("Start close resultSet"); rs.close(); LOG.info("Close resultSet successful"); } catch (SQLException e) { - LOG.warn("Close resultSet error:{}",e); + LOG.warn("Close resultSet error: {}", ExceptionUtil.getErrorMessage(e)); } } @@ -155,7 +171,7 @@ public static void closeDBResources(ResultSet rs, Statement stmt, stmt.close(); LOG.info("Close statement successful"); } catch (SQLException e) { - LOG.warn("Close statement error:{}",e); + LOG.warn("Close statement error:{}", ExceptionUtil.getErrorMessage(e)); } } @@ -169,11 +185,15 @@ public static void closeDBResources(ResultSet rs, Statement stmt, conn.close(); LOG.info("Close connection successful"); } catch (SQLException e) { - LOG.warn("Close connection error:{}",e); + LOG.warn("Close connection error:{}", ExceptionUtil.getErrorMessage(e)); } } } + /** + * 手动提交事物 + * @param conn Connection + */ public static void commit(Connection conn){ try { if (!conn.isClosed() && !conn.getAutoCommit()){ @@ -182,10 +202,15 @@ public static void commit(Connection conn){ LOG.info("Commit connection successful"); } } catch (SQLException e){ - LOG.warn("commit error:{}",e); + LOG.warn("commit error:{}", ExceptionUtil.getErrorMessage(e)); } } + /** + * 批量执行sql + * @param dbConn Connection + * @param sqls sql列表 + */ public static void executeBatch(Connection dbConn, List sqls) { if(sqls == null || sqls.size() == 0) { return; @@ -204,6 +229,13 @@ public static void executeBatch(Connection dbConn, List sqls) { } } + /** + * 获取某数据库某表的主键和唯一索引 + * @param table 表名 + * @param dbConn 数据库连接 + * @return + * @throws SQLException + */ public static Map> getPrimaryOrUniqueKeys(String table, Connection dbConn) throws SQLException { Map> keyMap = new HashMap<>(); DatabaseMetaData meta = dbConn.getMetaData(); @@ -219,26 +251,38 @@ public static Map> getPrimaryOrUniqueKeys(String table, Conn return keyMap; } + /** + * 封装channel通道顺序 + * @param channels + * @return + */ public static Object[][] getParameterValues(final int channels){ - ParameterValuesProvider provider = new ParameterValuesProvider() { - @Override - public Serializable[][] getParameterValues() { - Integer[][] parameters = new Integer[channels][]; - for(int i = 0; i < channels; ++i) { - parameters[i] = new Integer[2]; - parameters[i][0] = channels; - parameters[i][1] = i; - } - return parameters; + ParameterValuesProvider provider = () -> { + Integer[][] parameters = new Integer[channels][]; + for(int i = 0; i < channels; ++i) { + parameters[i] = new Integer[2]; + parameters[i][0] = channels; + parameters[i][1] = i; } + return parameters; }; return provider.getParameterValues(); } - public static List analyzeTable(String dbURL,String username,String password,DatabaseInterface databaseInterface, - String table,List metaColumns) { - List ret = new ArrayList<>(); + /** + * 获取表列名类型列表 + * @param dbURL jdbc url + * @param username 数据库账号 + * @param password 数据库密码 + * @param databaseInterface DatabaseInterface + * @param table 表名 + * @param metaColumns MetaColumn列表 + * @return + */ + public static List analyzeTable(String dbURL, String username, String password, DatabaseInterface databaseInterface, + String table, List metaColumns) { + List ret = new ArrayList<>(metaColumns.size()); Connection dbConn = null; Statement stmt = null; ResultSet rs = null; @@ -248,7 +292,7 @@ public static List analyzeTable(String dbURL,String username,String pass rs = stmt.executeQuery(databaseInterface.getSQLQueryFields(databaseInterface.quoteTable(table))); ResultSetMetaData rd = rs.getMetaData(); - Map nameTypeMap = new HashMap<>(); + Map nameTypeMap = new HashMap<>((rd.getColumnCount() << 2) / 3); for(int i = 0; i < rd.getColumnCount(); ++i) { nameTypeMap.put(rd.getColumnName(i+1),rd.getColumnTypeName(i+1)); } @@ -269,6 +313,13 @@ public static List analyzeTable(String dbURL,String username,String pass return ret; } + /** + * 占位符设值 + * @param param 参数 + * @param statement PreparedStatement + * @param i 占位符位置 + * @throws SQLException + */ public static void setParameterValue(Object param,PreparedStatement statement,int i) throws SQLException{ if (param instanceof String) { statement.setString(i + 1, (String) param); @@ -302,6 +353,12 @@ public static void setParameterValue(Object param,PreparedStatement statement,in } } + /** + * clob转string + * @param obj clob + * @return + * @throws Exception + */ public static Object clobToString(Object obj) throws Exception{ String dataStr; if(obj instanceof Clob){ @@ -320,16 +377,23 @@ public static Object clobToString(Object obj) throws Exception{ return dataStr; } - - + /** + * 获取纳秒字符串 + * @param timeStr + * @return + */ public static String getNanosTimeStr(String timeStr){ if(timeStr.length() < 29){ timeStr += StringUtils.repeat("0",29 - timeStr.length()); } - return timeStr; } + /** + * 将边界位置时间转换成对应饿的纳秒时间 + * @param startLocation 边界位置(起始/结束) + * @return + */ public static int getNanos(long startLocation){ String timeStr = String.valueOf(startLocation); int nanos; @@ -348,6 +412,11 @@ public static int getNanos(long startLocation){ return nanos; } + /** + * 将边界位置时间转换成对应饿的毫秒时间 + * @param startLocation 边界位置(起始/结束) + * @return + */ public static long getMillis(long startLocation){ String timeStr = String.valueOf(startLocation); long millisSecond; From 24b571e0b85ce096c65ad72a0457be6110f96a4a Mon Sep 17 00:00:00 2001 From: jiangbo Date: Wed, 25 Sep 2019 13:41:34 +0800 Subject: [PATCH 11/62] =?UTF-8?q?[=E6=95=B0=E6=8D=AE=E5=90=8C=E6=AD=A5perj?= =?UTF-8?q?ob=E6=A8=A1=E5=BC=8F=E8=BF=90=E8=A1=8C=E6=97=B6=EF=BC=8Capplica?= =?UTF-8?q?tionMaster=E5=A4=B1=E8=B4=A5=E9=87=8D=E8=AF=95=E5=90=8E?= =?UTF-8?q?=E6=B2=A1=E6=9C=89=E6=B8=85=E9=99=A4.data=E7=9B=AE=E5=BD=95][18?= =?UTF-8?q?574]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flinkx/hdfs/writer/HdfsOutputFormat.java | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java index b6e3c8be70..08bb2d7912 100644 --- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java @@ -46,6 +46,8 @@ public abstract class HdfsOutputFormat extends RichOutputFormat implements Clean protected static final String FINISHED_SUBDIR = ".finished"; + protected static final String ACTION_FINISHED_TAG = ".actionFinished"; + protected static final String SP = "/"; protected FileSystem fs; @@ -149,9 +151,60 @@ protected void openInternal(int taskNumber, int numTasks) throws IOException { String dateString = formatter.format(currentTime); tmpPath = outputFilePath + SP + DATA_SUBDIR + SP + taskNumber + "." + dateString; finishedPath = outputFilePath + SP + FINISHED_SUBDIR + SP + taskNumber; + + beforeWrite(); open(); } + private void beforeWrite(){ + if(numTasks > 0){ + waitBeforeWrite(); + return; + } + + try{ + LOG.info("Delete [.data] dir before write records"); + cleanTemporaryDataFiles(); + } catch (Exception e){ + throw new RuntimeException("Clean .data dir error:", e); + } + + try { + fs.create(new Path(tmpPath + SP + ACTION_FINISHED_TAG)); + } catch (Exception e){ + throw new RuntimeException("Clean .data dir error:", e); + } + } + + private void waitBeforeWrite(){ + try { + Path path = new Path(tmpPath + SP + ACTION_FINISHED_TAG); + boolean readyWrite = fs.exists(path); + int n = 0; + while (!readyWrite){ + if(n > 60){ + throw new RuntimeException("Wait action finished before write timeout"); + } + + SysUtil.sleep(1000); + readyWrite = fs.exists(path); + n++; + } + } catch (Exception e){ + throw new RuntimeException("wait before write error:", e); + } + } + + private void cleanTemporaryDataFiles() throws IOException{ + Path finishedDir = new Path(outputFilePath + SP + FINISHED_SUBDIR); + fs.delete(finishedDir, true); + LOG.info("Delete .finished dir:{}", finishedDir); + + Path tmpDir = new Path(outputFilePath + SP + DATA_SUBDIR); + fs.delete(tmpDir, true); + LOG.info("Delete .data dir:{}", tmpDir); + } + protected abstract void open() throws IOException; @Override From 69cd6ccc2438719fd5d85b55131dc5a96bb13e8c Mon Sep 17 00:00:00 2001 From: tudou Date: Sun, 29 Sep 2019 10:17:57 +0800 Subject: [PATCH 12/62] =?UTF-8?q?=E4=BC=98=E5=8C=961.5=E6=8F=92=E4=BB=B6?= =?UTF-8?q?=E8=AF=BB=E5=86=99=E9=80=9F=E5=BA=A6=EF=BC=8C=E5=AE=8C=E5=96=84?= =?UTF-8?q?=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 + docs/kudureader.md | 183 +++++++++++++++++ docs/kuduwriter.md | 187 ++++++++++++++++++ .../dtstack/flinkx/util/ExceptionUtil.java | 63 ++++++ .../dtstack/flinkx/kudu/core/KuduConfig.java | 67 +++++++ .../flinkx/kudu/core/KuduConfigBuilder.java | 7 + .../flinkx/kudu/core/KuduConfigKeys.java | 3 +- .../dtstack/flinkx/kudu/core/KuduUtil.java | 25 +-- .../flinkx/kudu/reader/KuduInputFormat.java | 45 +++-- .../flinkx/kudu/writer/KuduOutputFormat.java | 17 +- .../flinkx/kudu/writer/KuduWriter.java | 5 + 11 files changed, 570 insertions(+), 36 deletions(-) create mode 100644 docs/kudureader.md create mode 100644 docs/kuduwriter.md create mode 100644 flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java diff --git a/README.md b/README.md index 4ee91d03bf..53ef2f6cdd 100644 --- a/README.md +++ b/README.md @@ -218,6 +218,8 @@ reader和writer包括name和parameter,分别表示插件名称和插件参数 * [MongoDB读取插件](docs/mongodbreader.md) * [Stream读取插件](docs/streamreader.md) * [Carbondata读取插件](docs/carbondatareader.md) +* [Kudu读取插件](docs/kudureader.md) + ### 5.2 写入插件 @@ -231,6 +233,8 @@ reader和writer包括name和parameter,分别表示插件名称和插件参数 * [Redis写入插件](docs/rediswriter.md) * [Stream写入插件](docs/streamwriter.md) * [Carbondata写入插件](docs/carbondatawriter.md) +* [Kudu写入插件](docs/kuduwriter.md) + ## 6.版本说明 diff --git a/docs/kudureader.md b/docs/kudureader.md new file mode 100644 index 0000000000..6de48bcd1c --- /dev/null +++ b/docs/kudureader.md @@ -0,0 +1,183 @@ +# Kudu读取插件(kudureader) + +## 1. 配置样例 + +``` +{ + "job": { + "content": [ + { + "reader": { + "name": "kudureader", + "parameter": { + "column": [ + { + "name": "id", + "type": "long" + } + ], + "masterAddresses": "kudu1:7051,kudu2:7051,kudu3:7051", + "table": "kudu", + "readMode": "read_latest", + "authentication": "", + "principal": "", + "keytabFile": "", + "workerCount": 2, + "bossCount": 1, + "operationTimeout": 30000, + "adminOperationTimeout": 30000, + "queryTimeout": 30000, + "where": " id >= 1 ", + "batchSizeBytes": 1048576 + } + }, + "writer": {} + } + ], + "setting": {} + } +} +``` + +## 2. 参数说明 + +* **name** + + * 描述:插件名,此处填写插件名称,kudureader。 + + * 必选:是 + + * 默认值:无 + +* **column** + + * 描述:需要生成的字段。 + + * 属性说明: + + * name:字段名称; + + * type:字段类型; + + * 必选:是 + + * 默认值:无 + +* **masterAddresses** + + * 描述: master节点地址:端口,多个以,隔开。 + + * 必选:是 + + * 默认值:无 + +* **table** + + * 描述: kudu表名。 + + * 必选:是 + + * 默认值:无 + +* **readMode** + + * 描述: kudu读取模式: + + * 1、read_latest + 默认的读取模式。 + 该模式下,服务器将始终在收到请求时返回已提交的写操作。 + 这种类型的读取不会返回快照时间戳,并且不可重复。 + 用ACID术语表示,它对应于隔离模式:“读已提交”。 + + * 2、read_at_snapshot + 该模式下,服务器将尝试在提供的时间戳上执行读取。 + 如果未提供时间戳,则服务器将当前时间作为快照时间戳。 + 在这种模式下,读取是可重复的,即将来所有在相同时间戳记下的读取将产生相同的数据。 + 执行此操作的代价是等待时间戳小于快照的时间戳的正在进行的正在进行的事务,因此可能会导致延迟损失。用ACID术语,这本身就相当于隔离模式“可重复读取”。 + 如果对已扫描tablet的所有写入均在外部保持一致,则这对应于隔离模式“严格可序列化”。 + 注意:当前存在“空洞”,在罕见的边缘条件下会发生,通过这种空洞有时即使在采取措施使写入如此时,它们在外部也不一致。 + 在这些情况下,隔离可能会退化为“读取已提交”模式。 + + * 必选:是 + + * 默认值:无 + +* **authentication** + + * 描述: 认证方式,如:Kerberos。 + + * 必选:否 + + * 默认值:无 + +* **principal** + + * 描述: 用户名。 + + * 必选:否 + + * 默认值:无 + +* **keytabFile** + + * 描述: keytab文件路径。 + + * 必选:否 + + * 默认值:无 + +* **workerCount** + + * 描述: worker线程数。 + + * 必选:否 + + * 默认值:默认为cpu*2 + +* **bossCount** + + * 描述: boss线程数。 + + * 必选:否 + + * 默认值:1 + +* **operationTimeout** + + * 描述: 普通操作超时时间。 + + * 必选:否 + + * 默认值:30000 + +* **adminOperationTimeout** + + * 描述: 管理员操作(建表,删表)超时时间。 + + * 必选:否 + + * 默认值:30000 + +* **queryTimeout** + + * 描述: 连接scan token的超时时间。 + + * 必选:否 + + * 默认值:与operationTimeout一致 + +* **where** + + * 描述: 过滤条件字符串,多个以and连接。 + + * 必选:否 + + * 默认值:无 + +* **batchSizeBytes** + + * 描述: kudu scan一次性最大读取字节数。 + + * 必选:否 + + * 默认值:1048576 diff --git a/docs/kuduwriter.md b/docs/kuduwriter.md new file mode 100644 index 0000000000..0bbb3b0850 --- /dev/null +++ b/docs/kuduwriter.md @@ -0,0 +1,187 @@ +# Kudu写入插件(kuduwriter) + +## 1. 配置样例 + +``` +{ + "job": { + "content": [ + { + "writer": { + "parameter": { + "column": [ + { + "name": "id", + "type": "long" + } + ], + "masterAddresses": "kudu1:7051,kudu2:7051,kudu3:7051", + "table": "kudu", + "writeMode": "insert", + "flushMode": "manual_flush", + "batchInterval": 10000, + "authentication": "", + "principal": "", + "keytabFile": "", + "workerCount": 2, + "bossCount": 1, + "operationTimeout": 30000, + "adminOperationTimeout": 30000, + "queryTimeout": 30000, + "batchSizeBytes": 1048576 + }, + "reader": {} + } + ], + "setting": {} + } +} +``` + +## 2. 参数说明 + +* **name** + + * 描述:插件名,此处填写插件名称,kuduwriter。 + + * 必选:是 + + * 默认值:无 + +* **column** + + * 描述:需要生成的字段。 + + * 属性说明: + + * name:字段名称; + + * type:字段类型; + + * 必选:是 + + * 默认值:无 + +* **masterAddresses** + + * 描述: master节点地址:端口,多个以,隔开。 + + * 必选:是 + + * 默认值:无 + +* **table** + + * 描述: kudu表名。 + + * 必选:是 + + * 默认值:无 + +* **writeMode** + + * 描述: kudu数据写入模式: + + * 1、insert + + * 2、update + + * 3、upsert + + * 必选:是 + + * 默认值:无 + +* **flushMode** + + * 描述: kudu session刷新模式: + + * 1、auto_flush_sync + + * 2、auto_flush_background + + * 3、manual_flush + + * 必选:否 + + * 默认值:auto_flush_sync + +* **batchInterval** + + * 描述: 单次批量写入数据条数 + + * 必选:否 + + * 默认值:1 + +* **authentication** + + * 描述: 认证方式,如:Kerberos。 + + * 必选:否 + + * 默认值:无 + +* **principal** + + * 描述: 用户名。 + + * 必选:否 + + * 默认值:无 + +* **keytabFile** + + * 描述: keytab文件路径。 + + * 必选:否 + + * 默认值:无 + +* **workerCount** + + * 描述: worker线程数。 + + * 必选:否 + + * 默认值:默认为cpu*2 + +* **bossCount** + + * 描述: boss线程数。 + + * 必选:否 + + * 默认值:1 + +* **operationTimeout** + + * 描述: 普通操作超时时间。 + + * 必选:否 + + * 默认值:30000 + +* **adminOperationTimeout** + + * 描述: 管理员操作(建表,删表)超时时间。 + + * 必选:否 + + * 默认值:30000 + +* **queryTimeout** + + * 描述: 连接scan token的超时时间。 + + * 必选:否 + + * 默认值:与operationTimeout一致 + +* **batchSizeBytes** + + * 描述: kudu scan一次性最大读取字节数。 + + * 必选:否 + + * 默认值:1048576 diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java new file mode 100644 index 0000000000..179c39b8f6 --- /dev/null +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flinkx.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.PrintWriter; +import java.io.StringWriter; + +/** + * @author jiangbo + * @date 2019/8/17 + */ +public class ExceptionUtil { + + private static Logger logger = LoggerFactory.getLogger(ExceptionUtil.class); + + public static String getErrorMessage(Throwable e) { + StringWriter stringWriter = null; + PrintWriter writer = null; + try{ + stringWriter= new StringWriter(); + writer = new PrintWriter(stringWriter); + e.printStackTrace(writer); + writer.flush(); + stringWriter.flush(); + StringBuffer buffer= stringWriter.getBuffer(); + return buffer.toString(); + }catch(Throwable ee){ + logger.error("",ee); + + }finally { + if(writer!=null){ + writer.close(); + } + if(stringWriter!=null){ + try{ + stringWriter.close(); + }catch (Throwable ee){ + logger.error("",ee); + } + } + } + return null; + } +} diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java index 493e5ce1a0..ea65d6c90c 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java @@ -27,32 +27,91 @@ */ public class KuduConfig implements Serializable { + /** + * master节点地址:端口,多个以,隔开 + */ private String masterAddresses; + /** + * 认证方式,如:Kerberos + */ private String authentication; + /** + * 用户名 + */ private String principal; + /** + * keytab文件路径 + */ private String keytabFile; + /** + * worker线程数,默认为cpu*2 + */ private Integer workerCount; + /** + * boss线程数,默认为1 + */ private Integer bossCount; + /** + * 设置普通操作超时时间,默认30S + */ private Long operationTimeout; + /** + * 设置管理员操作(建表,删表)超时时间,默认30S + */ private Long adminOperationTimeout; + /** + * 连接scan token的超时时间,如果不设置,则与operationTimeout一致 + */ private Long queryTimeout; + /** + * kudu表名 + */ private String table; + /** + * kudu读取模式: + * 1、READ_LATEST 默认的读取模式 + * 该模式下,服务器将始终在收到请求时返回已提交的写操作。这种类型的读取不会返回快照时间戳,并且不可重复。 + * 用ACID术语表示,它对应于隔离模式:“读已提交” + * + * 2、READ_AT_SNAPSHOT + * 该模式下,服务器将尝试在提供的时间戳上执行读取。如果未提供时间戳,则服务器将当前时间作为快照时间戳。 + * 在这种模式下,读取是可重复的,即将来所有在相同时间戳记下的读取将产生相同的数据。 + * 执行此操作的代价是等待时间戳小于快照的时间戳的正在进行的正在进行的事务,因此可能会导致延迟损失。用ACID术语,这本身就相当于隔离模式“可重复读取”。 + * 如果对已扫描tablet的所有写入均在外部保持一致,则这对应于隔离模式“严格可序列化”。 + * 注意:当前存在“空洞”,在罕见的边缘条件下会发生,通过这种空洞有时即使在采取措施使写入如此时,它们在外部也不一致。 + * 在这些情况下,隔离可能会退化为“读取已提交”模式。 + * 3、READ_YOUR_WRITES 不支持该模式 + */ private String readMode; + /** + * 过滤条件字符串,如:id >= 1 and time > 1565586665372 + */ private String filterString; + /** + * kudu scan一次性最大读取字节数,默认为1MB + */ private int batchSizeBytes; + /** + * writer写入时session刷新模式 + * auto_flush_sync(默认) + * auto_flush_background + * manual_flush + */ + private String flushMode; + public String getFilterString() { return filterString; } @@ -156,4 +215,12 @@ public Long getAdminOperationTimeout() { public void setAdminOperationTimeout(Long adminOperationTimeout) { this.adminOperationTimeout = adminOperationTimeout; } + + public String getFlushMode() { + return flushMode; + } + + public void setFlushMode(String flushMode) { + this.flushMode = flushMode; + } } diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java index beafa11516..999434ac2a 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java @@ -39,6 +39,7 @@ public final class KuduConfigBuilder { private Long queryTimeout; private String table; private String readMode; + private String flushMode; private String filterString; private int batchSizeBytes; @@ -106,6 +107,11 @@ public KuduConfigBuilder withReadMode(String readMode){ return this; } + public KuduConfigBuilder withFlushMode(String flushMode){ + this.flushMode = flushMode; + return this; + } + public KuduConfigBuilder withFilter(String filter){ this.filterString = filter; return this; @@ -134,6 +140,7 @@ public KuduConfig build() { kuduConfig.setQueryTimeout(queryTimeout); kuduConfig.setTable(table); kuduConfig.setReadMode(readMode); + kuduConfig.setFlushMode(flushMode); kuduConfig.setFilterString(filterString); kuduConfig.setBatchSizeBytes(batchSizeBytes); return kuduConfig; diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java index c2f3ae0255..ec89e847be 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java @@ -36,6 +36,7 @@ public class KuduConfigKeys { public final static String KEY_ADMIN_OPERATION_TIMEOUT = "adminOperationTimeout"; public final static String KEY_TABLE = "table"; public final static String KEY_READ_MODE = "readMode"; - public final static String KEY_FILTER = "filter"; + public final static String KEY_FLUSH_MODE = "flushMode"; + public final static String KEY_FILTER = "where"; public final static String KEY_BATCH_SIZE_BYTES = "batchSizeBytes"; } diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java index 4a802a43d7..6ac58227c7 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java @@ -20,7 +20,6 @@ package com.dtstack.flinkx.kudu.core; import com.dtstack.flinkx.reader.MetaColumn; -import com.google.common.collect.Lists; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.math.NumberUtils; import org.apache.hadoop.security.UserGroupInformation; @@ -32,10 +31,7 @@ import java.math.BigDecimal; import java.security.PrivilegedExceptionAction; import java.sql.Timestamp; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -75,14 +71,13 @@ private static KuduClient getKuduClientInternal(KuduConfig config) { .syncClient(); } - public static List getKuduScanToken(KuduConfig config, List columns, - String filterString) throws IOException{ - KuduClient client = null; - try { - client = getKuduClient(config); + public static List getKuduScanToken(KuduConfig config, List columns, String filterString) throws IOException{ + try ( + KuduClient client = getKuduClient(config) + ) { KuduTable kuduTable = client.openTable(config.getTable()); - List columnNames = Lists.newArrayList(); + List columnNames = new ArrayList<>(columns.size()); for (MetaColumn column : columns) { columnNames.add(column.getName()); } @@ -93,15 +88,12 @@ public static List getKuduScanToken(KuduConfig config, List scanTokens = KuduUtil.getKuduScanToken(kuduConfig, columns, kuduConfig.getFilterString()); KuduTableSplit[] inputSplits = new KuduTableSplit[scanTokens.size()]; for (int i = 0; i < scanTokens.size(); i++) { @@ -124,15 +130,17 @@ public InputSplit[] createInputSplits(int minNumSplits) throws IOException { @Override public boolean reachedEnd() throws IOException { - if(iterator == null || !iterator.hasNext()){ + LOG.info("execute reachedEnd, indexOfSubtask = {}", indexOfSubtask); + if (iterator == null || !iterator.hasNext()) { return getNextRows(); } return false; } - private boolean getNextRows() throws IOException{ - if(scanner.hasMoreRows()){ + private boolean getNextRows() throws IOException { + LOG.info("execute getNextRows, scanner is closed : {}", scanner.isClosed()); + if (scanner.hasMoreRows()) { iterator = scanner.nextRows(); } @@ -141,7 +149,8 @@ private boolean getNextRows() throws IOException{ @Override protected void closeInternal() throws IOException { - if(scanner != null){ + LOG.info("execute closeInternal, indexOfSubtask = {}", indexOfSubtask); + if (scanner != null) { scanner.close(); scanner = null; } @@ -151,7 +160,7 @@ protected void closeInternal() throws IOException { public void closeInputFormat() throws IOException { super.closeInputFormat(); - if (client != null){ + if (client != null) { client.close(); client = null; } diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java index 872cdd346f..5cbbd82fb9 100644 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java @@ -25,6 +25,7 @@ import com.dtstack.flinkx.kudu.core.KuduUtil; import com.dtstack.flinkx.outputformat.RichOutputFormat; import com.dtstack.flinkx.reader.MetaColumn; +import com.dtstack.flinkx.util.ExceptionUtil; import org.apache.flink.types.Row; import org.apache.kudu.client.*; @@ -58,7 +59,19 @@ protected void openInternal(int taskNumber, int numTasks) throws IOException { } session = client.newSession(); + session.setMutationBufferSpace(batchInterval); kuduTable = client.openTable(kuduConfig.getTable()); + + switch (kuduConfig.getFlushMode().toLowerCase()){ + case "auto_flush_background": + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + break; + case "manual_flush": + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + break; + default: + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); + } } @Override @@ -66,6 +79,7 @@ protected void writeSingleRecordInternal(Row row) throws WriteRecordException { writeData(row); if(numWriteCounter.getLocalValue() % batchInterval == 0){ + LOG.info("writeSingleRecordInternal, numWriteCounter = {}", numWriteCounter.getLocalValue()); try { session.flush(); } catch (KuduException e) { @@ -86,6 +100,7 @@ private void writeData(Row row) throws WriteRecordException { session.apply(operation); } catch (Exception e){ + LOG.error("Write data error, index = {}, row = {}, e = {}", index, row, ExceptionUtil.getErrorMessage(e)); throw new WriteRecordException("Write data error", e, index, row); } } @@ -104,10 +119,10 @@ private Operation getOperation(){ @Override protected void writeMultipleRecordsInternal() throws Exception { + LOG.info("writeRecordInternal, row size = {}", rows.size()); for (Row row : rows) { writeData(row); } - session.flush(); } diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java index 8c7b9b5e0c..2a4e626b3b 100644 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java @@ -47,6 +47,8 @@ public class KuduWriter extends DataWriter { private String writeMode; + private int batchInterval; + public KuduWriter(DataTransferConfig config) { super(config); @@ -54,6 +56,7 @@ public KuduWriter(DataTransferConfig config) { columns = MetaColumn.getMetaColumns(parameterConfig.getColumn()); writeMode = parameterConfig.getStringVal("writeMode"); + batchInterval = parameterConfig.getIntVal("batchInterval", 1); kuduConfig = KuduConfigBuilder.getInstance() .withMasterAddresses(parameterConfig.getStringVal(KEY_MASTER_ADDRESSES)) .withAuthentication(parameterConfig.getStringVal(KEY_AUTHENTICATION)) @@ -64,6 +67,7 @@ public KuduWriter(DataTransferConfig config) { .withOperationTimeout(parameterConfig.getLongVal(KEY_OPERATION_TIMEOUT, AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS)) .withAdminOperationTimeout(parameterConfig.getLongVal(KEY_ADMIN_OPERATION_TIMEOUT, AsyncKuduClient.DEFAULT_KEEP_ALIVE_PERIOD_MS)) .withTable(parameterConfig.getStringVal(KEY_TABLE)) + .withFlushMode(parameterConfig.getStringVal(KEY_FLUSH_MODE)) .build(); } @@ -74,6 +78,7 @@ public DataStreamSink writeData(DataStream dataSet) { builder.setColumns(columns); builder.setKuduConfig(kuduConfig); builder.setWriteMode(writeMode); + builder.setBatchInterval(batchInterval); DtOutputFormatSinkFunction formatSinkFunction = new DtOutputFormatSinkFunction(builder.finish()); DataStreamSink dataStreamSink = dataSet.addSink(formatSinkFunction); From 171b53fa83cea6d958292606a5d5717b31a3931c Mon Sep 17 00:00:00 2001 From: tudou Date: Sun, 29 Sep 2019 13:49:24 +0800 Subject: [PATCH 13/62] =?UTF-8?q?=E9=98=B2=E6=AD=A2=E8=BF=87=E6=BB=A4?= =?UTF-8?q?=E6=9D=A1=E4=BB=B6NPE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java index 6ac58227c7..14f7177d34 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java @@ -117,9 +117,11 @@ private static void addPredicates(KuduScanToken.KuduScanTokenBuilder builder, St String[] filters = filterString.split(FILTER_SPLIT_REGEX); for (String filter : filters) { - ExpressResult expressResult = parseExpress(filter, nameTypeMap); - KuduPredicate predicate = KuduPredicate.newComparisonPredicate(expressResult.getColumnSchema(), expressResult.getOp(), expressResult.getValue()); - builder.addPredicate(predicate); + if(StringUtils.isNotBlank(filter)){ + ExpressResult expressResult = parseExpress(filter, nameTypeMap); + KuduPredicate predicate = KuduPredicate.newComparisonPredicate(expressResult.getColumnSchema(), expressResult.getOp(), expressResult.getValue()); + builder.addPredicate(predicate); + } } } From 0e96f92035da420773ee78b84e3681df70b85dbe Mon Sep 17 00:00:00 2001 From: jiangbo Date: Sun, 29 Sep 2019 15:53:35 +0800 Subject: [PATCH 14/62] =?UTF-8?q?[=E6=95=B0=E6=8D=AE=E5=90=8C=E6=AD=A5perj?= =?UTF-8?q?ob=E6=A8=A1=E5=BC=8F=E8=BF=90=E8=A1=8C=E6=97=B6=EF=BC=8Capplica?= =?UTF-8?q?tionMaster=E5=A4=B1=E8=B4=A5=E9=87=8D=E8=AF=95=E5=90=8E?= =?UTF-8?q?=E6=B2=A1=E6=9C=89=E6=B8=85=E9=99=A4.data=E7=9B=AE=E5=BD=95][18?= =?UTF-8?q?574]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java index 08bb2d7912..825d2c7ea5 100644 --- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java @@ -157,7 +157,7 @@ protected void openInternal(int taskNumber, int numTasks) throws IOException { } private void beforeWrite(){ - if(numTasks > 0){ + if(taskNumber > 0){ waitBeforeWrite(); return; } From 1c313bd14a21f64f9d67e95bc897b46ed9a2fd4c Mon Sep 17 00:00:00 2001 From: jiangbo Date: Wed, 9 Oct 2019 10:41:22 +0800 Subject: [PATCH 15/62] =?UTF-8?q?[=E5=86=99=E5=85=A5hdfs=E5=89=8D=E6=B8=85?= =?UTF-8?q?=E9=99=A4.data=E7=9B=AE=E5=BD=95=E9=94=99=E8=AF=AF]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java index 825d2c7ea5..8aadc9da85 100644 --- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java @@ -170,7 +170,7 @@ private void beforeWrite(){ } try { - fs.create(new Path(tmpPath + SP + ACTION_FINISHED_TAG)); + fs.create(new Path(outputFilePath + SP + DATA_SUBDIR + SP + ACTION_FINISHED_TAG)); } catch (Exception e){ throw new RuntimeException("Clean .data dir error:", e); } @@ -178,7 +178,7 @@ private void beforeWrite(){ private void waitBeforeWrite(){ try { - Path path = new Path(tmpPath + SP + ACTION_FINISHED_TAG); + Path path = new Path(outputFilePath + SP + DATA_SUBDIR + SP + ACTION_FINISHED_TAG); boolean readyWrite = fs.exists(path); int n = 0; while (!readyWrite){ From 7c33571588312965737d81bf7c61602efe0880b3 Mon Sep 17 00:00:00 2001 From: tudou Date: Wed, 9 Oct 2019 11:40:23 +0800 Subject: [PATCH 16/62] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E5=90=84=E7=A7=8Dbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flinkx/db2/format/Db2InputFormat.java | 25 ++++++++++++ .../db2/{ => format}/Db2OutputFormat.java | 2 +- .../flinkx/gbase/format/GbaseInputFormat.java | 27 ++++++++++++- .../com/dtstack/flinkx/rdb/util/DBUtil.java | 40 ++++++++++--------- .../JdbcInputFormat.java | 6 +-- pom.xml | 30 +++++++------- 6 files changed, 89 insertions(+), 41 deletions(-) rename flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/{ => format}/Db2OutputFormat.java (98%) diff --git a/flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java b/flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java index 7434e362b0..81672c3267 100644 --- a/flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java +++ b/flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java @@ -18,6 +18,11 @@ package com.dtstack.flinkx.db2.format; import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import org.apache.flink.types.Row; + +import java.io.IOException; + +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; /** * Date: 2019/09/20 @@ -26,4 +31,24 @@ * @author tudou */ public class Db2InputFormat extends JdbcInputFormat { + @Override + public Row nextRecordInternal(Row row) throws IOException { + if (!hasNext) { + return null; + } + row = new Row(columnCount); + try { + for (int pos = 0; pos < row.getArity(); pos++) { + Object obj = resultSet.getObject(pos + 1); + if(obj != null) { + obj = clobToString(obj); + } + + row.setField(pos, obj); + } + return super.nextRecordInternal(row); + }catch (Exception e) { + throw new IOException("Couldn't read data - " + e.getMessage(), e); + } + } } diff --git a/flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/Db2OutputFormat.java b/flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/format/Db2OutputFormat.java similarity index 98% rename from flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/Db2OutputFormat.java rename to flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/format/Db2OutputFormat.java index ecb005635e..b2a5ff040b 100644 --- a/flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/Db2OutputFormat.java +++ b/flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/format/Db2OutputFormat.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.dtstack.flinkx.db2; +package com.dtstack.flinkx.db2.format; import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; diff --git a/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java b/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java index 02223df8f9..942332a675 100644 --- a/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java +++ b/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java @@ -23,12 +23,15 @@ import com.dtstack.flinkx.util.ClassUtil; import org.apache.commons.lang3.StringUtils; import org.apache.flink.core.io.InputSplit; +import org.apache.flink.types.Row; import java.io.IOException; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; + /** * Date: 2019/09/20 * Company: www.dtstack.com @@ -52,7 +55,6 @@ public void openInternal(InputSplit inputSplit) throws IOException { if(!canReadData(inputSplit)){ LOG.warn("Not read data when the start location are equal to end location"); - hasNext = false; return; } @@ -61,8 +63,8 @@ public void openInternal(InputSplit inputSplit) throws IOException { // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 dbConn.setAutoCommit(false); - statement.setFetchSize(Integer.MIN_VALUE); Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); + statement.setFetchSize(Integer.MIN_VALUE); statement.setFetchSize(fetchSize); statement.setQueryTimeout(queryTimeOut); String querySql = buildQuerySql(inputSplit); @@ -91,4 +93,25 @@ public void openInternal(InputSplit inputSplit) throws IOException { LOG.info("JdbcInputFormat[{}]open: end", jobName); } + + @Override + public Row nextRecordInternal(Row row) throws IOException { + if (!hasNext) { + return null; + } + row = new Row(columnCount); + try { + for (int pos = 0; pos < row.getArity(); pos++) { + Object obj = resultSet.getObject(pos + 1); + if(obj != null) { + obj = clobToString(obj); + } + + row.setField(pos, obj); + } + return super.nextRecordInternal(row); + }catch (Exception e) { + throw new IOException("Couldn't read data - " + e.getMessage(), e); + } + } } diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java index b007642e5c..7e708ed040 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java @@ -124,27 +124,31 @@ private static Connection getConnectionInternal(String url, String username, Str * @throws SQLException */ public static Connection getConnection(String url, String username, String password) throws SQLException { - boolean failed = true; - Connection dbConn = null; - for (int i = 0; i < MAX_RETRY_TIMES && failed; ++i) { - try { - dbConn = getConnectionInternal(url, username, password); - dbConn.createStatement().execute("select 111"); - failed = false; - } catch (Exception e) { - if (dbConn != null) { - dbConn.close(); - } - - if (i == MAX_RETRY_TIMES - 1) { - throw e; - } else { - SysUtil.sleep(3000); + if (!url.startsWith("jdbc:mysql")) { + return getConnectionInternal(url, username, password); + } else { + boolean failed = true; + Connection dbConn = null; + for (int i = 0; i < MAX_RETRY_TIMES && failed; ++i) { + try { + dbConn = getConnectionInternal(url, username, password); + dbConn.createStatement().execute("select 111"); + failed = false; + } catch (Exception e) { + if (dbConn != null) { + dbConn.close(); + } + + if (i == MAX_RETRY_TIMES - 1) { + throw e; + } else { + SysUtil.sleep(3000); + } } } - } - return dbConn; + return dbConn; + } } /** diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java index 3ba932e01d..c2c3abfad2 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java @@ -26,10 +26,7 @@ import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.ClassUtil; -import com.dtstack.flinkx.util.DateUtil; -import com.dtstack.flinkx.util.StringUtil; -import com.dtstack.flinkx.util.URLUtil; +import com.dtstack.flinkx.util.*; import com.google.gson.Gson; import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.accumulators.Accumulator; @@ -163,7 +160,6 @@ public void openInternal(InputSplit inputSplit) throws IOException { if(!canReadData(inputSplit)){ LOG.warn("Not read data when the start location are equal to end location"); - hasNext = false; return; } diff --git a/pom.xml b/pom.xml index be479144b1..1378ebe2b4 100644 --- a/pom.xml +++ b/pom.xml @@ -12,29 +12,29 @@ flinkx-core flinkx-rdb - - - - + flinkx-es + flinkx-ftp + flinkx-odps + flinkx-examples flinkx-mysql - - - + flinkx-hbase + flinkx-hdfs + flinkx-hive flinkx-oracle flinkx-sqlserver - - + flinkx-redis + flinkx-mongodb flinkx-postgresql flinkx-launcher - - + flinkx-stream + flinkx-carbondata flinkx-db2 flinkx-test flinkx-gbase - - - - + flinkx-binlog + flinkx-kafka09 + flinkx-kafka10 + flinkx-kafka11 From b755387d05b07e7f943f1e1e70361c2f62f78761 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Thu, 10 Oct 2019 10:23:26 +0800 Subject: [PATCH 17/62] =?UTF-8?q?PostgreSqlInputFormat=E5=8E=BB=E6=8E=89op?= =?UTF-8?q?enInternal?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../format/PostgresqlInputFormat.java | 57 ------------------- 1 file changed, 57 deletions(-) diff --git a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java index 63abdd400e..7bf705bbeb 100644 --- a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java +++ b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java @@ -42,63 +42,6 @@ */ public class PostgresqlInputFormat extends JdbcInputFormat { - @Override - public void openInternal(InputSplit inputSplit) throws IOException { - try { - LOG.info(inputSplit.toString()); - - ClassUtil.forName(drivername, getClass().getClassLoader()); - - if (incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc()){ - getMaxValue(inputSplit); - } - - initMetric(inputSplit); - - if(!canReadData(inputSplit)){ - LOG.warn("Not read data when the start location are equal to end location"); - - hasNext = false; - return; - } - - dbConn = DBUtil.getConnection(dbURL, username, password); - - // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 - dbConn.setAutoCommit(false); - - // 读取前先提交事务,确保程序异常退出时,下次再读取PG时的顺序不变 - dbConn.commit(); - Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); - statement.setFetchSize(fetchSize); - statement.setQueryTimeout(queryTimeOut); - String querySql = buildQuerySql(inputSplit); - resultSet = statement.executeQuery(querySql); - columnCount = resultSet.getMetaData().getColumnCount(); - - boolean splitWithRowCol = numPartitions > 1 && StringUtils.isNotEmpty(splitKey) && splitKey.contains("("); - if(splitWithRowCol){ - columnCount = columnCount-1; - } - - hasNext = resultSet.next(); - - if (StringUtils.isEmpty(customSql)){ - descColumnTypeList = DBUtil.analyzeTable(dbURL, username, password,databaseInterface,table,metaColumns); - } else { - descColumnTypeList = new ArrayList<>(); - for (MetaColumn metaColumn : metaColumns) { - descColumnTypeList.add(metaColumn.getName()); - } - } - - } catch (SQLException se) { - throw new IllegalArgumentException("open() failed." + se.getMessage(), se); - } - - LOG.info("JdbcInputFormat[{}]open: end", jobName); - } - @Override public Row nextRecordInternal(Row row) throws IOException { if (!hasNext) { From 6f4189d0538d540d8e8def22f8c6ee564d00d173 Mon Sep 17 00:00:00 2001 From: tudou Date: Thu, 10 Oct 2019 14:00:48 +0800 Subject: [PATCH 18/62] =?UTF-8?q?=E3=80=90=E7=A6=BB=E7=BA=BF=E8=AE=A1?= =?UTF-8?q?=E7=AE=97=E3=80=91=E4=BD=BF=E7=94=A8CheckpointListener=E7=A1=AE?= =?UTF-8?q?=E4=BF=9Dcheckpoint=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../api/functions/sink/DtOutputFormatSinkFunction.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java index 72c7f22bcc..8f67fe20b9 100644 --- a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java +++ b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java @@ -32,6 +32,7 @@ import org.apache.flink.api.java.typeutils.InputTypeConfigurable; import org.apache.flink.configuration.Configuration; +import org.apache.flink.runtime.state.CheckpointListener; import org.apache.flink.runtime.state.FunctionInitializationContext; import org.apache.flink.runtime.state.FunctionSnapshotContext; import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; @@ -52,7 +53,7 @@ */ @PublicEvolving @Deprecated -public class DtOutputFormatSinkFunction extends OutputFormatSinkFunction implements CheckpointedFunction { +public class DtOutputFormatSinkFunction extends OutputFormatSinkFunction implements CheckpointedFunction, CheckpointListener { private static final long serialVersionUID = 1L; @@ -135,6 +136,12 @@ private void cleanup() { @Override public void snapshotState(FunctionSnapshotContext context) throws Exception { + + } + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception { + LOG.info("notifyCheckpointComplete checkpointId = {}", checkpointId); FormatState formatState = ((com.dtstack.flinkx.outputformat.RichOutputFormat) format).getFormatState(); if (formatState != null){ LOG.info("OutputFormat format state:{}", formatState.toString()); From 5bdd1ec6d2b197621b2480f3b8786f9271a3cf27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=B1=9F=E5=8D=9A=5F=E6=B1=9F=E5=8D=9A?= Date: Thu, 10 Oct 2019 15:19:21 +0800 Subject: [PATCH 19/62] =?UTF-8?q?Revert=20"=E3=80=90=E7=A6=BB=E7=BA=BF?= =?UTF-8?q?=E8=AE=A1=E7=AE=97=E3=80=91=E4=BD=BF=E7=94=A8CheckpointListener?= =?UTF-8?q?=E7=A1=AE=E4=BF=9Dcheckpoint=E5=AE=8C=E6=88=90"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 6f4189d0538d540d8e8def22f8c6ee564d00d173 --- .../api/functions/sink/DtOutputFormatSinkFunction.java | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java index 8f67fe20b9..72c7f22bcc 100644 --- a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java +++ b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java @@ -32,7 +32,6 @@ import org.apache.flink.api.java.typeutils.InputTypeConfigurable; import org.apache.flink.configuration.Configuration; -import org.apache.flink.runtime.state.CheckpointListener; import org.apache.flink.runtime.state.FunctionInitializationContext; import org.apache.flink.runtime.state.FunctionSnapshotContext; import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; @@ -53,7 +52,7 @@ */ @PublicEvolving @Deprecated -public class DtOutputFormatSinkFunction extends OutputFormatSinkFunction implements CheckpointedFunction, CheckpointListener { +public class DtOutputFormatSinkFunction extends OutputFormatSinkFunction implements CheckpointedFunction { private static final long serialVersionUID = 1L; @@ -136,12 +135,6 @@ private void cleanup() { @Override public void snapshotState(FunctionSnapshotContext context) throws Exception { - - } - - @Override - public void notifyCheckpointComplete(long checkpointId) throws Exception { - LOG.info("notifyCheckpointComplete checkpointId = {}", checkpointId); FormatState formatState = ((com.dtstack.flinkx.outputformat.RichOutputFormat) format).getFormatState(); if (formatState != null){ LOG.info("OutputFormat format state:{}", formatState.toString()); From 82eb7531a177bd8c1dbc50738bcd70cfcaedd814 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=B1=9F=E5=8D=9A=5F=E6=B1=9F=E5=8D=9A?= Date: Thu, 10 Oct 2019 15:22:20 +0800 Subject: [PATCH 20/62] Revert "Merge branch 'feature_1.5_dev_codeOptimization' into '1.5_dev' " This reverts merge request !104 --- .../flinkx/inputformat/RichInputFormat.java | 7 +- .../com/dtstack/flinkx/util/ClassUtil.java | 2 +- .../dtstack/flinkx/util/ExceptionUtil.java | 23 +- .../dtstack/flinkx/db2/Db2DatabaseMeta.java | 2 + .../flinkx/db2/format/Db2InputFormat.java | 54 --- .../flinkx/db2/format/Db2OutputFormat.java | 59 --- .../flinkx/gbase/format/GbaseInputFormat.java | 117 ----- .../gbase/format/GbaseOutputFormat.java | 29 -- .../flinkx/gbase/writer/GbaseWriter.java | 2 - .../mysqld/format/MysqldInputFormat.java | 135 ------ .../flinkx/mysqld/reader/MysqldReader.java | 47 -- .../flinkx/mysql/format/MysqlInputFormat.java | 136 ------ .../flinkx/mysql/reader/MysqlReader.java | 5 +- .../mysql/format/MysqlOutputFormat.java | 29 -- .../flinkx/mysql/writer/MysqlWriter.java | 5 - .../oracle/format/OracleInputFormat.java | 87 ---- .../oracle/format/OracleOutputFormat.java | 102 ----- .../format/PostgresqlInputFormat.java | 69 --- .../reader/PostgresqlQuerySqlBuilder.java | 78 ---- .../postgresql/reader/PostgresqlReader.java | 35 -- .../PostgresqlOutputFormat.java | 38 +- .../postgresql/writer/PostgresqlWriter.java | 1 - .../com/dtstack/flinkx/rdb/DataSource.java | 17 - .../flinkx/rdb/ParameterValuesProvider.java | 17 - .../flinkx/rdb/loader/JdbcFormatLoader.java | 89 ---- .../rdb/type/TypeConverterInterface.java | 6 - .../com/dtstack/flinkx/rdb/util/DBUtil.java | 403 +++++++++++------- .../DistributedJdbcDataReader.java | 17 +- .../IncrementConfig.java | 12 - .../JdbcDataReader.java | 8 +- .../QuerySqlBuilder.java | 52 ++- .../DistributedJdbcInputFormat.java | 34 +- .../DistributedJdbcInputFormatBuilder.java | 7 +- .../JdbcInputFormat.java | 281 +++--------- .../JdbcInputFormatBuilder.java | 6 +- .../JdbcDataWriter.java | 6 +- .../rdb/outputformat/JdbcOutputFormat.java | 83 +++- .../outputformat/JdbcOutputFormatBuilder.java | 6 +- .../format/SqlserverInputFormat.java | 112 ----- .../format/SqlserverOutputFormat.java | 29 -- 40 files changed, 468 insertions(+), 1779 deletions(-) delete mode 100644 flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java delete mode 100644 flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/format/Db2OutputFormat.java delete mode 100644 flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java delete mode 100644 flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/format/GbaseOutputFormat.java delete mode 100644 flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/format/MysqldInputFormat.java delete mode 100644 flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java delete mode 100644 flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java delete mode 100644 flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java delete mode 100644 flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java delete mode 100644 flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java delete mode 100644 flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlQuerySqlBuilder.java rename flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/{format => writer}/PostgresqlOutputFormat.java (75%) delete mode 100644 flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java delete mode 100644 flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java delete mode 100644 flinkx-sqlserver/flinkx-sqlserver-writer/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverOutputFormat.java diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/RichInputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/RichInputFormat.java index ac407f8730..973f0d61ea 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/RichInputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/RichInputFormat.java @@ -1,4 +1,4 @@ -/* +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -24,6 +24,7 @@ import com.dtstack.flinkx.metrics.BaseMetric; import com.dtstack.flinkx.reader.ByteRateLimiter; import com.dtstack.flinkx.restore.FormatState; +import org.apache.commons.lang.StringUtils; import org.apache.flink.api.common.accumulators.LongCounter; import org.apache.flink.api.common.io.DefaultInputSplitAssigner; import org.apache.flink.api.common.io.statistics.BaseStatistics; @@ -32,7 +33,6 @@ import org.apache.flink.types.Row; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import java.io.IOException; import java.util.Arrays; import java.util.Map; @@ -57,9 +57,6 @@ public abstract class RichInputFormat extends org.apache.flink.api.common.io.Ric protected long bytes; protected ByteRateLimiter byteRateLimiter; - /** - * 断点续传配置 - */ protected RestoreConfig restoreConfig; protected FormatState formatState; diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ClassUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ClassUtil.java index 856b02f816..487c9b6ff5 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ClassUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ClassUtil.java @@ -1,4 +1,4 @@ -/* +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java index e6f91a66c7..179c39b8f6 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java @@ -32,11 +32,6 @@ public class ExceptionUtil { private static Logger logger = LoggerFactory.getLogger(ExceptionUtil.class); - /** - * 获取错误的堆栈信息 - * @param e throwable - * @return 堆栈信息 - */ public static String getErrorMessage(Throwable e) { StringWriter stringWriter = null; PrintWriter writer = null; @@ -52,16 +47,16 @@ public static String getErrorMessage(Throwable e) { logger.error("",ee); }finally { - if(writer!=null){ - writer.close(); - } - if(stringWriter!=null){ - try{ - stringWriter.close(); - }catch (Throwable ee){ - logger.error("",ee); + if(writer!=null){ + writer.close(); + } + if(stringWriter!=null){ + try{ + stringWriter.close(); + }catch (Throwable ee){ + logger.error("",ee); + } } - } } return null; } diff --git a/flinkx-db2/flinkx-db2-core/src/main/java/com/dtstack/flinkx/db2/Db2DatabaseMeta.java b/flinkx-db2/flinkx-db2-core/src/main/java/com/dtstack/flinkx/db2/Db2DatabaseMeta.java index 2db50637ce..a549f7fdea 100644 --- a/flinkx-db2/flinkx-db2-core/src/main/java/com/dtstack/flinkx/db2/Db2DatabaseMeta.java +++ b/flinkx-db2/flinkx-db2-core/src/main/java/com/dtstack/flinkx/db2/Db2DatabaseMeta.java @@ -20,7 +20,9 @@ import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.rdb.BaseDatabaseMeta; +import org.apache.commons.lang3.StringUtils; +import java.util.ArrayList; import java.util.List; /** diff --git a/flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java b/flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java deleted file mode 100644 index 81672c3267..0000000000 --- a/flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.db2.format; - -import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; -import org.apache.flink.types.Row; - -import java.io.IOException; - -import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; - -/** - * Date: 2019/09/20 - * Company: www.dtstack.com - * - * @author tudou - */ -public class Db2InputFormat extends JdbcInputFormat { - @Override - public Row nextRecordInternal(Row row) throws IOException { - if (!hasNext) { - return null; - } - row = new Row(columnCount); - try { - for (int pos = 0; pos < row.getArity(); pos++) { - Object obj = resultSet.getObject(pos + 1); - if(obj != null) { - obj = clobToString(obj); - } - - row.setField(pos, obj); - } - return super.nextRecordInternal(row); - }catch (Exception e) { - throw new IOException("Couldn't read data - " + e.getMessage(), e); - } - } -} diff --git a/flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/format/Db2OutputFormat.java b/flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/format/Db2OutputFormat.java deleted file mode 100644 index b2a5ff040b..0000000000 --- a/flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/format/Db2OutputFormat.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.db2.format; - -import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; - -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Date: 2019/09/20 - * Company: www.dtstack.com - * - * @author tudou - */ -public class Db2OutputFormat extends JdbcOutputFormat { - - @Override - protected Map> probePrimaryKeys(String table, Connection dbConn) throws SQLException { - Map> map = new HashMap<>(); - ResultSet rs = dbConn.getMetaData().getIndexInfo(null, null, table.toUpperCase(), true, false); - while(rs.next()) { - String indexName = rs.getString("INDEX_NAME"); - if(!map.containsKey(indexName)) { - map.put(indexName,new ArrayList<>()); - } - map.get(indexName).add(rs.getString("COLUMN_NAME")); - } - Map> retMap = new HashMap<>(); - for(Map.Entry> entry: map.entrySet()) { - String k = entry.getKey(); - List v = entry.getValue(); - if(v!=null && v.size() != 0 && v.get(0) != null) { - retMap.put(k, v); - } - } - return retMap; - } -} diff --git a/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java b/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java deleted file mode 100644 index 942332a675..0000000000 --- a/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.gbase.format; - -import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; -import com.dtstack.flinkx.rdb.util.DBUtil; -import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.ClassUtil; -import org.apache.commons.lang3.StringUtils; -import org.apache.flink.core.io.InputSplit; -import org.apache.flink.types.Row; - -import java.io.IOException; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; - -import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; - -/** - * Date: 2019/09/20 - * Company: www.dtstack.com - * - * @author tudou - */ -public class GbaseInputFormat extends JdbcInputFormat { - - @Override - public void openInternal(InputSplit inputSplit) throws IOException { - try { - LOG.info(inputSplit.toString()); - - ClassUtil.forName(drivername, getClass().getClassLoader()); - - if (incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc()){ - getMaxValue(inputSplit); - } - - initMetric(inputSplit); - - if(!canReadData(inputSplit)){ - LOG.warn("Not read data when the start location are equal to end location"); - hasNext = false; - return; - } - - dbConn = DBUtil.getConnection(dbURL, username, password); - - // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 - dbConn.setAutoCommit(false); - Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); - statement.setFetchSize(Integer.MIN_VALUE); - statement.setFetchSize(fetchSize); - statement.setQueryTimeout(queryTimeOut); - String querySql = buildQuerySql(inputSplit); - resultSet = statement.executeQuery(querySql); - columnCount = resultSet.getMetaData().getColumnCount(); - - boolean splitWithRowCol = numPartitions > 1 && StringUtils.isNotEmpty(splitKey) && splitKey.contains("("); - if(splitWithRowCol){ - columnCount = columnCount-1; - } - - hasNext = resultSet.next(); - - if (StringUtils.isEmpty(customSql)){ - descColumnTypeList = DBUtil.analyzeTable(dbURL, username, password,databaseInterface,table,metaColumns); - } else { - descColumnTypeList = new ArrayList<>(); - for (MetaColumn metaColumn : metaColumns) { - descColumnTypeList.add(metaColumn.getName()); - } - } - - } catch (SQLException se) { - throw new IllegalArgumentException("open() failed." + se.getMessage(), se); - } - - LOG.info("JdbcInputFormat[{}]open: end", jobName); - } - - @Override - public Row nextRecordInternal(Row row) throws IOException { - if (!hasNext) { - return null; - } - row = new Row(columnCount); - try { - for (int pos = 0; pos < row.getArity(); pos++) { - Object obj = resultSet.getObject(pos + 1); - if(obj != null) { - obj = clobToString(obj); - } - - row.setField(pos, obj); - } - return super.nextRecordInternal(row); - }catch (Exception e) { - throw new IOException("Couldn't read data - " + e.getMessage(), e); - } - } -} diff --git a/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/format/GbaseOutputFormat.java b/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/format/GbaseOutputFormat.java deleted file mode 100644 index 3ac78d0036..0000000000 --- a/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/format/GbaseOutputFormat.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.gbase.format; - -import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; - -/** - * Date: 2019/09/20 - * Company: www.dtstack.com - * - * @author tudou - */ -public class GbaseOutputFormat extends JdbcOutputFormat { -} diff --git a/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/writer/GbaseWriter.java b/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/writer/GbaseWriter.java index 553179c507..2ba9a9e30e 100644 --- a/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/writer/GbaseWriter.java +++ b/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/writer/GbaseWriter.java @@ -22,7 +22,6 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.gbase.GbaseDatabaseMeta; import com.dtstack.flinkx.rdb.datawriter.JdbcDataWriter; -import com.dtstack.flinkx.rdb.util.DBUtil; /** * @author jiangbo @@ -33,6 +32,5 @@ public class GbaseWriter extends JdbcDataWriter { public GbaseWriter(DataTransferConfig config) { super(config); setDatabaseInterface(new GbaseDatabaseMeta()); - dbUrl = DBUtil.formatJdbcUrl(dbUrl, null); } } diff --git a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/format/MysqldInputFormat.java b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/format/MysqldInputFormat.java deleted file mode 100644 index beda91cc76..0000000000 --- a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/format/MysqldInputFormat.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.mysqld.format; - -import com.dtstack.flinkx.rdb.DataSource; -import com.dtstack.flinkx.rdb.datareader.QuerySqlBuilder; -import com.dtstack.flinkx.rdb.inputformat.DistributedJdbcInputFormat; -import com.dtstack.flinkx.rdb.util.DBUtil; -import com.dtstack.flinkx.util.DateUtil; -import com.dtstack.flinkx.util.StringUtil; -import org.apache.commons.collections.CollectionUtils; -import org.apache.flink.types.Row; - -import java.io.IOException; -import java.sql.SQLException; -import java.util.Arrays; - -import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; - -/** - * Date: 2019/09/20 - * Company: www.dtstack.com - * - * @author tudou - */ -public class MysqldInputFormat extends DistributedJdbcInputFormat { - - @Override - protected void openNextSource() throws SQLException { - DataSource currentSource = sourceList.get(sourceIndex); - currentConn = DBUtil.getConnection(currentSource.getJdbcUrl(), currentSource.getUserName(), currentSource.getPassword()); - currentConn.setAutoCommit(false); - String queryTemplate = new QuerySqlBuilder(databaseInterface, currentSource.getTable(),metaColumns,splitKey, - where, currentSource.isSplitByKey(), false, false).buildSql(); - currentStatement = currentConn.createStatement(resultSetType, resultSetConcurrency); - - if (currentSource.isSplitByKey()){ - String n = currentSource.getParameterValues()[0].toString(); - String m = currentSource.getParameterValues()[1].toString(); - queryTemplate = queryTemplate.replace("${N}",n).replace("${M}",m); - - if (LOG.isDebugEnabled()) { - LOG.debug(String.format("Executing '%s' with parameters %s", queryTemplate, - Arrays.deepToString(currentSource.getParameterValues()))); - } - } - - currentStatement.setFetchSize(Integer.MIN_VALUE); - currentStatement.setQueryTimeout(queryTimeOut); - currentResultSet = currentStatement.executeQuery(queryTemplate); - columnCount = currentResultSet.getMetaData().getColumnCount(); - - if(descColumnTypeList == null) { - descColumnTypeList = DBUtil.analyzeTable(currentSource.getJdbcUrl(), currentSource.getUserName(), - currentSource.getPassword(),databaseInterface, currentSource.getTable(),metaColumns); - } - - LOG.info("open source: {} ,table: {}", currentSource.getJdbcUrl(), currentSource.getTable()); - } - - @Override - protected boolean readNextRecord() throws IOException { - try{ - if(currentConn == null){ - openNextSource(); - } - - hasNext = currentResultSet.next(); - if (hasNext){ - currentRecord = new Row(columnCount); - - for (int pos = 0; pos < currentRecord.getArity(); pos++) { - Object obj = currentResultSet.getObject(pos + 1); - if(obj != null) { - if(CollectionUtils.isNotEmpty(descColumnTypeList)) { - String columnType = descColumnTypeList.get(pos); - if("year".equalsIgnoreCase(columnType)) { - java.util.Date date = (java.util.Date) obj; - obj = DateUtil.dateToYearString(date); - } else if("tinyint".equalsIgnoreCase(columnType) - || "bit".equalsIgnoreCase(columnType)) { - if(obj instanceof Boolean) { - obj = ((Boolean) obj ? 1 : 0); - } - } - } - obj = clobToString(obj); - } - currentRecord.setField(pos, obj); - } - - if(!"*".equals(metaColumns.get(0).getName())){ - for (int i = 0; i < columnCount; i++) { - Object val = currentRecord.getField(i); - if(val == null && metaColumns.get(i).getValue() != null){ - val = metaColumns.get(i).getValue(); - } - - if (val instanceof String){ - val = StringUtil.string2col(String.valueOf(val),metaColumns.get(i).getType(),metaColumns.get(i).getTimeFormat()); - currentRecord.setField(i,val); - } - } - } - } else { - if(sourceIndex + 1 < sourceList.size()){ - closeCurrentSource(); - sourceIndex++; - return readNextRecord(); - } - } - - return !hasNext; - }catch (SQLException se) { - throw new IOException("Couldn't read data - " + se.getMessage(), se); - } catch (Exception npe) { - throw new IOException("Couldn't access resultSet", npe); - } - } -} diff --git a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java index 4da26aab5c..56dbb38299 100644 --- a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java +++ b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java @@ -1,61 +1,14 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package com.dtstack.flinkx.mysqld.reader; import com.dtstack.flinkx.config.DataTransferConfig; -import com.dtstack.flinkx.config.ReaderConfig; import com.dtstack.flinkx.mysql.MySqlDatabaseMeta; -import com.dtstack.flinkx.rdb.DataSource; import com.dtstack.flinkx.rdb.datareader.DistributedJdbcDataReader; -import com.dtstack.flinkx.rdb.util.DBUtil; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - public class MysqldReader extends DistributedJdbcDataReader { public MysqldReader(DataTransferConfig config, StreamExecutionEnvironment env) { super(config, env); setDatabaseInterface(new MySqlDatabaseMeta()); } - - @Override - protected List buildConnections(){ - List sourceList = new ArrayList<>(connectionConfigs.size()); - for (ReaderConfig.ParameterConfig.ConnectionConfig connectionConfig : connectionConfigs) { - String curUsername = (connectionConfig.getUsername() == null || connectionConfig.getUsername().length() == 0) - ? username : connectionConfig.getUsername(); - String curPassword = (connectionConfig.getPassword() == null || connectionConfig.getPassword().length() == 0) - ? password : connectionConfig.getPassword(); - String curJdbcUrl = DBUtil.formatJdbcUrl(connectionConfig.getJdbcUrl().get(0), Collections.singletonMap("zeroDateTimeBehavior", "convertToNull")); - for (String table : connectionConfig.getTable()) { - DataSource source = new DataSource(); - source.setTable(table); - source.setUserName(curUsername); - source.setPassword(curPassword); - source.setJdbcUrl(curJdbcUrl); - - sourceList.add(source); - } - } - - return sourceList; - } } diff --git a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java deleted file mode 100644 index 6db68313ef..0000000000 --- a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.mysql.format; - -import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; -import com.dtstack.flinkx.rdb.util.DBUtil; -import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.ClassUtil; -import com.dtstack.flinkx.util.DateUtil; -import org.apache.commons.collections.CollectionUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.flink.core.io.InputSplit; -import org.apache.flink.types.Row; - -import java.io.IOException; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; - -import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; - -/** - * Date: 2019/09/19 - * Company: www.dtstack.com - * - * @author tudou - */ -public class MysqlInputFormat extends JdbcInputFormat { - - @Override - public void openInternal(InputSplit inputSplit) throws IOException { - try { - LOG.info(inputSplit.toString()); - - ClassUtil.forName(drivername, getClass().getClassLoader()); - - if (incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc()){ - getMaxValue(inputSplit); - } - - initMetric(inputSplit); - - if(!canReadData(inputSplit)){ - LOG.warn("Not read data when the start location are equal to end location"); - - hasNext = false; - return; - } - - dbConn = DBUtil.getConnection(dbURL, username, password); - - // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 - dbConn.setAutoCommit(false); - - Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); - - statement.setFetchSize(Integer.MIN_VALUE); - - statement.setQueryTimeout(queryTimeOut); - String querySql = buildQuerySql(inputSplit); - resultSet = statement.executeQuery(querySql); - columnCount = resultSet.getMetaData().getColumnCount(); - - boolean splitWithRowCol = numPartitions > 1 && StringUtils.isNotEmpty(splitKey) && splitKey.contains("("); - if(splitWithRowCol){ - columnCount = columnCount-1; - } - - hasNext = resultSet.next(); - - if (StringUtils.isEmpty(customSql)){ - descColumnTypeList = DBUtil.analyzeTable(dbURL, username, password,databaseInterface,table,metaColumns); - } else { - descColumnTypeList = new ArrayList<>(); - for (MetaColumn metaColumn : metaColumns) { - descColumnTypeList.add(metaColumn.getName()); - } - } - - } catch (SQLException se) { - throw new IllegalArgumentException("open() failed. " + se.getMessage(), se); - } - - LOG.info("JdbcInputFormat[{}]open: end", jobName); - } - - @Override - public Row nextRecordInternal(Row row) throws IOException { - if (!hasNext) { - return null; - } - row = new Row(columnCount); - - try { - for (int pos = 0; pos < row.getArity(); pos++) { - Object obj = resultSet.getObject(pos + 1); - if(obj != null) { - if(CollectionUtils.isNotEmpty(descColumnTypeList)) { - String columnType = descColumnTypeList.get(pos); - if("year".equalsIgnoreCase(columnType)) { - java.util.Date date = (java.util.Date) obj; - obj = DateUtil.dateToYearString(date); - } else if("tinyint".equalsIgnoreCase(columnType) - || "bit".equalsIgnoreCase(columnType)) { - if(obj instanceof Boolean) { - obj = ((Boolean) obj ? 1 : 0); - } - } - } - obj = clobToString(obj); - } - - row.setField(pos, obj); - } - return super.nextRecordInternal(row); - }catch (Exception e) { - throw new IOException("Couldn't read data - " + e.getMessage(), e); - } - } - -} diff --git a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/reader/MysqlReader.java b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/reader/MysqlReader.java index 27c30a6692..cd396e4454 100644 --- a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/reader/MysqlReader.java +++ b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/reader/MysqlReader.java @@ -21,11 +21,8 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.mysql.MySqlDatabaseMeta; import com.dtstack.flinkx.rdb.datareader.JdbcDataReader; -import com.dtstack.flinkx.rdb.util.DBUtil; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import java.util.Collections; - /** * MySQL reader plugin * @@ -37,6 +34,6 @@ public class MysqlReader extends JdbcDataReader { public MysqlReader(DataTransferConfig config, StreamExecutionEnvironment env) { super(config, env); setDatabaseInterface(new MySqlDatabaseMeta()); - dbUrl = DBUtil.formatJdbcUrl(dbUrl, Collections.singletonMap("zeroDateTimeBehavior", "convertToNull")); } + } diff --git a/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java b/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java deleted file mode 100644 index 41a2b9df75..0000000000 --- a/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.mysql.format; - -import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; - -/** - * Date: 2019/09/19 - * Company: www.dtstack.com - * - * @author tudou - */ -public class MysqlOutputFormat extends JdbcOutputFormat { -} diff --git a/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/writer/MysqlWriter.java b/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/writer/MysqlWriter.java index 52b3b28c85..15a5294592 100644 --- a/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/writer/MysqlWriter.java +++ b/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/writer/MysqlWriter.java @@ -21,9 +21,6 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.mysql.MySqlDatabaseMeta; import com.dtstack.flinkx.rdb.datawriter.JdbcDataWriter; -import com.dtstack.flinkx.rdb.util.DBUtil; - -import java.util.Collections; /** * MySQL writer plugin @@ -36,8 +33,6 @@ public class MysqlWriter extends JdbcDataWriter { public MysqlWriter(DataTransferConfig config) { super(config); setDatabaseInterface(new MySqlDatabaseMeta()); - dbUrl = DBUtil.formatJdbcUrl(dbUrl, Collections.singletonMap("zeroDateTimeBehavior", "convertToNull")); - } } diff --git a/flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java b/flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java deleted file mode 100644 index 2821d45160..0000000000 --- a/flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.oracle.format; - -import com.dtstack.flinkx.enums.ColumnType; -import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; -import com.dtstack.flinkx.rdb.util.DBUtil; -import org.apache.flink.types.Row; - -import java.io.IOException; -import java.sql.Timestamp; - -import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; - -/** - * Date: 2019/09/19 - * Company: www.dtstack.com - * - * @author tudou - */ -public class OracleInputFormat extends JdbcInputFormat { - - @Override - public Row nextRecordInternal(Row row) throws IOException { - if (!hasNext) { - return null; - } - row = new Row(columnCount); - - try { - for (int pos = 0; pos < row.getArity(); pos++) { - Object obj = resultSet.getObject(pos + 1); - if(obj != null) { - if((obj instanceof java.util.Date - || obj.getClass().getSimpleName().toUpperCase().contains("TIMESTAMP")) ) { - obj = resultSet.getTimestamp(pos + 1); - } - obj = clobToString(obj); - } - - row.setField(pos, obj); - } - return super.nextRecordInternal(row); - }catch (Exception e) { - throw new IOException("Couldn't read data - " + e.getMessage(), e); - } - } - - /** - * 构建时间边界字符串 - * @param location 边界位置(起始/结束) - * @param incrementColType 增量字段类型 - * @return - */ - @Override - protected String getTimeStr(Long location, String incrementColType){ - String timeStr; - Timestamp ts = new Timestamp(DBUtil.getMillis(location)); - ts.setNanos(DBUtil.getNanos(location)); - timeStr = DBUtil.getNanosTimeStr(ts.toString()); - - if(ColumnType.TIMESTAMP.name().equals(incrementColType)){ - timeStr = String.format("TO_TIMESTAMP('%s','YYYY-MM-DD HH24:MI:SS:FF6')",timeStr); - } else { - timeStr = timeStr.substring(0, 19); - timeStr = String.format("TO_DATE('%s','YYYY-MM-DD HH24:MI:SS')", timeStr); - } - timeStr = String.format("'%s'",timeStr); - - return timeStr; - } -} diff --git a/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java b/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java deleted file mode 100644 index 77f9d65502..0000000000 --- a/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.oracle.format; - -import com.dtstack.flinkx.enums.ColumnType; -import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; -import com.dtstack.flinkx.util.DateUtil; -import org.apache.flink.types.Row; - -import java.sql.*; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Date: 2019/09/20 - * Company: www.dtstack.com - * - * @author tudou - */ -public class OracleOutputFormat extends JdbcOutputFormat { - - @Override - protected Object getField(Row row, int index) { - Object field = super.getField(row, index); - String type = columnType.get(index); - - //oracle timestamp to oracle varchar or varchar2 or long field format - if (!(field instanceof Timestamp)){ - return field; - } - - if (type.equalsIgnoreCase(ColumnType.VARCHAR.name()) || type.equalsIgnoreCase(ColumnType.VARCHAR2.name())){ - SimpleDateFormat format = DateUtil.getDateTimeFormatter(); - field= format.format(field); - } - - if (type.equalsIgnoreCase(ColumnType.LONG.name()) ){ - field = ((Timestamp) field).getTime(); - } - return field; - } - - @Override - protected List probeFullColumns(String table, Connection dbConn) throws SQLException { - String schema =null; - - String[] parts = table.split("\\."); - if(parts.length == 2) { - schema = parts[0].toUpperCase(); - table = parts[1]; - } - - List ret = new ArrayList<>(); - ResultSet rs = dbConn.getMetaData().getColumns(null, schema, table, null); - while(rs.next()) { - ret.add(rs.getString("COLUMN_NAME")); - } - return ret; - } - - @Override - protected Map> probePrimaryKeys(String table, Connection dbConn) throws SQLException { - Map> map = new HashMap<>(); - PreparedStatement ps = dbConn.prepareStatement(String.format(GET_ORACLE_INDEX_SQL,table)); - ResultSet rs = ps.executeQuery(); - - while(rs.next()) { - String indexName = rs.getString("INDEX_NAME"); - if(!map.containsKey(indexName)) { - map.put(indexName,new ArrayList<>()); - } - map.get(indexName).add(rs.getString("COLUMN_NAME")); - } - Map> retMap = new HashMap<>(); - for(Map.Entry> entry: map.entrySet()) { - String k = entry.getKey(); - List v = entry.getValue(); - if(v!=null && v.size() != 0 && v.get(0) != null) { - retMap.put(k, v); - } - } - return retMap; - } -} diff --git a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java deleted file mode 100644 index 7bf705bbeb..0000000000 --- a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.dtstack.flinkx.postgresql.format; - -import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; -import com.dtstack.flinkx.rdb.util.DBUtil; -import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.ClassUtil; -import org.apache.commons.collections.CollectionUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.flink.core.io.InputSplit; -import org.apache.flink.types.Row; - -import java.io.IOException; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; - -import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; - -/** - * Date: 2019/09/20 - * Company: www.dtstack.com - * - * @author tudou - */ -public class PostgresqlInputFormat extends JdbcInputFormat { - - @Override - public Row nextRecordInternal(Row row) throws IOException { - if (!hasNext) { - return null; - } - row = new Row(columnCount); - - try { - for (int pos = 0; pos < row.getArity(); pos++) { - Object obj = resultSet.getObject(pos + 1); - if(obj != null) { - if(CollectionUtils.isNotEmpty(descColumnTypeList)) { - obj = typeConverter.convert(obj,descColumnTypeList.get(pos)); - } - obj = clobToString(obj); - } - - row.setField(pos, obj); - } - return super.nextRecordInternal(row); - }catch (Exception e) { - throw new IOException("Couldn't read data - " + e.getMessage(), e); - } - } -} diff --git a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlQuerySqlBuilder.java b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlQuerySqlBuilder.java deleted file mode 100644 index 6868aeb8e4..0000000000 --- a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlQuerySqlBuilder.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.postgresql.reader; - -import com.dtstack.flinkx.rdb.datareader.JdbcDataReader; -import com.dtstack.flinkx.rdb.datareader.QuerySqlBuilder; -import org.apache.commons.lang3.StringUtils; - -import java.util.List; - -/** - * Date: 2019/09/20 - * Company: www.dtstack.com - * - * @author tudou - */ -public class PostgresqlQuerySqlBuilder extends QuerySqlBuilder { - - public PostgresqlQuerySqlBuilder(JdbcDataReader reader){ - super(reader); - } - - @Override - protected String buildQuerySql(){ - List selectColumns = buildSelectColumns(databaseInterface, metaColumns); - boolean splitWithRowNum = addRowNumColumn(databaseInterface, selectColumns, isSplitByKey, splitKey); - - StringBuilder sb = new StringBuilder(); - sb.append("SELECT ").append(StringUtils.join(selectColumns,",")).append(" FROM "); - sb.append(databaseInterface.quoteTable(table)); - sb.append(" WHERE 1=1 "); - - StringBuilder filter = new StringBuilder(); - - if(isSplitByKey && !splitWithRowNum) { - filter.append(" AND ").append(databaseInterface.getSplitFilter(splitKey)); - } - - if (customFilter != null){ - customFilter = customFilter.trim(); - if (customFilter.length() > 0){ - filter.append(" AND ").append(customFilter); - } - } - - if(isIncrement){ - filter.append(" ").append(INCREMENT_FILTER_PLACEHOLDER); - } - - if(isRestore){ - filter.append(" ").append(RESTORE_FILTER_PLACEHOLDER); - } - - sb.append(filter); - sb.append(buildOrderSql()); - - if(isSplitByKey && splitWithRowNum){ - return String.format(SQL_SPLIT_WITH_ROW_NUM, sb.toString(), databaseInterface.getSplitFilter(ROW_NUM_COLUMN_ALIAS)); - } else { - return sb.toString(); - } - } -} diff --git a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java index 591e31abd5..d333c77964 100644 --- a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java +++ b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java @@ -19,16 +19,11 @@ package com.dtstack.flinkx.postgresql.reader; import com.dtstack.flinkx.config.DataTransferConfig; -import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.postgresql.PostgresqlDatabaseMeta; import com.dtstack.flinkx.postgresql.PostgresqlTypeConverter; import com.dtstack.flinkx.rdb.datareader.JdbcDataReader; -import com.dtstack.flinkx.rdb.datareader.QuerySqlBuilder; -import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormatBuilder; import com.dtstack.flinkx.rdb.util.DBUtil; -import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.types.Row; /** * The reader plugin for PostgreSQL database @@ -42,35 +37,5 @@ public PostgresqlReader(DataTransferConfig config, StreamExecutionEnvironment en super(config, env); setDatabaseInterface(new PostgresqlDatabaseMeta()); setTypeConverterInterface(new PostgresqlTypeConverter()); - dbUrl = DBUtil.formatJdbcUrl(dbUrl, null); - } - - @Override - public DataStream readData() { - JdbcInputFormatBuilder builder = new JdbcInputFormatBuilder(databaseInterface.getDatabaseType().name()); - builder.setDrivername(databaseInterface.getDriverClass()); - builder.setDBUrl(dbUrl); - builder.setUsername(username); - builder.setPassword(password); - builder.setBytes(bytes); - builder.setMonitorUrls(monitorUrls); - builder.setTable(table); - builder.setDatabaseInterface(databaseInterface); - builder.setTypeConverter(typeConverter); - builder.setMetaColumn(metaColumns); - builder.setFetchSize(fetchSize == 0 ? databaseInterface.getFetchSize() : fetchSize); - builder.setQueryTimeOut(queryTimeOut == 0 ? databaseInterface.getQueryTimeout() : queryTimeOut); - builder.setIncrementConfig(incrementConfig); - builder.setSplitKey(splitKey); - builder.setNumPartitions(numPartitions); - builder.setCustomSql(customSql); - builder.setRestoreConfig(restoreConfig); - builder.setHadoopConfig(hadoopConfig); - - QuerySqlBuilder sqlBuilder = new PostgresqlQuerySqlBuilder(this); - builder.setQuery(sqlBuilder.buildSql()); - - RichInputFormat format = builder.finish(); - return createInput(format, (databaseInterface.getDatabaseType() + "reader").toLowerCase()); } } diff --git a/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlOutputFormat.java b/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlOutputFormat.java similarity index 75% rename from flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlOutputFormat.java rename to flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlOutputFormat.java index ce3653c985..4af60e797e 100644 --- a/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlOutputFormat.java +++ b/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlOutputFormat.java @@ -1,21 +1,4 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.postgresql.format; +package com.dtstack.flinkx.postgresql.writer; import com.dtstack.flinkx.enums.EWriteMode; import com.dtstack.flinkx.exception.WriteRecordException; @@ -24,6 +7,8 @@ import org.apache.flink.types.Row; import org.postgresql.copy.CopyManager; import org.postgresql.core.BaseConnection; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.sql.PreparedStatement; @@ -38,15 +23,15 @@ public class PostgresqlOutputFormat extends JdbcOutputFormat { + private static final Logger LOG = LoggerFactory.getLogger(PostgresqlOutputFormat.class); + private static final String COPY_SQL_TEMPL = "copy %s(%s) from stdin DELIMITER '%s'"; private static final String DEFAULT_FIELD_DELIM = "\001"; private static final String LINE_DELIMITER = "\n"; - /** - * now just add ext insert mode:copy - */ + /**now just add ext insert mode:copy*/ private static final String INSERT_SQL_MODE_TYPE = "copy"; private String copySql = ""; @@ -102,7 +87,7 @@ protected void writeMultipleRecordsInternal() throws Exception { return; } - StringBuilder sb = new StringBuilder(128); + StringBuilder sb = new StringBuilder(); for (Row row : rows) { int lastIndex = row.getArity() - 1; for (int index =0; index < row.getArity(); index++) { @@ -125,15 +110,6 @@ protected void writeMultipleRecordsInternal() throws Exception { } } - @Override - protected Object getField(Row row, int index) { - Object field = super.getField(row, index); - String type = columnType.get(index); - field = typeConverter.convert(field,type); - - return field; - } - private boolean checkIsCopyMode(String insertMode){ if(Strings.isNullOrEmpty(insertMode)){ return false; diff --git a/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlWriter.java b/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlWriter.java index 867fd909bb..a81de66aed 100644 --- a/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlWriter.java +++ b/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlWriter.java @@ -21,7 +21,6 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.postgresql.PostgresqlDatabaseMeta; import com.dtstack.flinkx.postgresql.PostgresqlTypeConverter; -import com.dtstack.flinkx.postgresql.format.PostgresqlOutputFormat; import com.dtstack.flinkx.rdb.datawriter.JdbcDataWriter; import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormatBuilder; import org.apache.flink.streaming.api.datastream.DataStream; diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/DataSource.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/DataSource.java index d9f3508798..1840f866dc 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/DataSource.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/DataSource.java @@ -1,20 +1,3 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package com.dtstack.flinkx.rdb; import java.io.Serializable; diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/ParameterValuesProvider.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/ParameterValuesProvider.java index 79cac26365..48faab016e 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/ParameterValuesProvider.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/ParameterValuesProvider.java @@ -1,20 +1,3 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package com.dtstack.flinkx.rdb; import java.io.Serializable; diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java deleted file mode 100644 index 94c846429c..0000000000 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.rdb.loader; - -import org.apache.commons.lang3.StringUtils; -import org.apache.flink.util.Preconditions; - -/** - * FlinkX jdbc format loader - * - * Date: 2019/09/19 - * Company: www.dtstack.com - * - * @author tudou - */ -public class JdbcFormatLoader { - - /** - * 类型名称 - */ - private String formatName; - - /** - * format全限定名 - */ - private String formatClassName; - - public static final int INPUT_FORMAT = 0; - public static final int OUTPUT_FORMAT = 1; - - private final String pkgPrefixFormat = "com.dtstack.flinkx.%s.format.%s"; - - private final String INPUT_FORMAT_SUFFIX = "InputFormat"; - - private final String OUTPUT_FORMAT_SUFFIX = "OutputFormat"; - - /** - * JdbcFormatLoader构造器 - * @param dataType jdbc数据源类型 - * @param formatType format类型:INPUT_FORMAT,OUTPUT_FORMAT - */ - public JdbcFormatLoader(String dataType, int formatType){ - - Preconditions.checkArgument(StringUtils.isNotBlank(dataType)); - Preconditions.checkArgument(formatType == INPUT_FORMAT || formatType == OUTPUT_FORMAT); - - dataType = dataType.toLowerCase(); - if(formatType == INPUT_FORMAT){ - this.formatName = dataType + INPUT_FORMAT_SUFFIX; - }else{ - this.formatName = dataType + OUTPUT_FORMAT_SUFFIX; - } - this.formatClassName = String.format(pkgPrefixFormat, dataType, this.formatName.substring(0, 1).toUpperCase() + this.formatName.substring(1)); - } - - /** - * 获取format实例对象 - * @return - */ - public Object getFormatInstance() { - Object format = null; - try { - Class clz = Class.forName(formatClassName); - format = clz.newInstance(); - } catch (ClassNotFoundException e) { - throw new RuntimeException("error to load " + formatClassName, e); - } catch (Exception e) { - throw new RuntimeException(formatClassName + "don't have no parameter constructor", e); - } - - return format; - } - -} diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/type/TypeConverterInterface.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/type/TypeConverterInterface.java index 35088110d0..8ac4ca6fda 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/type/TypeConverterInterface.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/type/TypeConverterInterface.java @@ -28,12 +28,6 @@ */ public interface TypeConverterInterface extends Serializable { - /** - * 类型转换,将数据库数据某类型的对象转换为对应的Java基本数据对象实例 - * @param data 数据记录 - * @param typeName 数据类型 - * @return - */ Object convert(Object data,String typeName); } diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java index 7e708ed040..6108137732 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java @@ -17,26 +17,27 @@ */ package com.dtstack.flinkx.rdb.util; +import com.dtstack.flinkx.constants.PluginNameConstrant; +import com.dtstack.flinkx.enums.ColumnType; +import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.ParameterValuesProvider; +import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.ClassUtil; -import com.dtstack.flinkx.util.ExceptionUtil; -import com.dtstack.flinkx.util.SysUtil; -import com.dtstack.flinkx.util.TelnetUtil; +import com.dtstack.flinkx.util.*; import org.apache.commons.lang.StringUtils; -import org.apache.flink.util.CollectionUtil; +import org.apache.flink.types.Row; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; +import java.io.Serializable; import java.math.BigDecimal; import java.sql.*; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.regex.Pattern; /** * @@ -48,55 +49,21 @@ public class DBUtil { private static final Logger LOG = LoggerFactory.getLogger(DBUtil.class); - /** - * 数据库连接的最大重试次数 - */ private static int MAX_RETRY_TIMES = 3; - /** - * 秒级时间戳的长度为10位 - */ private static int SECOND_LENGTH = 10; - /** - * 毫秒级时间戳的长度为13位 - */ private static int MILLIS_LENGTH = 13; - /** - * 微秒级时间戳的长度为16位 - */ private static int MICRO_LENGTH = 16; - /** - * 纳秒级时间戳的长度为19位 - */ private static int NANOS_LENGTH = 19; - /** - * jdbc连接URL的分割正则,用于获取URL?后的连接参数 - */ - public static final Pattern DB_PATTERN = Pattern.compile("\\?"); - - /** - * 增量任务过滤条件占位符 - */ public static final String INCREMENT_FILTER_PLACEHOLDER = "${incrementFilter}"; - /** - * 断点续传过滤条件占位符 - */ public static final String RESTORE_FILTER_PLACEHOLDER = "${restoreFilter}"; public static final String TEMPORARY_TABLE_NAME = "flinkx_tmp"; public static final String NULL_STRING = "null"; - /** - * 获取jdbc连接(超时10S) - * @param url url - * @param username 账号 - * @param password 密码 - * @return - * @throws SQLException - */ private static Connection getConnectionInternal(String url, String username, String password) throws SQLException { Connection dbConn; synchronized (ClassUtil.lock_str){ @@ -115,14 +82,6 @@ private static Connection getConnectionInternal(String url, String username, Str return dbConn; } - /** - * 获取jdbc连接(重试3次) - * @param url url - * @param username 账号 - * @param password 密码 - * @return - * @throws SQLException - */ public static Connection getConnection(String url, String username, String password) throws SQLException { if (!url.startsWith("jdbc:mysql")) { return getConnectionInternal(url, username, password); @@ -151,21 +110,45 @@ public static Connection getConnection(String url, String username, String passw } } - /** - * 关闭连接资源 - * @param rs ResultSet - * @param stmt Statement - * @param conn Connection - * @param commit - */ - public static void closeDBResources(ResultSet rs, Statement stmt, Connection conn, boolean commit) { + + public static List> executeQuery(Connection connection, String sql) { + List> result = com.google.common.collect.Lists.newArrayList(); + ResultSet res = null; + Statement statement = null; + try{ + statement = connection.createStatement(); + res = statement.executeQuery(sql); + int columns = res.getMetaData().getColumnCount(); + List columnName = com.google.common.collect.Lists.newArrayList(); + for(int i = 0; i < columns; i++){ + columnName.add(res.getMetaData().getColumnName(i + 1)); + } + + while(res.next()){ + Map row = com.google.common.collect.Maps.newHashMap(); + for(int i = 0;i < columns; i++){ + row.put(columnName.get(i), res.getObject(i + 1)); + } + result.add(row); + } + }catch(Exception e){ + throw new RuntimeException(e); + } + finally{ + DBUtil.closeDBResources(res, statement, null, false); + } + return result; + } + + public static void closeDBResources(ResultSet rs, Statement stmt, + Connection conn, boolean commit) { if (null != rs) { try { LOG.info("Start close resultSet"); rs.close(); LOG.info("Close resultSet successful"); } catch (SQLException e) { - LOG.warn("Close resultSet error: {}", ExceptionUtil.getErrorMessage(e)); + LOG.warn("Close resultSet error:{}",e); } } @@ -175,7 +158,7 @@ public static void closeDBResources(ResultSet rs, Statement stmt, Connection con stmt.close(); LOG.info("Close statement successful"); } catch (SQLException e) { - LOG.warn("Close statement error:{}", ExceptionUtil.getErrorMessage(e)); + LOG.warn("Close statement error:{}",e); } } @@ -189,15 +172,11 @@ public static void closeDBResources(ResultSet rs, Statement stmt, Connection con conn.close(); LOG.info("Close connection successful"); } catch (SQLException e) { - LOG.warn("Close connection error:{}", ExceptionUtil.getErrorMessage(e)); + LOG.warn("Close connection error:{}",e); } } } - /** - * 手动提交事物 - * @param conn Connection - */ public static void commit(Connection conn){ try { if (!conn.isClosed() && !conn.getAutoCommit()){ @@ -206,15 +185,10 @@ public static void commit(Connection conn){ LOG.info("Commit connection successful"); } } catch (SQLException e){ - LOG.warn("commit error:{}", ExceptionUtil.getErrorMessage(e)); + LOG.warn("commit error:{}",e); } } - /** - * 批量执行sql - * @param dbConn Connection - * @param sqls sql列表 - */ public static void executeBatch(Connection dbConn, List sqls) { if(sqls == null || sqls.size() == 0) { return; @@ -233,13 +207,6 @@ public static void executeBatch(Connection dbConn, List sqls) { } } - /** - * 获取某数据库某表的主键和唯一索引 - * @param table 表名 - * @param dbConn 数据库连接 - * @return - * @throws SQLException - */ public static Map> getPrimaryOrUniqueKeys(String table, Connection dbConn) throws SQLException { Map> keyMap = new HashMap<>(); DatabaseMetaData meta = dbConn.getMetaData(); @@ -255,38 +222,26 @@ public static Map> getPrimaryOrUniqueKeys(String table, Conn return keyMap; } - /** - * 封装channel通道顺序 - * @param channels - * @return - */ public static Object[][] getParameterValues(final int channels){ - ParameterValuesProvider provider = () -> { - Integer[][] parameters = new Integer[channels][]; - for(int i = 0; i < channels; ++i) { - parameters[i] = new Integer[2]; - parameters[i][0] = channels; - parameters[i][1] = i; + ParameterValuesProvider provider = new ParameterValuesProvider() { + @Override + public Serializable[][] getParameterValues() { + Integer[][] parameters = new Integer[channels][]; + for(int i = 0; i < channels; ++i) { + parameters[i] = new Integer[2]; + parameters[i][0] = channels; + parameters[i][1] = i; + } + return parameters; } - return parameters; }; return provider.getParameterValues(); } - /** - * 获取表列名类型列表 - * @param dbURL jdbc url - * @param username 数据库账号 - * @param password 数据库密码 - * @param databaseInterface DatabaseInterface - * @param table 表名 - * @param metaColumns MetaColumn列表 - * @return - */ - public static List analyzeTable(String dbURL, String username, String password, DatabaseInterface databaseInterface, - String table, List metaColumns) { - List ret = new ArrayList<>(metaColumns.size()); + public static List analyzeTable(String dbURL,String username,String password,DatabaseInterface databaseInterface, + String table,List metaColumns) { + List ret = new ArrayList<>(); Connection dbConn = null; Statement stmt = null; ResultSet rs = null; @@ -296,7 +251,7 @@ public static List analyzeTable(String dbURL, String username, String pa rs = stmt.executeQuery(databaseInterface.getSQLQueryFields(databaseInterface.quoteTable(table))); ResultSetMetaData rd = rs.getMetaData(); - Map nameTypeMap = new HashMap<>((rd.getColumnCount() << 2) / 3); + Map nameTypeMap = new HashMap<>(); for(int i = 0; i < rd.getColumnCount(); ++i) { nameTypeMap.put(rd.getColumnName(i+1),rd.getColumnTypeName(i+1)); } @@ -317,13 +272,6 @@ public static List analyzeTable(String dbURL, String username, String pa return ret; } - /** - * 占位符设值 - * @param param 参数 - * @param statement PreparedStatement - * @param i 占位符位置 - * @throws SQLException - */ public static void setParameterValue(Object param,PreparedStatement statement,int i) throws SQLException{ if (param instanceof String) { statement.setString(i + 1, (String) param); @@ -357,12 +305,53 @@ public static void setParameterValue(Object param,PreparedStatement statement,in } } - /** - * clob转string - * @param obj clob - * @return - * @throws Exception - */ + public static void getRow(EDatabaseType dbType, Row row, List descColumnTypeList, ResultSet resultSet, + TypeConverterInterface typeConverter) throws Exception{ + for (int pos = 0; pos < row.getArity(); pos++) { + Object obj = resultSet.getObject(pos + 1); + if(obj != null) { + if (EDatabaseType.Oracle == dbType) { + if((obj instanceof java.util.Date || obj.getClass().getSimpleName().toUpperCase().contains("TIMESTAMP")) ) { + obj = resultSet.getTimestamp(pos + 1); + } + } else if(EDatabaseType.MySQL == dbType) { + if(descColumnTypeList != null && descColumnTypeList.size() != 0) { + if(descColumnTypeList.get(pos).equalsIgnoreCase("year")) { + java.util.Date date = (java.util.Date) obj; + String year = DateUtil.dateToYearString(date); + System.out.println(year); + obj = year; + } else if(descColumnTypeList.get(pos).equalsIgnoreCase("tinyint")) { + if(obj instanceof Boolean) { + obj = ((Boolean) obj ? 1 : 0); + } + } else if(descColumnTypeList.get(pos).equalsIgnoreCase("bit")) { + if(obj instanceof Boolean) { + obj = ((Boolean) obj ? 1 : 0); + } + } + } + } else if(EDatabaseType.SQLServer == dbType) { + if(descColumnTypeList != null && descColumnTypeList.size() != 0) { + if(descColumnTypeList.get(pos).equalsIgnoreCase("bit")) { + if(obj instanceof Boolean) { + obj = ((Boolean) obj ? 1 : 0); + } + } + } + } else if(EDatabaseType.PostgreSQL == dbType){ + if(descColumnTypeList != null && descColumnTypeList.size() != 0) { + obj = typeConverter.convert(obj,descColumnTypeList.get(pos)); + } + } + + obj = clobToString(obj); + } + + row.setField(pos, obj); + } + } + public static Object clobToString(Object obj) throws Exception{ String dataStr; if(obj instanceof Clob){ @@ -381,24 +370,110 @@ public static Object clobToString(Object obj) throws Exception{ return dataStr; } - /** - * 获取纳秒字符串 - * @param timeStr - * @return - */ - public static String getNanosTimeStr(String timeStr){ + public static String buildIncrementFilter(DatabaseInterface databaseInterface,String incrementColType,String incrementCol, + String startLocation,String endLocation, String customSql, boolean useMaxFunc){ + StringBuilder filter = new StringBuilder(); + + if (StringUtils.isNotEmpty(customSql)){ + incrementCol = String.format("%s.%s", TEMPORARY_TABLE_NAME, databaseInterface.quoteColumn(incrementCol)); + } else { + incrementCol = databaseInterface.quoteColumn(incrementCol); + } + + String startFilter = buildStartLocationSql(databaseInterface, incrementColType, incrementCol, startLocation, useMaxFunc); + if (StringUtils.isNotEmpty(startFilter)){ + filter.append(startFilter); + } + + String endFilter = buildEndLocationSql(databaseInterface, incrementColType, incrementCol, endLocation); + if (StringUtils.isNotEmpty(endFilter)){ + if (filter.length() > 0){ + filter.append(" and ").append(endFilter); + } else { + filter.append(endFilter); + } + } + + return filter.toString(); + } + + public static String buildStartLocationSql(DatabaseInterface databaseInterface,String incrementColType, + String incrementCol,String startLocation,boolean useMaxFunc){ + if(StringUtils.isEmpty(startLocation) || NULL_STRING.equalsIgnoreCase(startLocation)){ + return null; + } + + String operator = " >= "; + if(!useMaxFunc){ + operator = " > "; + } + + return getLocationSql(databaseInterface, incrementColType, incrementCol, startLocation, operator); + } + + public static String buildEndLocationSql(DatabaseInterface databaseInterface,String incrementColType,String incrementCol, + String endLocation){ + if(StringUtils.isEmpty(endLocation) || NULL_STRING.equalsIgnoreCase(endLocation)){ + return null; + } + + return getLocationSql(databaseInterface, incrementColType, incrementCol, endLocation, " < "); + } + + private static String getLocationSql(DatabaseInterface databaseInterface, String incrementColType, String incrementCol, + String location, String operator) { + String endTimeStr; + String endLocationSql; + boolean isTimeType = ColumnType.isTimeType(incrementColType) + || (databaseInterface.getDatabaseType() == EDatabaseType.SQLServer && ColumnType.NVARCHAR.name().equals(incrementColType)); + if(isTimeType){ + endTimeStr = getTimeStr(databaseInterface.getDatabaseType(), Long.parseLong(location), incrementColType); + endLocationSql = incrementCol + operator + endTimeStr; + } else if(ColumnType.isNumberType(incrementColType)){ + endLocationSql = incrementCol + operator + location; + } else { + endTimeStr = String.format("'%s'",location); + endLocationSql = incrementCol + operator + endTimeStr; + } + + return endLocationSql; + } + + private static String getTimeStr(EDatabaseType databaseType,Long startLocation,String incrementColType){ + String timeStr; + Timestamp ts = new Timestamp(getMillis(startLocation)); + ts.setNanos(getNanos(startLocation)); + timeStr = getNanosTimeStr(ts.toString()); + + if(databaseType == EDatabaseType.SQLServer){ + timeStr = timeStr.substring(0,23); + } else { + timeStr = timeStr.substring(0,26); + } + + if (databaseType == EDatabaseType.Oracle){ + if(ColumnType.TIMESTAMP.name().equals(incrementColType)){ + timeStr = String.format("TO_TIMESTAMP('%s','YYYY-MM-DD HH24:MI:SS:FF6')",timeStr); + } else { + timeStr = timeStr.substring(0, 19); + timeStr = String.format("TO_DATE('%s','YYYY-MM-DD HH24:MI:SS')", timeStr); + } + } else { + timeStr = String.format("'%s'",timeStr); + } + + return timeStr; + } + + private static String getNanosTimeStr(String timeStr){ if(timeStr.length() < 29){ timeStr += StringUtils.repeat("0",29 - timeStr.length()); } + return timeStr; } - /** - * 将边界位置时间转换成对应饿的纳秒时间 - * @param startLocation 边界位置(起始/结束) - * @return - */ - public static int getNanos(long startLocation){ + private static int getNanos(long startLocation){ String timeStr = String.valueOf(startLocation); int nanos; if (timeStr.length() == SECOND_LENGTH){ @@ -416,12 +491,7 @@ public static int getNanos(long startLocation){ return nanos; } - /** - * 将边界位置时间转换成对应饿的毫秒时间 - * @param startLocation 边界位置(起始/结束) - * @return - */ - public static long getMillis(long startLocation){ + private static long getMillis(long startLocation){ String timeStr = String.valueOf(startLocation); long millisSecond; if (timeStr.length() == SECOND_LENGTH){ @@ -439,41 +509,46 @@ public static long getMillis(long startLocation){ return millisSecond; } - /** - * 格式化jdbc连接 - * @param dbUrl 原jdbc连接 - * @param extParamMap 需要额外添加的参数 - * @return 格式化后jdbc连接URL字符串 - */ - public static String formatJdbcUrl(String dbUrl, Map extParamMap){ - String[] splits = DB_PATTERN.split(dbUrl); - - Map paramMap = new HashMap(); - if(splits.length > 1) { - String[] pairs = splits[1].split("&"); - for(String pair : pairs) { - String[] leftRight = pair.split("="); - paramMap.put(leftRight[0], leftRight[1]); + public static String formatJdbcUrl(String pluginName,String dbUrl){ + if(pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQL_READER) + || pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQLD_READER) + || pluginName.equalsIgnoreCase(PluginNameConstrant.POSTGRESQL_READER) + || pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQL_WRITER) + || pluginName.equalsIgnoreCase(PluginNameConstrant.GBASE_WRITER) ){ + String[] splits = dbUrl.split("\\?"); + + Map paramMap = new HashMap(); + if(splits.length > 1) { + String[] pairs = splits[1].split("&"); + for(String pair : pairs) { + String[] leftRight = pair.split("="); + paramMap.put(leftRight[0], leftRight[1]); + } } - } - if(!CollectionUtil.isNullOrEmpty(extParamMap)){ - paramMap.putAll(extParamMap); - } - paramMap.put("useCursorFetch", "true"); - paramMap.put("rewriteBatchedStatements", "true"); - - StringBuffer sb = new StringBuffer(dbUrl.length() + 128); - sb.append(splits[0]).append("?"); - int index = 0; - for(Map.Entry entry : paramMap.entrySet()) { - if(index != 0) { - sb.append("&"); + paramMap.put("useCursorFetch", "true"); + paramMap.put("rewriteBatchedStatements", "true"); + if(pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQL_READER) + || pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQLD_READER)){ + paramMap.put("zeroDateTimeBehavior","convertToNull"); + } + + StringBuffer sb = new StringBuffer(splits[0]); + if(paramMap.size() != 0) { + sb.append("?"); + int index = 0; + for(Map.Entry entry : paramMap.entrySet()) { + if(index != 0) { + sb.append("&"); + } + sb.append(entry.getKey() + "=" + entry.getValue()); + index++; + } } - sb.append(entry.getKey()).append("=").append(entry.getValue()); - index++; + + dbUrl = sb.toString(); } - return sb.toString(); + return dbUrl; } } diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java index 5c265935df..a3e9da992e 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java @@ -28,7 +28,6 @@ import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.DataReader; import com.dtstack.flinkx.reader.MetaColumn; -import org.apache.commons.lang.StringUtils; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.types.Row; @@ -64,7 +63,7 @@ public class DistributedJdbcDataReader extends DataReader { protected int queryTimeOut; - protected List connectionConfigs; + private List connectionConfigs; private static String DISTRIBUTED_TAG = "d"; @@ -85,7 +84,7 @@ protected DistributedJdbcDataReader(DataTransferConfig config, StreamExecutionEn @Override public DataStream readData() { - DistributedJdbcInputFormatBuilder builder = new DistributedJdbcInputFormatBuilder(databaseInterface.getDatabaseType().name()); + DistributedJdbcInputFormatBuilder builder = new DistributedJdbcInputFormatBuilder(); builder.setDrivername(databaseInterface.getDriverClass()); builder.setUsername(username); builder.setPassword(password); @@ -105,12 +104,14 @@ public DataStream readData() { return createInput(format, (databaseInterface.getDatabaseType() + DISTRIBUTED_TAG + "reader").toLowerCase()); } - protected List buildConnections(){ - List sourceList = new ArrayList<>(connectionConfigs.size()); + private List buildConnections(){ + List sourceList = new ArrayList<>(); for (ReaderConfig.ParameterConfig.ConnectionConfig connectionConfig : connectionConfigs) { - String curUsername = (StringUtils.isBlank(connectionConfig.getUsername())) ? username : connectionConfig.getUsername(); - String curPassword = (StringUtils.isBlank(connectionConfig.getPassword())) ? password : connectionConfig.getPassword(); - String curJdbcUrl = DBUtil.formatJdbcUrl(connectionConfig.getJdbcUrl().get(0), null); + String curUsername = (connectionConfig.getUsername() == null || connectionConfig.getUsername().length() == 0) + ? username : connectionConfig.getUsername(); + String curPassword = (connectionConfig.getPassword() == null || connectionConfig.getPassword().length() == 0) + ? password : connectionConfig.getPassword(); + String curJdbcUrl = DBUtil.formatJdbcUrl(pluginName,connectionConfig.getJdbcUrl().get(0)); for (String table : connectionConfig.getTable()) { DataSource source = new DataSource(); source.setTable(table); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java index 5ae1fc6a6a..103283a9f0 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java @@ -27,17 +27,8 @@ */ public class IncrementConfig implements Serializable { - /** - * 是否为增量任务 - */ private boolean increment; - /** - * 用于标记是否保存endLocation位置的一条或多条数据 - * true:不保存 - * false(默认):保存 - * 某些情况下可能出现最后几条数据被重复记录的情况,可以将此参数配置为true - */ private boolean useMaxFunc; private int columnIndex; @@ -48,9 +39,6 @@ public class IncrementConfig implements Serializable { private String startLocation; - /** - * 发送查询累加器请求的间隔时间 - */ private int requestAccumulatorInterval; public int getRequestAccumulatorInterval() { diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java index 0a1d19b5da..dc016efb52 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java @@ -20,10 +20,11 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.config.ReaderConfig; -import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormatBuilder; +import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; +import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.DataReader; import com.dtstack.flinkx.reader.MetaColumn; import org.apache.commons.lang3.StringUtils; @@ -83,6 +84,7 @@ public JdbcDataReader(DataTransferConfig config, StreamExecutionEnvironment env) ReaderConfig readerConfig = config.getJob().getContent().get(0).getReader(); dbUrl = readerConfig.getParameter().getConnection().get(0).getJdbcUrl().get(0); + dbUrl = DBUtil.formatJdbcUrl(readerConfig.getName(),dbUrl); username = readerConfig.getParameter().getStringVal(JdbcConfigKeys.KEY_USER_NAME); password = readerConfig.getParameter().getStringVal(JdbcConfigKeys.KEY_PASSWORD); table = readerConfig.getParameter().getConnection().get(0).getTable().get(0); @@ -99,7 +101,7 @@ public JdbcDataReader(DataTransferConfig config, StreamExecutionEnvironment env) @Override public DataStream readData() { - JdbcInputFormatBuilder builder = new JdbcInputFormatBuilder(databaseInterface.getDatabaseType().name()); + JdbcInputFormatBuilder builder = new JdbcInputFormatBuilder(); builder.setDrivername(databaseInterface.getDriverClass()); builder.setDBUrl(dbUrl); builder.setUsername(username); @@ -140,7 +142,7 @@ private void buildIncrementConfig(ReaderConfig readerConfig){ String incrementColStr = String.valueOf(incrementColumn); if(NumberUtils.isNumber(incrementColStr)){ - MetaColumn metaColumn = metaColumns.get(Integer.parseInt(incrementColStr)); + MetaColumn metaColumn = metaColumns.get(Integer.valueOf(incrementColStr)); type = metaColumn.getType(); name = metaColumn.getName(); index = metaColumn.getIndex(); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/QuerySqlBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/QuerySqlBuilder.java index 8b365ebe72..3f585c6f0a 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/QuerySqlBuilder.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/QuerySqlBuilder.java @@ -19,8 +19,10 @@ package com.dtstack.flinkx.rdb.datareader; +import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.reader.MetaColumn; +import com.dtstack.flinkx.util.StringUtil; import org.apache.commons.lang3.StringUtils; import java.util.ArrayList; @@ -32,25 +34,25 @@ */ public class QuerySqlBuilder { - protected static final String CUSTOM_SQL_TEMPLATE = "select * from (%s) %s"; - protected static final String TEMPORARY_TABLE_NAME = "flinkx_tmp"; - protected static final String INCREMENT_FILTER_PLACEHOLDER = "${incrementFilter}"; - protected static final String RESTORE_FILTER_PLACEHOLDER = "${restoreFilter}"; - protected static final String SQL_SPLIT_WITH_ROW_NUM = "SELECT * FROM (%s) tmp WHERE %s"; - protected static final String ROW_NUM_COLUMN_ALIAS = "FLINKX_ROWNUM"; - - protected DatabaseInterface databaseInterface; - protected String table; - protected List metaColumns; - protected String splitKey; - protected String customFilter; - protected String customSql; - protected boolean isSplitByKey; - protected boolean isIncrement; - protected String incrementColumn; - protected String restoreColumn; - protected boolean isRestore; - protected String orderByColumn; + private static final String CUSTOM_SQL_TEMPLATE = "select * from (%s) %s"; + private static final String TEMPORARY_TABLE_NAME = "flinkx_tmp"; + private static final String INCREMENT_FILTER_PLACEHOLDER = "${incrementFilter}"; + private static final String RESTORE_FILTER_PLACEHOLDER = "${restoreFilter}"; + private static final String SQL_SPLIT_WITH_ROW_NUM = "SELECT * FROM (%s) tmp WHERE %s"; + private static final String ROW_NUM_COLUMN_ALIAS = "FLINKX_ROWNUM"; + + private DatabaseInterface databaseInterface; + private String table; + private List metaColumns; + private String splitKey; + private String customFilter; + private String customSql; + private boolean isSplitByKey; + private boolean isIncrement; + private String incrementColumn; + private String restoreColumn; + private boolean isRestore; + private String orderByColumn; public QuerySqlBuilder(JdbcDataReader reader) { databaseInterface = reader.databaseInterface; @@ -90,7 +92,7 @@ public String buildSql(){ return query; } - protected String buildQuerySql(){ + private String buildQuerySql(){ List selectColumns = buildSelectColumns(databaseInterface, metaColumns); boolean splitWithRowNum = addRowNumColumn(databaseInterface, selectColumns, isSplitByKey, splitKey); @@ -122,6 +124,10 @@ protected String buildQuerySql(){ sb.append(filter); + if(EDatabaseType.PostgreSQL.equals(databaseInterface.getDatabaseType())){ + sb.append(buildOrderSql()); + } + if(isSplitByKey && splitWithRowNum){ return String.format(SQL_SPLIT_WITH_ROW_NUM, sb.toString(), databaseInterface.getSplitFilter(ROW_NUM_COLUMN_ALIAS)); } else { @@ -129,7 +135,7 @@ protected String buildQuerySql(){ } } - protected String buildOrderSql(){ + private String buildOrderSql(){ String column; if(isIncrement){ column = incrementColumn; @@ -162,7 +168,7 @@ private String buildQuerySqlWithCustomSql(){ return querySql.toString(); } - protected static List buildSelectColumns(DatabaseInterface databaseInterface, List metaColumns){ + private static List buildSelectColumns(DatabaseInterface databaseInterface, List metaColumns){ List selectColumns = new ArrayList<>(); if(metaColumns.size() == 1 && "*".equals(metaColumns.get(0).getName())){ selectColumns.add("*"); @@ -179,7 +185,7 @@ protected static List buildSelectColumns(DatabaseInterface databaseInter return selectColumns; } - protected static boolean addRowNumColumn(DatabaseInterface databaseInterface, List selectColumns, boolean isSplitByKey,String splitKey){ + private static boolean addRowNumColumn(DatabaseInterface databaseInterface, List selectColumns, boolean isSplitByKey,String splitKey){ if(!isSplitByKey || !splitKey.contains("(")){ return false; } diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java index 8c4e41df64..a48ff5489b 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java @@ -18,6 +18,7 @@ package com.dtstack.flinkx.rdb.inputformat; +import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.rdb.DataSource; import com.dtstack.flinkx.rdb.DatabaseInterface; @@ -32,10 +33,7 @@ import org.apache.flink.types.Row; import java.io.*; -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; +import java.sql.*; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -68,15 +66,15 @@ public class DistributedJdbcInputFormat extends RichInputFormat { protected List sourceList; - protected transient int sourceIndex; + private transient int sourceIndex; - protected transient Connection currentConn; + private transient Connection currentConn; - protected transient Statement currentStatement; + private transient Statement currentStatement; - protected transient ResultSet currentResultSet; + private transient ResultSet currentResultSet; - protected transient Row currentRecord; + private transient Row currentRecord; protected String username; @@ -113,10 +111,10 @@ protected void openInternal(InputSplit inputSplit) throws IOException { throw new IllegalArgumentException("open() failed." + e.getMessage(), e); } - LOG.info("JdbcInputFormat[{}}]open: end", jobName); + LOG.info("JdbcInputFormat[" + jobName + "]open: end"); } - protected void openNextSource() throws SQLException{ + private void openNextSource() throws SQLException{ DataSource currentSource = sourceList.get(sourceIndex); currentConn = DBUtil.getConnection(currentSource.getJdbcUrl(), currentSource.getUserName(), currentSource.getPassword()); currentConn.setAutoCommit(false); @@ -135,7 +133,12 @@ protected void openNextSource() throws SQLException{ } } - currentStatement.setFetchSize(fetchSize); + if(databaseInterface.getDatabaseType() == EDatabaseType.MySQL){ + currentStatement.setFetchSize(Integer.MIN_VALUE); + } else { + currentStatement.setFetchSize(fetchSize); + } + currentStatement.setQueryTimeout(queryTimeOut); currentResultSet = currentStatement.executeQuery(queryTemplate); columnCount = currentResultSet.getMetaData().getColumnCount(); @@ -145,10 +148,10 @@ protected void openNextSource() throws SQLException{ currentSource.getPassword(),databaseInterface, currentSource.getTable(),metaColumns); } - LOG.info("open source: {} ,table: {}", currentSource.getJdbcUrl(), currentSource.getTable()); + LOG.info("open source:" + currentSource.getJdbcUrl() + ",table:" + currentSource.getTable()); } - protected boolean readNextRecord() throws IOException{ + private boolean readNextRecord() throws IOException{ try{ if(currentConn == null){ openNextSource(); @@ -157,6 +160,7 @@ protected boolean readNextRecord() throws IOException{ hasNext = currentResultSet.next(); if (hasNext){ currentRecord = new Row(columnCount); + DBUtil.getRow(databaseInterface.getDatabaseType(),currentRecord,descColumnTypeList,currentResultSet,typeConverter); if(!"*".equals(metaColumns.get(0).getName())){ for (int i = 0; i < columnCount; i++) { Object val = currentRecord.getField(i); @@ -191,7 +195,7 @@ protected Row nextRecordInternal(Row row) throws IOException { return currentRecord; } - protected void closeCurrentSource(){ + private void closeCurrentSource(){ try { DBUtil.closeDBResources(currentResultSet,currentStatement,currentConn, true); currentConn = null; diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java index f8b4bb04e0..1c1ad058e7 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java @@ -21,7 +21,6 @@ import com.dtstack.flinkx.inputformat.RichInputFormatBuilder; import com.dtstack.flinkx.rdb.DataSource; import com.dtstack.flinkx.rdb.DatabaseInterface; -import com.dtstack.flinkx.rdb.loader.JdbcFormatLoader; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.reader.MetaColumn; import org.apache.commons.lang.StringUtils; @@ -36,12 +35,10 @@ */ public class DistributedJdbcInputFormatBuilder extends RichInputFormatBuilder { - private static String DISTRIBUTED_TAG = "d"; private DistributedJdbcInputFormat format; - public DistributedJdbcInputFormatBuilder(String name) { - JdbcFormatLoader jdbcFormatLoader = new JdbcFormatLoader(name + DISTRIBUTED_TAG, JdbcFormatLoader.INPUT_FORMAT); - super.format = format = (DistributedJdbcInputFormat) jdbcFormatLoader.getFormatInstance(); + public DistributedJdbcInputFormatBuilder() { + super.format = this.format = new DistributedJdbcInputFormat(); } public void setDrivername(String driverName) { diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java index c2c3abfad2..9a40ccfb23 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java @@ -18,15 +18,18 @@ package com.dtstack.flinkx.rdb.inputformat; -import com.dtstack.flinkx.constants.Metrics; import com.dtstack.flinkx.enums.ColumnType; -import com.dtstack.flinkx.inputformat.RichInputFormat; +import com.dtstack.flinkx.constants.Metrics; +import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.datareader.IncrementConfig; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.*; +import com.dtstack.flinkx.util.ClassUtil; +import com.dtstack.flinkx.util.DateUtil; +import com.dtstack.flinkx.util.StringUtil; +import com.dtstack.flinkx.util.URLUtil; import com.google.gson.Gson; import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.accumulators.Accumulator; @@ -38,6 +41,12 @@ import org.apache.flink.hadoop.shaded.org.apache.http.impl.client.CloseableHttpClient; import org.apache.flink.hadoop.shaded.org.apache.http.impl.client.HttpClientBuilder; import org.apache.flink.types.Row; +import java.io.IOException; +import java.sql.*; +import java.util.*; +import java.util.Date; + +import com.dtstack.flinkx.inputformat.RichInputFormat; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -45,11 +54,6 @@ import org.apache.hadoop.io.IOUtils; import org.codehaus.jackson.map.ObjectMapper; -import java.io.IOException; -import java.sql.*; -import java.util.Date; -import java.util.*; - /** * InputFormat for reading data from a database and generate Rows. * @@ -98,18 +102,12 @@ public class JdbcInputFormat extends RichInputFormat { protected int fetchSize; - /** - * 各DatabaseMeta.getQueryTimeout()返回的超时时间,默认1000ms - */ protected int queryTimeOut; protected int numPartitions; protected String customSql; - /** - * 增量任务配置 - */ protected IncrementConfig incrementConfig; protected StringAccumulator tableColAccumulator; @@ -160,17 +158,33 @@ public void openInternal(InputSplit inputSplit) throws IOException { if(!canReadData(inputSplit)){ LOG.warn("Not read data when the start location are equal to end location"); + hasNext = false; return; } dbConn = DBUtil.getConnection(dbURL, username, password); - // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 + // 部分驱动需要关闭事务自动提交,featchSize参数才会起作用 dbConn.setAutoCommit(false); + + // 读取前先提交事务,确保程序异常退出时,下次再读取PG时的顺序不变 + if(EDatabaseType.PostgreSQL == databaseInterface.getDatabaseType()){ + dbConn.commit(); + } + Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); - statement.setFetchSize(fetchSize); - statement.setQueryTimeout(queryTimeOut); + if(EDatabaseType.MySQL == databaseInterface.getDatabaseType() + || EDatabaseType.GBase == databaseInterface.getDatabaseType()){ + statement.setFetchSize(Integer.MIN_VALUE); + } else { + statement.setFetchSize(fetchSize); + } + + if(EDatabaseType.Carbondata != databaseInterface.getDatabaseType()) { + statement.setQueryTimeout(queryTimeOut); + } + String querySql = buildQuerySql(inputSplit); resultSet = statement.executeQuery(querySql); columnCount = resultSet.getMetaData().getColumnCount(); @@ -195,7 +209,7 @@ public void openInternal(InputSplit inputSplit) throws IOException { throw new IllegalArgumentException("open() failed." + se.getMessage(), se); } - LOG.info("JdbcInputFormat[{}]open: end", jobName); + LOG.info("JdbcInputFormat[" + jobName + "]open: end"); } @@ -226,7 +240,13 @@ public boolean reachedEnd() throws IOException { @Override public Row nextRecordInternal(Row row) throws IOException { + row = new Row(columnCount); try { + if (!hasNext) { + return null; + } + + DBUtil.getRow(databaseInterface.getDatabaseType(),row,descColumnTypeList,resultSet,typeConverter); if(!"*".equals(metaColumns.get(0).getName())){ for (int i = 0; i < columnCount; i++) { Object val = row.getField(i); @@ -249,45 +269,31 @@ public Row nextRecordInternal(Row row) throws IOException { //update hasNext after we've read the record hasNext = resultSet.next(); return row; - } catch (SQLException e) { - throw new IOException("Couldn't access resultSet", e); - } - } - - @Override - public void closeInternal() throws IOException { - if(incrementConfig.isIncrement() && hadoopConfig != null) { - uploadMetricData(); + } catch (SQLException se) { + throw new IOException("Couldn't read data - " + se.getMessage(), se); + } catch (Exception npe) { + throw new IOException("Couldn't access resultSet", npe); } - DBUtil.closeDBResources(resultSet,statement,dbConn, true); } - /** - * 初始化增量任务指标 - * @param split 数据分片 - */ - protected void initMetric(InputSplit split){ + private void initMetric(InputSplit split){ if (!incrementConfig.isIncrement()){ return; } - //获取所有的累加器 Map> accumulatorMap = getRuntimeContext().getAllAccumulators(); - //如果没有tableCol累加器,则创建一个用来记录表名-增量字段并保存到context上下文 if(!accumulatorMap.containsKey(Metrics.TABLE_COL)){ tableColAccumulator = new StringAccumulator(); tableColAccumulator.add(table + "-" + incrementConfig.getColumnName()); getRuntimeContext().addAccumulator(Metrics.TABLE_COL,tableColAccumulator); } - //创建一个记录起始位置的累加器 startLocationAccumulator = new StringAccumulator(); if (incrementConfig.getStartLocation() != null){ startLocationAccumulator.add(incrementConfig.getStartLocation()); } getRuntimeContext().addAccumulator(Metrics.START_LOCATION,startLocationAccumulator); - //创建一个记录结束位置的累加器 endLocationAccumulator = new MaximumAccumulator(); String endLocation = ((JdbcInputSplit)split).getEndLocation(); if(endLocation != null && incrementConfig.isUseMaxFunc()){ @@ -298,15 +304,9 @@ protected void initMetric(InputSplit split){ getRuntimeContext().addAccumulator(Metrics.END_LOCATION,endLocationAccumulator); } - /** - * 将增量任务的数据最大值设置到累加器中 - * @param inputSplit 数据分片 - */ - protected void getMaxValue(InputSplit inputSplit){ + private void getMaxValue(InputSplit inputSplit){ String maxValue = null; - //第0个通道新建累加器并保存最大值,多通道下其他通道从historyServer中获取最大值 if (inputSplit.getSplitNumber() == 0){ - //从数据库中获取当前增量字段的最大值 maxValue = getMaxValueFromDb(); maxValueAccumulator = new StringAccumulator(); maxValueAccumulator.add(maxValue); @@ -336,7 +336,6 @@ protected void getMaxValue(InputSplit inputSplit){ */ int maxAcquireTimes = (queryTimeOut / incrementConfig.getRequestAccumulatorInterval()) + 10; - //当前重试次数 int acquireTimes = 0; while (StringUtils.isEmpty(maxValue) && acquireTimes < maxAcquireTimes){ try { @@ -359,13 +358,6 @@ protected void getMaxValue(InputSplit inputSplit){ ((JdbcInputSplit) inputSplit).setEndLocation(maxValue); } - /** - * 从historyServer中获取增量最大值 - * @param httpClient httpClient - * @param monitors 请求的URL数组 - * @return - */ - @SuppressWarnings("unchecked") private String getMaxvalueFromAccumulator(CloseableHttpClient httpClient,String[] monitors){ String maxValue = null; Gson gson = new Gson(); @@ -396,13 +388,7 @@ private String getMaxvalueFromAccumulator(CloseableHttpClient httpClient,String[ return maxValue; } - /** - * 判断增量任务是否还能继续读取数据 - * 增量任务,startLocation = endLocation且两者都不为null,返回false,其余情况返回true - * @param split 数据分片 - * @return - */ - protected boolean canReadData(InputSplit split){ + private boolean canReadData(InputSplit split){ if (!incrementConfig.isIncrement()){ return true; } @@ -415,27 +401,21 @@ protected boolean canReadData(InputSplit split){ return !StringUtils.equals(jdbcInputSplit.getStartLocation(), jdbcInputSplit.getEndLocation()); } - /** - * 构造查询sql - * @param inputSplit 数据切片 - * @return 构建的sql字符串 - */ - protected String buildQuerySql(InputSplit inputSplit){ - //QuerySqlBuilder中构建的queryTemplate + private String buildQuerySql(InputSplit inputSplit){ String querySql = queryTemplate; if (inputSplit == null){ - LOG.warn("Executing sql is: '{}'", querySql); + LOG.warn(String.format("Executing sql is: '%s'", querySql)); return querySql; } JdbcInputSplit jdbcInputSplit = (JdbcInputSplit) inputSplit; if (StringUtils.isNotEmpty(splitKey)){ - querySql = queryTemplate.replace("${N}", String.valueOf(numPartitions)) .replace("${M}", String.valueOf(indexOfSubtask)); + querySql = queryTemplate.replace("${N}", String.valueOf(numPartitions)) + .replace("${M}", String.valueOf(indexOfSubtask)); } - //是否开启断点续传 if (restoreConfig.isRestore()){ if(formatState == null){ querySql = querySql.replace(DBUtil.RESTORE_FILTER_PLACEHOLDER, StringUtils.EMPTY); @@ -445,12 +425,8 @@ protected String buildQuerySql(InputSplit inputSplit){ } } else { String startLocation = getLocation(restoreColumn.getType(), formatState.getState()); - String restoreFilter = buildIncrementFilter(restoreColumn.getType(), - restoreColumn.getName(), - startLocation, - jdbcInputSplit.getEndLocation(), - customSql, - incrementConfig.isUseMaxFunc()); + String restoreFilter = DBUtil.buildIncrementFilter(databaseInterface, restoreColumn.getType(), + restoreColumn.getName(), startLocation, jdbcInputSplit.getEndLocation(), customSql, incrementConfig.isUseMaxFunc()); if(StringUtils.isNotEmpty(restoreFilter)){ restoreFilter = " and " + restoreFilter; @@ -464,24 +440,15 @@ protected String buildQuerySql(InputSplit inputSplit){ querySql = buildIncrementSql(jdbcInputSplit, querySql); } - LOG.warn("Executing sql is: '{}}'", querySql); + LOG.warn(String.format("Executing sql is: '%s'", querySql)); return querySql; } - /** - * 构造增量任务查询sql - * @param jdbcInputSplit 数据切片 - * @param querySql 已经创建的查询sql - * @return - */ private String buildIncrementSql(JdbcInputSplit jdbcInputSplit, String querySql){ - String incrementFilter = buildIncrementFilter(incrementConfig.getColumnType(), - incrementConfig.getColumnName(), - jdbcInputSplit.getStartLocation(), - jdbcInputSplit.getEndLocation(), - customSql, - incrementConfig.isUseMaxFunc()); + String incrementFilter = DBUtil.buildIncrementFilter(databaseInterface, incrementConfig.getColumnType(), + incrementConfig.getColumnName(), jdbcInputSplit.getStartLocation(), + jdbcInputSplit.getEndLocation(), customSql, incrementConfig.isUseMaxFunc()); if(StringUtils.isNotEmpty(incrementFilter)){ incrementFilter = " and " + incrementFilter; @@ -490,120 +457,6 @@ private String buildIncrementSql(JdbcInputSplit jdbcInputSplit, String querySql) return querySql.replace(DBUtil.INCREMENT_FILTER_PLACEHOLDER, incrementFilter); } - /** - * 构建增量任务查询sql的过滤条件 - * @param incrementColType 增量字段类型 - * @param incrementCol 增量字段名称 - * @param startLocation 开始位置 - * @param endLocation 结束位置 - * @param customSql 用户自定义sql - * @param useMaxFunc 是否保存结束位置数据 - * @return - */ - protected String buildIncrementFilter(String incrementColType,String incrementCol, String startLocation,String endLocation, String customSql, boolean useMaxFunc){ - StringBuilder filter = new StringBuilder(128); - - if (org.apache.commons.lang.StringUtils.isNotEmpty(customSql)){ - incrementCol = String.format("%s.%s", DBUtil.TEMPORARY_TABLE_NAME, databaseInterface.quoteColumn(incrementCol)); - } else { - incrementCol = databaseInterface.quoteColumn(incrementCol); - } - - String startFilter = buildStartLocationSql(incrementColType, incrementCol, startLocation, useMaxFunc); - if (org.apache.commons.lang.StringUtils.isNotEmpty(startFilter)){ - filter.append(startFilter); - } - - String endFilter = buildEndLocationSql(incrementColType, incrementCol, endLocation); - if (org.apache.commons.lang.StringUtils.isNotEmpty(endFilter)){ - if (filter.length() > 0){ - filter.append(" and ").append(endFilter); - } else { - filter.append(endFilter); - } - } - - return filter.toString(); - } - - /** - * 构建起始位置sql - * @param incrementColType 增量字段类型 - * @param incrementCol 增量字段名称 - * @param startLocation 开始位置 - * @param useMaxFunc 是否保存结束位置数据 - * @return - */ - protected String buildStartLocationSql(String incrementColType, String incrementCol, String startLocation, boolean useMaxFunc){ - if(org.apache.commons.lang.StringUtils.isEmpty(startLocation) || DBUtil.NULL_STRING.equalsIgnoreCase(startLocation)){ - return null; - } - - String operator = useMaxFunc?" >= ":" > "; - - return getLocationSql(incrementColType, incrementCol, startLocation, operator); - } - - /** - * 构建结束位置sql - * @param incrementColType 增量字段类型 - * @param incrementCol 增量字段名称 - * @param endLocation 结束位置 - * @return - */ - public String buildEndLocationSql(String incrementColType, String incrementCol, String endLocation){ - if(org.apache.commons.lang.StringUtils.isEmpty(endLocation) || DBUtil.NULL_STRING.equalsIgnoreCase(endLocation)){ - return null; - } - - return getLocationSql(incrementColType, incrementCol, endLocation, " < "); - } - - /** - * 构建边界位置sql - * @param incrementColType 增量字段类型 - * @param incrementCol 增量字段名称 - * @param location 边界位置(起始/结束) - * @param operator 判断符( >, >=, <) - * @return - */ - protected String getLocationSql(String incrementColType, String incrementCol, String location, String operator) { - String endTimeStr; - String endLocationSql; - if(ColumnType.isTimeType(incrementColType)){ - endTimeStr = getTimeStr(Long.parseLong(location), incrementColType); - endLocationSql = incrementCol + operator + endTimeStr; - } else if(ColumnType.isNumberType(incrementColType)){ - endLocationSql = incrementCol + operator + location; - } else { - endTimeStr = String.format("'%s'",location); - endLocationSql = incrementCol + operator + endTimeStr; - } - - return endLocationSql; - } - - /** - * 构建时间边界字符串 - * @param location 边界位置(起始/结束) - * @param incrementColType 增量字段类型 - * @return - */ - protected String getTimeStr(Long location, String incrementColType){ - String timeStr; - Timestamp ts = new Timestamp(DBUtil.getMillis(location)); - ts.setNanos(DBUtil.getNanos(location)); - timeStr = DBUtil.getNanosTimeStr(ts.toString()); - timeStr = timeStr.substring(0,26); - timeStr = String.format("'%s'",timeStr); - - return timeStr; - } - - /** - * 从数据库中查询增量字段的最大值 - * @return - */ private String getMaxValueFromDb() { String maxValue = null; Connection conn = null; @@ -621,10 +474,8 @@ private String getMaxValueFromDb() { databaseInterface.quoteColumn(incrementConfig.getColumnName()), databaseInterface.quoteTable(table)); } - String startSql = buildStartLocationSql(incrementConfig.getColumnType(), - databaseInterface.quoteColumn(incrementConfig.getColumnName()), - incrementConfig.getStartLocation(), - incrementConfig.isUseMaxFunc()); + String startSql = DBUtil.buildStartLocationSql(databaseInterface, incrementConfig.getColumnType(), + databaseInterface.quoteColumn(incrementConfig.getColumnName()), incrementConfig.getStartLocation(), incrementConfig.isUseMaxFunc()); if(StringUtils.isNotEmpty(startSql)){ queryMaxValueSql += " where " + startSql; } @@ -648,12 +499,6 @@ private String getMaxValueFromDb() { } } - /** - * 边界位置值转字符串 - * @param columnType 边界字段类型 - * @param columnVal 边界值 - * @return - */ private String getLocation(String columnType, Object columnVal){ String location; if (columnVal == null){ @@ -688,10 +533,6 @@ private String getLocation(String columnType, Object columnVal){ return location; } - /** - * 上传累加器数据 - * @throws IOException - */ private void uploadMetricData() throws IOException { FSDataOutputStream out = null; try { @@ -726,4 +567,12 @@ private void uploadMetricData() throws IOException { } } + @Override + public void closeInternal() throws IOException { + if(incrementConfig.isIncrement() && hadoopConfig != null) { + uploadMetricData(); + } + DBUtil.closeDBResources(resultSet,statement,dbConn, true); + } + } \ No newline at end of file diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java index d0976b6a13..658c3cf0b0 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java @@ -21,7 +21,6 @@ import com.dtstack.flinkx.inputformat.RichInputFormatBuilder; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.datareader.IncrementConfig; -import com.dtstack.flinkx.rdb.loader.JdbcFormatLoader; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.reader.MetaColumn; import org.apache.commons.lang.StringUtils; @@ -39,9 +38,8 @@ public class JdbcInputFormatBuilder extends RichInputFormatBuilder { private JdbcInputFormat format; - public JdbcInputFormatBuilder(String dataType) { - JdbcFormatLoader jdbcFormatLoader = new JdbcFormatLoader(dataType, JdbcFormatLoader.INPUT_FORMAT); - super.format = format = (JdbcInputFormat) jdbcFormatLoader.getFormatInstance(); + public JdbcInputFormatBuilder() { + super.format = format = new JdbcInputFormat(); } public void setDrivername(String drivername) { diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java index 0d70d2361a..595baf6ed7 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java @@ -23,6 +23,7 @@ import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormatBuilder; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; +import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.writer.DataWriter; import org.apache.flink.streaming.api.datastream.DataStream; @@ -68,7 +69,6 @@ public void setDatabaseInterface(DatabaseInterface databaseInterface) { this.databaseInterface = databaseInterface; } - @SuppressWarnings("unchecked") public JdbcDataWriter(DataTransferConfig config) { super(config); @@ -76,6 +76,8 @@ public JdbcDataWriter(DataTransferConfig config) { WriterConfig writerConfig = config.getJob().getContent().get(0).getWriter(); dbUrl = writerConfig.getParameter().getConnection().get(0).getJdbcUrl(); + dbUrl = DBUtil.formatJdbcUrl(writerConfig.getName(), dbUrl); + username = writerConfig.getParameter().getStringVal(KEY_USERNAME); password = writerConfig.getParameter().getStringVal(KEY_PASSWORD); table = writerConfig.getParameter().getConnection().get(0).getTable().get(0); @@ -93,7 +95,7 @@ public JdbcDataWriter(DataTransferConfig config) { @Override public DataStreamSink writeData(DataStream dataSet) { - JdbcOutputFormatBuilder builder = new JdbcOutputFormatBuilder(databaseInterface.getDatabaseType().name()); + JdbcOutputFormatBuilder builder = new JdbcOutputFormatBuilder(); builder.setDriverName(databaseInterface.getDriverClass()); builder.setDBUrl(dbUrl); builder.setUsername(username); diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java index bc60fc02ec..83b5acaa62 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java @@ -18,6 +18,7 @@ package com.dtstack.flinkx.rdb.outputformat; import com.dtstack.flinkx.enums.ColumnType; +import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.enums.EWriteMode; import com.dtstack.flinkx.exception.WriteRecordException; import com.dtstack.flinkx.outputformat.RichOutputFormat; @@ -27,13 +28,14 @@ import com.dtstack.flinkx.restore.FormatState; import com.dtstack.flinkx.util.ClassUtil; import com.dtstack.flinkx.util.DateUtil; -import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.ObjectUtils; import org.apache.flink.types.Row; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; import java.sql.*; +import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -84,7 +86,7 @@ public class JdbcOutputFormat extends RichOutputFormat { protected List fullColumnType; - protected List columnType = new ArrayList<>(); + private List columnType = new ArrayList<>(); protected TypeConverterInterface typeConverter; @@ -94,7 +96,7 @@ public class JdbcOutputFormat extends RichOutputFormat { protected long rowsOfCurrentTransaction; - protected final static String GET_ORACLE_INDEX_SQL = "SELECT " + + private final static String GET_ORACLE_INDEX_SQL = "SELECT " + "t.INDEX_NAME," + "t.COLUMN_NAME " + "FROM " + @@ -108,7 +110,7 @@ public class JdbcOutputFormat extends RichOutputFormat { protected final static String CONN_CLOSE_ERROR_MSG = "No operations allowed"; protected PreparedStatement prepareTemplates() throws SQLException { - if(CollectionUtils.isEmpty(fullColumn)) { + if(fullColumn == null || fullColumn.size() == 0) { fullColumn = column; } @@ -129,7 +131,7 @@ protected PreparedStatement prepareTemplates() throws SQLException { } @Override - protected void openInternal(int taskNumber, int numTasks){ + protected void openInternal(int taskNumber, int numTasks) throws IOException { try { ClassUtil.forName(driverName, getClass().getClassLoader()); dbConn = DBUtil.getConnection(dbURL, username, password); @@ -138,7 +140,7 @@ protected void openInternal(int taskNumber, int numTasks){ dbConn.setAutoCommit(false); } - if(CollectionUtils.isEmpty(fullColumn)) { + if(fullColumn == null || fullColumn.size() == 0) { fullColumn = probeFullColumns(table, dbConn); } @@ -164,7 +166,7 @@ protected void openInternal(int taskNumber, int numTasks){ preparedStatement = prepareTemplates(); readyCheckpoint = false; - LOG.info("subTask[{}}] wait finished", taskNumber); + LOG.info("subtask[" + taskNumber + "] wait finished"); } catch (SQLException sqe) { throw new IllegalArgumentException("open() failed.", sqe); } @@ -182,7 +184,7 @@ private List analyzeTable() { ret.add(rd.getColumnTypeName(i+1)); } - if(CollectionUtils.isEmpty(fullColumn)){ + if(fullColumn == null || fullColumn.size() == 0){ for(int i = 0; i < rd.getColumnCount(); ++i) { fullColumn.add(rd.getColumnName(i+1)); } @@ -271,7 +273,8 @@ public FormatState getFormatState(){ } try { - LOG.info("readyCheckpoint: {}, rowsOfCurrentTransaction: {}", readyCheckpoint, rowsOfCurrentTransaction); + LOG.info("readyCheckpoint:" + readyCheckpoint); + LOG.info("rowsOfCurrentTransaction:" + rowsOfCurrentTransaction); if (readyCheckpoint || rowsOfCurrentTransaction > restoreConfig.getMaxRowNumForCheckpoint()){ @@ -318,21 +321,75 @@ protected Object getField(Row row, int index) { field = ((java.util.Date) field).getTime(); } + field=dealOracleTimestampToVarcharOrLong(databaseInterface.getDatabaseType(),field,type); + + + if(EDatabaseType.PostgreSQL == databaseInterface.getDatabaseType()){ + field = typeConverter.convert(field,type); + } + + return field; + } + + /** + * oracle timestamp to oracle varchar or varchar2 or long field format + * @param databaseType + * @param field + * @param type + * @return + */ + private Object dealOracleTimestampToVarcharOrLong(EDatabaseType databaseType, Object field, String type) { + if (EDatabaseType.Oracle!=databaseInterface.getDatabaseType()){ + return field; + } + + if (!(field instanceof Timestamp)){ + return field; + } + + if (type.equalsIgnoreCase(ColumnType.VARCHAR.name()) || type.equalsIgnoreCase(ColumnType.VARCHAR2.name())){ + SimpleDateFormat format = DateUtil.getDateTimeFormatter(); + field= format.format(field); + } + + if (type.equalsIgnoreCase(ColumnType.LONG.name()) ){ + field = ((Timestamp) field).getTime(); + } return field; } protected List probeFullColumns(String table, Connection dbConn) throws SQLException { + String schema =null; + if(EDatabaseType.Oracle == databaseInterface.getDatabaseType()) { + String[] parts = table.split("\\."); + if(parts.length == 2) { + schema = parts[0].toUpperCase(); + table = parts[1]; + } + } + List ret = new ArrayList<>(); - ResultSet rs = dbConn.getMetaData().getColumns(null, null, table, null); + ResultSet rs = dbConn.getMetaData().getColumns(null, schema, table, null); while(rs.next()) { ret.add(rs.getString("COLUMN_NAME")); } return ret; } + + protected Map> probePrimaryKeys(String table, Connection dbConn) throws SQLException { Map> map = new HashMap<>(); - ResultSet rs = dbConn.getMetaData().getIndexInfo(null, null, table, true, false); + ResultSet rs; + if(EDatabaseType.Oracle == databaseInterface.getDatabaseType()){ + PreparedStatement ps = dbConn.prepareStatement(String.format(GET_ORACLE_INDEX_SQL,table)); + rs = ps.executeQuery(); + } else if(EDatabaseType.DB2 == databaseInterface.getDatabaseType()){ + rs = dbConn.getMetaData().getIndexInfo(null, null, table.toUpperCase(), true, false); + } else { + rs = dbConn.getMetaData().getIndexInfo(null, null, table, true, false); + } + while(rs.next()) { String indexName = rs.getString("INDEX_NAME"); if(!map.containsKey(indexName)) { @@ -371,7 +428,7 @@ public void closeInternal() { @Override protected boolean needWaitBeforeWriteRecords() { - return CollectionUtils.isNotEmpty(preSql); + return preSql != null && preSql.size() != 0; } @Override @@ -383,7 +440,7 @@ protected void beforeWriteRecords() { @Override protected boolean needWaitBeforeCloseInternal() { - return CollectionUtils.isNotEmpty(postSql); + return postSql != null && postSql.size() != 0; } @Override diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java index 0cc8516216..0bd3adff21 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java @@ -19,7 +19,6 @@ import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.outputformat.RichOutputFormatBuilder; -import com.dtstack.flinkx.rdb.loader.JdbcFormatLoader; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import java.util.List; @@ -33,9 +32,8 @@ public class JdbcOutputFormatBuilder extends RichOutputFormatBuilder { private JdbcOutputFormat format; - public JdbcOutputFormatBuilder(String dataType) { - JdbcFormatLoader jdbcFormatLoader = new JdbcFormatLoader(dataType, JdbcFormatLoader.OUTPUT_FORMAT); - super.format = format = (JdbcOutputFormat) jdbcFormatLoader.getFormatInstance(); + public JdbcOutputFormatBuilder() { + super.format = format = new JdbcOutputFormat(); } public JdbcOutputFormatBuilder(JdbcOutputFormat format) { diff --git a/flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java b/flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java deleted file mode 100644 index fc95e68534..0000000000 --- a/flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.sqlserver.format; - -import com.dtstack.flinkx.enums.ColumnType; -import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; -import com.dtstack.flinkx.rdb.util.DBUtil; -import org.apache.commons.collections.CollectionUtils; -import org.apache.flink.types.Row; - -import java.io.IOException; -import java.sql.Timestamp; - -import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; - -/** - * Date: 2019/09/19 - * Company: www.dtstack.com - * - * @author tudou - */ -public class SqlserverInputFormat extends JdbcInputFormat { - - @Override - public Row nextRecordInternal(Row row) throws IOException { - if (!hasNext) { - return null; - } - row = new Row(columnCount); - - try { - for (int pos = 0; pos < row.getArity(); pos++) { - Object obj = resultSet.getObject(pos + 1); - if(obj != null) { - if(CollectionUtils.isNotEmpty(descColumnTypeList)) { - if(descColumnTypeList.get(pos).equalsIgnoreCase("bit")) { - if(obj instanceof Boolean) { - obj = ((Boolean) obj ? 1 : 0); - } - } - } - obj = clobToString(obj); - } - - row.setField(pos, obj); - } - return super.nextRecordInternal(row); - }catch (Exception e) { - throw new IOException("Couldn't read data - " + e.getMessage(), e); - } - } - - /** - * 构建边界位置sql - * @param incrementColType 增量字段类型 - * @param incrementCol 增量字段名称 - * @param location 边界位置(起始/结束) - * @param operator 判断符( >, >=, <) - * @return - */ - @Override - protected String getLocationSql(String incrementColType, String incrementCol, String location, String operator) { - String endTimeStr; - String endLocationSql; - boolean isTimeType = ColumnType.isTimeType(incrementColType) - || ColumnType.NVARCHAR.name().equals(incrementColType); - if(isTimeType){ - endTimeStr = getTimeStr(Long.parseLong(location), incrementColType); - endLocationSql = incrementCol + operator + endTimeStr; - } else if(ColumnType.isNumberType(incrementColType)){ - endLocationSql = incrementCol + operator + location; - } else { - endTimeStr = String.format("'%s'",location); - endLocationSql = incrementCol + operator + endTimeStr; - } - - return endLocationSql; - } - - /** - * 构建时间边界字符串 - * @param location 边界位置(起始/结束) - * @param incrementColType 增量字段类型 - * @return - */ - @Override - protected String getTimeStr(Long location, String incrementColType){ - String timeStr; - Timestamp ts = new Timestamp(DBUtil.getMillis(location)); - ts.setNanos(DBUtil.getNanos(location)); - timeStr = DBUtil.getNanosTimeStr(ts.toString()); - timeStr = timeStr.substring(0,23); - timeStr = String.format("'%s'",timeStr); - - return timeStr; - } -} diff --git a/flinkx-sqlserver/flinkx-sqlserver-writer/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverOutputFormat.java b/flinkx-sqlserver/flinkx-sqlserver-writer/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverOutputFormat.java deleted file mode 100644 index d574881b56..0000000000 --- a/flinkx-sqlserver/flinkx-sqlserver-writer/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverOutputFormat.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.sqlserver.format; - -import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; - -/** - * Date: 2019/09/20 - * Company: www.dtstack.com - * - * @author tudou - */ -public class SqlserverOutputFormat extends JdbcOutputFormat { -} From dd86c8937dbdbd61440612c6203131be0b1fe983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=B1=9F=E5=8D=9A=5F=E6=B1=9F=E5=8D=9A?= Date: Thu, 10 Oct 2019 15:23:37 +0800 Subject: [PATCH 21/62] Revert "Merge branch 'feature_1.5_kudu' into '1.5_dev' " This reverts merge request !105 --- README.md | 4 - docs/kudureader.md | 183 ------------ docs/kuduwriter.md | 187 ------------- .../flinkx/constants/PluginNameConstrant.java | 2 - .../com/dtstack/flinkx/enums/EWriteMode.java | 4 +- flinkx-kudu/flinkx-kudu-core/pom.xml | 34 --- .../dtstack/flinkx/kudu/core/KuduConfig.java | 226 --------------- .../flinkx/kudu/core/KuduConfigBuilder.java | 148 ---------- .../flinkx/kudu/core/KuduConfigKeys.java | 42 --- .../dtstack/flinkx/kudu/core/KuduUtil.java | 261 ------------------ .../flinkx/kudu/core/test/KuduUtilTest.java | 65 ----- flinkx-kudu/flinkx-kudu-reader/pom.xml | 79 ------ .../flinkx/kudu/reader/KuduInputFormat.java | 173 ------------ .../kudu/reader/KuduInputFormatBuilder.java | 54 ---- .../flinkx/kudu/reader/KuduReader.java | 81 ------ .../flinkx/kudu/reader/KuduTableSplit.java | 48 ---- flinkx-kudu/flinkx-kudu-writer/pom.xml | 79 ------ .../flinkx/kudu/writer/KuduOutputFormat.java | 142 ---------- .../kudu/writer/KuduOutputFormatBuilder.java | 58 ---- .../flinkx/kudu/writer/KuduWriter.java | 88 ------ flinkx-kudu/pom.xml | 28 -- flinkx-test/pom.xml | 11 - .../dev_test_job/kudu_reader_template.json | 46 --- .../dev_test_job/kudu_writer_template.json | 59 ---- .../resources/dev_test_job/stream_hdfs.json | 64 ----- pom.xml | 1 - 26 files changed, 1 insertion(+), 2166 deletions(-) delete mode 100644 docs/kudureader.md delete mode 100644 docs/kuduwriter.md delete mode 100644 flinkx-kudu/flinkx-kudu-core/pom.xml delete mode 100644 flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java delete mode 100644 flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java delete mode 100644 flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java delete mode 100644 flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java delete mode 100644 flinkx-kudu/flinkx-kudu-core/src/test/java/com/dtstack/flinkx/kudu/core/test/KuduUtilTest.java delete mode 100644 flinkx-kudu/flinkx-kudu-reader/pom.xml delete mode 100644 flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java delete mode 100644 flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java delete mode 100644 flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java delete mode 100644 flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduTableSplit.java delete mode 100644 flinkx-kudu/flinkx-kudu-writer/pom.xml delete mode 100644 flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java delete mode 100644 flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java delete mode 100644 flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java delete mode 100644 flinkx-kudu/pom.xml delete mode 100644 flinkx-test/src/main/resources/dev_test_job/kudu_reader_template.json delete mode 100644 flinkx-test/src/main/resources/dev_test_job/kudu_writer_template.json delete mode 100644 flinkx-test/src/main/resources/dev_test_job/stream_hdfs.json diff --git a/README.md b/README.md index 64daf68da2..a503809857 100644 --- a/README.md +++ b/README.md @@ -233,8 +233,6 @@ reader和writer包括name和parameter,分别表示插件名称和插件参数 * [Carbondata读取插件](docs/carbondatareader.md) * [MySQL binlog读取插件](docs/binlog.md) * [KafKa读取插件](docs/kafkareader.md) -* [Kudu读取插件](docs/kudureader.md) - ### 5.2 写入插件 @@ -252,8 +250,6 @@ reader和writer包括name和parameter,分别表示插件名称和插件参数 * [Hive写入插件](docs/hivewriter.md) [断点续传和实时采集功能介绍](docs/restore.md) -* [Kudu写入插件](docs/kuduwriter.md) - ## 6.版本说明 diff --git a/docs/kudureader.md b/docs/kudureader.md deleted file mode 100644 index 6de48bcd1c..0000000000 --- a/docs/kudureader.md +++ /dev/null @@ -1,183 +0,0 @@ -# Kudu读取插件(kudureader) - -## 1. 配置样例 - -``` -{ - "job": { - "content": [ - { - "reader": { - "name": "kudureader", - "parameter": { - "column": [ - { - "name": "id", - "type": "long" - } - ], - "masterAddresses": "kudu1:7051,kudu2:7051,kudu3:7051", - "table": "kudu", - "readMode": "read_latest", - "authentication": "", - "principal": "", - "keytabFile": "", - "workerCount": 2, - "bossCount": 1, - "operationTimeout": 30000, - "adminOperationTimeout": 30000, - "queryTimeout": 30000, - "where": " id >= 1 ", - "batchSizeBytes": 1048576 - } - }, - "writer": {} - } - ], - "setting": {} - } -} -``` - -## 2. 参数说明 - -* **name** - - * 描述:插件名,此处填写插件名称,kudureader。 - - * 必选:是 - - * 默认值:无 - -* **column** - - * 描述:需要生成的字段。 - - * 属性说明: - - * name:字段名称; - - * type:字段类型; - - * 必选:是 - - * 默认值:无 - -* **masterAddresses** - - * 描述: master节点地址:端口,多个以,隔开。 - - * 必选:是 - - * 默认值:无 - -* **table** - - * 描述: kudu表名。 - - * 必选:是 - - * 默认值:无 - -* **readMode** - - * 描述: kudu读取模式: - - * 1、read_latest - 默认的读取模式。 - 该模式下,服务器将始终在收到请求时返回已提交的写操作。 - 这种类型的读取不会返回快照时间戳,并且不可重复。 - 用ACID术语表示,它对应于隔离模式:“读已提交”。 - - * 2、read_at_snapshot - 该模式下,服务器将尝试在提供的时间戳上执行读取。 - 如果未提供时间戳,则服务器将当前时间作为快照时间戳。 - 在这种模式下,读取是可重复的,即将来所有在相同时间戳记下的读取将产生相同的数据。 - 执行此操作的代价是等待时间戳小于快照的时间戳的正在进行的正在进行的事务,因此可能会导致延迟损失。用ACID术语,这本身就相当于隔离模式“可重复读取”。 - 如果对已扫描tablet的所有写入均在外部保持一致,则这对应于隔离模式“严格可序列化”。 - 注意:当前存在“空洞”,在罕见的边缘条件下会发生,通过这种空洞有时即使在采取措施使写入如此时,它们在外部也不一致。 - 在这些情况下,隔离可能会退化为“读取已提交”模式。 - - * 必选:是 - - * 默认值:无 - -* **authentication** - - * 描述: 认证方式,如:Kerberos。 - - * 必选:否 - - * 默认值:无 - -* **principal** - - * 描述: 用户名。 - - * 必选:否 - - * 默认值:无 - -* **keytabFile** - - * 描述: keytab文件路径。 - - * 必选:否 - - * 默认值:无 - -* **workerCount** - - * 描述: worker线程数。 - - * 必选:否 - - * 默认值:默认为cpu*2 - -* **bossCount** - - * 描述: boss线程数。 - - * 必选:否 - - * 默认值:1 - -* **operationTimeout** - - * 描述: 普通操作超时时间。 - - * 必选:否 - - * 默认值:30000 - -* **adminOperationTimeout** - - * 描述: 管理员操作(建表,删表)超时时间。 - - * 必选:否 - - * 默认值:30000 - -* **queryTimeout** - - * 描述: 连接scan token的超时时间。 - - * 必选:否 - - * 默认值:与operationTimeout一致 - -* **where** - - * 描述: 过滤条件字符串,多个以and连接。 - - * 必选:否 - - * 默认值:无 - -* **batchSizeBytes** - - * 描述: kudu scan一次性最大读取字节数。 - - * 必选:否 - - * 默认值:1048576 diff --git a/docs/kuduwriter.md b/docs/kuduwriter.md deleted file mode 100644 index 0bbb3b0850..0000000000 --- a/docs/kuduwriter.md +++ /dev/null @@ -1,187 +0,0 @@ -# Kudu写入插件(kuduwriter) - -## 1. 配置样例 - -``` -{ - "job": { - "content": [ - { - "writer": { - "parameter": { - "column": [ - { - "name": "id", - "type": "long" - } - ], - "masterAddresses": "kudu1:7051,kudu2:7051,kudu3:7051", - "table": "kudu", - "writeMode": "insert", - "flushMode": "manual_flush", - "batchInterval": 10000, - "authentication": "", - "principal": "", - "keytabFile": "", - "workerCount": 2, - "bossCount": 1, - "operationTimeout": 30000, - "adminOperationTimeout": 30000, - "queryTimeout": 30000, - "batchSizeBytes": 1048576 - }, - "reader": {} - } - ], - "setting": {} - } -} -``` - -## 2. 参数说明 - -* **name** - - * 描述:插件名,此处填写插件名称,kuduwriter。 - - * 必选:是 - - * 默认值:无 - -* **column** - - * 描述:需要生成的字段。 - - * 属性说明: - - * name:字段名称; - - * type:字段类型; - - * 必选:是 - - * 默认值:无 - -* **masterAddresses** - - * 描述: master节点地址:端口,多个以,隔开。 - - * 必选:是 - - * 默认值:无 - -* **table** - - * 描述: kudu表名。 - - * 必选:是 - - * 默认值:无 - -* **writeMode** - - * 描述: kudu数据写入模式: - - * 1、insert - - * 2、update - - * 3、upsert - - * 必选:是 - - * 默认值:无 - -* **flushMode** - - * 描述: kudu session刷新模式: - - * 1、auto_flush_sync - - * 2、auto_flush_background - - * 3、manual_flush - - * 必选:否 - - * 默认值:auto_flush_sync - -* **batchInterval** - - * 描述: 单次批量写入数据条数 - - * 必选:否 - - * 默认值:1 - -* **authentication** - - * 描述: 认证方式,如:Kerberos。 - - * 必选:否 - - * 默认值:无 - -* **principal** - - * 描述: 用户名。 - - * 必选:否 - - * 默认值:无 - -* **keytabFile** - - * 描述: keytab文件路径。 - - * 必选:否 - - * 默认值:无 - -* **workerCount** - - * 描述: worker线程数。 - - * 必选:否 - - * 默认值:默认为cpu*2 - -* **bossCount** - - * 描述: boss线程数。 - - * 必选:否 - - * 默认值:1 - -* **operationTimeout** - - * 描述: 普通操作超时时间。 - - * 必选:否 - - * 默认值:30000 - -* **adminOperationTimeout** - - * 描述: 管理员操作(建表,删表)超时时间。 - - * 必选:否 - - * 默认值:30000 - -* **queryTimeout** - - * 描述: 连接scan token的超时时间。 - - * 必选:否 - - * 默认值:与operationTimeout一致 - -* **batchSizeBytes** - - * 描述: kudu scan一次性最大读取字节数。 - - * 必选:否 - - * 默认值:1048576 diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/constants/PluginNameConstrant.java b/flinkx-core/src/main/java/com/dtstack/flinkx/constants/PluginNameConstrant.java index feb35ef964..f589ea4110 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/constants/PluginNameConstrant.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/constants/PluginNameConstrant.java @@ -38,7 +38,6 @@ public class PluginNameConstrant { public static final String POSTGRESQL_READER = "postgresqlreader"; public static final String SQLSERVER_READER = "sqlserverreader"; public static final String GBASE_READER = "gbasereader"; - public static final String KUDU_READER = "kudureader"; public static final String BINLOG_READER = "binlogreader"; public static final String KAFKA09_READER = "kafka09reader"; public static final String KAFKA10_READER = "kafka10reader"; @@ -59,7 +58,6 @@ public class PluginNameConstrant { public static final String REDIS_WRITER = "rediswriter"; public static final String SQLSERVER_WRITER = "sqlserverwriter"; public static final String GBASE_WRITER = "gbasewriter"; - public static final String KUDU_WRITER = "kuduwriter"; public static final String HIVE_WRITER = "hivewriter"; public static final String KAFKA09_WRITER = "kafka09writer"; public static final String KAFKA10_WRITER = "kafka10writer"; diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/enums/EWriteMode.java b/flinkx-core/src/main/java/com/dtstack/flinkx/enums/EWriteMode.java index e04688f2f6..ac7fcd257b 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/enums/EWriteMode.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/enums/EWriteMode.java @@ -39,7 +39,5 @@ public enum EWriteMode { /** * replace into */ - REPLACE, - - UPSERT + REPLACE } diff --git a/flinkx-kudu/flinkx-kudu-core/pom.xml b/flinkx-kudu/flinkx-kudu-core/pom.xml deleted file mode 100644 index abafab1604..0000000000 --- a/flinkx-kudu/flinkx-kudu-core/pom.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - flinkx-kudu - com.dtstack.flinkx - 1.6 - - 4.0.0 - - flinkx-kudu-core - - - - org.apache.kudu - kudu-client - 1.10.0 - - - - com.dtstack.flinkx - flinkx-core - 1.6 - provided - - - junit - junit - 4.12 - test - - - \ No newline at end of file diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java deleted file mode 100644 index ea65d6c90c..0000000000 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfig.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.kudu.core; - -import java.io.Serializable; - -/** - * @author jiangbo - * @date 2019/8/2 - */ -public class KuduConfig implements Serializable { - - /** - * master节点地址:端口,多个以,隔开 - */ - private String masterAddresses; - - /** - * 认证方式,如:Kerberos - */ - private String authentication; - - /** - * 用户名 - */ - private String principal; - - /** - * keytab文件路径 - */ - private String keytabFile; - - /** - * worker线程数,默认为cpu*2 - */ - private Integer workerCount; - - /** - * boss线程数,默认为1 - */ - private Integer bossCount; - - /** - * 设置普通操作超时时间,默认30S - */ - private Long operationTimeout; - - /** - * 设置管理员操作(建表,删表)超时时间,默认30S - */ - private Long adminOperationTimeout; - - /** - * 连接scan token的超时时间,如果不设置,则与operationTimeout一致 - */ - private Long queryTimeout; - - /** - * kudu表名 - */ - private String table; - - /** - * kudu读取模式: - * 1、READ_LATEST 默认的读取模式 - * 该模式下,服务器将始终在收到请求时返回已提交的写操作。这种类型的读取不会返回快照时间戳,并且不可重复。 - * 用ACID术语表示,它对应于隔离模式:“读已提交” - * - * 2、READ_AT_SNAPSHOT - * 该模式下,服务器将尝试在提供的时间戳上执行读取。如果未提供时间戳,则服务器将当前时间作为快照时间戳。 - * 在这种模式下,读取是可重复的,即将来所有在相同时间戳记下的读取将产生相同的数据。 - * 执行此操作的代价是等待时间戳小于快照的时间戳的正在进行的正在进行的事务,因此可能会导致延迟损失。用ACID术语,这本身就相当于隔离模式“可重复读取”。 - * 如果对已扫描tablet的所有写入均在外部保持一致,则这对应于隔离模式“严格可序列化”。 - * 注意:当前存在“空洞”,在罕见的边缘条件下会发生,通过这种空洞有时即使在采取措施使写入如此时,它们在外部也不一致。 - * 在这些情况下,隔离可能会退化为“读取已提交”模式。 - * 3、READ_YOUR_WRITES 不支持该模式 - */ - private String readMode; - - /** - * 过滤条件字符串,如:id >= 1 and time > 1565586665372 - */ - private String filterString; - - /** - * kudu scan一次性最大读取字节数,默认为1MB - */ - private int batchSizeBytes; - - /** - * writer写入时session刷新模式 - * auto_flush_sync(默认) - * auto_flush_background - * manual_flush - */ - private String flushMode; - - public String getFilterString() { - return filterString; - } - - public void setFilterString(String filterString) { - this.filterString = filterString; - } - - public int getBatchSizeBytes() { - return batchSizeBytes; - } - - public void setBatchSizeBytes(int batchSizeBytes) { - this.batchSizeBytes = batchSizeBytes; - } - - public String getTable() { - return table; - } - - public void setTable(String table) { - this.table = table; - } - - public String getReadMode() { - return readMode; - } - - public void setReadMode(String readMode) { - this.readMode = readMode; - } - - public Long getQueryTimeout() { - return queryTimeout; - } - - public void setQueryTimeout(Long queryTimeout) { - this.queryTimeout = queryTimeout; - } - - public String getAuthentication() { - return authentication; - } - - public void setAuthentication(String authentication) { - this.authentication = authentication; - } - - public String getPrincipal() { - return principal; - } - - public void setPrincipal(String principal) { - this.principal = principal; - } - - public String getKeytabFile() { - return keytabFile; - } - - public void setKeytabFile(String keytabFile) { - this.keytabFile = keytabFile; - } - - public Integer getBossCount() { - return bossCount; - } - - public void setBossCount(Integer bossCount) { - this.bossCount = bossCount; - } - - public String getMasterAddresses() { - return masterAddresses; - } - - public void setMasterAddresses(String masterAddresses) { - this.masterAddresses = masterAddresses; - } - - public Integer getWorkerCount() { - return workerCount; - } - - public void setWorkerCount(Integer workerCount) { - this.workerCount = workerCount; - } - - public Long getOperationTimeout() { - return operationTimeout; - } - - public void setOperationTimeout(Long operationTimeout) { - this.operationTimeout = operationTimeout; - } - - public Long getAdminOperationTimeout() { - return adminOperationTimeout; - } - - public void setAdminOperationTimeout(Long adminOperationTimeout) { - this.adminOperationTimeout = adminOperationTimeout; - } - - public String getFlushMode() { - return flushMode; - } - - public void setFlushMode(String flushMode) { - this.flushMode = flushMode; - } -} diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java deleted file mode 100644 index 999434ac2a..0000000000 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigBuilder.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.kudu.core; - -import org.apache.commons.lang.StringUtils; -import org.apache.flink.util.Preconditions; - - -/** - * @author jiangbo - * @date 2019/8/2 - */ -public final class KuduConfigBuilder { - private String masterAddresses; - private String authentication; - private String principal; - private String keytabFile; - private Integer workerCount; - private Integer bossCount; - private Long operationTimeout; - private Long adminOperationTimeout; - private Long queryTimeout; - private String table; - private String readMode; - private String flushMode; - private String filterString; - private int batchSizeBytes; - - private KuduConfigBuilder() { - } - - public static KuduConfigBuilder getInstance() { - return new KuduConfigBuilder(); - } - - public KuduConfigBuilder withMasterAddresses(String masterAddresses) { - Preconditions.checkArgument(StringUtils.isNotEmpty(masterAddresses), "Parameter [masterAddresses] can not be null or empty"); - this.masterAddresses = masterAddresses; - return this; - } - - public KuduConfigBuilder withAuthentication(String authentication) { - this.authentication = authentication; - return this; - } - - public KuduConfigBuilder withprincipal(String principal) { - this.principal = principal; - return this; - } - - public KuduConfigBuilder withKeytabFile(String keytabFile) { - this.keytabFile = keytabFile; - return this; - } - - public KuduConfigBuilder withWorkerCount(Integer workerCount) { - Preconditions.checkArgument(workerCount > 0, "Parameter [workerCount] should be greater than 0"); - this.workerCount = workerCount; - return this; - } - - public KuduConfigBuilder withBossCount(Integer bossCount) { - Preconditions.checkArgument(bossCount > 0, "Parameter [bossCount] should be greater than 0"); - this.bossCount = bossCount; - return this; - } - - public KuduConfigBuilder withOperationTimeout(Long operationTimeout) { - Preconditions.checkArgument(operationTimeout > 0, "Parameter [operationTimeout] should be greater than 0"); - this.operationTimeout = operationTimeout; - return this; - } - - public KuduConfigBuilder withAdminOperationTimeout(Long adminOperationTimeout) { - Preconditions.checkArgument(adminOperationTimeout > 0, "Parameter [adminOperationTimeout] should be greater than 0"); - this.adminOperationTimeout = adminOperationTimeout; - return this; - } - - public KuduConfigBuilder withTable(String table){ - Preconditions.checkArgument(StringUtils.isNotEmpty(table), "Parameter [table] can not be null or empty"); - this.table = table; - return this; - } - - public KuduConfigBuilder withReadMode(String readMode){ - Preconditions.checkArgument(StringUtils.isNotEmpty(readMode), "Parameter [readMode] can not be null or empty"); - this.readMode = readMode; - return this; - } - - public KuduConfigBuilder withFlushMode(String flushMode){ - this.flushMode = flushMode; - return this; - } - - public KuduConfigBuilder withFilter(String filter){ - this.filterString = filter; - return this; - } - - public KuduConfigBuilder withQueryTimeout(Long queryTimeout){ - this.queryTimeout = queryTimeout; - return this; - } - - public KuduConfigBuilder withBatchSizeBytes(Integer batchSizeBytes){ - this.batchSizeBytes = batchSizeBytes; - return this; - } - - public KuduConfig build() { - KuduConfig kuduConfig = new KuduConfig(); - kuduConfig.setMasterAddresses(masterAddresses); - kuduConfig.setAuthentication(authentication); - kuduConfig.setPrincipal(principal); - kuduConfig.setKeytabFile(keytabFile); - kuduConfig.setWorkerCount(workerCount); - kuduConfig.setBossCount(bossCount); - kuduConfig.setOperationTimeout(operationTimeout); - kuduConfig.setAdminOperationTimeout(adminOperationTimeout); - kuduConfig.setQueryTimeout(queryTimeout); - kuduConfig.setTable(table); - kuduConfig.setReadMode(readMode); - kuduConfig.setFlushMode(flushMode); - kuduConfig.setFilterString(filterString); - kuduConfig.setBatchSizeBytes(batchSizeBytes); - return kuduConfig; - } -} diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java deleted file mode 100644 index ec89e847be..0000000000 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduConfigKeys.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.kudu.core; - -/** - * @author jiangbo - * @date 2019/8/12 - */ -public class KuduConfigKeys { - - public final static String KEY_MASTER_ADDRESSES = "masterAddresses"; - public final static String KEY_AUTHENTICATION = "authentication"; - public final static String KEY_PRINCIPAL = "principal"; - public final static String KEY_KEYTABFILE = "keytabFile"; - public final static String KEY_WORKER_COUNT = "workerCount"; - public final static String KEY_BOSS_COUNT = "bossCount"; - public final static String KEY_OPERATION_TIMEOUT = "operationTimeout"; - public final static String KEY_QUERY_TIMEOUT = "queryTimeout"; - public final static String KEY_ADMIN_OPERATION_TIMEOUT = "adminOperationTimeout"; - public final static String KEY_TABLE = "table"; - public final static String KEY_READ_MODE = "readMode"; - public final static String KEY_FLUSH_MODE = "flushMode"; - public final static String KEY_FILTER = "where"; - public final static String KEY_BATCH_SIZE_BYTES = "batchSizeBytes"; -} diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java deleted file mode 100644 index 14f7177d34..0000000000 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.kudu.core; - -import com.dtstack.flinkx.reader.MetaColumn; -import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang.math.NumberUtils; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.kudu.ColumnSchema; -import org.apache.kudu.Type; -import org.apache.kudu.client.*; - -import java.io.IOException; -import java.math.BigDecimal; -import java.security.PrivilegedExceptionAction; -import java.sql.Timestamp; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * @author jiangbo - * @date 2019/7/31 - */ -public class KuduUtil { - - private static String FILTER_SPLIT_REGEX = "(?i)\\s+and\\s+"; - private static String EXPRESS_REGEX = "(?[^\\=|\\s]+)+\\s*(?[\\>|\\<|\\=]+)\\s*(?.*)"; - private static Pattern EXPRESS_PATTERN = Pattern.compile(EXPRESS_REGEX); - - public final static String AUTHENTICATION_TYPE = "Kerberos"; - - public static KuduClient getKuduClient(KuduConfig config) throws IOException,InterruptedException { - if(AUTHENTICATION_TYPE.equals(config.getAuthentication())){ - UserGroupInformation.loginUserFromKeytab(config.getPrincipal(), config.getKeytabFile()); - return UserGroupInformation.getLoginUser().doAs(new PrivilegedExceptionAction() { - @Override - public KuduClient run() throws Exception { - return getKuduClientInternal(config); - } - }); - } else { - return getKuduClientInternal(config); - } - } - - private static KuduClient getKuduClientInternal(KuduConfig config) { - return new AsyncKuduClient.AsyncKuduClientBuilder(Arrays.asList(config.getMasterAddresses().split(","))) - .workerCount(config.getWorkerCount()) - .bossCount(config.getBossCount()) - .defaultAdminOperationTimeoutMs(config.getAdminOperationTimeout()) - .defaultOperationTimeoutMs(config.getOperationTimeout()) - .build() - .syncClient(); - } - - public static List getKuduScanToken(KuduConfig config, List columns, String filterString) throws IOException{ - try ( - KuduClient client = getKuduClient(config) - ) { - KuduTable kuduTable = client.openTable(config.getTable()); - - List columnNames = new ArrayList<>(columns.size()); - for (MetaColumn column : columns) { - columnNames.add(column.getName()); - } - - KuduScanToken.KuduScanTokenBuilder builder = client.newScanTokenBuilder(kuduTable) - .readMode(getReadMode(config.getReadMode())) - .batchSizeBytes(config.getBatchSizeBytes()) - .setTimeout(config.getQueryTimeout()) - .setProjectedColumnNames(columnNames); - - //添加过滤条件 - addPredicates(builder, filterString, columns); - - return builder.build(); - } catch (Exception e) { - throw new IOException("Get ScanToken error", e); - } - } - - private static AsyncKuduScanner.ReadMode getReadMode(String readMode){ - if(AsyncKuduScanner.ReadMode.READ_LATEST.name().equalsIgnoreCase(readMode)){ - return AsyncKuduScanner.ReadMode.READ_LATEST; - } else { - return AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT; - } - } - - private static void addPredicates(KuduScanToken.KuduScanTokenBuilder builder, String filterString, List columns){ - if(StringUtils.isEmpty(filterString)){ - return; - } - - Map nameTypeMap = new HashMap<>(); - for (MetaColumn column : columns) { - nameTypeMap.put(column.getName(), getType(column.getType())); - } - - String[] filters = filterString.split(FILTER_SPLIT_REGEX); - for (String filter : filters) { - if(StringUtils.isNotBlank(filter)){ - ExpressResult expressResult = parseExpress(filter, nameTypeMap); - KuduPredicate predicate = KuduPredicate.newComparisonPredicate(expressResult.getColumnSchema(), expressResult.getOp(), expressResult.getValue()); - builder.addPredicate(predicate); - } - } - } - - public static Type getType(String columnType){ - switch (columnType.toLowerCase()){ - case "boolean" : - case "bool" : return Type.BOOL; - case "int8": - case "byte" : return Type.INT8; - case "int16": - case "short" : return Type.INT16; - case "int32": - case "integer": - case "int" : return Type.INT32; - case "int64": - case "bigint": - case "long" : return Type.INT64; - case "float" : return Type.FLOAT; - case "double" : return Type.DOUBLE; - case "decimal" : return Type.DECIMAL; - case "binary" : return Type.BINARY; - case "char": - case "varchar": - case "text": - case "string" : return Type.STRING; - case "timestamp" : return Type.UNIXTIME_MICROS; - default: - throw new IllegalArgumentException("Not support column type:" + columnType); - } - } - - public static ExpressResult parseExpress(String express, Map nameTypeMap){ - Matcher matcher = EXPRESS_PATTERN.matcher(express.trim()); - if (matcher.find()) { - String column = matcher.group("column"); - String op = matcher.group("op"); - String value = matcher.group("value"); - - Type type = nameTypeMap.get(column.trim()); - if(type == null){ - throw new IllegalArgumentException("Can not find column:" + column + " from column list"); - } - - ColumnSchema columnSchema = new ColumnSchema.ColumnSchemaBuilder(column, type).build(); - - ExpressResult result = new ExpressResult(); - result.setColumnSchema(columnSchema); - result.setOp(getOp(op)); - result.setValue(getValue(value, type)); - - return result; - } else { - throw new IllegalArgumentException("Illegal filter express:" + express); - } - } - - public static Object getValue(String value, Type type){ - if(value == null){ - return null; - } - - if(value.startsWith("\"") || value.endsWith("'")){ - value = value.substring(1, value.length() - 1); - } - - Object objValue; - if (Type.BOOL.equals(type)){ - objValue = Boolean.valueOf(value); - } else if(Type.INT8.equals(type)){ - objValue = Byte.valueOf(value); - } else if(Type.INT16.equals(type)){ - objValue = Short.valueOf(value); - } else if(Type.INT32.equals(type)){ - objValue = Integer.valueOf(value); - } else if(Type.INT64.equals(type)){ - objValue = Long.valueOf(value); - } else if(Type.FLOAT.equals(type)){ - objValue = Float.valueOf(value); - } else if(Type.DOUBLE.equals(type)){ - objValue = Double.valueOf(value); - } else if(Type.DECIMAL.equals(type)){ - objValue = new BigDecimal(value); - } else if(Type.UNIXTIME_MICROS.equals(type)){ - if(NumberUtils.isNumber(value)){ - objValue = Long.valueOf(value); - } else { - objValue = Timestamp.valueOf(value); - } - } else { - objValue = value; - } - - return objValue; - } - - private static KuduPredicate.ComparisonOp getOp(String opExpress){ - switch (opExpress){ - case "=" : return KuduPredicate.ComparisonOp.EQUAL; - case ">" : return KuduPredicate.ComparisonOp.GREATER; - case ">=" : return KuduPredicate.ComparisonOp.GREATER_EQUAL; - case "<" : return KuduPredicate.ComparisonOp.LESS; - case "<=" : return KuduPredicate.ComparisonOp.LESS_EQUAL; - default: - throw new IllegalArgumentException("Comparison express only support '=','>','>=','<','<='"); - } - } - - public static class ExpressResult{ - private ColumnSchema columnSchema; - private KuduPredicate.ComparisonOp op; - private Object value; - - public ColumnSchema getColumnSchema() { - return columnSchema; - } - - public void setColumnSchema(ColumnSchema columnSchema) { - this.columnSchema = columnSchema; - } - - public KuduPredicate.ComparisonOp getOp() { - return op; - } - - public void setOp(KuduPredicate.ComparisonOp op) { - this.op = op; - } - - public Object getValue() { - return value; - } - - public void setValue(Object value) { - this.value = value; - } - } -} diff --git a/flinkx-kudu/flinkx-kudu-core/src/test/java/com/dtstack/flinkx/kudu/core/test/KuduUtilTest.java b/flinkx-kudu/flinkx-kudu-core/src/test/java/com/dtstack/flinkx/kudu/core/test/KuduUtilTest.java deleted file mode 100644 index c6a564d50d..0000000000 --- a/flinkx-kudu/flinkx-kudu-core/src/test/java/com/dtstack/flinkx/kudu/core/test/KuduUtilTest.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.kudu.core.test; - -import com.dtstack.flinkx.kudu.core.KuduUtil; -import org.apache.kudu.Type; -import org.apache.kudu.client.KuduPredicate; -import org.junit.Assert; -import org.junit.Test; - -import java.sql.Timestamp; -import java.util.HashMap; -import java.util.Map; - -/** - * @author jiangbo - * @date 2019/8/12 - */ -public class KuduUtilTest { - - @Test - public void parseExpressTest(){ - Map nameTypeMap = new HashMap<>(); - nameTypeMap.put("id", Type.INT32); - nameTypeMap.put("name", Type.STRING); - nameTypeMap.put("time", Type.UNIXTIME_MICROS); - - KuduUtil.ExpressResult result = KuduUtil.parseExpress(" id >= 1", nameTypeMap); - Assert.assertEquals(result.getColumnSchema().getName(), "id"); - Assert.assertEquals(result.getOp(), KuduPredicate.ComparisonOp.GREATER_EQUAL); - Assert.assertTrue(result.getValue() instanceof Integer); - - result = KuduUtil.parseExpress("name = \"xxxxx\"", nameTypeMap); - Assert.assertEquals(result.getColumnSchema().getName(), "name"); - Assert.assertEquals(result.getOp(), KuduPredicate.ComparisonOp.EQUAL); - Assert.assertTrue(result.getValue() instanceof String); - - result = KuduUtil.parseExpress("time > 1565586665372 ", nameTypeMap); - Assert.assertEquals(result.getColumnSchema().getName(), "time"); - Assert.assertEquals(result.getOp(), KuduPredicate.ComparisonOp.GREATER); - Assert.assertTrue(result.getValue() instanceof Long); - - result = KuduUtil.parseExpress("time <= '2019-08-12 13:10:12'", nameTypeMap); - Assert.assertEquals(result.getColumnSchema().getName(), "time"); - Assert.assertEquals(result.getOp(), KuduPredicate.ComparisonOp.LESS_EQUAL); - Assert.assertTrue(result.getValue() instanceof Timestamp); - } -} diff --git a/flinkx-kudu/flinkx-kudu-reader/pom.xml b/flinkx-kudu/flinkx-kudu-reader/pom.xml deleted file mode 100644 index 524b6236df..0000000000 --- a/flinkx-kudu/flinkx-kudu-reader/pom.xml +++ /dev/null @@ -1,79 +0,0 @@ - - - - flinkx-kudu - com.dtstack.flinkx - 1.6 - - 4.0.0 - - flinkx-kudu-reader - - - - com.dtstack.flinkx - flinkx-kudu-core - 1.6 - - - - - - - org.apache.maven.plugins - maven-shade-plugin - 1.4 - - - package - - shade - - - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - maven-antrun-plugin - 1.2 - - - copy-resources - - package - - run - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java deleted file mode 100644 index 5fc3009a05..0000000000 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.kudu.reader; - -import com.dtstack.flinkx.inputformat.RichInputFormat; -import com.dtstack.flinkx.kudu.core.KuduConfig; -import com.dtstack.flinkx.kudu.core.KuduUtil; -import com.dtstack.flinkx.reader.MetaColumn; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.core.io.InputSplit; -import org.apache.flink.types.Row; -import org.apache.kudu.Type; -import org.apache.kudu.client.*; - -import java.io.IOException; -import java.util.List; - -/** - * @author jiangbo - * @date 2019/7/31 - */ -public class KuduInputFormat extends RichInputFormat { - - protected List columns; - - protected KuduConfig kuduConfig; - - private transient KuduClient client; - - private transient KuduScanner scanner; - - private transient RowResultIterator iterator; - - @Override - public void openInputFormat() throws IOException { - LOG.info("execute openInputFormat"); - super.openInputFormat(); - - try { - client = KuduUtil.getKuduClient(kuduConfig); - } catch (IOException | InterruptedException e) { - throw new RuntimeException("Get KuduClient error", e); - } - } - - @Override - protected void openInternal(InputSplit inputSplit) throws IOException { - LOG.info("execute openInternal,splitNumber = {}, indexOfSubtask = {}", inputSplit.getSplitNumber(), indexOfSubtask); - KuduTableSplit kuduTableSplit = (KuduTableSplit) inputSplit; - scanner = KuduScanToken.deserializeIntoScanner(kuduTableSplit.getToken(), client); - } - - @Override - protected Row nextRecordInternal(Row row) throws IOException { - row = new Row(columns.size()); - RowResult rowResult = iterator.next(); - - for (int i = 0; i < columns.size(); i++) { - MetaColumn column = columns.get(i); - Type type = KuduUtil.getType(column.getType()); - if (column.getValue() != null) { - row.setField(i, KuduUtil.getValue(column.getValue(), type)); - } else { - row.setField(i, getValue(type, rowResult, column.getName())); - } - } - - LOG.info("nextRecordInternal, numReadCounter = {}", numReadCounter.getLocalValue()); - return row; - } - - private Object getValue(Type type, RowResult rowResult, String name) { - Object objValue; - - if (Type.BOOL.equals(type)) { - objValue = rowResult.getBoolean(name); - } else if (Type.INT8.equals(type)) { - objValue = rowResult.getByte(name); - } else if (Type.INT16.equals(type)) { - objValue = rowResult.getShort(name); - } else if (Type.INT32.equals(type)) { - objValue = rowResult.getInt(name); - } else if (Type.INT64.equals(type)) { - objValue = rowResult.getLong(name); - } else if (Type.FLOAT.equals(type)) { - objValue = rowResult.getFloat(name); - } else if (Type.DOUBLE.equals(type)) { - objValue = rowResult.getDouble(name); - } else if (Type.DECIMAL.equals(type)) { - objValue = rowResult.getDecimal(name); - } else if (Type.BINARY.equals(type)) { - objValue = rowResult.getBinary(name); - } else if (Type.UNIXTIME_MICROS.equals(type)) { - objValue = rowResult.getTimestamp(name); - } else { - objValue = rowResult.getString(name); - } - - return objValue; - } - - @Override - public InputSplit[] createInputSplits(int minNumSplits) throws IOException { - LOG.info("execute createInputSplits,minNumSplits:{}", minNumSplits); - List scanTokens = KuduUtil.getKuduScanToken(kuduConfig, columns, kuduConfig.getFilterString()); - KuduTableSplit[] inputSplits = new KuduTableSplit[scanTokens.size()]; - for (int i = 0; i < scanTokens.size(); i++) { - inputSplits[i] = new KuduTableSplit(scanTokens.get(i).serialize(), i); - } - - return inputSplits; - } - - @Override - public boolean reachedEnd() throws IOException { - LOG.info("execute reachedEnd, indexOfSubtask = {}", indexOfSubtask); - if (iterator == null || !iterator.hasNext()) { - return getNextRows(); - } - - return false; - } - - private boolean getNextRows() throws IOException { - LOG.info("execute getNextRows, scanner is closed : {}", scanner.isClosed()); - if (scanner.hasMoreRows()) { - iterator = scanner.nextRows(); - } - - return iterator == null || !iterator.hasNext(); - } - - @Override - protected void closeInternal() throws IOException { - LOG.info("execute closeInternal, indexOfSubtask = {}", indexOfSubtask); - if (scanner != null) { - scanner.close(); - scanner = null; - } - } - - @Override - public void closeInputFormat() throws IOException { - super.closeInputFormat(); - - if (client != null) { - client.close(); - client = null; - } - } - - @Override - public void configure(Configuration parameters) { - - } -} diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java deleted file mode 100644 index b9f9c919c8..0000000000 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.kudu.reader; - -import com.dtstack.flinkx.inputformat.RichInputFormatBuilder; -import com.dtstack.flinkx.kudu.core.KuduConfig; -import com.dtstack.flinkx.reader.MetaColumn; - -import java.util.List; - -/** - * @author jiangbo - * @date 2019/7/31 - */ -public class KuduInputFormatBuilder extends RichInputFormatBuilder { - - private KuduInputFormat format; - - public KuduInputFormatBuilder() { - super.format = format = new KuduInputFormat(); - } - - public void setColumns(List columns){ - format.columns = columns; - } - - public void setKuduConfig(KuduConfig kuduConfig){ - format.kuduConfig = kuduConfig; - } - - @Override - protected void checkFormat() { - if (format.columns == null || format.columns.size() == 0){ - throw new IllegalArgumentException("columns can not be empty"); - } - } -} diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java deleted file mode 100644 index b030013266..0000000000 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.kudu.reader; - -import com.dtstack.flinkx.config.DataTransferConfig; -import com.dtstack.flinkx.config.ReaderConfig; -import com.dtstack.flinkx.kudu.core.KuduConfig; -import com.dtstack.flinkx.kudu.core.KuduConfigBuilder; -import com.dtstack.flinkx.reader.DataReader; -import com.dtstack.flinkx.reader.MetaColumn; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.types.Row; -import org.apache.kudu.client.AsyncKuduClient; - -import java.util.List; - -import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.*; - -/** - * @author jiangbo - * @date 2019/7/31 - */ -public class KuduReader extends DataReader { - - private List columns; - - private KuduConfig kuduConfig; - - public KuduReader(DataTransferConfig config, StreamExecutionEnvironment env) { - super(config, env); - - ReaderConfig readerConfig = config.getJob().getContent().get(0).getReader(); - ReaderConfig.ParameterConfig parameterConfig = readerConfig.getParameter(); - - columns = MetaColumn.getMetaColumns(parameterConfig.getColumn()); - kuduConfig = KuduConfigBuilder.getInstance() - .withMasterAddresses(parameterConfig.getStringVal(KEY_MASTER_ADDRESSES)) - .withAuthentication(parameterConfig.getStringVal(KEY_AUTHENTICATION)) - .withprincipal(parameterConfig.getStringVal(KEY_PRINCIPAL)) - .withKeytabFile(parameterConfig.getStringVal(KEY_KEYTABFILE)) - .withWorkerCount(parameterConfig.getIntVal(KEY_WORKER_COUNT, 2 * Runtime.getRuntime().availableProcessors())) - .withBossCount(parameterConfig.getIntVal(KEY_BOSS_COUNT, 1)) - .withOperationTimeout(parameterConfig.getLongVal(KEY_OPERATION_TIMEOUT, AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS)) - .withQueryTimeout(parameterConfig.getLongVal(KEY_QUERY_TIMEOUT, AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS)) - .withAdminOperationTimeout(parameterConfig.getLongVal(KEY_ADMIN_OPERATION_TIMEOUT, AsyncKuduClient.DEFAULT_KEEP_ALIVE_PERIOD_MS)) - .withTable(parameterConfig.getStringVal(KEY_TABLE)) - .withReadMode(parameterConfig.getStringVal(KEY_READ_MODE)) - .withBatchSizeBytes(parameterConfig.getIntVal(KEY_BATCH_SIZE_BYTES, 1024*1024)) - .withFilter(parameterConfig.getStringVal(KEY_FILTER)) - .build(); - } - - @Override - public DataStream readData() { - KuduInputFormatBuilder builder = new KuduInputFormatBuilder(); - builder.setColumns(columns); - builder.setMonitorUrls(monitorUrls); - builder.setBytes(bytes); - builder.setKuduConfig(kuduConfig); - - return createInput(builder.finish(), "kudureader"); - } -} diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduTableSplit.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduTableSplit.java deleted file mode 100644 index ae79e26488..0000000000 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduTableSplit.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.kudu.reader; - -import org.apache.flink.core.io.InputSplit; - - -/** - * @author jiangbo - * @date 2019/8/2 - */ -public class KuduTableSplit implements InputSplit { - - private byte[] token; - - private int splitNumber; - - public KuduTableSplit(byte[] token, int splitNumber) { - this.token = token; - this.splitNumber = splitNumber; - } - - @Override - public int getSplitNumber() { - return splitNumber; - } - - public byte[] getToken() { - return token; - } -} diff --git a/flinkx-kudu/flinkx-kudu-writer/pom.xml b/flinkx-kudu/flinkx-kudu-writer/pom.xml deleted file mode 100644 index f82f24aecc..0000000000 --- a/flinkx-kudu/flinkx-kudu-writer/pom.xml +++ /dev/null @@ -1,79 +0,0 @@ - - - - flinkx-kudu - com.dtstack.flinkx - 1.6 - - 4.0.0 - - flinkx-kudu-writer - - - - com.dtstack.flinkx - flinkx-kudu-core - 1.6 - - - - - - - org.apache.maven.plugins - maven-shade-plugin - 1.4 - - - package - - shade - - - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - maven-antrun-plugin - 1.2 - - - copy-resources - - package - - run - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java deleted file mode 100644 index 5cbbd82fb9..0000000000 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.kudu.writer; - -import com.dtstack.flinkx.enums.EWriteMode; -import com.dtstack.flinkx.exception.WriteRecordException; -import com.dtstack.flinkx.kudu.core.KuduConfig; -import com.dtstack.flinkx.kudu.core.KuduUtil; -import com.dtstack.flinkx.outputformat.RichOutputFormat; -import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.ExceptionUtil; -import org.apache.flink.types.Row; -import org.apache.kudu.client.*; - -import java.io.IOException; -import java.util.List; - -/** - * @author jiangbo - * @date 2019/7/31 - */ -public class KuduOutputFormat extends RichOutputFormat { - - protected List columns; - - protected KuduConfig kuduConfig; - - protected String writeMode; - - private transient KuduClient client; - - private transient KuduSession session; - - private transient KuduTable kuduTable; - - @Override - protected void openInternal(int taskNumber, int numTasks) throws IOException { - try{ - client = KuduUtil.getKuduClient(kuduConfig); - } catch (Exception e){ - throw new RuntimeException("Get KuduClient error", e); - } - - session = client.newSession(); - session.setMutationBufferSpace(batchInterval); - kuduTable = client.openTable(kuduConfig.getTable()); - - switch (kuduConfig.getFlushMode().toLowerCase()){ - case "auto_flush_background": - session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); - break; - case "manual_flush": - session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); - break; - default: - session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); - } - } - - @Override - protected void writeSingleRecordInternal(Row row) throws WriteRecordException { - writeData(row); - - if(numWriteCounter.getLocalValue() % batchInterval == 0){ - LOG.info("writeSingleRecordInternal, numWriteCounter = {}", numWriteCounter.getLocalValue()); - try { - session.flush(); - } catch (KuduException e) { - throw new RuntimeException("Flush data error", e); - } - } - } - - private void writeData(Row row) throws WriteRecordException { - int index = 0; - try { - Operation operation = getOperation(); - for (int i = 0; i < columns.size(); i++) { - index = i; - MetaColumn column = columns.get(i); - operation.getRow().addObject(column.getName(), row.getField(i)); - } - - session.apply(operation); - } catch (Exception e){ - LOG.error("Write data error, index = {}, row = {}, e = {}", index, row, ExceptionUtil.getErrorMessage(e)); - throw new WriteRecordException("Write data error", e, index, row); - } - } - - private Operation getOperation(){ - if(EWriteMode.INSERT.name().equalsIgnoreCase(writeMode)){ - return kuduTable.newInsert(); - } else if(EWriteMode.UPDATE.name().equalsIgnoreCase(writeMode)){ - return kuduTable.newUpdate(); - } else if(EWriteMode.UPSERT.name().equalsIgnoreCase(writeMode)){ - return kuduTable.newUpsert(); - } else { - throw new IllegalArgumentException("Not support writeMode:" + writeMode); - } - } - - @Override - protected void writeMultipleRecordsInternal() throws Exception { - LOG.info("writeRecordInternal, row size = {}", rows.size()); - for (Row row : rows) { - writeData(row); - } - session.flush(); - } - - @Override - public void closeInternal() throws IOException { - super.closeInternal(); - - if(session != null){ - session.flush(); - session.close(); - } - - if(client != null){ - client.close(); - } - } -} diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java deleted file mode 100644 index a18d17e689..0000000000 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormatBuilder.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.kudu.writer; - -import com.dtstack.flinkx.kudu.core.KuduConfig; -import com.dtstack.flinkx.outputformat.RichOutputFormatBuilder; -import com.dtstack.flinkx.reader.MetaColumn; - -import java.util.List; - -/** - * @author jiangbo - * @date 2019/7/31 - */ -public class KuduOutputFormatBuilder extends RichOutputFormatBuilder { - - private KuduOutputFormat format; - - public KuduOutputFormatBuilder() { - super.format = format = new KuduOutputFormat(); - } - - public void setColumns(List columns){ - format.columns = columns; - } - - public void setKuduConfig(KuduConfig kuduConfig){ - format.kuduConfig = kuduConfig; - } - - public void setWriteMode(String writeMode){ - format.writeMode = writeMode; - } - - @Override - protected void checkFormat() { - if (format.columns == null || format.columns.size() == 0){ - throw new IllegalArgumentException("columns can not be empty"); - } - } -} diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java deleted file mode 100644 index 2a4e626b3b..0000000000 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.kudu.writer; - -import com.dtstack.flinkx.config.DataTransferConfig; -import com.dtstack.flinkx.config.WriterConfig; -import com.dtstack.flinkx.kudu.core.KuduConfig; -import com.dtstack.flinkx.kudu.core.KuduConfigBuilder; -import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.writer.DataWriter; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.datastream.DataStreamSink; -import org.apache.flink.streaming.api.functions.sink.DtOutputFormatSinkFunction; -import org.apache.flink.types.Row; -import org.apache.kudu.client.AsyncKuduClient; - -import java.util.List; - -import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.*; - -/** - * @author jiangbo - * @date 2019/7/31 - */ -public class KuduWriter extends DataWriter { - - private List columns; - - private KuduConfig kuduConfig; - - private String writeMode; - - private int batchInterval; - - public KuduWriter(DataTransferConfig config) { - super(config); - - WriterConfig.ParameterConfig parameterConfig = config.getJob().getContent().get(0).getWriter().getParameter(); - - columns = MetaColumn.getMetaColumns(parameterConfig.getColumn()); - writeMode = parameterConfig.getStringVal("writeMode"); - batchInterval = parameterConfig.getIntVal("batchInterval", 1); - kuduConfig = KuduConfigBuilder.getInstance() - .withMasterAddresses(parameterConfig.getStringVal(KEY_MASTER_ADDRESSES)) - .withAuthentication(parameterConfig.getStringVal(KEY_AUTHENTICATION)) - .withprincipal(parameterConfig.getStringVal(KEY_PRINCIPAL)) - .withKeytabFile(parameterConfig.getStringVal(KEY_KEYTABFILE)) - .withWorkerCount(parameterConfig.getIntVal(KEY_WORKER_COUNT, 2 * Runtime.getRuntime().availableProcessors())) - .withBossCount(parameterConfig.getIntVal(KEY_BOSS_COUNT, 1)) - .withOperationTimeout(parameterConfig.getLongVal(KEY_OPERATION_TIMEOUT, AsyncKuduClient.DEFAULT_OPERATION_TIMEOUT_MS)) - .withAdminOperationTimeout(parameterConfig.getLongVal(KEY_ADMIN_OPERATION_TIMEOUT, AsyncKuduClient.DEFAULT_KEEP_ALIVE_PERIOD_MS)) - .withTable(parameterConfig.getStringVal(KEY_TABLE)) - .withFlushMode(parameterConfig.getStringVal(KEY_FLUSH_MODE)) - .build(); - } - - @Override - public DataStreamSink writeData(DataStream dataSet) { - KuduOutputFormatBuilder builder = new KuduOutputFormatBuilder(); - builder.setMonitorUrls(monitorUrls); - builder.setColumns(columns); - builder.setKuduConfig(kuduConfig); - builder.setWriteMode(writeMode); - builder.setBatchInterval(batchInterval); - - DtOutputFormatSinkFunction formatSinkFunction = new DtOutputFormatSinkFunction(builder.finish()); - DataStreamSink dataStreamSink = dataSet.addSink(formatSinkFunction); - dataStreamSink.name("kuduwriter"); - return dataStreamSink; - } -} diff --git a/flinkx-kudu/pom.xml b/flinkx-kudu/pom.xml deleted file mode 100644 index 01ce9e536c..0000000000 --- a/flinkx-kudu/pom.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - flinkx-all - com.dtstack.flinkx - 1.6 - - 4.0.0 - - flinkx-kudu - pom - - flinkx-kudu-core - flinkx-kudu-reader - flinkx-kudu-writer - - - - - com.dtstack.flinkx - flinkx-core - 1.6 - provided - - - \ No newline at end of file diff --git a/flinkx-test/pom.xml b/flinkx-test/pom.xml index f699eb3dea..4452e52497 100644 --- a/flinkx-test/pom.xml +++ b/flinkx-test/pom.xml @@ -291,17 +291,6 @@ 1.6 - - com.dtstack.flinkx - flinkx-kudu-reader - 1.6 - - - - com.dtstack.flinkx - flinkx-kudu-writer - 1.6 - diff --git a/flinkx-test/src/main/resources/dev_test_job/kudu_reader_template.json b/flinkx-test/src/main/resources/dev_test_job/kudu_reader_template.json deleted file mode 100644 index 9a2bd1ae10..0000000000 --- a/flinkx-test/src/main/resources/dev_test_job/kudu_reader_template.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "job": { - "content": [ - { - "reader": { - "parameter": { - "column": [ - { - "name": "id", - "type": "long" - }, - { - "name": "user_id", - "type": "long" - }, - { - "name": "name", - "type": "string" - } - ], - "masterAddresses": "impala1:7051,impala2:7051,impala3:7051", - "table": "kudu_range_table", - "readMode": "read_latest", - "filter": "" - }, - "name": "kudureader" - }, - "writer": { - "parameter": { - "print": false - }, - "name": "streamwriter" - } - } - ], - "setting": { - "errorLimit": { - "record": 100 - }, - "speed": { - "bytes": 1048576, - "channel": 1 - } - } - } -} \ No newline at end of file diff --git a/flinkx-test/src/main/resources/dev_test_job/kudu_writer_template.json b/flinkx-test/src/main/resources/dev_test_job/kudu_writer_template.json deleted file mode 100644 index 6960765f86..0000000000 --- a/flinkx-test/src/main/resources/dev_test_job/kudu_writer_template.json +++ /dev/null @@ -1,59 +0,0 @@ -{ - "job": { - "content": [ - { - "writer": { - "parameter": { - "column": [ - { - "name": "id", - "type": "long" - }, - { - "name": "user_id", - "type": "long" - }, - { - "name": "name", - "type": "string" - } - ], - "masterAddresses": "impala1:7051,impala2:7051,impala3:7051", - "table": "kudu_range_table_write", - "writeMode": "insert" - }, - "name": "kuduwriter" - }, - "reader": { - "parameter": { - "column": [ - { - "name": "id", - "type": "long" - }, - { - "name": "user_id", - "type": "long" - }, - { - "name": "name", - "type": "string" - } - ], - "sliceRecordCount": ["10000"] - }, - "name": "streamreader" - } - } - ], - "setting": { - "errorLimit": { - "record": 100 - }, - "speed": { - "bytes": 1048576, - "channel": 1 - } - } - } -} \ No newline at end of file diff --git a/flinkx-test/src/main/resources/dev_test_job/stream_hdfs.json b/flinkx-test/src/main/resources/dev_test_job/stream_hdfs.json deleted file mode 100644 index 8a1cac8f51..0000000000 --- a/flinkx-test/src/main/resources/dev_test_job/stream_hdfs.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "job": { - "content": [ - { - "reader": { - "name": "streamreader", - "parameter": { - "column": [ - { - "name": "id", - "type": "INT" - }, - { - "name": "name", - "index": 1, - "type": "string" - } - ], - "sliceRecordCount": ["100"] - } - }, - "writer": { - "name": "hdfswriter", - "parameter": { - "path": "hdfs://ns1/user/hive/warehouse/impala_test.db/impala_tb2", - "defaultFS": "hdfs://ns1", - "hadoopConfig": { - "dfs.ha.namenodes.ns1":"nn1,nn2", - "dfs.namenode.rpc-address.ns1.nn2":"impala2:9000", - "dfs.client.failover.proxy.provider.ns1":"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider", - "dfs.namenode.rpc-address.ns1.nn1":"impala1:9000", - "dfs.nameservices":"ns1" - }, - "column": [ - { - "name": "id", - "type": "INT" - }, - { - "name": "name", - "type": "string" - } - ], - "fileType": "parquet", - "fieldDelimiter": "\u0001", - "encoding": "utf-8", - "fileName": "pt=1", - "writeMode": "append", - "partition": "pt=1" - } - } - } - ], - "setting": { - "speed": { - "channel": 1, - "bytes": 1048576 - }, - "errorLimit": { - "record": 100 - } - } - } -} \ No newline at end of file diff --git a/pom.xml b/pom.xml index d70a7a7193..1378ebe2b4 100644 --- a/pom.xml +++ b/pom.xml @@ -35,7 +35,6 @@ flinkx-kafka09 flinkx-kafka10 flinkx-kafka11 - flinkx-kudu From a69614c084a842796ba7bdda5c178aca9ed5558e Mon Sep 17 00:00:00 2001 From: tudou Date: Thu, 10 Oct 2019 16:40:22 +0800 Subject: [PATCH 22/62] merge --- .../dtstack/flinkx/db2/Db2DatabaseMeta.java | 2 - .../flinkx/db2/format/Db2InputFormat.java | 54 +++ .../flinkx/db2/format/Db2OutputFormat.java | 59 +++ .../flinkx/gbase/format/GbaseInputFormat.java | 117 +++++ .../gbase/format/GbaseOutputFormat.java | 29 ++ .../flinkx/gbase/writer/GbaseWriter.java | 2 + .../mysqld/format/MysqldInputFormat.java | 135 ++++++ .../flinkx/mysqld/reader/MysqldReader.java | 47 ++ .../flinkx/mysql/format/MysqlInputFormat.java | 136 ++++++ .../flinkx/mysql/reader/MysqlReader.java | 5 +- .../mysql/format/MysqlOutputFormat.java | 29 ++ .../flinkx/mysql/writer/MysqlWriter.java | 5 + .../oracle/format/OracleInputFormat.java | 87 ++++ .../oracle/format/OracleOutputFormat.java | 102 +++++ .../format/PostgresqlInputFormat.java | 126 ++++++ .../reader/PostgresqlQuerySqlBuilder.java | 78 ++++ .../postgresql/reader/PostgresqlReader.java | 35 ++ .../PostgresqlOutputFormat.java | 38 +- .../postgresql/writer/PostgresqlWriter.java | 1 + .../com/dtstack/flinkx/rdb/DataSource.java | 17 + .../flinkx/rdb/ParameterValuesProvider.java | 17 + .../flinkx/rdb/loader/JdbcFormatLoader.java | 89 ++++ .../rdb/type/TypeConverterInterface.java | 6 + .../com/dtstack/flinkx/rdb/util/DBUtil.java | 403 +++++++----------- .../DistributedJdbcDataReader.java | 17 +- .../IncrementConfig.java | 12 + .../JdbcDataReader.java | 8 +- .../QuerySqlBuilder.java | 52 +-- .../DistributedJdbcInputFormat.java | 34 +- .../DistributedJdbcInputFormatBuilder.java | 7 +- .../JdbcInputFormat.java | 281 +++++++++--- .../JdbcInputFormatBuilder.java | 6 +- .../JdbcDataWriter.java | 6 +- .../rdb/outputformat/JdbcOutputFormat.java | 83 +--- .../outputformat/JdbcOutputFormatBuilder.java | 6 +- .../format/SqlserverInputFormat.java | 112 +++++ .../format/SqlserverOutputFormat.java | 29 ++ 37 files changed, 1816 insertions(+), 456 deletions(-) create mode 100644 flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java create mode 100644 flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/format/Db2OutputFormat.java create mode 100644 flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java create mode 100644 flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/format/GbaseOutputFormat.java create mode 100644 flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/format/MysqldInputFormat.java create mode 100644 flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java create mode 100644 flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java create mode 100644 flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java create mode 100644 flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java create mode 100644 flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java create mode 100644 flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlQuerySqlBuilder.java rename flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/{writer => format}/PostgresqlOutputFormat.java (75%) create mode 100644 flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java create mode 100644 flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java create mode 100644 flinkx-sqlserver/flinkx-sqlserver-writer/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverOutputFormat.java diff --git a/flinkx-db2/flinkx-db2-core/src/main/java/com/dtstack/flinkx/db2/Db2DatabaseMeta.java b/flinkx-db2/flinkx-db2-core/src/main/java/com/dtstack/flinkx/db2/Db2DatabaseMeta.java index a549f7fdea..2db50637ce 100644 --- a/flinkx-db2/flinkx-db2-core/src/main/java/com/dtstack/flinkx/db2/Db2DatabaseMeta.java +++ b/flinkx-db2/flinkx-db2-core/src/main/java/com/dtstack/flinkx/db2/Db2DatabaseMeta.java @@ -20,9 +20,7 @@ import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.rdb.BaseDatabaseMeta; -import org.apache.commons.lang3.StringUtils; -import java.util.ArrayList; import java.util.List; /** diff --git a/flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java b/flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java new file mode 100644 index 0000000000..81672c3267 --- /dev/null +++ b/flinkx-db2/flinkx-db2-reader/src/main/java/com/dtstack/flinkx/db2/format/Db2InputFormat.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.db2.format; + +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import org.apache.flink.types.Row; + +import java.io.IOException; + +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class Db2InputFormat extends JdbcInputFormat { + @Override + public Row nextRecordInternal(Row row) throws IOException { + if (!hasNext) { + return null; + } + row = new Row(columnCount); + try { + for (int pos = 0; pos < row.getArity(); pos++) { + Object obj = resultSet.getObject(pos + 1); + if(obj != null) { + obj = clobToString(obj); + } + + row.setField(pos, obj); + } + return super.nextRecordInternal(row); + }catch (Exception e) { + throw new IOException("Couldn't read data - " + e.getMessage(), e); + } + } +} diff --git a/flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/format/Db2OutputFormat.java b/flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/format/Db2OutputFormat.java new file mode 100644 index 0000000000..b2a5ff040b --- /dev/null +++ b/flinkx-db2/flinkx-db2-writer/src/main/java/com/dtstack/flinkx/db2/format/Db2OutputFormat.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.db2.format; + +import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class Db2OutputFormat extends JdbcOutputFormat { + + @Override + protected Map> probePrimaryKeys(String table, Connection dbConn) throws SQLException { + Map> map = new HashMap<>(); + ResultSet rs = dbConn.getMetaData().getIndexInfo(null, null, table.toUpperCase(), true, false); + while(rs.next()) { + String indexName = rs.getString("INDEX_NAME"); + if(!map.containsKey(indexName)) { + map.put(indexName,new ArrayList<>()); + } + map.get(indexName).add(rs.getString("COLUMN_NAME")); + } + Map> retMap = new HashMap<>(); + for(Map.Entry> entry: map.entrySet()) { + String k = entry.getKey(); + List v = entry.getValue(); + if(v!=null && v.size() != 0 && v.get(0) != null) { + retMap.put(k, v); + } + } + return retMap; + } +} diff --git a/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java b/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java new file mode 100644 index 0000000000..942332a675 --- /dev/null +++ b/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.gbase.format; + +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import com.dtstack.flinkx.rdb.util.DBUtil; +import com.dtstack.flinkx.reader.MetaColumn; +import com.dtstack.flinkx.util.ClassUtil; +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.core.io.InputSplit; +import org.apache.flink.types.Row; + +import java.io.IOException; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; + +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class GbaseInputFormat extends JdbcInputFormat { + + @Override + public void openInternal(InputSplit inputSplit) throws IOException { + try { + LOG.info(inputSplit.toString()); + + ClassUtil.forName(drivername, getClass().getClassLoader()); + + if (incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc()){ + getMaxValue(inputSplit); + } + + initMetric(inputSplit); + + if(!canReadData(inputSplit)){ + LOG.warn("Not read data when the start location are equal to end location"); + hasNext = false; + return; + } + + dbConn = DBUtil.getConnection(dbURL, username, password); + + // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 + dbConn.setAutoCommit(false); + Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); + statement.setFetchSize(Integer.MIN_VALUE); + statement.setFetchSize(fetchSize); + statement.setQueryTimeout(queryTimeOut); + String querySql = buildQuerySql(inputSplit); + resultSet = statement.executeQuery(querySql); + columnCount = resultSet.getMetaData().getColumnCount(); + + boolean splitWithRowCol = numPartitions > 1 && StringUtils.isNotEmpty(splitKey) && splitKey.contains("("); + if(splitWithRowCol){ + columnCount = columnCount-1; + } + + hasNext = resultSet.next(); + + if (StringUtils.isEmpty(customSql)){ + descColumnTypeList = DBUtil.analyzeTable(dbURL, username, password,databaseInterface,table,metaColumns); + } else { + descColumnTypeList = new ArrayList<>(); + for (MetaColumn metaColumn : metaColumns) { + descColumnTypeList.add(metaColumn.getName()); + } + } + + } catch (SQLException se) { + throw new IllegalArgumentException("open() failed." + se.getMessage(), se); + } + + LOG.info("JdbcInputFormat[{}]open: end", jobName); + } + + @Override + public Row nextRecordInternal(Row row) throws IOException { + if (!hasNext) { + return null; + } + row = new Row(columnCount); + try { + for (int pos = 0; pos < row.getArity(); pos++) { + Object obj = resultSet.getObject(pos + 1); + if(obj != null) { + obj = clobToString(obj); + } + + row.setField(pos, obj); + } + return super.nextRecordInternal(row); + }catch (Exception e) { + throw new IOException("Couldn't read data - " + e.getMessage(), e); + } + } +} diff --git a/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/format/GbaseOutputFormat.java b/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/format/GbaseOutputFormat.java new file mode 100644 index 0000000000..3ac78d0036 --- /dev/null +++ b/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/format/GbaseOutputFormat.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.gbase.format; + +import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class GbaseOutputFormat extends JdbcOutputFormat { +} diff --git a/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/writer/GbaseWriter.java b/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/writer/GbaseWriter.java index 2ba9a9e30e..553179c507 100644 --- a/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/writer/GbaseWriter.java +++ b/flinkx-gbase/flinkx-gbase-writer/src/main/java/com/dtstack/flinkx/gbase/writer/GbaseWriter.java @@ -22,6 +22,7 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.gbase.GbaseDatabaseMeta; import com.dtstack.flinkx.rdb.datawriter.JdbcDataWriter; +import com.dtstack.flinkx.rdb.util.DBUtil; /** * @author jiangbo @@ -32,5 +33,6 @@ public class GbaseWriter extends JdbcDataWriter { public GbaseWriter(DataTransferConfig config) { super(config); setDatabaseInterface(new GbaseDatabaseMeta()); + dbUrl = DBUtil.formatJdbcUrl(dbUrl, null); } } diff --git a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/format/MysqldInputFormat.java b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/format/MysqldInputFormat.java new file mode 100644 index 0000000000..beda91cc76 --- /dev/null +++ b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/format/MysqldInputFormat.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.mysqld.format; + +import com.dtstack.flinkx.rdb.DataSource; +import com.dtstack.flinkx.rdb.datareader.QuerySqlBuilder; +import com.dtstack.flinkx.rdb.inputformat.DistributedJdbcInputFormat; +import com.dtstack.flinkx.rdb.util.DBUtil; +import com.dtstack.flinkx.util.DateUtil; +import com.dtstack.flinkx.util.StringUtil; +import org.apache.commons.collections.CollectionUtils; +import org.apache.flink.types.Row; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Arrays; + +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class MysqldInputFormat extends DistributedJdbcInputFormat { + + @Override + protected void openNextSource() throws SQLException { + DataSource currentSource = sourceList.get(sourceIndex); + currentConn = DBUtil.getConnection(currentSource.getJdbcUrl(), currentSource.getUserName(), currentSource.getPassword()); + currentConn.setAutoCommit(false); + String queryTemplate = new QuerySqlBuilder(databaseInterface, currentSource.getTable(),metaColumns,splitKey, + where, currentSource.isSplitByKey(), false, false).buildSql(); + currentStatement = currentConn.createStatement(resultSetType, resultSetConcurrency); + + if (currentSource.isSplitByKey()){ + String n = currentSource.getParameterValues()[0].toString(); + String m = currentSource.getParameterValues()[1].toString(); + queryTemplate = queryTemplate.replace("${N}",n).replace("${M}",m); + + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Executing '%s' with parameters %s", queryTemplate, + Arrays.deepToString(currentSource.getParameterValues()))); + } + } + + currentStatement.setFetchSize(Integer.MIN_VALUE); + currentStatement.setQueryTimeout(queryTimeOut); + currentResultSet = currentStatement.executeQuery(queryTemplate); + columnCount = currentResultSet.getMetaData().getColumnCount(); + + if(descColumnTypeList == null) { + descColumnTypeList = DBUtil.analyzeTable(currentSource.getJdbcUrl(), currentSource.getUserName(), + currentSource.getPassword(),databaseInterface, currentSource.getTable(),metaColumns); + } + + LOG.info("open source: {} ,table: {}", currentSource.getJdbcUrl(), currentSource.getTable()); + } + + @Override + protected boolean readNextRecord() throws IOException { + try{ + if(currentConn == null){ + openNextSource(); + } + + hasNext = currentResultSet.next(); + if (hasNext){ + currentRecord = new Row(columnCount); + + for (int pos = 0; pos < currentRecord.getArity(); pos++) { + Object obj = currentResultSet.getObject(pos + 1); + if(obj != null) { + if(CollectionUtils.isNotEmpty(descColumnTypeList)) { + String columnType = descColumnTypeList.get(pos); + if("year".equalsIgnoreCase(columnType)) { + java.util.Date date = (java.util.Date) obj; + obj = DateUtil.dateToYearString(date); + } else if("tinyint".equalsIgnoreCase(columnType) + || "bit".equalsIgnoreCase(columnType)) { + if(obj instanceof Boolean) { + obj = ((Boolean) obj ? 1 : 0); + } + } + } + obj = clobToString(obj); + } + currentRecord.setField(pos, obj); + } + + if(!"*".equals(metaColumns.get(0).getName())){ + for (int i = 0; i < columnCount; i++) { + Object val = currentRecord.getField(i); + if(val == null && metaColumns.get(i).getValue() != null){ + val = metaColumns.get(i).getValue(); + } + + if (val instanceof String){ + val = StringUtil.string2col(String.valueOf(val),metaColumns.get(i).getType(),metaColumns.get(i).getTimeFormat()); + currentRecord.setField(i,val); + } + } + } + } else { + if(sourceIndex + 1 < sourceList.size()){ + closeCurrentSource(); + sourceIndex++; + return readNextRecord(); + } + } + + return !hasNext; + }catch (SQLException se) { + throw new IOException("Couldn't read data - " + se.getMessage(), se); + } catch (Exception npe) { + throw new IOException("Couldn't access resultSet", npe); + } + } +} diff --git a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java index 56dbb38299..4da26aab5c 100644 --- a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java +++ b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java @@ -1,14 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.mysqld.reader; import com.dtstack.flinkx.config.DataTransferConfig; +import com.dtstack.flinkx.config.ReaderConfig; import com.dtstack.flinkx.mysql.MySqlDatabaseMeta; +import com.dtstack.flinkx.rdb.DataSource; import com.dtstack.flinkx.rdb.datareader.DistributedJdbcDataReader; +import com.dtstack.flinkx.rdb.util.DBUtil; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + public class MysqldReader extends DistributedJdbcDataReader { public MysqldReader(DataTransferConfig config, StreamExecutionEnvironment env) { super(config, env); setDatabaseInterface(new MySqlDatabaseMeta()); } + + @Override + protected List buildConnections(){ + List sourceList = new ArrayList<>(connectionConfigs.size()); + for (ReaderConfig.ParameterConfig.ConnectionConfig connectionConfig : connectionConfigs) { + String curUsername = (connectionConfig.getUsername() == null || connectionConfig.getUsername().length() == 0) + ? username : connectionConfig.getUsername(); + String curPassword = (connectionConfig.getPassword() == null || connectionConfig.getPassword().length() == 0) + ? password : connectionConfig.getPassword(); + String curJdbcUrl = DBUtil.formatJdbcUrl(connectionConfig.getJdbcUrl().get(0), Collections.singletonMap("zeroDateTimeBehavior", "convertToNull")); + for (String table : connectionConfig.getTable()) { + DataSource source = new DataSource(); + source.setTable(table); + source.setUserName(curUsername); + source.setPassword(curPassword); + source.setJdbcUrl(curJdbcUrl); + + sourceList.add(source); + } + } + + return sourceList; + } } diff --git a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java new file mode 100644 index 0000000000..6db68313ef --- /dev/null +++ b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.mysql.format; + +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import com.dtstack.flinkx.rdb.util.DBUtil; +import com.dtstack.flinkx.reader.MetaColumn; +import com.dtstack.flinkx.util.ClassUtil; +import com.dtstack.flinkx.util.DateUtil; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.core.io.InputSplit; +import org.apache.flink.types.Row; + +import java.io.IOException; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; + +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; + +/** + * Date: 2019/09/19 + * Company: www.dtstack.com + * + * @author tudou + */ +public class MysqlInputFormat extends JdbcInputFormat { + + @Override + public void openInternal(InputSplit inputSplit) throws IOException { + try { + LOG.info(inputSplit.toString()); + + ClassUtil.forName(drivername, getClass().getClassLoader()); + + if (incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc()){ + getMaxValue(inputSplit); + } + + initMetric(inputSplit); + + if(!canReadData(inputSplit)){ + LOG.warn("Not read data when the start location are equal to end location"); + + hasNext = false; + return; + } + + dbConn = DBUtil.getConnection(dbURL, username, password); + + // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 + dbConn.setAutoCommit(false); + + Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); + + statement.setFetchSize(Integer.MIN_VALUE); + + statement.setQueryTimeout(queryTimeOut); + String querySql = buildQuerySql(inputSplit); + resultSet = statement.executeQuery(querySql); + columnCount = resultSet.getMetaData().getColumnCount(); + + boolean splitWithRowCol = numPartitions > 1 && StringUtils.isNotEmpty(splitKey) && splitKey.contains("("); + if(splitWithRowCol){ + columnCount = columnCount-1; + } + + hasNext = resultSet.next(); + + if (StringUtils.isEmpty(customSql)){ + descColumnTypeList = DBUtil.analyzeTable(dbURL, username, password,databaseInterface,table,metaColumns); + } else { + descColumnTypeList = new ArrayList<>(); + for (MetaColumn metaColumn : metaColumns) { + descColumnTypeList.add(metaColumn.getName()); + } + } + + } catch (SQLException se) { + throw new IllegalArgumentException("open() failed. " + se.getMessage(), se); + } + + LOG.info("JdbcInputFormat[{}]open: end", jobName); + } + + @Override + public Row nextRecordInternal(Row row) throws IOException { + if (!hasNext) { + return null; + } + row = new Row(columnCount); + + try { + for (int pos = 0; pos < row.getArity(); pos++) { + Object obj = resultSet.getObject(pos + 1); + if(obj != null) { + if(CollectionUtils.isNotEmpty(descColumnTypeList)) { + String columnType = descColumnTypeList.get(pos); + if("year".equalsIgnoreCase(columnType)) { + java.util.Date date = (java.util.Date) obj; + obj = DateUtil.dateToYearString(date); + } else if("tinyint".equalsIgnoreCase(columnType) + || "bit".equalsIgnoreCase(columnType)) { + if(obj instanceof Boolean) { + obj = ((Boolean) obj ? 1 : 0); + } + } + } + obj = clobToString(obj); + } + + row.setField(pos, obj); + } + return super.nextRecordInternal(row); + }catch (Exception e) { + throw new IOException("Couldn't read data - " + e.getMessage(), e); + } + } + +} diff --git a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/reader/MysqlReader.java b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/reader/MysqlReader.java index cd396e4454..27c30a6692 100644 --- a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/reader/MysqlReader.java +++ b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/reader/MysqlReader.java @@ -21,8 +21,11 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.mysql.MySqlDatabaseMeta; import com.dtstack.flinkx.rdb.datareader.JdbcDataReader; +import com.dtstack.flinkx.rdb.util.DBUtil; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import java.util.Collections; + /** * MySQL reader plugin * @@ -34,6 +37,6 @@ public class MysqlReader extends JdbcDataReader { public MysqlReader(DataTransferConfig config, StreamExecutionEnvironment env) { super(config, env); setDatabaseInterface(new MySqlDatabaseMeta()); + dbUrl = DBUtil.formatJdbcUrl(dbUrl, Collections.singletonMap("zeroDateTimeBehavior", "convertToNull")); } - } diff --git a/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java b/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java new file mode 100644 index 0000000000..41a2b9df75 --- /dev/null +++ b/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/format/MysqlOutputFormat.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.mysql.format; + +import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; + +/** + * Date: 2019/09/19 + * Company: www.dtstack.com + * + * @author tudou + */ +public class MysqlOutputFormat extends JdbcOutputFormat { +} diff --git a/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/writer/MysqlWriter.java b/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/writer/MysqlWriter.java index 15a5294592..52b3b28c85 100644 --- a/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/writer/MysqlWriter.java +++ b/flinkx-mysql/flinkx-mysql-writer/src/main/java/com/dtstack/flinkx/mysql/writer/MysqlWriter.java @@ -21,6 +21,9 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.mysql.MySqlDatabaseMeta; import com.dtstack.flinkx.rdb.datawriter.JdbcDataWriter; +import com.dtstack.flinkx.rdb.util.DBUtil; + +import java.util.Collections; /** * MySQL writer plugin @@ -33,6 +36,8 @@ public class MysqlWriter extends JdbcDataWriter { public MysqlWriter(DataTransferConfig config) { super(config); setDatabaseInterface(new MySqlDatabaseMeta()); + dbUrl = DBUtil.formatJdbcUrl(dbUrl, Collections.singletonMap("zeroDateTimeBehavior", "convertToNull")); + } } diff --git a/flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java b/flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java new file mode 100644 index 0000000000..2821d45160 --- /dev/null +++ b/flinkx-oracle/flinkx-oracle-reader/src/main/java/com/dtstack/flinkx/oracle/format/OracleInputFormat.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.oracle.format; + +import com.dtstack.flinkx.enums.ColumnType; +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import com.dtstack.flinkx.rdb.util.DBUtil; +import org.apache.flink.types.Row; + +import java.io.IOException; +import java.sql.Timestamp; + +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; + +/** + * Date: 2019/09/19 + * Company: www.dtstack.com + * + * @author tudou + */ +public class OracleInputFormat extends JdbcInputFormat { + + @Override + public Row nextRecordInternal(Row row) throws IOException { + if (!hasNext) { + return null; + } + row = new Row(columnCount); + + try { + for (int pos = 0; pos < row.getArity(); pos++) { + Object obj = resultSet.getObject(pos + 1); + if(obj != null) { + if((obj instanceof java.util.Date + || obj.getClass().getSimpleName().toUpperCase().contains("TIMESTAMP")) ) { + obj = resultSet.getTimestamp(pos + 1); + } + obj = clobToString(obj); + } + + row.setField(pos, obj); + } + return super.nextRecordInternal(row); + }catch (Exception e) { + throw new IOException("Couldn't read data - " + e.getMessage(), e); + } + } + + /** + * 构建时间边界字符串 + * @param location 边界位置(起始/结束) + * @param incrementColType 增量字段类型 + * @return + */ + @Override + protected String getTimeStr(Long location, String incrementColType){ + String timeStr; + Timestamp ts = new Timestamp(DBUtil.getMillis(location)); + ts.setNanos(DBUtil.getNanos(location)); + timeStr = DBUtil.getNanosTimeStr(ts.toString()); + + if(ColumnType.TIMESTAMP.name().equals(incrementColType)){ + timeStr = String.format("TO_TIMESTAMP('%s','YYYY-MM-DD HH24:MI:SS:FF6')",timeStr); + } else { + timeStr = timeStr.substring(0, 19); + timeStr = String.format("TO_DATE('%s','YYYY-MM-DD HH24:MI:SS')", timeStr); + } + timeStr = String.format("'%s'",timeStr); + + return timeStr; + } +} diff --git a/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java b/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java new file mode 100644 index 0000000000..77f9d65502 --- /dev/null +++ b/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.oracle.format; + +import com.dtstack.flinkx.enums.ColumnType; +import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; +import com.dtstack.flinkx.util.DateUtil; +import org.apache.flink.types.Row; + +import java.sql.*; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class OracleOutputFormat extends JdbcOutputFormat { + + @Override + protected Object getField(Row row, int index) { + Object field = super.getField(row, index); + String type = columnType.get(index); + + //oracle timestamp to oracle varchar or varchar2 or long field format + if (!(field instanceof Timestamp)){ + return field; + } + + if (type.equalsIgnoreCase(ColumnType.VARCHAR.name()) || type.equalsIgnoreCase(ColumnType.VARCHAR2.name())){ + SimpleDateFormat format = DateUtil.getDateTimeFormatter(); + field= format.format(field); + } + + if (type.equalsIgnoreCase(ColumnType.LONG.name()) ){ + field = ((Timestamp) field).getTime(); + } + return field; + } + + @Override + protected List probeFullColumns(String table, Connection dbConn) throws SQLException { + String schema =null; + + String[] parts = table.split("\\."); + if(parts.length == 2) { + schema = parts[0].toUpperCase(); + table = parts[1]; + } + + List ret = new ArrayList<>(); + ResultSet rs = dbConn.getMetaData().getColumns(null, schema, table, null); + while(rs.next()) { + ret.add(rs.getString("COLUMN_NAME")); + } + return ret; + } + + @Override + protected Map> probePrimaryKeys(String table, Connection dbConn) throws SQLException { + Map> map = new HashMap<>(); + PreparedStatement ps = dbConn.prepareStatement(String.format(GET_ORACLE_INDEX_SQL,table)); + ResultSet rs = ps.executeQuery(); + + while(rs.next()) { + String indexName = rs.getString("INDEX_NAME"); + if(!map.containsKey(indexName)) { + map.put(indexName,new ArrayList<>()); + } + map.get(indexName).add(rs.getString("COLUMN_NAME")); + } + Map> retMap = new HashMap<>(); + for(Map.Entry> entry: map.entrySet()) { + String k = entry.getKey(); + List v = entry.getValue(); + if(v!=null && v.size() != 0 && v.get(0) != null) { + retMap.put(k, v); + } + } + return retMap; + } +} diff --git a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java new file mode 100644 index 0000000000..63abdd400e --- /dev/null +++ b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flinkx.postgresql.format; + +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import com.dtstack.flinkx.rdb.util.DBUtil; +import com.dtstack.flinkx.reader.MetaColumn; +import com.dtstack.flinkx.util.ClassUtil; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.core.io.InputSplit; +import org.apache.flink.types.Row; + +import java.io.IOException; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; + +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class PostgresqlInputFormat extends JdbcInputFormat { + + @Override + public void openInternal(InputSplit inputSplit) throws IOException { + try { + LOG.info(inputSplit.toString()); + + ClassUtil.forName(drivername, getClass().getClassLoader()); + + if (incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc()){ + getMaxValue(inputSplit); + } + + initMetric(inputSplit); + + if(!canReadData(inputSplit)){ + LOG.warn("Not read data when the start location are equal to end location"); + + hasNext = false; + return; + } + + dbConn = DBUtil.getConnection(dbURL, username, password); + + // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 + dbConn.setAutoCommit(false); + + // 读取前先提交事务,确保程序异常退出时,下次再读取PG时的顺序不变 + dbConn.commit(); + Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); + statement.setFetchSize(fetchSize); + statement.setQueryTimeout(queryTimeOut); + String querySql = buildQuerySql(inputSplit); + resultSet = statement.executeQuery(querySql); + columnCount = resultSet.getMetaData().getColumnCount(); + + boolean splitWithRowCol = numPartitions > 1 && StringUtils.isNotEmpty(splitKey) && splitKey.contains("("); + if(splitWithRowCol){ + columnCount = columnCount-1; + } + + hasNext = resultSet.next(); + + if (StringUtils.isEmpty(customSql)){ + descColumnTypeList = DBUtil.analyzeTable(dbURL, username, password,databaseInterface,table,metaColumns); + } else { + descColumnTypeList = new ArrayList<>(); + for (MetaColumn metaColumn : metaColumns) { + descColumnTypeList.add(metaColumn.getName()); + } + } + + } catch (SQLException se) { + throw new IllegalArgumentException("open() failed." + se.getMessage(), se); + } + + LOG.info("JdbcInputFormat[{}]open: end", jobName); + } + + @Override + public Row nextRecordInternal(Row row) throws IOException { + if (!hasNext) { + return null; + } + row = new Row(columnCount); + + try { + for (int pos = 0; pos < row.getArity(); pos++) { + Object obj = resultSet.getObject(pos + 1); + if(obj != null) { + if(CollectionUtils.isNotEmpty(descColumnTypeList)) { + obj = typeConverter.convert(obj,descColumnTypeList.get(pos)); + } + obj = clobToString(obj); + } + + row.setField(pos, obj); + } + return super.nextRecordInternal(row); + }catch (Exception e) { + throw new IOException("Couldn't read data - " + e.getMessage(), e); + } + } +} diff --git a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlQuerySqlBuilder.java b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlQuerySqlBuilder.java new file mode 100644 index 0000000000..6868aeb8e4 --- /dev/null +++ b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlQuerySqlBuilder.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.postgresql.reader; + +import com.dtstack.flinkx.rdb.datareader.JdbcDataReader; +import com.dtstack.flinkx.rdb.datareader.QuerySqlBuilder; +import org.apache.commons.lang3.StringUtils; + +import java.util.List; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class PostgresqlQuerySqlBuilder extends QuerySqlBuilder { + + public PostgresqlQuerySqlBuilder(JdbcDataReader reader){ + super(reader); + } + + @Override + protected String buildQuerySql(){ + List selectColumns = buildSelectColumns(databaseInterface, metaColumns); + boolean splitWithRowNum = addRowNumColumn(databaseInterface, selectColumns, isSplitByKey, splitKey); + + StringBuilder sb = new StringBuilder(); + sb.append("SELECT ").append(StringUtils.join(selectColumns,",")).append(" FROM "); + sb.append(databaseInterface.quoteTable(table)); + sb.append(" WHERE 1=1 "); + + StringBuilder filter = new StringBuilder(); + + if(isSplitByKey && !splitWithRowNum) { + filter.append(" AND ").append(databaseInterface.getSplitFilter(splitKey)); + } + + if (customFilter != null){ + customFilter = customFilter.trim(); + if (customFilter.length() > 0){ + filter.append(" AND ").append(customFilter); + } + } + + if(isIncrement){ + filter.append(" ").append(INCREMENT_FILTER_PLACEHOLDER); + } + + if(isRestore){ + filter.append(" ").append(RESTORE_FILTER_PLACEHOLDER); + } + + sb.append(filter); + sb.append(buildOrderSql()); + + if(isSplitByKey && splitWithRowNum){ + return String.format(SQL_SPLIT_WITH_ROW_NUM, sb.toString(), databaseInterface.getSplitFilter(ROW_NUM_COLUMN_ALIAS)); + } else { + return sb.toString(); + } + } +} diff --git a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java index d333c77964..591e31abd5 100644 --- a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java +++ b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/reader/PostgresqlReader.java @@ -19,11 +19,16 @@ package com.dtstack.flinkx.postgresql.reader; import com.dtstack.flinkx.config.DataTransferConfig; +import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.postgresql.PostgresqlDatabaseMeta; import com.dtstack.flinkx.postgresql.PostgresqlTypeConverter; import com.dtstack.flinkx.rdb.datareader.JdbcDataReader; +import com.dtstack.flinkx.rdb.datareader.QuerySqlBuilder; +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormatBuilder; import com.dtstack.flinkx.rdb.util.DBUtil; +import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.types.Row; /** * The reader plugin for PostgreSQL database @@ -37,5 +42,35 @@ public PostgresqlReader(DataTransferConfig config, StreamExecutionEnvironment en super(config, env); setDatabaseInterface(new PostgresqlDatabaseMeta()); setTypeConverterInterface(new PostgresqlTypeConverter()); + dbUrl = DBUtil.formatJdbcUrl(dbUrl, null); + } + + @Override + public DataStream readData() { + JdbcInputFormatBuilder builder = new JdbcInputFormatBuilder(databaseInterface.getDatabaseType().name()); + builder.setDrivername(databaseInterface.getDriverClass()); + builder.setDBUrl(dbUrl); + builder.setUsername(username); + builder.setPassword(password); + builder.setBytes(bytes); + builder.setMonitorUrls(monitorUrls); + builder.setTable(table); + builder.setDatabaseInterface(databaseInterface); + builder.setTypeConverter(typeConverter); + builder.setMetaColumn(metaColumns); + builder.setFetchSize(fetchSize == 0 ? databaseInterface.getFetchSize() : fetchSize); + builder.setQueryTimeOut(queryTimeOut == 0 ? databaseInterface.getQueryTimeout() : queryTimeOut); + builder.setIncrementConfig(incrementConfig); + builder.setSplitKey(splitKey); + builder.setNumPartitions(numPartitions); + builder.setCustomSql(customSql); + builder.setRestoreConfig(restoreConfig); + builder.setHadoopConfig(hadoopConfig); + + QuerySqlBuilder sqlBuilder = new PostgresqlQuerySqlBuilder(this); + builder.setQuery(sqlBuilder.buildSql()); + + RichInputFormat format = builder.finish(); + return createInput(format, (databaseInterface.getDatabaseType() + "reader").toLowerCase()); } } diff --git a/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlOutputFormat.java b/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlOutputFormat.java similarity index 75% rename from flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlOutputFormat.java rename to flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlOutputFormat.java index 4af60e797e..ce3653c985 100644 --- a/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlOutputFormat.java +++ b/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlOutputFormat.java @@ -1,4 +1,21 @@ -package com.dtstack.flinkx.postgresql.writer; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.postgresql.format; import com.dtstack.flinkx.enums.EWriteMode; import com.dtstack.flinkx.exception.WriteRecordException; @@ -7,8 +24,6 @@ import org.apache.flink.types.Row; import org.postgresql.copy.CopyManager; import org.postgresql.core.BaseConnection; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.sql.PreparedStatement; @@ -23,15 +38,15 @@ public class PostgresqlOutputFormat extends JdbcOutputFormat { - private static final Logger LOG = LoggerFactory.getLogger(PostgresqlOutputFormat.class); - private static final String COPY_SQL_TEMPL = "copy %s(%s) from stdin DELIMITER '%s'"; private static final String DEFAULT_FIELD_DELIM = "\001"; private static final String LINE_DELIMITER = "\n"; - /**now just add ext insert mode:copy*/ + /** + * now just add ext insert mode:copy + */ private static final String INSERT_SQL_MODE_TYPE = "copy"; private String copySql = ""; @@ -87,7 +102,7 @@ protected void writeMultipleRecordsInternal() throws Exception { return; } - StringBuilder sb = new StringBuilder(); + StringBuilder sb = new StringBuilder(128); for (Row row : rows) { int lastIndex = row.getArity() - 1; for (int index =0; index < row.getArity(); index++) { @@ -110,6 +125,15 @@ protected void writeMultipleRecordsInternal() throws Exception { } } + @Override + protected Object getField(Row row, int index) { + Object field = super.getField(row, index); + String type = columnType.get(index); + field = typeConverter.convert(field,type); + + return field; + } + private boolean checkIsCopyMode(String insertMode){ if(Strings.isNullOrEmpty(insertMode)){ return false; diff --git a/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlWriter.java b/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlWriter.java index a81de66aed..867fd909bb 100644 --- a/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlWriter.java +++ b/flinkx-postgresql/flinkx-postgresql-writer/src/main/java/com/dtstack/flinkx/postgresql/writer/PostgresqlWriter.java @@ -21,6 +21,7 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.postgresql.PostgresqlDatabaseMeta; import com.dtstack.flinkx.postgresql.PostgresqlTypeConverter; +import com.dtstack.flinkx.postgresql.format.PostgresqlOutputFormat; import com.dtstack.flinkx.rdb.datawriter.JdbcDataWriter; import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormatBuilder; import org.apache.flink.streaming.api.datastream.DataStream; diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/DataSource.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/DataSource.java index 1840f866dc..d9f3508798 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/DataSource.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/DataSource.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.rdb; import java.io.Serializable; diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/ParameterValuesProvider.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/ParameterValuesProvider.java index 48faab016e..79cac26365 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/ParameterValuesProvider.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/ParameterValuesProvider.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.rdb; import java.io.Serializable; diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java new file mode 100644 index 0000000000..94c846429c --- /dev/null +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/loader/JdbcFormatLoader.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.rdb.loader; + +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.util.Preconditions; + +/** + * FlinkX jdbc format loader + * + * Date: 2019/09/19 + * Company: www.dtstack.com + * + * @author tudou + */ +public class JdbcFormatLoader { + + /** + * 类型名称 + */ + private String formatName; + + /** + * format全限定名 + */ + private String formatClassName; + + public static final int INPUT_FORMAT = 0; + public static final int OUTPUT_FORMAT = 1; + + private final String pkgPrefixFormat = "com.dtstack.flinkx.%s.format.%s"; + + private final String INPUT_FORMAT_SUFFIX = "InputFormat"; + + private final String OUTPUT_FORMAT_SUFFIX = "OutputFormat"; + + /** + * JdbcFormatLoader构造器 + * @param dataType jdbc数据源类型 + * @param formatType format类型:INPUT_FORMAT,OUTPUT_FORMAT + */ + public JdbcFormatLoader(String dataType, int formatType){ + + Preconditions.checkArgument(StringUtils.isNotBlank(dataType)); + Preconditions.checkArgument(formatType == INPUT_FORMAT || formatType == OUTPUT_FORMAT); + + dataType = dataType.toLowerCase(); + if(formatType == INPUT_FORMAT){ + this.formatName = dataType + INPUT_FORMAT_SUFFIX; + }else{ + this.formatName = dataType + OUTPUT_FORMAT_SUFFIX; + } + this.formatClassName = String.format(pkgPrefixFormat, dataType, this.formatName.substring(0, 1).toUpperCase() + this.formatName.substring(1)); + } + + /** + * 获取format实例对象 + * @return + */ + public Object getFormatInstance() { + Object format = null; + try { + Class clz = Class.forName(formatClassName); + format = clz.newInstance(); + } catch (ClassNotFoundException e) { + throw new RuntimeException("error to load " + formatClassName, e); + } catch (Exception e) { + throw new RuntimeException(formatClassName + "don't have no parameter constructor", e); + } + + return format; + } + +} diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/type/TypeConverterInterface.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/type/TypeConverterInterface.java index 8ac4ca6fda..35088110d0 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/type/TypeConverterInterface.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/type/TypeConverterInterface.java @@ -28,6 +28,12 @@ */ public interface TypeConverterInterface extends Serializable { + /** + * 类型转换,将数据库数据某类型的对象转换为对应的Java基本数据对象实例 + * @param data 数据记录 + * @param typeName 数据类型 + * @return + */ Object convert(Object data,String typeName); } diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java index 6108137732..7e708ed040 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DBUtil.java @@ -17,27 +17,26 @@ */ package com.dtstack.flinkx.rdb.util; -import com.dtstack.flinkx.constants.PluginNameConstrant; -import com.dtstack.flinkx.enums.ColumnType; -import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.ParameterValuesProvider; -import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.*; +import com.dtstack.flinkx.util.ClassUtil; +import com.dtstack.flinkx.util.ExceptionUtil; +import com.dtstack.flinkx.util.SysUtil; +import com.dtstack.flinkx.util.TelnetUtil; import org.apache.commons.lang.StringUtils; -import org.apache.flink.types.Row; +import org.apache.flink.util.CollectionUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; -import java.io.Serializable; import java.math.BigDecimal; import java.sql.*; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.regex.Pattern; /** * @@ -49,21 +48,55 @@ public class DBUtil { private static final Logger LOG = LoggerFactory.getLogger(DBUtil.class); + /** + * 数据库连接的最大重试次数 + */ private static int MAX_RETRY_TIMES = 3; + /** + * 秒级时间戳的长度为10位 + */ private static int SECOND_LENGTH = 10; + /** + * 毫秒级时间戳的长度为13位 + */ private static int MILLIS_LENGTH = 13; + /** + * 微秒级时间戳的长度为16位 + */ private static int MICRO_LENGTH = 16; + /** + * 纳秒级时间戳的长度为19位 + */ private static int NANOS_LENGTH = 19; + /** + * jdbc连接URL的分割正则,用于获取URL?后的连接参数 + */ + public static final Pattern DB_PATTERN = Pattern.compile("\\?"); + + /** + * 增量任务过滤条件占位符 + */ public static final String INCREMENT_FILTER_PLACEHOLDER = "${incrementFilter}"; + /** + * 断点续传过滤条件占位符 + */ public static final String RESTORE_FILTER_PLACEHOLDER = "${restoreFilter}"; public static final String TEMPORARY_TABLE_NAME = "flinkx_tmp"; public static final String NULL_STRING = "null"; + /** + * 获取jdbc连接(超时10S) + * @param url url + * @param username 账号 + * @param password 密码 + * @return + * @throws SQLException + */ private static Connection getConnectionInternal(String url, String username, String password) throws SQLException { Connection dbConn; synchronized (ClassUtil.lock_str){ @@ -82,6 +115,14 @@ private static Connection getConnectionInternal(String url, String username, Str return dbConn; } + /** + * 获取jdbc连接(重试3次) + * @param url url + * @param username 账号 + * @param password 密码 + * @return + * @throws SQLException + */ public static Connection getConnection(String url, String username, String password) throws SQLException { if (!url.startsWith("jdbc:mysql")) { return getConnectionInternal(url, username, password); @@ -110,45 +151,21 @@ public static Connection getConnection(String url, String username, String passw } } - - public static List> executeQuery(Connection connection, String sql) { - List> result = com.google.common.collect.Lists.newArrayList(); - ResultSet res = null; - Statement statement = null; - try{ - statement = connection.createStatement(); - res = statement.executeQuery(sql); - int columns = res.getMetaData().getColumnCount(); - List columnName = com.google.common.collect.Lists.newArrayList(); - for(int i = 0; i < columns; i++){ - columnName.add(res.getMetaData().getColumnName(i + 1)); - } - - while(res.next()){ - Map row = com.google.common.collect.Maps.newHashMap(); - for(int i = 0;i < columns; i++){ - row.put(columnName.get(i), res.getObject(i + 1)); - } - result.add(row); - } - }catch(Exception e){ - throw new RuntimeException(e); - } - finally{ - DBUtil.closeDBResources(res, statement, null, false); - } - return result; - } - - public static void closeDBResources(ResultSet rs, Statement stmt, - Connection conn, boolean commit) { + /** + * 关闭连接资源 + * @param rs ResultSet + * @param stmt Statement + * @param conn Connection + * @param commit + */ + public static void closeDBResources(ResultSet rs, Statement stmt, Connection conn, boolean commit) { if (null != rs) { try { LOG.info("Start close resultSet"); rs.close(); LOG.info("Close resultSet successful"); } catch (SQLException e) { - LOG.warn("Close resultSet error:{}",e); + LOG.warn("Close resultSet error: {}", ExceptionUtil.getErrorMessage(e)); } } @@ -158,7 +175,7 @@ public static void closeDBResources(ResultSet rs, Statement stmt, stmt.close(); LOG.info("Close statement successful"); } catch (SQLException e) { - LOG.warn("Close statement error:{}",e); + LOG.warn("Close statement error:{}", ExceptionUtil.getErrorMessage(e)); } } @@ -172,11 +189,15 @@ public static void closeDBResources(ResultSet rs, Statement stmt, conn.close(); LOG.info("Close connection successful"); } catch (SQLException e) { - LOG.warn("Close connection error:{}",e); + LOG.warn("Close connection error:{}", ExceptionUtil.getErrorMessage(e)); } } } + /** + * 手动提交事物 + * @param conn Connection + */ public static void commit(Connection conn){ try { if (!conn.isClosed() && !conn.getAutoCommit()){ @@ -185,10 +206,15 @@ public static void commit(Connection conn){ LOG.info("Commit connection successful"); } } catch (SQLException e){ - LOG.warn("commit error:{}",e); + LOG.warn("commit error:{}", ExceptionUtil.getErrorMessage(e)); } } + /** + * 批量执行sql + * @param dbConn Connection + * @param sqls sql列表 + */ public static void executeBatch(Connection dbConn, List sqls) { if(sqls == null || sqls.size() == 0) { return; @@ -207,6 +233,13 @@ public static void executeBatch(Connection dbConn, List sqls) { } } + /** + * 获取某数据库某表的主键和唯一索引 + * @param table 表名 + * @param dbConn 数据库连接 + * @return + * @throws SQLException + */ public static Map> getPrimaryOrUniqueKeys(String table, Connection dbConn) throws SQLException { Map> keyMap = new HashMap<>(); DatabaseMetaData meta = dbConn.getMetaData(); @@ -222,26 +255,38 @@ public static Map> getPrimaryOrUniqueKeys(String table, Conn return keyMap; } + /** + * 封装channel通道顺序 + * @param channels + * @return + */ public static Object[][] getParameterValues(final int channels){ - ParameterValuesProvider provider = new ParameterValuesProvider() { - @Override - public Serializable[][] getParameterValues() { - Integer[][] parameters = new Integer[channels][]; - for(int i = 0; i < channels; ++i) { - parameters[i] = new Integer[2]; - parameters[i][0] = channels; - parameters[i][1] = i; - } - return parameters; + ParameterValuesProvider provider = () -> { + Integer[][] parameters = new Integer[channels][]; + for(int i = 0; i < channels; ++i) { + parameters[i] = new Integer[2]; + parameters[i][0] = channels; + parameters[i][1] = i; } + return parameters; }; return provider.getParameterValues(); } - public static List analyzeTable(String dbURL,String username,String password,DatabaseInterface databaseInterface, - String table,List metaColumns) { - List ret = new ArrayList<>(); + /** + * 获取表列名类型列表 + * @param dbURL jdbc url + * @param username 数据库账号 + * @param password 数据库密码 + * @param databaseInterface DatabaseInterface + * @param table 表名 + * @param metaColumns MetaColumn列表 + * @return + */ + public static List analyzeTable(String dbURL, String username, String password, DatabaseInterface databaseInterface, + String table, List metaColumns) { + List ret = new ArrayList<>(metaColumns.size()); Connection dbConn = null; Statement stmt = null; ResultSet rs = null; @@ -251,7 +296,7 @@ public static List analyzeTable(String dbURL,String username,String pass rs = stmt.executeQuery(databaseInterface.getSQLQueryFields(databaseInterface.quoteTable(table))); ResultSetMetaData rd = rs.getMetaData(); - Map nameTypeMap = new HashMap<>(); + Map nameTypeMap = new HashMap<>((rd.getColumnCount() << 2) / 3); for(int i = 0; i < rd.getColumnCount(); ++i) { nameTypeMap.put(rd.getColumnName(i+1),rd.getColumnTypeName(i+1)); } @@ -272,6 +317,13 @@ public static List analyzeTable(String dbURL,String username,String pass return ret; } + /** + * 占位符设值 + * @param param 参数 + * @param statement PreparedStatement + * @param i 占位符位置 + * @throws SQLException + */ public static void setParameterValue(Object param,PreparedStatement statement,int i) throws SQLException{ if (param instanceof String) { statement.setString(i + 1, (String) param); @@ -305,53 +357,12 @@ public static void setParameterValue(Object param,PreparedStatement statement,in } } - public static void getRow(EDatabaseType dbType, Row row, List descColumnTypeList, ResultSet resultSet, - TypeConverterInterface typeConverter) throws Exception{ - for (int pos = 0; pos < row.getArity(); pos++) { - Object obj = resultSet.getObject(pos + 1); - if(obj != null) { - if (EDatabaseType.Oracle == dbType) { - if((obj instanceof java.util.Date || obj.getClass().getSimpleName().toUpperCase().contains("TIMESTAMP")) ) { - obj = resultSet.getTimestamp(pos + 1); - } - } else if(EDatabaseType.MySQL == dbType) { - if(descColumnTypeList != null && descColumnTypeList.size() != 0) { - if(descColumnTypeList.get(pos).equalsIgnoreCase("year")) { - java.util.Date date = (java.util.Date) obj; - String year = DateUtil.dateToYearString(date); - System.out.println(year); - obj = year; - } else if(descColumnTypeList.get(pos).equalsIgnoreCase("tinyint")) { - if(obj instanceof Boolean) { - obj = ((Boolean) obj ? 1 : 0); - } - } else if(descColumnTypeList.get(pos).equalsIgnoreCase("bit")) { - if(obj instanceof Boolean) { - obj = ((Boolean) obj ? 1 : 0); - } - } - } - } else if(EDatabaseType.SQLServer == dbType) { - if(descColumnTypeList != null && descColumnTypeList.size() != 0) { - if(descColumnTypeList.get(pos).equalsIgnoreCase("bit")) { - if(obj instanceof Boolean) { - obj = ((Boolean) obj ? 1 : 0); - } - } - } - } else if(EDatabaseType.PostgreSQL == dbType){ - if(descColumnTypeList != null && descColumnTypeList.size() != 0) { - obj = typeConverter.convert(obj,descColumnTypeList.get(pos)); - } - } - - obj = clobToString(obj); - } - - row.setField(pos, obj); - } - } - + /** + * clob转string + * @param obj clob + * @return + * @throws Exception + */ public static Object clobToString(Object obj) throws Exception{ String dataStr; if(obj instanceof Clob){ @@ -370,110 +381,24 @@ public static Object clobToString(Object obj) throws Exception{ return dataStr; } - public static String buildIncrementFilter(DatabaseInterface databaseInterface,String incrementColType,String incrementCol, - String startLocation,String endLocation, String customSql, boolean useMaxFunc){ - StringBuilder filter = new StringBuilder(); - - if (StringUtils.isNotEmpty(customSql)){ - incrementCol = String.format("%s.%s", TEMPORARY_TABLE_NAME, databaseInterface.quoteColumn(incrementCol)); - } else { - incrementCol = databaseInterface.quoteColumn(incrementCol); - } - - String startFilter = buildStartLocationSql(databaseInterface, incrementColType, incrementCol, startLocation, useMaxFunc); - if (StringUtils.isNotEmpty(startFilter)){ - filter.append(startFilter); - } - - String endFilter = buildEndLocationSql(databaseInterface, incrementColType, incrementCol, endLocation); - if (StringUtils.isNotEmpty(endFilter)){ - if (filter.length() > 0){ - filter.append(" and ").append(endFilter); - } else { - filter.append(endFilter); - } - } - - return filter.toString(); - } - - public static String buildStartLocationSql(DatabaseInterface databaseInterface,String incrementColType, - String incrementCol,String startLocation,boolean useMaxFunc){ - if(StringUtils.isEmpty(startLocation) || NULL_STRING.equalsIgnoreCase(startLocation)){ - return null; - } - - String operator = " >= "; - if(!useMaxFunc){ - operator = " > "; - } - - return getLocationSql(databaseInterface, incrementColType, incrementCol, startLocation, operator); - } - - public static String buildEndLocationSql(DatabaseInterface databaseInterface,String incrementColType,String incrementCol, - String endLocation){ - if(StringUtils.isEmpty(endLocation) || NULL_STRING.equalsIgnoreCase(endLocation)){ - return null; - } - - return getLocationSql(databaseInterface, incrementColType, incrementCol, endLocation, " < "); - } - - private static String getLocationSql(DatabaseInterface databaseInterface, String incrementColType, String incrementCol, - String location, String operator) { - String endTimeStr; - String endLocationSql; - boolean isTimeType = ColumnType.isTimeType(incrementColType) - || (databaseInterface.getDatabaseType() == EDatabaseType.SQLServer && ColumnType.NVARCHAR.name().equals(incrementColType)); - if(isTimeType){ - endTimeStr = getTimeStr(databaseInterface.getDatabaseType(), Long.parseLong(location), incrementColType); - endLocationSql = incrementCol + operator + endTimeStr; - } else if(ColumnType.isNumberType(incrementColType)){ - endLocationSql = incrementCol + operator + location; - } else { - endTimeStr = String.format("'%s'",location); - endLocationSql = incrementCol + operator + endTimeStr; - } - - return endLocationSql; - } - - private static String getTimeStr(EDatabaseType databaseType,Long startLocation,String incrementColType){ - String timeStr; - Timestamp ts = new Timestamp(getMillis(startLocation)); - ts.setNanos(getNanos(startLocation)); - timeStr = getNanosTimeStr(ts.toString()); - - if(databaseType == EDatabaseType.SQLServer){ - timeStr = timeStr.substring(0,23); - } else { - timeStr = timeStr.substring(0,26); - } - - if (databaseType == EDatabaseType.Oracle){ - if(ColumnType.TIMESTAMP.name().equals(incrementColType)){ - timeStr = String.format("TO_TIMESTAMP('%s','YYYY-MM-DD HH24:MI:SS:FF6')",timeStr); - } else { - timeStr = timeStr.substring(0, 19); - timeStr = String.format("TO_DATE('%s','YYYY-MM-DD HH24:MI:SS')", timeStr); - } - } else { - timeStr = String.format("'%s'",timeStr); - } - - return timeStr; - } - - private static String getNanosTimeStr(String timeStr){ + /** + * 获取纳秒字符串 + * @param timeStr + * @return + */ + public static String getNanosTimeStr(String timeStr){ if(timeStr.length() < 29){ timeStr += StringUtils.repeat("0",29 - timeStr.length()); } - return timeStr; } - private static int getNanos(long startLocation){ + /** + * 将边界位置时间转换成对应饿的纳秒时间 + * @param startLocation 边界位置(起始/结束) + * @return + */ + public static int getNanos(long startLocation){ String timeStr = String.valueOf(startLocation); int nanos; if (timeStr.length() == SECOND_LENGTH){ @@ -491,7 +416,12 @@ private static int getNanos(long startLocation){ return nanos; } - private static long getMillis(long startLocation){ + /** + * 将边界位置时间转换成对应饿的毫秒时间 + * @param startLocation 边界位置(起始/结束) + * @return + */ + public static long getMillis(long startLocation){ String timeStr = String.valueOf(startLocation); long millisSecond; if (timeStr.length() == SECOND_LENGTH){ @@ -509,46 +439,41 @@ private static long getMillis(long startLocation){ return millisSecond; } - public static String formatJdbcUrl(String pluginName,String dbUrl){ - if(pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQL_READER) - || pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQLD_READER) - || pluginName.equalsIgnoreCase(PluginNameConstrant.POSTGRESQL_READER) - || pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQL_WRITER) - || pluginName.equalsIgnoreCase(PluginNameConstrant.GBASE_WRITER) ){ - String[] splits = dbUrl.split("\\?"); - - Map paramMap = new HashMap(); - if(splits.length > 1) { - String[] pairs = splits[1].split("&"); - for(String pair : pairs) { - String[] leftRight = pair.split("="); - paramMap.put(leftRight[0], leftRight[1]); - } - } - - paramMap.put("useCursorFetch", "true"); - paramMap.put("rewriteBatchedStatements", "true"); - if(pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQL_READER) - || pluginName.equalsIgnoreCase(PluginNameConstrant.MYSQLD_READER)){ - paramMap.put("zeroDateTimeBehavior","convertToNull"); + /** + * 格式化jdbc连接 + * @param dbUrl 原jdbc连接 + * @param extParamMap 需要额外添加的参数 + * @return 格式化后jdbc连接URL字符串 + */ + public static String formatJdbcUrl(String dbUrl, Map extParamMap){ + String[] splits = DB_PATTERN.split(dbUrl); + + Map paramMap = new HashMap(); + if(splits.length > 1) { + String[] pairs = splits[1].split("&"); + for(String pair : pairs) { + String[] leftRight = pair.split("="); + paramMap.put(leftRight[0], leftRight[1]); } + } - StringBuffer sb = new StringBuffer(splits[0]); - if(paramMap.size() != 0) { - sb.append("?"); - int index = 0; - for(Map.Entry entry : paramMap.entrySet()) { - if(index != 0) { - sb.append("&"); - } - sb.append(entry.getKey() + "=" + entry.getValue()); - index++; - } + if(!CollectionUtil.isNullOrEmpty(extParamMap)){ + paramMap.putAll(extParamMap); + } + paramMap.put("useCursorFetch", "true"); + paramMap.put("rewriteBatchedStatements", "true"); + + StringBuffer sb = new StringBuffer(dbUrl.length() + 128); + sb.append(splits[0]).append("?"); + int index = 0; + for(Map.Entry entry : paramMap.entrySet()) { + if(index != 0) { + sb.append("&"); } - - dbUrl = sb.toString(); + sb.append(entry.getKey()).append("=").append(entry.getValue()); + index++; } - return dbUrl; + return sb.toString(); } } diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java index a3e9da992e..5c265935df 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java @@ -28,6 +28,7 @@ import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.DataReader; import com.dtstack.flinkx.reader.MetaColumn; +import org.apache.commons.lang.StringUtils; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.types.Row; @@ -63,7 +64,7 @@ public class DistributedJdbcDataReader extends DataReader { protected int queryTimeOut; - private List connectionConfigs; + protected List connectionConfigs; private static String DISTRIBUTED_TAG = "d"; @@ -84,7 +85,7 @@ protected DistributedJdbcDataReader(DataTransferConfig config, StreamExecutionEn @Override public DataStream readData() { - DistributedJdbcInputFormatBuilder builder = new DistributedJdbcInputFormatBuilder(); + DistributedJdbcInputFormatBuilder builder = new DistributedJdbcInputFormatBuilder(databaseInterface.getDatabaseType().name()); builder.setDrivername(databaseInterface.getDriverClass()); builder.setUsername(username); builder.setPassword(password); @@ -104,14 +105,12 @@ public DataStream readData() { return createInput(format, (databaseInterface.getDatabaseType() + DISTRIBUTED_TAG + "reader").toLowerCase()); } - private List buildConnections(){ - List sourceList = new ArrayList<>(); + protected List buildConnections(){ + List sourceList = new ArrayList<>(connectionConfigs.size()); for (ReaderConfig.ParameterConfig.ConnectionConfig connectionConfig : connectionConfigs) { - String curUsername = (connectionConfig.getUsername() == null || connectionConfig.getUsername().length() == 0) - ? username : connectionConfig.getUsername(); - String curPassword = (connectionConfig.getPassword() == null || connectionConfig.getPassword().length() == 0) - ? password : connectionConfig.getPassword(); - String curJdbcUrl = DBUtil.formatJdbcUrl(pluginName,connectionConfig.getJdbcUrl().get(0)); + String curUsername = (StringUtils.isBlank(connectionConfig.getUsername())) ? username : connectionConfig.getUsername(); + String curPassword = (StringUtils.isBlank(connectionConfig.getPassword())) ? password : connectionConfig.getPassword(); + String curJdbcUrl = DBUtil.formatJdbcUrl(connectionConfig.getJdbcUrl().get(0), null); for (String table : connectionConfig.getTable()) { DataSource source = new DataSource(); source.setTable(table); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java index 103283a9f0..5ae1fc6a6a 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/IncrementConfig.java @@ -27,8 +27,17 @@ */ public class IncrementConfig implements Serializable { + /** + * 是否为增量任务 + */ private boolean increment; + /** + * 用于标记是否保存endLocation位置的一条或多条数据 + * true:不保存 + * false(默认):保存 + * 某些情况下可能出现最后几条数据被重复记录的情况,可以将此参数配置为true + */ private boolean useMaxFunc; private int columnIndex; @@ -39,6 +48,9 @@ public class IncrementConfig implements Serializable { private String startLocation; + /** + * 发送查询累加器请求的间隔时间 + */ private int requestAccumulatorInterval; public int getRequestAccumulatorInterval() { diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java index dc016efb52..0a1d19b5da 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/JdbcDataReader.java @@ -20,11 +20,10 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.config.ReaderConfig; +import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormatBuilder; -import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; -import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.DataReader; import com.dtstack.flinkx.reader.MetaColumn; import org.apache.commons.lang3.StringUtils; @@ -84,7 +83,6 @@ public JdbcDataReader(DataTransferConfig config, StreamExecutionEnvironment env) ReaderConfig readerConfig = config.getJob().getContent().get(0).getReader(); dbUrl = readerConfig.getParameter().getConnection().get(0).getJdbcUrl().get(0); - dbUrl = DBUtil.formatJdbcUrl(readerConfig.getName(),dbUrl); username = readerConfig.getParameter().getStringVal(JdbcConfigKeys.KEY_USER_NAME); password = readerConfig.getParameter().getStringVal(JdbcConfigKeys.KEY_PASSWORD); table = readerConfig.getParameter().getConnection().get(0).getTable().get(0); @@ -101,7 +99,7 @@ public JdbcDataReader(DataTransferConfig config, StreamExecutionEnvironment env) @Override public DataStream readData() { - JdbcInputFormatBuilder builder = new JdbcInputFormatBuilder(); + JdbcInputFormatBuilder builder = new JdbcInputFormatBuilder(databaseInterface.getDatabaseType().name()); builder.setDrivername(databaseInterface.getDriverClass()); builder.setDBUrl(dbUrl); builder.setUsername(username); @@ -142,7 +140,7 @@ private void buildIncrementConfig(ReaderConfig readerConfig){ String incrementColStr = String.valueOf(incrementColumn); if(NumberUtils.isNumber(incrementColStr)){ - MetaColumn metaColumn = metaColumns.get(Integer.valueOf(incrementColStr)); + MetaColumn metaColumn = metaColumns.get(Integer.parseInt(incrementColStr)); type = metaColumn.getType(); name = metaColumn.getName(); index = metaColumn.getIndex(); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/QuerySqlBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/QuerySqlBuilder.java index 3f585c6f0a..8b365ebe72 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/QuerySqlBuilder.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/QuerySqlBuilder.java @@ -19,10 +19,8 @@ package com.dtstack.flinkx.rdb.datareader; -import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.StringUtil; import org.apache.commons.lang3.StringUtils; import java.util.ArrayList; @@ -34,25 +32,25 @@ */ public class QuerySqlBuilder { - private static final String CUSTOM_SQL_TEMPLATE = "select * from (%s) %s"; - private static final String TEMPORARY_TABLE_NAME = "flinkx_tmp"; - private static final String INCREMENT_FILTER_PLACEHOLDER = "${incrementFilter}"; - private static final String RESTORE_FILTER_PLACEHOLDER = "${restoreFilter}"; - private static final String SQL_SPLIT_WITH_ROW_NUM = "SELECT * FROM (%s) tmp WHERE %s"; - private static final String ROW_NUM_COLUMN_ALIAS = "FLINKX_ROWNUM"; - - private DatabaseInterface databaseInterface; - private String table; - private List metaColumns; - private String splitKey; - private String customFilter; - private String customSql; - private boolean isSplitByKey; - private boolean isIncrement; - private String incrementColumn; - private String restoreColumn; - private boolean isRestore; - private String orderByColumn; + protected static final String CUSTOM_SQL_TEMPLATE = "select * from (%s) %s"; + protected static final String TEMPORARY_TABLE_NAME = "flinkx_tmp"; + protected static final String INCREMENT_FILTER_PLACEHOLDER = "${incrementFilter}"; + protected static final String RESTORE_FILTER_PLACEHOLDER = "${restoreFilter}"; + protected static final String SQL_SPLIT_WITH_ROW_NUM = "SELECT * FROM (%s) tmp WHERE %s"; + protected static final String ROW_NUM_COLUMN_ALIAS = "FLINKX_ROWNUM"; + + protected DatabaseInterface databaseInterface; + protected String table; + protected List metaColumns; + protected String splitKey; + protected String customFilter; + protected String customSql; + protected boolean isSplitByKey; + protected boolean isIncrement; + protected String incrementColumn; + protected String restoreColumn; + protected boolean isRestore; + protected String orderByColumn; public QuerySqlBuilder(JdbcDataReader reader) { databaseInterface = reader.databaseInterface; @@ -92,7 +90,7 @@ public String buildSql(){ return query; } - private String buildQuerySql(){ + protected String buildQuerySql(){ List selectColumns = buildSelectColumns(databaseInterface, metaColumns); boolean splitWithRowNum = addRowNumColumn(databaseInterface, selectColumns, isSplitByKey, splitKey); @@ -124,10 +122,6 @@ private String buildQuerySql(){ sb.append(filter); - if(EDatabaseType.PostgreSQL.equals(databaseInterface.getDatabaseType())){ - sb.append(buildOrderSql()); - } - if(isSplitByKey && splitWithRowNum){ return String.format(SQL_SPLIT_WITH_ROW_NUM, sb.toString(), databaseInterface.getSplitFilter(ROW_NUM_COLUMN_ALIAS)); } else { @@ -135,7 +129,7 @@ private String buildQuerySql(){ } } - private String buildOrderSql(){ + protected String buildOrderSql(){ String column; if(isIncrement){ column = incrementColumn; @@ -168,7 +162,7 @@ private String buildQuerySqlWithCustomSql(){ return querySql.toString(); } - private static List buildSelectColumns(DatabaseInterface databaseInterface, List metaColumns){ + protected static List buildSelectColumns(DatabaseInterface databaseInterface, List metaColumns){ List selectColumns = new ArrayList<>(); if(metaColumns.size() == 1 && "*".equals(metaColumns.get(0).getName())){ selectColumns.add("*"); @@ -185,7 +179,7 @@ private static List buildSelectColumns(DatabaseInterface databaseInterfa return selectColumns; } - private static boolean addRowNumColumn(DatabaseInterface databaseInterface, List selectColumns, boolean isSplitByKey,String splitKey){ + protected static boolean addRowNumColumn(DatabaseInterface databaseInterface, List selectColumns, boolean isSplitByKey,String splitKey){ if(!isSplitByKey || !splitKey.contains("(")){ return false; } diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java index a48ff5489b..8c4e41df64 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java @@ -18,7 +18,6 @@ package com.dtstack.flinkx.rdb.inputformat; -import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.rdb.DataSource; import com.dtstack.flinkx.rdb.DatabaseInterface; @@ -33,7 +32,10 @@ import org.apache.flink.types.Row; import java.io.*; -import java.sql.*; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -66,15 +68,15 @@ public class DistributedJdbcInputFormat extends RichInputFormat { protected List sourceList; - private transient int sourceIndex; + protected transient int sourceIndex; - private transient Connection currentConn; + protected transient Connection currentConn; - private transient Statement currentStatement; + protected transient Statement currentStatement; - private transient ResultSet currentResultSet; + protected transient ResultSet currentResultSet; - private transient Row currentRecord; + protected transient Row currentRecord; protected String username; @@ -111,10 +113,10 @@ protected void openInternal(InputSplit inputSplit) throws IOException { throw new IllegalArgumentException("open() failed." + e.getMessage(), e); } - LOG.info("JdbcInputFormat[" + jobName + "]open: end"); + LOG.info("JdbcInputFormat[{}}]open: end", jobName); } - private void openNextSource() throws SQLException{ + protected void openNextSource() throws SQLException{ DataSource currentSource = sourceList.get(sourceIndex); currentConn = DBUtil.getConnection(currentSource.getJdbcUrl(), currentSource.getUserName(), currentSource.getPassword()); currentConn.setAutoCommit(false); @@ -133,12 +135,7 @@ private void openNextSource() throws SQLException{ } } - if(databaseInterface.getDatabaseType() == EDatabaseType.MySQL){ - currentStatement.setFetchSize(Integer.MIN_VALUE); - } else { - currentStatement.setFetchSize(fetchSize); - } - + currentStatement.setFetchSize(fetchSize); currentStatement.setQueryTimeout(queryTimeOut); currentResultSet = currentStatement.executeQuery(queryTemplate); columnCount = currentResultSet.getMetaData().getColumnCount(); @@ -148,10 +145,10 @@ private void openNextSource() throws SQLException{ currentSource.getPassword(),databaseInterface, currentSource.getTable(),metaColumns); } - LOG.info("open source:" + currentSource.getJdbcUrl() + ",table:" + currentSource.getTable()); + LOG.info("open source: {} ,table: {}", currentSource.getJdbcUrl(), currentSource.getTable()); } - private boolean readNextRecord() throws IOException{ + protected boolean readNextRecord() throws IOException{ try{ if(currentConn == null){ openNextSource(); @@ -160,7 +157,6 @@ private boolean readNextRecord() throws IOException{ hasNext = currentResultSet.next(); if (hasNext){ currentRecord = new Row(columnCount); - DBUtil.getRow(databaseInterface.getDatabaseType(),currentRecord,descColumnTypeList,currentResultSet,typeConverter); if(!"*".equals(metaColumns.get(0).getName())){ for (int i = 0; i < columnCount; i++) { Object val = currentRecord.getField(i); @@ -195,7 +191,7 @@ protected Row nextRecordInternal(Row row) throws IOException { return currentRecord; } - private void closeCurrentSource(){ + protected void closeCurrentSource(){ try { DBUtil.closeDBResources(currentResultSet,currentStatement,currentConn, true); currentConn = null; diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java index 1c1ad058e7..f8b4bb04e0 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java @@ -21,6 +21,7 @@ import com.dtstack.flinkx.inputformat.RichInputFormatBuilder; import com.dtstack.flinkx.rdb.DataSource; import com.dtstack.flinkx.rdb.DatabaseInterface; +import com.dtstack.flinkx.rdb.loader.JdbcFormatLoader; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.reader.MetaColumn; import org.apache.commons.lang.StringUtils; @@ -35,10 +36,12 @@ */ public class DistributedJdbcInputFormatBuilder extends RichInputFormatBuilder { + private static String DISTRIBUTED_TAG = "d"; private DistributedJdbcInputFormat format; - public DistributedJdbcInputFormatBuilder() { - super.format = this.format = new DistributedJdbcInputFormat(); + public DistributedJdbcInputFormatBuilder(String name) { + JdbcFormatLoader jdbcFormatLoader = new JdbcFormatLoader(name + DISTRIBUTED_TAG, JdbcFormatLoader.INPUT_FORMAT); + super.format = format = (DistributedJdbcInputFormat) jdbcFormatLoader.getFormatInstance(); } public void setDrivername(String driverName) { diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java index 9a40ccfb23..c2c3abfad2 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java @@ -18,18 +18,15 @@ package com.dtstack.flinkx.rdb.inputformat; -import com.dtstack.flinkx.enums.ColumnType; import com.dtstack.flinkx.constants.Metrics; -import com.dtstack.flinkx.enums.EDatabaseType; +import com.dtstack.flinkx.enums.ColumnType; +import com.dtstack.flinkx.inputformat.RichInputFormat; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.datareader.IncrementConfig; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.ClassUtil; -import com.dtstack.flinkx.util.DateUtil; -import com.dtstack.flinkx.util.StringUtil; -import com.dtstack.flinkx.util.URLUtil; +import com.dtstack.flinkx.util.*; import com.google.gson.Gson; import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.accumulators.Accumulator; @@ -41,12 +38,6 @@ import org.apache.flink.hadoop.shaded.org.apache.http.impl.client.CloseableHttpClient; import org.apache.flink.hadoop.shaded.org.apache.http.impl.client.HttpClientBuilder; import org.apache.flink.types.Row; -import java.io.IOException; -import java.sql.*; -import java.util.*; -import java.util.Date; - -import com.dtstack.flinkx.inputformat.RichInputFormat; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -54,6 +45,11 @@ import org.apache.hadoop.io.IOUtils; import org.codehaus.jackson.map.ObjectMapper; +import java.io.IOException; +import java.sql.*; +import java.util.Date; +import java.util.*; + /** * InputFormat for reading data from a database and generate Rows. * @@ -102,12 +98,18 @@ public class JdbcInputFormat extends RichInputFormat { protected int fetchSize; + /** + * 各DatabaseMeta.getQueryTimeout()返回的超时时间,默认1000ms + */ protected int queryTimeOut; protected int numPartitions; protected String customSql; + /** + * 增量任务配置 + */ protected IncrementConfig incrementConfig; protected StringAccumulator tableColAccumulator; @@ -158,33 +160,17 @@ public void openInternal(InputSplit inputSplit) throws IOException { if(!canReadData(inputSplit)){ LOG.warn("Not read data when the start location are equal to end location"); - hasNext = false; return; } dbConn = DBUtil.getConnection(dbURL, username, password); - // 部分驱动需要关闭事务自动提交,featchSize参数才会起作用 + // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 dbConn.setAutoCommit(false); - - // 读取前先提交事务,确保程序异常退出时,下次再读取PG时的顺序不变 - if(EDatabaseType.PostgreSQL == databaseInterface.getDatabaseType()){ - dbConn.commit(); - } - Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); - if(EDatabaseType.MySQL == databaseInterface.getDatabaseType() - || EDatabaseType.GBase == databaseInterface.getDatabaseType()){ - statement.setFetchSize(Integer.MIN_VALUE); - } else { - statement.setFetchSize(fetchSize); - } - - if(EDatabaseType.Carbondata != databaseInterface.getDatabaseType()) { - statement.setQueryTimeout(queryTimeOut); - } - + statement.setFetchSize(fetchSize); + statement.setQueryTimeout(queryTimeOut); String querySql = buildQuerySql(inputSplit); resultSet = statement.executeQuery(querySql); columnCount = resultSet.getMetaData().getColumnCount(); @@ -209,7 +195,7 @@ public void openInternal(InputSplit inputSplit) throws IOException { throw new IllegalArgumentException("open() failed." + se.getMessage(), se); } - LOG.info("JdbcInputFormat[" + jobName + "]open: end"); + LOG.info("JdbcInputFormat[{}]open: end", jobName); } @@ -240,13 +226,7 @@ public boolean reachedEnd() throws IOException { @Override public Row nextRecordInternal(Row row) throws IOException { - row = new Row(columnCount); try { - if (!hasNext) { - return null; - } - - DBUtil.getRow(databaseInterface.getDatabaseType(),row,descColumnTypeList,resultSet,typeConverter); if(!"*".equals(metaColumns.get(0).getName())){ for (int i = 0; i < columnCount; i++) { Object val = row.getField(i); @@ -269,31 +249,45 @@ public Row nextRecordInternal(Row row) throws IOException { //update hasNext after we've read the record hasNext = resultSet.next(); return row; - } catch (SQLException se) { - throw new IOException("Couldn't read data - " + se.getMessage(), se); - } catch (Exception npe) { - throw new IOException("Couldn't access resultSet", npe); + } catch (SQLException e) { + throw new IOException("Couldn't access resultSet", e); } } - private void initMetric(InputSplit split){ + @Override + public void closeInternal() throws IOException { + if(incrementConfig.isIncrement() && hadoopConfig != null) { + uploadMetricData(); + } + DBUtil.closeDBResources(resultSet,statement,dbConn, true); + } + + /** + * 初始化增量任务指标 + * @param split 数据分片 + */ + protected void initMetric(InputSplit split){ if (!incrementConfig.isIncrement()){ return; } + //获取所有的累加器 Map> accumulatorMap = getRuntimeContext().getAllAccumulators(); + //如果没有tableCol累加器,则创建一个用来记录表名-增量字段并保存到context上下文 if(!accumulatorMap.containsKey(Metrics.TABLE_COL)){ tableColAccumulator = new StringAccumulator(); tableColAccumulator.add(table + "-" + incrementConfig.getColumnName()); getRuntimeContext().addAccumulator(Metrics.TABLE_COL,tableColAccumulator); } + //创建一个记录起始位置的累加器 startLocationAccumulator = new StringAccumulator(); if (incrementConfig.getStartLocation() != null){ startLocationAccumulator.add(incrementConfig.getStartLocation()); } getRuntimeContext().addAccumulator(Metrics.START_LOCATION,startLocationAccumulator); + //创建一个记录结束位置的累加器 endLocationAccumulator = new MaximumAccumulator(); String endLocation = ((JdbcInputSplit)split).getEndLocation(); if(endLocation != null && incrementConfig.isUseMaxFunc()){ @@ -304,9 +298,15 @@ private void initMetric(InputSplit split){ getRuntimeContext().addAccumulator(Metrics.END_LOCATION,endLocationAccumulator); } - private void getMaxValue(InputSplit inputSplit){ + /** + * 将增量任务的数据最大值设置到累加器中 + * @param inputSplit 数据分片 + */ + protected void getMaxValue(InputSplit inputSplit){ String maxValue = null; + //第0个通道新建累加器并保存最大值,多通道下其他通道从historyServer中获取最大值 if (inputSplit.getSplitNumber() == 0){ + //从数据库中获取当前增量字段的最大值 maxValue = getMaxValueFromDb(); maxValueAccumulator = new StringAccumulator(); maxValueAccumulator.add(maxValue); @@ -336,6 +336,7 @@ private void getMaxValue(InputSplit inputSplit){ */ int maxAcquireTimes = (queryTimeOut / incrementConfig.getRequestAccumulatorInterval()) + 10; + //当前重试次数 int acquireTimes = 0; while (StringUtils.isEmpty(maxValue) && acquireTimes < maxAcquireTimes){ try { @@ -358,6 +359,13 @@ private void getMaxValue(InputSplit inputSplit){ ((JdbcInputSplit) inputSplit).setEndLocation(maxValue); } + /** + * 从historyServer中获取增量最大值 + * @param httpClient httpClient + * @param monitors 请求的URL数组 + * @return + */ + @SuppressWarnings("unchecked") private String getMaxvalueFromAccumulator(CloseableHttpClient httpClient,String[] monitors){ String maxValue = null; Gson gson = new Gson(); @@ -388,7 +396,13 @@ private String getMaxvalueFromAccumulator(CloseableHttpClient httpClient,String[ return maxValue; } - private boolean canReadData(InputSplit split){ + /** + * 判断增量任务是否还能继续读取数据 + * 增量任务,startLocation = endLocation且两者都不为null,返回false,其余情况返回true + * @param split 数据分片 + * @return + */ + protected boolean canReadData(InputSplit split){ if (!incrementConfig.isIncrement()){ return true; } @@ -401,21 +415,27 @@ private boolean canReadData(InputSplit split){ return !StringUtils.equals(jdbcInputSplit.getStartLocation(), jdbcInputSplit.getEndLocation()); } - private String buildQuerySql(InputSplit inputSplit){ + /** + * 构造查询sql + * @param inputSplit 数据切片 + * @return 构建的sql字符串 + */ + protected String buildQuerySql(InputSplit inputSplit){ + //QuerySqlBuilder中构建的queryTemplate String querySql = queryTemplate; if (inputSplit == null){ - LOG.warn(String.format("Executing sql is: '%s'", querySql)); + LOG.warn("Executing sql is: '{}'", querySql); return querySql; } JdbcInputSplit jdbcInputSplit = (JdbcInputSplit) inputSplit; if (StringUtils.isNotEmpty(splitKey)){ - querySql = queryTemplate.replace("${N}", String.valueOf(numPartitions)) - .replace("${M}", String.valueOf(indexOfSubtask)); + querySql = queryTemplate.replace("${N}", String.valueOf(numPartitions)) .replace("${M}", String.valueOf(indexOfSubtask)); } + //是否开启断点续传 if (restoreConfig.isRestore()){ if(formatState == null){ querySql = querySql.replace(DBUtil.RESTORE_FILTER_PLACEHOLDER, StringUtils.EMPTY); @@ -425,8 +445,12 @@ private String buildQuerySql(InputSplit inputSplit){ } } else { String startLocation = getLocation(restoreColumn.getType(), formatState.getState()); - String restoreFilter = DBUtil.buildIncrementFilter(databaseInterface, restoreColumn.getType(), - restoreColumn.getName(), startLocation, jdbcInputSplit.getEndLocation(), customSql, incrementConfig.isUseMaxFunc()); + String restoreFilter = buildIncrementFilter(restoreColumn.getType(), + restoreColumn.getName(), + startLocation, + jdbcInputSplit.getEndLocation(), + customSql, + incrementConfig.isUseMaxFunc()); if(StringUtils.isNotEmpty(restoreFilter)){ restoreFilter = " and " + restoreFilter; @@ -440,15 +464,24 @@ private String buildQuerySql(InputSplit inputSplit){ querySql = buildIncrementSql(jdbcInputSplit, querySql); } - LOG.warn(String.format("Executing sql is: '%s'", querySql)); + LOG.warn("Executing sql is: '{}}'", querySql); return querySql; } + /** + * 构造增量任务查询sql + * @param jdbcInputSplit 数据切片 + * @param querySql 已经创建的查询sql + * @return + */ private String buildIncrementSql(JdbcInputSplit jdbcInputSplit, String querySql){ - String incrementFilter = DBUtil.buildIncrementFilter(databaseInterface, incrementConfig.getColumnType(), - incrementConfig.getColumnName(), jdbcInputSplit.getStartLocation(), - jdbcInputSplit.getEndLocation(), customSql, incrementConfig.isUseMaxFunc()); + String incrementFilter = buildIncrementFilter(incrementConfig.getColumnType(), + incrementConfig.getColumnName(), + jdbcInputSplit.getStartLocation(), + jdbcInputSplit.getEndLocation(), + customSql, + incrementConfig.isUseMaxFunc()); if(StringUtils.isNotEmpty(incrementFilter)){ incrementFilter = " and " + incrementFilter; @@ -457,6 +490,120 @@ private String buildIncrementSql(JdbcInputSplit jdbcInputSplit, String querySql) return querySql.replace(DBUtil.INCREMENT_FILTER_PLACEHOLDER, incrementFilter); } + /** + * 构建增量任务查询sql的过滤条件 + * @param incrementColType 增量字段类型 + * @param incrementCol 增量字段名称 + * @param startLocation 开始位置 + * @param endLocation 结束位置 + * @param customSql 用户自定义sql + * @param useMaxFunc 是否保存结束位置数据 + * @return + */ + protected String buildIncrementFilter(String incrementColType,String incrementCol, String startLocation,String endLocation, String customSql, boolean useMaxFunc){ + StringBuilder filter = new StringBuilder(128); + + if (org.apache.commons.lang.StringUtils.isNotEmpty(customSql)){ + incrementCol = String.format("%s.%s", DBUtil.TEMPORARY_TABLE_NAME, databaseInterface.quoteColumn(incrementCol)); + } else { + incrementCol = databaseInterface.quoteColumn(incrementCol); + } + + String startFilter = buildStartLocationSql(incrementColType, incrementCol, startLocation, useMaxFunc); + if (org.apache.commons.lang.StringUtils.isNotEmpty(startFilter)){ + filter.append(startFilter); + } + + String endFilter = buildEndLocationSql(incrementColType, incrementCol, endLocation); + if (org.apache.commons.lang.StringUtils.isNotEmpty(endFilter)){ + if (filter.length() > 0){ + filter.append(" and ").append(endFilter); + } else { + filter.append(endFilter); + } + } + + return filter.toString(); + } + + /** + * 构建起始位置sql + * @param incrementColType 增量字段类型 + * @param incrementCol 增量字段名称 + * @param startLocation 开始位置 + * @param useMaxFunc 是否保存结束位置数据 + * @return + */ + protected String buildStartLocationSql(String incrementColType, String incrementCol, String startLocation, boolean useMaxFunc){ + if(org.apache.commons.lang.StringUtils.isEmpty(startLocation) || DBUtil.NULL_STRING.equalsIgnoreCase(startLocation)){ + return null; + } + + String operator = useMaxFunc?" >= ":" > "; + + return getLocationSql(incrementColType, incrementCol, startLocation, operator); + } + + /** + * 构建结束位置sql + * @param incrementColType 增量字段类型 + * @param incrementCol 增量字段名称 + * @param endLocation 结束位置 + * @return + */ + public String buildEndLocationSql(String incrementColType, String incrementCol, String endLocation){ + if(org.apache.commons.lang.StringUtils.isEmpty(endLocation) || DBUtil.NULL_STRING.equalsIgnoreCase(endLocation)){ + return null; + } + + return getLocationSql(incrementColType, incrementCol, endLocation, " < "); + } + + /** + * 构建边界位置sql + * @param incrementColType 增量字段类型 + * @param incrementCol 增量字段名称 + * @param location 边界位置(起始/结束) + * @param operator 判断符( >, >=, <) + * @return + */ + protected String getLocationSql(String incrementColType, String incrementCol, String location, String operator) { + String endTimeStr; + String endLocationSql; + if(ColumnType.isTimeType(incrementColType)){ + endTimeStr = getTimeStr(Long.parseLong(location), incrementColType); + endLocationSql = incrementCol + operator + endTimeStr; + } else if(ColumnType.isNumberType(incrementColType)){ + endLocationSql = incrementCol + operator + location; + } else { + endTimeStr = String.format("'%s'",location); + endLocationSql = incrementCol + operator + endTimeStr; + } + + return endLocationSql; + } + + /** + * 构建时间边界字符串 + * @param location 边界位置(起始/结束) + * @param incrementColType 增量字段类型 + * @return + */ + protected String getTimeStr(Long location, String incrementColType){ + String timeStr; + Timestamp ts = new Timestamp(DBUtil.getMillis(location)); + ts.setNanos(DBUtil.getNanos(location)); + timeStr = DBUtil.getNanosTimeStr(ts.toString()); + timeStr = timeStr.substring(0,26); + timeStr = String.format("'%s'",timeStr); + + return timeStr; + } + + /** + * 从数据库中查询增量字段的最大值 + * @return + */ private String getMaxValueFromDb() { String maxValue = null; Connection conn = null; @@ -474,8 +621,10 @@ private String getMaxValueFromDb() { databaseInterface.quoteColumn(incrementConfig.getColumnName()), databaseInterface.quoteTable(table)); } - String startSql = DBUtil.buildStartLocationSql(databaseInterface, incrementConfig.getColumnType(), - databaseInterface.quoteColumn(incrementConfig.getColumnName()), incrementConfig.getStartLocation(), incrementConfig.isUseMaxFunc()); + String startSql = buildStartLocationSql(incrementConfig.getColumnType(), + databaseInterface.quoteColumn(incrementConfig.getColumnName()), + incrementConfig.getStartLocation(), + incrementConfig.isUseMaxFunc()); if(StringUtils.isNotEmpty(startSql)){ queryMaxValueSql += " where " + startSql; } @@ -499,6 +648,12 @@ private String getMaxValueFromDb() { } } + /** + * 边界位置值转字符串 + * @param columnType 边界字段类型 + * @param columnVal 边界值 + * @return + */ private String getLocation(String columnType, Object columnVal){ String location; if (columnVal == null){ @@ -533,6 +688,10 @@ private String getLocation(String columnType, Object columnVal){ return location; } + /** + * 上传累加器数据 + * @throws IOException + */ private void uploadMetricData() throws IOException { FSDataOutputStream out = null; try { @@ -567,12 +726,4 @@ private void uploadMetricData() throws IOException { } } - @Override - public void closeInternal() throws IOException { - if(incrementConfig.isIncrement() && hadoopConfig != null) { - uploadMetricData(); - } - DBUtil.closeDBResources(resultSet,statement,dbConn, true); - } - } \ No newline at end of file diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java index 658c3cf0b0..d0976b6a13 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java @@ -21,6 +21,7 @@ import com.dtstack.flinkx.inputformat.RichInputFormatBuilder; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.datareader.IncrementConfig; +import com.dtstack.flinkx.rdb.loader.JdbcFormatLoader; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.reader.MetaColumn; import org.apache.commons.lang.StringUtils; @@ -38,8 +39,9 @@ public class JdbcInputFormatBuilder extends RichInputFormatBuilder { private JdbcInputFormat format; - public JdbcInputFormatBuilder() { - super.format = format = new JdbcInputFormat(); + public JdbcInputFormatBuilder(String dataType) { + JdbcFormatLoader jdbcFormatLoader = new JdbcFormatLoader(dataType, JdbcFormatLoader.INPUT_FORMAT); + super.format = format = (JdbcInputFormat) jdbcFormatLoader.getFormatInstance(); } public void setDrivername(String drivername) { diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java index 595baf6ed7..0d70d2361a 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java @@ -23,7 +23,6 @@ import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormatBuilder; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; -import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.writer.DataWriter; import org.apache.flink.streaming.api.datastream.DataStream; @@ -69,6 +68,7 @@ public void setDatabaseInterface(DatabaseInterface databaseInterface) { this.databaseInterface = databaseInterface; } + @SuppressWarnings("unchecked") public JdbcDataWriter(DataTransferConfig config) { super(config); @@ -76,8 +76,6 @@ public JdbcDataWriter(DataTransferConfig config) { WriterConfig writerConfig = config.getJob().getContent().get(0).getWriter(); dbUrl = writerConfig.getParameter().getConnection().get(0).getJdbcUrl(); - dbUrl = DBUtil.formatJdbcUrl(writerConfig.getName(), dbUrl); - username = writerConfig.getParameter().getStringVal(KEY_USERNAME); password = writerConfig.getParameter().getStringVal(KEY_PASSWORD); table = writerConfig.getParameter().getConnection().get(0).getTable().get(0); @@ -95,7 +93,7 @@ public JdbcDataWriter(DataTransferConfig config) { @Override public DataStreamSink writeData(DataStream dataSet) { - JdbcOutputFormatBuilder builder = new JdbcOutputFormatBuilder(); + JdbcOutputFormatBuilder builder = new JdbcOutputFormatBuilder(databaseInterface.getDatabaseType().name()); builder.setDriverName(databaseInterface.getDriverClass()); builder.setDBUrl(dbUrl); builder.setUsername(username); diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java index 83b5acaa62..bc60fc02ec 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java @@ -18,7 +18,6 @@ package com.dtstack.flinkx.rdb.outputformat; import com.dtstack.flinkx.enums.ColumnType; -import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.enums.EWriteMode; import com.dtstack.flinkx.exception.WriteRecordException; import com.dtstack.flinkx.outputformat.RichOutputFormat; @@ -28,14 +27,13 @@ import com.dtstack.flinkx.restore.FormatState; import com.dtstack.flinkx.util.ClassUtil; import com.dtstack.flinkx.util.DateUtil; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.ObjectUtils; import org.apache.flink.types.Row; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.sql.*; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -86,7 +84,7 @@ public class JdbcOutputFormat extends RichOutputFormat { protected List fullColumnType; - private List columnType = new ArrayList<>(); + protected List columnType = new ArrayList<>(); protected TypeConverterInterface typeConverter; @@ -96,7 +94,7 @@ public class JdbcOutputFormat extends RichOutputFormat { protected long rowsOfCurrentTransaction; - private final static String GET_ORACLE_INDEX_SQL = "SELECT " + + protected final static String GET_ORACLE_INDEX_SQL = "SELECT " + "t.INDEX_NAME," + "t.COLUMN_NAME " + "FROM " + @@ -110,7 +108,7 @@ public class JdbcOutputFormat extends RichOutputFormat { protected final static String CONN_CLOSE_ERROR_MSG = "No operations allowed"; protected PreparedStatement prepareTemplates() throws SQLException { - if(fullColumn == null || fullColumn.size() == 0) { + if(CollectionUtils.isEmpty(fullColumn)) { fullColumn = column; } @@ -131,7 +129,7 @@ protected PreparedStatement prepareTemplates() throws SQLException { } @Override - protected void openInternal(int taskNumber, int numTasks) throws IOException { + protected void openInternal(int taskNumber, int numTasks){ try { ClassUtil.forName(driverName, getClass().getClassLoader()); dbConn = DBUtil.getConnection(dbURL, username, password); @@ -140,7 +138,7 @@ protected void openInternal(int taskNumber, int numTasks) throws IOException { dbConn.setAutoCommit(false); } - if(fullColumn == null || fullColumn.size() == 0) { + if(CollectionUtils.isEmpty(fullColumn)) { fullColumn = probeFullColumns(table, dbConn); } @@ -166,7 +164,7 @@ protected void openInternal(int taskNumber, int numTasks) throws IOException { preparedStatement = prepareTemplates(); readyCheckpoint = false; - LOG.info("subtask[" + taskNumber + "] wait finished"); + LOG.info("subTask[{}}] wait finished", taskNumber); } catch (SQLException sqe) { throw new IllegalArgumentException("open() failed.", sqe); } @@ -184,7 +182,7 @@ private List analyzeTable() { ret.add(rd.getColumnTypeName(i+1)); } - if(fullColumn == null || fullColumn.size() == 0){ + if(CollectionUtils.isEmpty(fullColumn)){ for(int i = 0; i < rd.getColumnCount(); ++i) { fullColumn.add(rd.getColumnName(i+1)); } @@ -273,8 +271,7 @@ public FormatState getFormatState(){ } try { - LOG.info("readyCheckpoint:" + readyCheckpoint); - LOG.info("rowsOfCurrentTransaction:" + rowsOfCurrentTransaction); + LOG.info("readyCheckpoint: {}, rowsOfCurrentTransaction: {}", readyCheckpoint, rowsOfCurrentTransaction); if (readyCheckpoint || rowsOfCurrentTransaction > restoreConfig.getMaxRowNumForCheckpoint()){ @@ -321,75 +318,21 @@ protected Object getField(Row row, int index) { field = ((java.util.Date) field).getTime(); } - field=dealOracleTimestampToVarcharOrLong(databaseInterface.getDatabaseType(),field,type); - - - if(EDatabaseType.PostgreSQL == databaseInterface.getDatabaseType()){ - field = typeConverter.convert(field,type); - } - - return field; - } - - /** - * oracle timestamp to oracle varchar or varchar2 or long field format - * @param databaseType - * @param field - * @param type - * @return - */ - private Object dealOracleTimestampToVarcharOrLong(EDatabaseType databaseType, Object field, String type) { - if (EDatabaseType.Oracle!=databaseInterface.getDatabaseType()){ - return field; - } - - if (!(field instanceof Timestamp)){ - return field; - } - - if (type.equalsIgnoreCase(ColumnType.VARCHAR.name()) || type.equalsIgnoreCase(ColumnType.VARCHAR2.name())){ - SimpleDateFormat format = DateUtil.getDateTimeFormatter(); - field= format.format(field); - } - - if (type.equalsIgnoreCase(ColumnType.LONG.name()) ){ - field = ((Timestamp) field).getTime(); - } return field; } protected List probeFullColumns(String table, Connection dbConn) throws SQLException { - String schema =null; - if(EDatabaseType.Oracle == databaseInterface.getDatabaseType()) { - String[] parts = table.split("\\."); - if(parts.length == 2) { - schema = parts[0].toUpperCase(); - table = parts[1]; - } - } - List ret = new ArrayList<>(); - ResultSet rs = dbConn.getMetaData().getColumns(null, schema, table, null); + ResultSet rs = dbConn.getMetaData().getColumns(null, null, table, null); while(rs.next()) { ret.add(rs.getString("COLUMN_NAME")); } return ret; } - - protected Map> probePrimaryKeys(String table, Connection dbConn) throws SQLException { Map> map = new HashMap<>(); - ResultSet rs; - if(EDatabaseType.Oracle == databaseInterface.getDatabaseType()){ - PreparedStatement ps = dbConn.prepareStatement(String.format(GET_ORACLE_INDEX_SQL,table)); - rs = ps.executeQuery(); - } else if(EDatabaseType.DB2 == databaseInterface.getDatabaseType()){ - rs = dbConn.getMetaData().getIndexInfo(null, null, table.toUpperCase(), true, false); - } else { - rs = dbConn.getMetaData().getIndexInfo(null, null, table, true, false); - } - + ResultSet rs = dbConn.getMetaData().getIndexInfo(null, null, table, true, false); while(rs.next()) { String indexName = rs.getString("INDEX_NAME"); if(!map.containsKey(indexName)) { @@ -428,7 +371,7 @@ public void closeInternal() { @Override protected boolean needWaitBeforeWriteRecords() { - return preSql != null && preSql.size() != 0; + return CollectionUtils.isNotEmpty(preSql); } @Override @@ -440,7 +383,7 @@ protected void beforeWriteRecords() { @Override protected boolean needWaitBeforeCloseInternal() { - return postSql != null && postSql.size() != 0; + return CollectionUtils.isNotEmpty(postSql); } @Override diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java index 0bd3adff21..0cc8516216 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormatBuilder.java @@ -19,6 +19,7 @@ import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.outputformat.RichOutputFormatBuilder; +import com.dtstack.flinkx.rdb.loader.JdbcFormatLoader; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import java.util.List; @@ -32,8 +33,9 @@ public class JdbcOutputFormatBuilder extends RichOutputFormatBuilder { private JdbcOutputFormat format; - public JdbcOutputFormatBuilder() { - super.format = format = new JdbcOutputFormat(); + public JdbcOutputFormatBuilder(String dataType) { + JdbcFormatLoader jdbcFormatLoader = new JdbcFormatLoader(dataType, JdbcFormatLoader.OUTPUT_FORMAT); + super.format = format = (JdbcOutputFormat) jdbcFormatLoader.getFormatInstance(); } public JdbcOutputFormatBuilder(JdbcOutputFormat format) { diff --git a/flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java b/flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java new file mode 100644 index 0000000000..fc95e68534 --- /dev/null +++ b/flinkx-sqlserver/flinkx-sqlserver-reader/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverInputFormat.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.sqlserver.format; + +import com.dtstack.flinkx.enums.ColumnType; +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import com.dtstack.flinkx.rdb.util.DBUtil; +import org.apache.commons.collections.CollectionUtils; +import org.apache.flink.types.Row; + +import java.io.IOException; +import java.sql.Timestamp; + +import static com.dtstack.flinkx.rdb.util.DBUtil.clobToString; + +/** + * Date: 2019/09/19 + * Company: www.dtstack.com + * + * @author tudou + */ +public class SqlserverInputFormat extends JdbcInputFormat { + + @Override + public Row nextRecordInternal(Row row) throws IOException { + if (!hasNext) { + return null; + } + row = new Row(columnCount); + + try { + for (int pos = 0; pos < row.getArity(); pos++) { + Object obj = resultSet.getObject(pos + 1); + if(obj != null) { + if(CollectionUtils.isNotEmpty(descColumnTypeList)) { + if(descColumnTypeList.get(pos).equalsIgnoreCase("bit")) { + if(obj instanceof Boolean) { + obj = ((Boolean) obj ? 1 : 0); + } + } + } + obj = clobToString(obj); + } + + row.setField(pos, obj); + } + return super.nextRecordInternal(row); + }catch (Exception e) { + throw new IOException("Couldn't read data - " + e.getMessage(), e); + } + } + + /** + * 构建边界位置sql + * @param incrementColType 增量字段类型 + * @param incrementCol 增量字段名称 + * @param location 边界位置(起始/结束) + * @param operator 判断符( >, >=, <) + * @return + */ + @Override + protected String getLocationSql(String incrementColType, String incrementCol, String location, String operator) { + String endTimeStr; + String endLocationSql; + boolean isTimeType = ColumnType.isTimeType(incrementColType) + || ColumnType.NVARCHAR.name().equals(incrementColType); + if(isTimeType){ + endTimeStr = getTimeStr(Long.parseLong(location), incrementColType); + endLocationSql = incrementCol + operator + endTimeStr; + } else if(ColumnType.isNumberType(incrementColType)){ + endLocationSql = incrementCol + operator + location; + } else { + endTimeStr = String.format("'%s'",location); + endLocationSql = incrementCol + operator + endTimeStr; + } + + return endLocationSql; + } + + /** + * 构建时间边界字符串 + * @param location 边界位置(起始/结束) + * @param incrementColType 增量字段类型 + * @return + */ + @Override + protected String getTimeStr(Long location, String incrementColType){ + String timeStr; + Timestamp ts = new Timestamp(DBUtil.getMillis(location)); + ts.setNanos(DBUtil.getNanos(location)); + timeStr = DBUtil.getNanosTimeStr(ts.toString()); + timeStr = timeStr.substring(0,23); + timeStr = String.format("'%s'",timeStr); + + return timeStr; + } +} diff --git a/flinkx-sqlserver/flinkx-sqlserver-writer/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverOutputFormat.java b/flinkx-sqlserver/flinkx-sqlserver-writer/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverOutputFormat.java new file mode 100644 index 0000000000..d574881b56 --- /dev/null +++ b/flinkx-sqlserver/flinkx-sqlserver-writer/src/main/java/com/dtstack/flinkx/sqlserver/format/SqlserverOutputFormat.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.sqlserver.format; + +import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; + +/** + * Date: 2019/09/20 + * Company: www.dtstack.com + * + * @author tudou + */ +public class SqlserverOutputFormat extends JdbcOutputFormat { +} From 0aea79429b7476467ab7867a26ee4960ece6fc03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=B1=9F=E5=8D=9A=5F=E6=B1=9F=E5=8D=9A?= Date: Thu, 10 Oct 2019 16:45:01 +0800 Subject: [PATCH 23/62] =?UTF-8?q?Revert=20"=E5=8E=BB=E6=8E=89=E5=9B=BA?= =?UTF-8?q?=E5=AE=9A=E7=9A=84stateBackend"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit a657c1b747f37d0cd6d260f8ee91508509634814 --- flinkx-core/src/main/java/com/dtstack/flinkx/Main.java | 6 ++++++ .../java/com/dtstack/flinkx/constants/ConfigConstrant.java | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java index cd4bcb17c5..53c734694a 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java @@ -151,6 +151,12 @@ private static StreamExecutionEnvironment openCheckpointConf(StreamExecutionEnvi env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); env.getCheckpointConfig().enableExternalizedCheckpoints( CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); + String backendPath = properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_DATAURI_KEY); + if(backendPath != null){ + //set checkpoint save path on file system,hdfs://, file:// + env.setStateBackend(new FsStateBackend(backendPath.trim())); + LOG.info("Set StateBackend:" + backendPath); + } } return env; } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/constants/ConfigConstrant.java b/flinkx-core/src/main/java/com/dtstack/flinkx/constants/ConfigConstrant.java index 502b3327f2..baa2362e27 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/constants/ConfigConstrant.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/constants/ConfigConstrant.java @@ -31,4 +31,10 @@ public class ConfigConstrant { public static final String FLINK_CHECKPOINT_INTERVAL_KEY = "flink.checkpoint.interval"; public static final String FLINK_CHECKPOINT_TIMEOUT_KEY = "flink.checkpoint.timeout"; + + public static final String FLINK_MAXCONCURRENTCHECKPOINTS_KEY = "flink.max.concurrent.checkpoints"; + + public static final String FLINK_CHECKPOINT_CLEANUPMODE_KEY = "flink.checkpoint.cleanup.mode"; + + public static final String FLINK_CHECKPOINT_DATAURI_KEY = "flink.checkpoint.stateBackend"; } From d6689d820e83f9e191890ce551e196afd760cda5 Mon Sep 17 00:00:00 2001 From: tudou Date: Thu, 10 Oct 2019 20:45:43 +0800 Subject: [PATCH 24/62] =?UTF-8?q?=E3=80=90=E6=B5=81=E8=AE=A1=E7=AE=97?= =?UTF-8?q?=E3=80=91=E4=BD=BF=E7=94=A8CheckpointListener=E7=A1=AE=E4=BF=9D?= =?UTF-8?q?checkpoint=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flinkx/outputformat/FileOutputFormat.java | 18 +++++++++++------- .../flinkx/outputformat/RichOutputFormat.java | 9 +++++++-- .../sink/DtOutputFormatSinkFunction.java | 12 ++++++++++-- .../flinkx/hive/writer/HiveOutputFormat.java | 9 ++++----- .../rdb/outputformat/JdbcOutputFormat.java | 10 +++++----- 5 files changed, 37 insertions(+), 21 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java index dfbd5ce624..6fb45aaa11 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java @@ -127,7 +127,7 @@ protected void actionBeforeWriteData(){ try{ // 覆盖模式并且不是从检查点恢复时先删除数据目录 - if(!APPEND_MODE.equalsIgnoreCase(writeMode) && formatState.getState() == null){ + if(!APPEND_MODE.equalsIgnoreCase(writeMode) && formatState != null && formatState.getState() == null){ coverageData(); } } catch (Exception e){ @@ -210,6 +210,16 @@ public FormatState getFormatState() { return null; } + if (restoreConfig.isStream() || readyCheckpoint){ + super.getFormatState(); + return formatState; + } + + return null; + } + + @Override + public void flushOutputFormat() { if (restoreConfig.isStream() || readyCheckpoint){ try{ flushData(); @@ -231,14 +241,8 @@ public FormatState getFormatState() { if (!restoreConfig.isStream()){ formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); } - sumRowsOfBlock = 0; - - super.getFormatState(); - return formatState; } - - return null; } @Override diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java index 66ef1e557d..d9380f4a7d 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java @@ -27,7 +27,6 @@ import com.dtstack.flinkx.metrics.AccumulatorCollector; import com.dtstack.flinkx.metrics.BaseMetric; import com.dtstack.flinkx.restore.FormatState; -import com.dtstack.flinkx.util.DataConvertUtil; import com.dtstack.flinkx.util.ExceptionUtil; import com.dtstack.flinkx.util.URLUtil; import com.dtstack.flinkx.writer.DirtyDataManager; @@ -42,14 +41,15 @@ import org.apache.flink.hadoop.shaded.org.apache.http.impl.client.HttpClientBuilder; import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; import org.apache.flink.types.Row; -import org.apache.flink.util.ExceptionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; + import static com.dtstack.flinkx.writer.WriteErrorTypes.*; /** @@ -534,6 +534,11 @@ public FormatState getFormatState(){ return formatState; } + /** + * flush the data after saving checkPoint successfully + */ + public void flushOutputFormat() {} + public void setRestoreState(FormatState formatState) { this.formatState = formatState; } diff --git a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java index 72c7f22bcc..f0528f783f 100644 --- a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java +++ b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java @@ -32,6 +32,7 @@ import org.apache.flink.api.java.typeutils.InputTypeConfigurable; import org.apache.flink.configuration.Configuration; +import org.apache.flink.runtime.state.CheckpointListener; import org.apache.flink.runtime.state.FunctionInitializationContext; import org.apache.flink.runtime.state.FunctionSnapshotContext; import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; @@ -52,7 +53,7 @@ */ @PublicEvolving @Deprecated -public class DtOutputFormatSinkFunction extends OutputFormatSinkFunction implements CheckpointedFunction { +public class DtOutputFormatSinkFunction extends OutputFormatSinkFunction implements CheckpointedFunction, CheckpointListener { private static final long serialVersionUID = 1L; @@ -135,9 +136,16 @@ private void cleanup() { @Override public void snapshotState(FunctionSnapshotContext context) throws Exception { + ((com.dtstack.flinkx.outputformat.RichOutputFormat) format).getFormatState(); + } + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception { + LOG.info("notifyCheckpointComplete checkpointId = {}", checkpointId); FormatState formatState = ((com.dtstack.flinkx.outputformat.RichOutputFormat) format).getFormatState(); + ((com.dtstack.flinkx.outputformat.RichOutputFormat) format).flushOutputFormat(); if (formatState != null){ - LOG.info("OutputFormat format state:{}", formatState.toString()); + LOG.info("OutputFormat format state:{}", formatState); unionOffsetStates.clear(); unionOffsetStates.add(formatState); } diff --git a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java index 8fac3e3c16..2b1fdd0a47 100644 --- a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java +++ b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java @@ -28,6 +28,7 @@ import com.dtstack.flinkx.hive.util.PathConverterUtil; import com.dtstack.flinkx.outputformat.RichOutputFormat; import com.dtstack.flinkx.restore.FormatState; +import com.dtstack.flinkx.util.ExceptionUtil; import org.apache.commons.collections.MapUtils; import org.apache.commons.math3.util.Pair; import org.apache.flink.types.Row; @@ -130,14 +131,12 @@ public FormatState getFormatState() { LOG.info("return null for formatState"); return null; } - - flushOutputFormat(); - super.getFormatState(); return formatState; } - private void flushOutputFormat() { + @Override + public void flushOutputFormat() { Iterator> entryIterator = outputFormats.entrySet().iterator(); while (entryIterator.hasNext()) { Map.Entry entry = entryIterator.next(); @@ -146,7 +145,7 @@ private void flushOutputFormat() { try { entry.getValue().close(); } catch (Exception e) { - logger.error("", e); + logger.error(ExceptionUtil.getErrorMessage(e)); } finally { entryIterator.remove(); } diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java index 83b5acaa62..90fd456768 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java @@ -271,7 +271,12 @@ public FormatState getFormatState(){ LOG.info("return null for formatState"); return null; } + super.getFormatState(); + return formatState; + } + @Override + public void flushOutputFormat() { try { LOG.info("readyCheckpoint:" + readyCheckpoint); LOG.info("rowsOfCurrentTransaction:" + rowsOfCurrentTransaction); @@ -289,12 +294,7 @@ public FormatState getFormatState(){ formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); formatState.setNumberWrite(snapshotWriteCounter.getLocalValue()); LOG.info("format state:{}", formatState.getState()); - - super.getFormatState(); - return formatState; } - - return null; } catch (Exception e){ try { LOG.warn("getFormatState:Start rollback"); From 4afb0832b2129e3d458f91562cbb505ec989e292 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Fri, 11 Oct 2019 09:58:24 +0800 Subject: [PATCH 25/62] =?UTF-8?q?fix=E7=BC=96=E8=AF=91=E5=A4=B1=E8=B4=A5?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java index 4e68ac184e..93f07c1bef 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java @@ -26,10 +26,7 @@ import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.ClassUtil; -import com.dtstack.flinkx.util.DateUtil; -import com.dtstack.flinkx.util.StringUtil; -import com.dtstack.flinkx.util.URLUtil; +import com.dtstack.flinkx.util.*; import com.google.gson.Gson; import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.accumulators.Accumulator; From 72bdd3ec948a95b78e6ec438dc9e2c81200d4e5c Mon Sep 17 00:00:00 2001 From: tudou Date: Fri, 11 Oct 2019 10:57:20 +0800 Subject: [PATCH 26/62] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E7=AC=AC=E4=B8=80?= =?UTF-8?q?=E4=BB=BD=E5=BF=AB=E7=85=A7=E6=B2=A1=E6=9C=89=E6=AD=A3=E7=A1=AE?= =?UTF-8?q?=E4=BF=9D=E5=AD=98bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flinkx/outputformat/FileOutputFormat.java | 19 +++++++++---------- .../sink/DtOutputFormatSinkFunction.java | 13 ++++++------- .../rdb/outputformat/JdbcOutputFormat.java | 15 +++++---------- 3 files changed, 20 insertions(+), 27 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java index 6fb45aaa11..831262e6c8 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java @@ -211,6 +211,13 @@ public FormatState getFormatState() { } if (restoreConfig.isStream() || readyCheckpoint){ + lastWriteSize = bytesWriteCounter.getLocalValue(); + snapshotWriteCounter.add(sumRowsOfBlock); + formatState.setNumberWrite(snapshotWriteCounter.getLocalValue()); + if (!restoreConfig.isStream()){ + formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); + } + sumRowsOfBlock = 0; super.getFormatState(); return formatState; } @@ -223,9 +230,8 @@ public void flushOutputFormat() { if (restoreConfig.isStream() || readyCheckpoint){ try{ flushData(); - lastWriteSize = bytesWriteCounter.getLocalValue(); } catch (Exception e){ - throw new RuntimeException("Flush data error when create snapshot:", e); + throw new RuntimeException("Flush data error :", e); } try{ @@ -233,15 +239,8 @@ public void flushOutputFormat() { moveTemporaryDataFileToDirectory(); } } catch (Exception e){ - throw new RuntimeException("Move temporary file to data directory error when create snapshot:", e); + throw new RuntimeException("Move temporary file to data directory error when flush data:", e); } - - snapshotWriteCounter.add(sumRowsOfBlock); - formatState.setNumberWrite(snapshotWriteCounter.getLocalValue()); - if (!restoreConfig.isStream()){ - formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); - } - sumRowsOfBlock = 0; } } diff --git a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java index f0528f783f..53aa7cf30f 100644 --- a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java +++ b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java @@ -136,14 +136,7 @@ private void cleanup() { @Override public void snapshotState(FunctionSnapshotContext context) throws Exception { - ((com.dtstack.flinkx.outputformat.RichOutputFormat) format).getFormatState(); - } - - @Override - public void notifyCheckpointComplete(long checkpointId) throws Exception { - LOG.info("notifyCheckpointComplete checkpointId = {}", checkpointId); FormatState formatState = ((com.dtstack.flinkx.outputformat.RichOutputFormat) format).getFormatState(); - ((com.dtstack.flinkx.outputformat.RichOutputFormat) format).flushOutputFormat(); if (formatState != null){ LOG.info("OutputFormat format state:{}", formatState); unionOffsetStates.clear(); @@ -151,6 +144,12 @@ public void notifyCheckpointComplete(long checkpointId) throws Exception { } } + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception { + LOG.info("notifyCheckpointComplete checkpointId = {}", checkpointId); + ((com.dtstack.flinkx.outputformat.RichOutputFormat) format).flushOutputFormat(); + } + @Override public void initializeState(FunctionInitializationContext context) throws Exception { LOG.info("Start initialize output format state"); diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java index 90fd456768..8641bca861 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java @@ -271,7 +271,12 @@ public FormatState getFormatState(){ LOG.info("return null for formatState"); return null; } + snapshotWriteCounter.add(rowsOfCurrentTransaction); + rowsOfCurrentTransaction = 0; + formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); + formatState.setNumberWrite(snapshotWriteCounter.getLocalValue()); super.getFormatState(); + LOG.info("format state:{}", formatState.getState()); return formatState; } @@ -280,20 +285,11 @@ public void flushOutputFormat() { try { LOG.info("readyCheckpoint:" + readyCheckpoint); LOG.info("rowsOfCurrentTransaction:" + rowsOfCurrentTransaction); - if (readyCheckpoint || rowsOfCurrentTransaction > restoreConfig.getMaxRowNumForCheckpoint()){ - LOG.info("getFormatState:Start commit connection"); preparedStatement.executeBatch(); dbConn.commit(); LOG.info("getFormatState:Commit connection success"); - - snapshotWriteCounter.add(rowsOfCurrentTransaction); - rowsOfCurrentTransaction = 0; - - formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); - formatState.setNumberWrite(snapshotWriteCounter.getLocalValue()); - LOG.info("format state:{}", formatState.getState()); } } catch (Exception e){ try { @@ -303,7 +299,6 @@ public void flushOutputFormat() { } catch (SQLException sqlE){ throw new RuntimeException("Rollback error:", e); } - throw new RuntimeException("Return format state error:", e); } } From 779ee2e19a4ff25e20ffef34adbfe7c606d1b3dc Mon Sep 17 00:00:00 2001 From: tudou Date: Fri, 11 Oct 2019 11:07:14 +0800 Subject: [PATCH 27/62] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=BD=93=E5=89=8D?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E8=AE=B0=E5=BD=95=E4=B8=AA=E6=95=B0=E7=9A=84?= =?UTF-8?q?=E6=B8=85=E7=A9=BA=E4=BD=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/dtstack/flinkx/outputformat/FileOutputFormat.java | 2 +- .../com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java index 831262e6c8..f79d13b3e2 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java @@ -217,7 +217,6 @@ public FormatState getFormatState() { if (!restoreConfig.isStream()){ formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); } - sumRowsOfBlock = 0; super.getFormatState(); return formatState; } @@ -241,6 +240,7 @@ public void flushOutputFormat() { } catch (Exception e){ throw new RuntimeException("Move temporary file to data directory error when flush data:", e); } + sumRowsOfBlock = 0; } } diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java index 8641bca861..1c190cf038 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java @@ -272,7 +272,6 @@ public FormatState getFormatState(){ return null; } snapshotWriteCounter.add(rowsOfCurrentTransaction); - rowsOfCurrentTransaction = 0; formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); formatState.setNumberWrite(snapshotWriteCounter.getLocalValue()); super.getFormatState(); @@ -286,6 +285,7 @@ public void flushOutputFormat() { LOG.info("readyCheckpoint:" + readyCheckpoint); LOG.info("rowsOfCurrentTransaction:" + rowsOfCurrentTransaction); if (readyCheckpoint || rowsOfCurrentTransaction > restoreConfig.getMaxRowNumForCheckpoint()){ + rowsOfCurrentTransaction = 0; LOG.info("getFormatState:Start commit connection"); preparedStatement.executeBatch(); dbConn.commit(); From a27c3bd4d44145e8c9573932d4542a8262c42345 Mon Sep 17 00:00:00 2001 From: tudou Date: Sat, 12 Oct 2019 14:02:26 +0800 Subject: [PATCH 28/62] =?UTF-8?q?=E3=80=90fix#19341=E3=80=91=E8=A7=A3?= =?UTF-8?q?=E5=86=B3=E5=AE=9E=E6=97=B6=E9=87=87=E9=9B=86hive=E8=A1=A8?= =?UTF-8?q?=E6=97=A0=E6=95=B0=E6=8D=AEbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/dtstack/flinkx/outputformat/FileOutputFormat.java | 2 +- .../java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java index f79d13b3e2..949a47ee9f 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java @@ -236,11 +236,11 @@ public void flushOutputFormat() { try{ if (sumRowsOfBlock != 0) { moveTemporaryDataFileToDirectory(); + sumRowsOfBlock = 0; } } catch (Exception e){ throw new RuntimeException("Move temporary file to data directory error when flush data:", e); } - sumRowsOfBlock = 0; } } diff --git a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java index 2b1fdd0a47..5aeb588920 100644 --- a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java +++ b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java @@ -137,10 +137,12 @@ public FormatState getFormatState() { @Override public void flushOutputFormat() { + LOG.info("flushOutputFormat start"); Iterator> entryIterator = outputFormats.entrySet().iterator(); while (entryIterator.hasNext()) { Map.Entry entry = entryIterator.next(); - entry.getValue().getFormatState(); + entry.getValue().flushOutputFormat(); + LOG.info("flushOutputFormat entry = {}", entry); if (partitionFormat.isTimeout(entry.getValue().getLastWriteTime())) { try { entry.getValue().close(); From 73f8c0277d500e19074b1ab4e4aef5492cef916a Mon Sep 17 00:00:00 2001 From: tudou Date: Sat, 12 Oct 2019 18:29:33 +0800 Subject: [PATCH 29/62] =?UTF-8?q?=E3=80=90fix#19356=E3=80=91=E8=A7=A3?= =?UTF-8?q?=E5=86=B3=E5=AE=9E=E6=97=B6=E9=87=87=E9=9B=86=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E4=B8=A2=E5=A4=B1=E5=8F=8A=E6=9B=B2=E7=BA=BF=E4=B8=8D=E9=87=8D?= =?UTF-8?q?=E5=90=88bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/dtstack/flinkx/outputformat/FileOutputFormat.java | 2 +- .../com/dtstack/flinkx/hive/writer/HiveOutputFormat.java | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java index 1c62500b92..25f8fe6312 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java @@ -213,7 +213,7 @@ public FormatState getFormatState() { if (restoreConfig.isStream() || readyCheckpoint){ lastWriteSize = bytesWriteCounter.getLocalValue(); snapshotWriteCounter.add(sumRowsOfBlock); - formatState.setNumberWrite(snapshotWriteCounter.getLocalValue()); + formatState.setNumberWrite(numWriteCounter.getLocalValue()); if (!restoreConfig.isStream()){ formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); } diff --git a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java index 9069451549..2828067b9f 100644 --- a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java +++ b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java @@ -139,6 +139,11 @@ public FormatState getFormatState() { LOG.info("return null for formatState"); return null; } + Iterator> entryIterator = outputFormats.entrySet().iterator(); + while (entryIterator.hasNext()) { + Map.Entry entry = entryIterator.next(); + entry.getValue().getFormatState(); + } super.getFormatState(); return formatState; } From cefa556bb348bd17be095eab452c6aa4a73a40b1 Mon Sep 17 00:00:00 2001 From: tudou Date: Sat, 12 Oct 2019 19:41:30 +0800 Subject: [PATCH 30/62] =?UTF-8?q?=E3=80=90fix#19362=E3=80=91=E8=A7=A3?= =?UTF-8?q?=E5=86=B3=E5=AE=9E=E6=97=B6=E9=87=87=E9=9B=86=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E4=B8=8D=E9=87=8D=E5=90=88bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/dtstack/flinkx/outputformat/RichOutputFormat.java | 2 +- .../com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java index 69d3c69233..ed2626f686 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java @@ -241,7 +241,7 @@ protected void initRestoreInfo(){ otherErrCounter.add(formatState.getMetricValue(Metrics.NUM_OTHER_ERRORS)); //use snapshot write count - numWriteCounter.add(formatState.getMetricValue(Metrics.SNAPSHOT_WRITES)); + numWriteCounter.add(formatState.getMetricValue(Metrics.NUM_WRITES)); snapshotWriteCounter.add(formatState.getMetricValue(Metrics.SNAPSHOT_WRITES)); bytesWriteCounter.add(formatState.getMetricValue(Metrics.WRITE_BYTES)); diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java index 1c190cf038..72ad7bc12e 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java @@ -273,7 +273,7 @@ public FormatState getFormatState(){ } snapshotWriteCounter.add(rowsOfCurrentTransaction); formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); - formatState.setNumberWrite(snapshotWriteCounter.getLocalValue()); + formatState.setNumberWrite(numWriteCounter.getLocalValue()); super.getFormatState(); LOG.info("format state:{}", formatState.getState()); return formatState; From 22e89acd006e75fdaba2414f3a56bbb38222da18 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Mon, 14 Oct 2019 10:40:30 +0800 Subject: [PATCH 31/62] =?UTF-8?q?[=E4=BB=BB=E5=8A=A1=E5=BF=AB=E7=BB=93?= =?UTF-8?q?=E6=9D=9F=E6=97=B6=E4=BB=BB=E5=8A=A1=E5=A4=B1=E8=B4=A5=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=E6=95=B0=E6=8D=AE=E9=87=8D=E5=A4=8D][19364]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flinkx/outputformat/FileOutputFormat.java | 7 ++++ .../dtstack/flinkx/restore/FormatState.java | 20 +++++++++++ .../flinkx/ftp/writer/FtpOutputFormat.java | 33 +++++++++++++++++ .../flinkx/hdfs/writer/HdfsOutputFormat.java | 36 +++++++++++++++++++ 4 files changed, 96 insertions(+) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java index 949a47ee9f..b3a455ea16 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java @@ -130,6 +130,11 @@ protected void actionBeforeWriteData(){ if(!APPEND_MODE.equalsIgnoreCase(writeMode) && formatState != null && formatState.getState() == null){ coverageData(); } + + // 处理上次任务因异常失败产生的脏数据 + if (restoreConfig.isRestore() && formatState != null) { + cleanDirtyData(); + } } catch (Exception e){ LOG.error("writeMode = {}, formatState = {}, e = {}", writeMode, formatState.getState(), ExceptionUtil.getErrorMessage(e)); throw new RuntimeException(e); @@ -339,6 +344,8 @@ public long getLastWriteTime() { return lastWriteTime; } + protected abstract void cleanDirtyData(); + protected abstract void createActionFinishedTag(); protected abstract void waitForActionFinishedBeforeWrite(); diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/restore/FormatState.java b/flinkx-core/src/main/java/com/dtstack/flinkx/restore/FormatState.java index 7ab83a0b82..ed3dd814d0 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/restore/FormatState.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/restore/FormatState.java @@ -43,6 +43,10 @@ public class FormatState implements Serializable { private long numberWrite; + private String jobId; + + private int fileIndex; + public FormatState() { } @@ -51,6 +55,22 @@ public FormatState(int numOfSubTask, Object state) { this.state = state; } + public String getJobId() { + return jobId; + } + + public void setJobId(String jobId) { + this.jobId = jobId; + } + + public int getFileIndex() { + return fileIndex; + } + + public void setFileIndex(int fileIndex) { + this.fileIndex = fileIndex; + } + public long getNumberRead() { return numberRead; } diff --git a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java index d130c3e5b8..8d4b1d2be3 100644 --- a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java +++ b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java @@ -26,12 +26,15 @@ import com.dtstack.flinkx.outputformat.FileOutputFormat; import com.dtstack.flinkx.util.StringUtil; import com.dtstack.flinkx.util.SysUtil; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.StringUtils; import org.apache.flink.types.Row; import java.io.IOException; import java.io.OutputStream; import java.util.Arrays; import java.util.List; +import java.util.function.Predicate; + import static com.dtstack.flinkx.ftp.FtpConfigConstants.SFTP_PROTOCOL; /** @@ -102,6 +105,36 @@ protected void checkOutputDir() { } } + @Override + protected void cleanDirtyData() { + int fileIndex = formatState.getFileIndex(); + String lastJobId = formatState.getJobId(); + + List files = ftpHandler.getFiles(outputFilePath); + files.removeIf(new Predicate() { + @Override + public boolean test(String file) { + String fileName = file.substring(file.lastIndexOf(SP) + 1); + if(!fileName.contains(lastJobId)){ + return false; + } + + String[] splits = fileName.split("\\."); + if (splits.length == 3) { + return Integer.parseInt(splits[2]) <= fileIndex; + } + + return true; + } + }); + + if(CollectionUtils.isNotEmpty(files)){ + for (String file : files) { + ftpHandler.deleteAllFilesInDir(file, null); + } + } + } + @Override protected void nextBlock(){ super.nextBlock(); diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java index fb5e54cf76..573c7b337f 100644 --- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java @@ -121,6 +121,42 @@ protected void waitForActionFinishedBeforeWrite() { } } + @Override + protected void cleanDirtyData() { + int fileIndex = formatState.getFileIndex(); + String lastJobId = formatState.getJobId(); + + PathFilter filter = new PathFilter() { + @Override + public boolean accept(Path path) { + String fileName = path.getName(); + if(!fileName.contains(lastJobId)){ + return false; + } + + String[] splits = fileName.split("\\."); + if (splits.length == 3) { + return Integer.parseInt(splits[2]) > fileIndex; + } + + return false; + } + }; + + try{ + FileStatus[] dirtyData = fs.listStatus(new Path(outputFilePath), filter); + if(dirtyData != null && dirtyData.length > 0){ + for (FileStatus dirtyDatum : dirtyData) { + fs.delete(dirtyDatum.getPath(), false); + LOG.info("Delete dirty data file:{}", dirtyDatum.getPath()); + } + } + } catch (Exception e){ + LOG.error("Clean dirty data error:", e); + throw new RuntimeException(e); + } + } + @Override protected void openSource() throws IOException{ conf = HdfsUtil.getHadoopConfig(hadoopConfig, defaultFS); From 7d1169f0c7aa71882adf56208dd46adea30860c7 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Mon, 14 Oct 2019 10:45:28 +0800 Subject: [PATCH 32/62] =?UTF-8?q?[=E4=BB=BB=E5=8A=A1=E5=BF=AB=E7=BB=93?= =?UTF-8?q?=E6=9D=9F=E6=97=B6=E4=BB=BB=E5=8A=A1=E5=A4=B1=E8=B4=A5=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=E6=95=B0=E6=8D=AE=E9=87=8D=E5=A4=8D][19364]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java index 8d4b1d2be3..bc037a382b 100644 --- a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java +++ b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java @@ -116,7 +116,7 @@ protected void cleanDirtyData() { public boolean test(String file) { String fileName = file.substring(file.lastIndexOf(SP) + 1); if(!fileName.contains(lastJobId)){ - return false; + return true; } String[] splits = fileName.split("\\."); From afc8c3ae7d9c7e6e1ee1f31569d26df98bedfc96 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Mon, 14 Oct 2019 10:48:17 +0800 Subject: [PATCH 33/62] =?UTF-8?q?[=E4=BB=BB=E5=8A=A1=E5=BF=AB=E7=BB=93?= =?UTF-8?q?=E6=9D=9F=E6=97=B6=E4=BB=BB=E5=8A=A1=E5=A4=B1=E8=B4=A5=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=E6=95=B0=E6=8D=AE=E9=87=8D=E5=A4=8D][19364]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/dtstack/flinkx/outputformat/FileOutputFormat.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java index b3a455ea16..d7023996b0 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java @@ -222,6 +222,10 @@ public FormatState getFormatState() { if (!restoreConfig.isStream()){ formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); } + + formatState.setJobId(jobId); + formatState.setFileIndex(blockIndex); + super.getFormatState(); return formatState; } From f7cf7d8cb952ee572e3a82921bcd5f51f26bbec8 Mon Sep 17 00:00:00 2001 From: tudou Date: Mon, 14 Oct 2019 10:59:35 +0800 Subject: [PATCH 34/62] =?UTF-8?q?=E3=80=90fix#19356=E3=80=91=E8=A7=A3?= =?UTF-8?q?=E5=86=B3=E5=AE=9E=E6=97=B6=E9=87=87=E9=9B=86=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E4=B8=A2=E5=A4=B1=E5=8F=8A=E6=9B=B2=E7=BA=BF=E4=B8=8D=E9=87=8D?= =?UTF-8?q?=E5=90=88bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../dtstack/flinkx/outputformat/FileOutputFormat.java | 2 +- .../dtstack/flinkx/outputformat/RichOutputFormat.java | 3 +-- .../com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java | 7 ++++--- .../com/dtstack/flinkx/hive/writer/HiveOutputFormat.java | 9 ++++++++- .../flinkx/rdb/outputformat/JdbcOutputFormat.java | 2 +- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java index f79d13b3e2..949a47ee9f 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java @@ -236,11 +236,11 @@ public void flushOutputFormat() { try{ if (sumRowsOfBlock != 0) { moveTemporaryDataFileToDirectory(); + sumRowsOfBlock = 0; } } catch (Exception e){ throw new RuntimeException("Move temporary file to data directory error when flush data:", e); } - sumRowsOfBlock = 0; } } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java index d9380f4a7d..163b9a3980 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java @@ -240,8 +240,7 @@ protected void initRestoreInfo(){ conversionErrCounter.add(formatState.getMetricValue(Metrics.NUM_CONVERSION_ERRORS)); otherErrCounter.add(formatState.getMetricValue(Metrics.NUM_OTHER_ERRORS)); - //use snapshot write count - numWriteCounter.add(formatState.getMetricValue(Metrics.SNAPSHOT_WRITES)); + numWriteCounter.add(formatState.getMetricValue(Metrics.NUM_WRITES)); snapshotWriteCounter.add(formatState.getMetricValue(Metrics.SNAPSHOT_WRITES)); bytesWriteCounter.add(formatState.getMetricValue(Metrics.WRITE_BYTES)); diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java index fb5e54cf76..c851e9040e 100644 --- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java @@ -22,14 +22,15 @@ import com.dtstack.flinkx.outputformat.FileOutputFormat; import com.dtstack.flinkx.util.ColumnTypeUtil; import com.dtstack.flinkx.util.SysUtil; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; -import java.io.IOException; -import java.util.*; -import org.apache.hadoop.conf.Configuration; +import java.io.IOException; +import java.util.List; +import java.util.Map; /** diff --git a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java index 2b1fdd0a47..d332ead341 100644 --- a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java +++ b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java @@ -131,16 +131,23 @@ public FormatState getFormatState() { LOG.info("return null for formatState"); return null; } + Iterator> entryIterator = outputFormats.entrySet().iterator(); + while (entryIterator.hasNext()) { + Map.Entry entry = entryIterator.next(); + entry.getValue().getFormatState(); + } super.getFormatState(); return formatState; } @Override public void flushOutputFormat() { + LOG.info("flushOutputFormat start"); Iterator> entryIterator = outputFormats.entrySet().iterator(); while (entryIterator.hasNext()) { Map.Entry entry = entryIterator.next(); - entry.getValue().getFormatState(); + LOG.info("flushOutputFormat entry = {}", entry); + entry.getValue().flushOutputFormat(); if (partitionFormat.isTimeout(entry.getValue().getLastWriteTime())) { try { entry.getValue().close(); diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java index 1c190cf038..72ad7bc12e 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java @@ -273,7 +273,7 @@ public FormatState getFormatState(){ } snapshotWriteCounter.add(rowsOfCurrentTransaction); formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); - formatState.setNumberWrite(snapshotWriteCounter.getLocalValue()); + formatState.setNumberWrite(numWriteCounter.getLocalValue()); super.getFormatState(); LOG.info("format state:{}", formatState.getState()); return formatState; From 9f33f0144f1135accab7d42dcd0e6787b75b332b Mon Sep 17 00:00:00 2001 From: tudou Date: Mon, 14 Oct 2019 14:49:59 +0800 Subject: [PATCH 35/62] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=88=A0=E9=99=A4?= =?UTF-8?q?=E8=84=8F=E6=95=B0=E6=8D=AE=E6=96=87=E4=BB=B6=E7=A9=BA=E6=8C=87?= =?UTF-8?q?=E9=92=88bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java index aff07210f4..40e20c583b 100644 --- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java @@ -22,6 +22,7 @@ import com.dtstack.flinkx.outputformat.FileOutputFormat; import com.dtstack.flinkx.util.ColumnTypeUtil; import com.dtstack.flinkx.util.SysUtil; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -126,6 +127,10 @@ protected void waitForActionFinishedBeforeWrite() { protected void cleanDirtyData() { int fileIndex = formatState.getFileIndex(); String lastJobId = formatState.getJobId(); + LOG.info("fileIndex = {}, lastJobId = {}",fileIndex, lastJobId); + if(StringUtils.isBlank(lastJobId)){ + return; + } PathFilter filter = new PathFilter() { @Override From 7bf9c726885a78258a89b3a117446c280273d3bb Mon Sep 17 00:00:00 2001 From: tudou Date: Mon, 14 Oct 2019 15:05:22 +0800 Subject: [PATCH 36/62] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=88=A0=E9=99=A4?= =?UTF-8?q?=E8=84=8F=E6=95=B0=E6=8D=AE=E6=96=87=E4=BB=B6=E7=A9=BA=E6=8C=87?= =?UTF-8?q?=E9=92=88bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java index bc037a382b..f3e9940181 100644 --- a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java +++ b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java @@ -109,7 +109,10 @@ protected void checkOutputDir() { protected void cleanDirtyData() { int fileIndex = formatState.getFileIndex(); String lastJobId = formatState.getJobId(); - + LOG.info("fileIndex = {}, lastJobId = {}",fileIndex, lastJobId); + if(org.apache.commons.lang3.StringUtils.isBlank(lastJobId)){ + return; + } List files = ftpHandler.getFiles(outputFilePath); files.removeIf(new Predicate() { @Override From 9805af9c46146e3edd2dfa2d0b9bff1977a69eca Mon Sep 17 00:00:00 2001 From: tudou Date: Tue, 15 Oct 2019 10:01:03 +0800 Subject: [PATCH 37/62] revert checkPointListener --- .../flinkx/outputformat/FileOutputFormat.java | 40 +++++++++---------- .../flinkx/outputformat/RichOutputFormat.java | 8 +--- .../dtstack/flinkx/restore/FormatState.java | 4 ++ .../sink/DtOutputFormatSinkFunction.java | 8 +--- .../flinkx/hdfs/writer/HdfsOutputFormat.java | 2 +- .../flinkx/hive/writer/HiveOutputFormat.java | 15 +++---- .../rdb/outputformat/JdbcOutputFormat.java | 25 +++++++----- 7 files changed, 46 insertions(+), 56 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java index d7023996b0..f2db577b44 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/FileOutputFormat.java @@ -216,15 +216,31 @@ public FormatState getFormatState() { } if (restoreConfig.isStream() || readyCheckpoint){ - lastWriteSize = bytesWriteCounter.getLocalValue(); + try{ + flushData(); + lastWriteSize = bytesWriteCounter.getLocalValue(); + } catch (Exception e){ + throw new RuntimeException("Flush data error when create snapshot:", e); + } + + try{ + if (sumRowsOfBlock != 0) { + moveTemporaryDataFileToDirectory(); + } + } catch (Exception e){ + throw new RuntimeException("Move temporary file to data directory error when create snapshot:", e); + } + snapshotWriteCounter.add(sumRowsOfBlock); formatState.setNumberWrite(snapshotWriteCounter.getLocalValue()); if (!restoreConfig.isStream()){ formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); } + sumRowsOfBlock = 0; formatState.setJobId(jobId); - formatState.setFileIndex(blockIndex); + formatState.setFileIndex(blockIndex-1); + LOG.info("jobId = {}, blockIndex = {}", jobId, blockIndex); super.getFormatState(); return formatState; @@ -233,26 +249,6 @@ public FormatState getFormatState() { return null; } - @Override - public void flushOutputFormat() { - if (restoreConfig.isStream() || readyCheckpoint){ - try{ - flushData(); - } catch (Exception e){ - throw new RuntimeException("Flush data error :", e); - } - - try{ - if (sumRowsOfBlock != 0) { - moveTemporaryDataFileToDirectory(); - sumRowsOfBlock = 0; - } - } catch (Exception e){ - throw new RuntimeException("Move temporary file to data directory error when flush data:", e); - } - } - } - @Override public void closeInternal() throws IOException { readyCheckpoint = false; diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java index 163b9a3980..4a977b6a74 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java @@ -240,7 +240,8 @@ protected void initRestoreInfo(){ conversionErrCounter.add(formatState.getMetricValue(Metrics.NUM_CONVERSION_ERRORS)); otherErrCounter.add(formatState.getMetricValue(Metrics.NUM_OTHER_ERRORS)); - numWriteCounter.add(formatState.getMetricValue(Metrics.NUM_WRITES)); + //use snapshot write count + numWriteCounter.add(formatState.getMetricValue(Metrics.SNAPSHOT_WRITES)); snapshotWriteCounter.add(formatState.getMetricValue(Metrics.SNAPSHOT_WRITES)); bytesWriteCounter.add(formatState.getMetricValue(Metrics.WRITE_BYTES)); @@ -533,11 +534,6 @@ public FormatState getFormatState(){ return formatState; } - /** - * flush the data after saving checkPoint successfully - */ - public void flushOutputFormat() {} - public void setRestoreState(FormatState formatState) { this.formatState = formatState; } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/restore/FormatState.java b/flinkx-core/src/main/java/com/dtstack/flinkx/restore/FormatState.java index ed3dd814d0..eb374e95d7 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/restore/FormatState.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/restore/FormatState.java @@ -124,6 +124,10 @@ public String toString() { "numOfSubTask=" + numOfSubTask + ", state=" + state + ", metric=" + metric + + ", numberRead=" + numberRead + + ", numberWrite=" + numberWrite + + ", jobId='" + jobId + '\'' + + ", fileIndex=" + fileIndex + '}'; } } diff --git a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java index 53aa7cf30f..c6a9f16cc6 100644 --- a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java +++ b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/sink/DtOutputFormatSinkFunction.java @@ -53,7 +53,7 @@ */ @PublicEvolving @Deprecated -public class DtOutputFormatSinkFunction extends OutputFormatSinkFunction implements CheckpointedFunction, CheckpointListener { +public class DtOutputFormatSinkFunction extends OutputFormatSinkFunction implements CheckpointedFunction { private static final long serialVersionUID = 1L; @@ -144,12 +144,6 @@ public void snapshotState(FunctionSnapshotContext context) throws Exception { } } - @Override - public void notifyCheckpointComplete(long checkpointId) throws Exception { - LOG.info("notifyCheckpointComplete checkpointId = {}", checkpointId); - ((com.dtstack.flinkx.outputformat.RichOutputFormat) format).flushOutputFormat(); - } - @Override public void initializeState(FunctionInitializationContext context) throws Exception { LOG.info("Start initialize output format state"); diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java index 40e20c583b..b459fa052e 100644 --- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormat.java @@ -127,7 +127,7 @@ protected void waitForActionFinishedBeforeWrite() { protected void cleanDirtyData() { int fileIndex = formatState.getFileIndex(); String lastJobId = formatState.getJobId(); - LOG.info("fileIndex = {}, lastJobId = {}",fileIndex, lastJobId); + LOG.info("start to cleanDirtyData, fileIndex = {}, lastJobId = {}",fileIndex, lastJobId); if(StringUtils.isBlank(lastJobId)){ return; } diff --git a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java index d332ead341..3ba054dc7e 100644 --- a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java +++ b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java @@ -131,23 +131,18 @@ public FormatState getFormatState() { LOG.info("return null for formatState"); return null; } - Iterator> entryIterator = outputFormats.entrySet().iterator(); - while (entryIterator.hasNext()) { - Map.Entry entry = entryIterator.next(); - entry.getValue().getFormatState(); - } + + flushOutputFormat(); + super.getFormatState(); return formatState; } - @Override - public void flushOutputFormat() { - LOG.info("flushOutputFormat start"); + private void flushOutputFormat() { Iterator> entryIterator = outputFormats.entrySet().iterator(); while (entryIterator.hasNext()) { Map.Entry entry = entryIterator.next(); - LOG.info("flushOutputFormat entry = {}", entry); - entry.getValue().flushOutputFormat(); + entry.getValue().getFormatState(); if (partitionFormat.isTimeout(entry.getValue().getLastWriteTime())) { try { entry.getValue().close(); diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java index 72ad7bc12e..83b5acaa62 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java @@ -271,26 +271,30 @@ public FormatState getFormatState(){ LOG.info("return null for formatState"); return null; } - snapshotWriteCounter.add(rowsOfCurrentTransaction); - formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); - formatState.setNumberWrite(numWriteCounter.getLocalValue()); - super.getFormatState(); - LOG.info("format state:{}", formatState.getState()); - return formatState; - } - @Override - public void flushOutputFormat() { try { LOG.info("readyCheckpoint:" + readyCheckpoint); LOG.info("rowsOfCurrentTransaction:" + rowsOfCurrentTransaction); + if (readyCheckpoint || rowsOfCurrentTransaction > restoreConfig.getMaxRowNumForCheckpoint()){ - rowsOfCurrentTransaction = 0; + LOG.info("getFormatState:Start commit connection"); preparedStatement.executeBatch(); dbConn.commit(); LOG.info("getFormatState:Commit connection success"); + + snapshotWriteCounter.add(rowsOfCurrentTransaction); + rowsOfCurrentTransaction = 0; + + formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); + formatState.setNumberWrite(snapshotWriteCounter.getLocalValue()); + LOG.info("format state:{}", formatState.getState()); + + super.getFormatState(); + return formatState; } + + return null; } catch (Exception e){ try { LOG.warn("getFormatState:Start rollback"); @@ -299,6 +303,7 @@ public void flushOutputFormat() { } catch (SQLException sqlE){ throw new RuntimeException("Rollback error:", e); } + throw new RuntimeException("Return format state error:", e); } } From 4dee19725329d0528f0bc36b0a072bcf985d3903 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Tue, 15 Oct 2019 10:32:15 +0800 Subject: [PATCH 38/62] revert checkpointListener --- .../java/com/dtstack/flinkx/outputformat/RichOutputFormat.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java index feef367fb2..347ebeeecb 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java @@ -241,7 +241,7 @@ protected void initRestoreInfo(){ otherErrCounter.add(formatState.getMetricValue(Metrics.NUM_OTHER_ERRORS)); //use snapshot write count - numWriteCounter.add(formatState.getMetricValue(Metrics.NUM_WRITES)); + numWriteCounter.add(formatState.getMetricValue(Metrics.SNAPSHOT_WRITES)); snapshotWriteCounter.add(formatState.getMetricValue(Metrics.SNAPSHOT_WRITES)); bytesWriteCounter.add(formatState.getMetricValue(Metrics.WRITE_BYTES)); From 42d2801daef59ae8be3eaed7924d615c457ffee0 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Tue, 15 Oct 2019 11:40:41 +0800 Subject: [PATCH 39/62] =?UTF-8?q?fix=E8=AE=B0=E5=BD=95=E8=AF=BB=E5=8F=96?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=BA=93=E7=9A=84=E5=A2=9E=E9=87=8F=E5=AD=97?= =?UTF-8?q?=E6=AE=B5=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flinkx/inputformat/RichInputFormat.java | 1 - .../JdbcInputFormat.java | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/RichInputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/RichInputFormat.java index 973f0d61ea..b4beb1f50b 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/RichInputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/RichInputFormat.java @@ -197,7 +197,6 @@ private Row setChannelInformation(Row internalRow){ */ public FormatState getFormatState() { if (formatState != null && numReadCounter != null && inputMetric!= null) { - formatState.setState(numReadCounter.getLocalValue()); formatState.setMetric(inputMetric.getMetricCounters()); } return formatState; diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java index 9a40ccfb23..912422d3ee 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java @@ -26,6 +26,7 @@ import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.rdb.util.DBUtil; import com.dtstack.flinkx.reader.MetaColumn; +import com.dtstack.flinkx.restore.FormatState; import com.dtstack.flinkx.util.ClassUtil; import com.dtstack.flinkx.util.DateUtil; import com.dtstack.flinkx.util.StringUtil; @@ -120,6 +121,8 @@ public class JdbcInputFormat extends RichInputFormat { private MetaColumn restoreColumn; + private Row lastRow = null; + /** * The hadoop config for metric */ @@ -268,6 +271,11 @@ public Row nextRecordInternal(Row row) throws IOException { //update hasNext after we've read the record hasNext = resultSet.next(); + + if (restoreConfig.isRestore()) { + lastRow = row; + } + return row; } catch (SQLException se) { throw new IOException("Couldn't read data - " + se.getMessage(), se); @@ -276,6 +284,16 @@ public Row nextRecordInternal(Row row) throws IOException { } } + @Override + public FormatState getFormatState() { + super.getFormatState(); + + if (formatState != null && lastRow != null) { + formatState.setState(lastRow.getField(restoreConfig.getRestoreColumnIndex())); + } + return formatState; + } + private void initMetric(InputSplit split){ if (!incrementConfig.isIncrement()){ return; From 88aa758711ee2b74560baa739a7f866580c6aba9 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Tue, 15 Oct 2019 16:29:13 +0800 Subject: [PATCH 40/62] =?UTF-8?q?checkpoint=E6=97=B6nextRecord=E5=8A=A0?= =?UTF-8?q?=E9=94=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../source/DtInputFormatSourceFunction.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/source/DtInputFormatSourceFunction.java b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/source/DtInputFormatSourceFunction.java index 955f4e84ec..70fa22dd59 100644 --- a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/source/DtInputFormatSourceFunction.java +++ b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/source/DtInputFormatSourceFunction.java @@ -114,12 +114,14 @@ public void run(SourceContext ctx) throws Exception { // was called by checking the isRunning flag while (isRunning && !format.reachedEnd()) { - nextElement = format.nextRecord(nextElement); - if (nextElement != null) { - ctx.collect(nextElement); - } else { - break; - } + synchronized (ctx.getCheckpointLock()){ + nextElement = format.nextRecord(nextElement); + if (nextElement != null) { + ctx.collect(nextElement); + } else { + break; + } + } } format.close(); completedSplitsCounter.inc(); From 19c735d7ce881f164fcea42c5849f3fed416b92b Mon Sep 17 00:00:00 2001 From: jiangbo Date: Wed, 16 Oct 2019 14:03:01 +0800 Subject: [PATCH 41/62] =?UTF-8?q?flinkx=20checkpoint=20StateBackend=20?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=E6=8F=90=E4=BE=9B=20RocksDB=E7=9A=84RocksDBS?= =?UTF-8?q?tateBackend?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flinkx-core/src/main/java/com/dtstack/flinkx/Main.java | 6 ------ .../src/main/java/com/dtstack/flinkx/test/LocalTest.java | 8 -------- 2 files changed, 14 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java index 84f7b2fd95..8f607307ca 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java @@ -171,12 +171,6 @@ private static StreamExecutionEnvironment openCheckpointConf(StreamExecutionEnvi env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); env.getCheckpointConfig().enableExternalizedCheckpoints( CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); - String backendPath = properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_DATAURI_KEY); - if(backendPath != null){ - //set checkpoint save path on file system,hdfs://, file:// - env.setStateBackend(new FsStateBackend(backendPath.trim())); - LOG.info("Set StateBackend:" + backendPath); - } } return env; } diff --git a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java index 762b8acf39..0a802370b7 100644 --- a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java +++ b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java @@ -261,14 +261,6 @@ private static void openCheckpointConf(StreamExecutionEnvironment env, Propertie env.getCheckpointConfig().enableExternalizedCheckpoints( CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); - String backendPath = properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_DATAURI_KEY); - if(backendPath != null){ - //set checkpoint save path on file system,hdfs://, file:// - env.setStateBackend(new FsStateBackend(backendPath)); - - LOG.info("Set StateBackend:" + backendPath); - } - env.setRestartStrategy(RestartStrategies.failureRateRestart( FAILURE_RATE, Time.of(FAILURE_INTERVAL, TimeUnit.MINUTES), From b5e99019019f17daa128926e3fe4966b55accf7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=B1=9F=E5=8D=9A=5F=E6=B1=9F=E5=8D=9A?= Date: Wed, 16 Oct 2019 14:21:53 +0800 Subject: [PATCH 42/62] Revert "Merge branch 'feature_rocksdb' into '1.5_dev' " This reverts merge request !120 --- flinkx-core/src/main/java/com/dtstack/flinkx/Main.java | 6 ++++++ .../src/main/java/com/dtstack/flinkx/test/LocalTest.java | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java index 8f607307ca..84f7b2fd95 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java @@ -171,6 +171,12 @@ private static StreamExecutionEnvironment openCheckpointConf(StreamExecutionEnvi env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); env.getCheckpointConfig().enableExternalizedCheckpoints( CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); + String backendPath = properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_DATAURI_KEY); + if(backendPath != null){ + //set checkpoint save path on file system,hdfs://, file:// + env.setStateBackend(new FsStateBackend(backendPath.trim())); + LOG.info("Set StateBackend:" + backendPath); + } } return env; } diff --git a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java index 0a802370b7..762b8acf39 100644 --- a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java +++ b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java @@ -261,6 +261,14 @@ private static void openCheckpointConf(StreamExecutionEnvironment env, Propertie env.getCheckpointConfig().enableExternalizedCheckpoints( CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); + String backendPath = properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_DATAURI_KEY); + if(backendPath != null){ + //set checkpoint save path on file system,hdfs://, file:// + env.setStateBackend(new FsStateBackend(backendPath)); + + LOG.info("Set StateBackend:" + backendPath); + } + env.setRestartStrategy(RestartStrategies.failureRateRestart( FAILURE_RATE, Time.of(FAILURE_INTERVAL, TimeUnit.MINUTES), From bdd288e5da9fc7e3bddc10d69d074dbbe2ee0d71 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Wed, 16 Oct 2019 14:27:44 +0800 Subject: [PATCH 43/62] =?UTF-8?q?flinkx=20checkpoint=20StateBackend=20?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=E6=8F=90=E4=BE=9B=20RocksDB=E7=9A=84RocksDBS?= =?UTF-8?q?tateBackend?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flinkx-core/src/main/java/com/dtstack/flinkx/Main.java | 6 ------ .../src/main/java/com/dtstack/flinkx/test/LocalTest.java | 8 -------- 2 files changed, 14 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java index 53c734694a..cd4bcb17c5 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java @@ -151,12 +151,6 @@ private static StreamExecutionEnvironment openCheckpointConf(StreamExecutionEnvi env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); env.getCheckpointConfig().enableExternalizedCheckpoints( CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); - String backendPath = properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_DATAURI_KEY); - if(backendPath != null){ - //set checkpoint save path on file system,hdfs://, file:// - env.setStateBackend(new FsStateBackend(backendPath.trim())); - LOG.info("Set StateBackend:" + backendPath); - } } return env; } diff --git a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java index 762b8acf39..0a802370b7 100644 --- a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java +++ b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java @@ -261,14 +261,6 @@ private static void openCheckpointConf(StreamExecutionEnvironment env, Propertie env.getCheckpointConfig().enableExternalizedCheckpoints( CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); - String backendPath = properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_DATAURI_KEY); - if(backendPath != null){ - //set checkpoint save path on file system,hdfs://, file:// - env.setStateBackend(new FsStateBackend(backendPath)); - - LOG.info("Set StateBackend:" + backendPath); - } - env.setRestartStrategy(RestartStrategies.failureRateRestart( FAILURE_RATE, Time.of(FAILURE_INTERVAL, TimeUnit.MINUTES), From 0b4eaeaec06705ad8f11ac3307b3f4fe21862faf Mon Sep 17 00:00:00 2001 From: tudou Date: Fri, 18 Oct 2019 13:50:12 +0800 Subject: [PATCH 44/62] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E8=84=8F=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E5=8F=8A=E7=A9=BA=E6=8C=87=E9=92=88bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flinkx/kudu/writer/KuduOutputFormat.java | 25 +++++++++++-------- .../flinkx/kudu/writer/KuduWriter.java | 2 ++ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java index 5cbbd82fb9..3bc4fe03aa 100644 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java @@ -26,6 +26,7 @@ import com.dtstack.flinkx.outputformat.RichOutputFormat; import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.util.ExceptionUtil; +import org.apache.commons.lang3.StringUtils; import org.apache.flink.types.Row; import org.apache.kudu.client.*; @@ -62,17 +63,21 @@ protected void openInternal(int taskNumber, int numTasks) throws IOException { session.setMutationBufferSpace(batchInterval); kuduTable = client.openTable(kuduConfig.getTable()); - switch (kuduConfig.getFlushMode().toLowerCase()){ - case "auto_flush_background": - session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); - break; - case "manual_flush": - session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); - break; - default: - session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); + if(StringUtils.isBlank(kuduConfig.getFlushMode())){ + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); + }else { + switch (kuduConfig.getFlushMode().toLowerCase()) { + case "auto_flush_background": + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_BACKGROUND); + break; + case "manual_flush": + session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH); + break; + default: + session.setFlushMode(SessionConfiguration.FlushMode.AUTO_FLUSH_SYNC); + } + } } - } @Override protected void writeSingleRecordInternal(Row row) throws WriteRecordException { diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java index 2a4e626b3b..39d979dd2e 100644 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java @@ -79,6 +79,8 @@ public DataStreamSink writeData(DataStream dataSet) { builder.setKuduConfig(kuduConfig); builder.setWriteMode(writeMode); builder.setBatchInterval(batchInterval); + builder.setErrors(errors); + builder.setErrorRatio(errorRatio); DtOutputFormatSinkFunction formatSinkFunction = new DtOutputFormatSinkFunction(builder.finish()); DataStreamSink dataStreamSink = dataSet.addSink(formatSinkFunction); From 126622bfb0f9cd480a74684bd341acd5799f821e Mon Sep 17 00:00:00 2001 From: jiangbo Date: Fri, 18 Oct 2019 16:42:30 +0800 Subject: [PATCH 45/62] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=AE=9E=E6=97=B6?= =?UTF-8?q?=E9=87=87=E9=9B=86=E6=B2=A1=E6=9C=89=E6=95=B0=E6=8D=AE=E6=97=B6?= =?UTF-8?q?=E5=AF=BC=E8=87=B4checkpoint=E6=AD=BB=E9=94=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../source/DtInputFormatSourceFunction.java | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/source/DtInputFormatSourceFunction.java b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/source/DtInputFormatSourceFunction.java index 70fa22dd59..478392203b 100644 --- a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/source/DtInputFormatSourceFunction.java +++ b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/source/DtInputFormatSourceFunction.java @@ -69,6 +69,8 @@ public class DtInputFormatSourceFunction extends InputFormatSourceFunction< private transient ListState unionOffsetStates; + private boolean isStream; + @SuppressWarnings("unchecked") public DtInputFormatSourceFunction(InputFormat format, TypeInformation typeInfo) { super(format, typeInfo); @@ -85,8 +87,11 @@ public void open(Configuration parameters) throws Exception { ((RichInputFormat) format).setRuntimeContext(context); } - if (format instanceof com.dtstack.flinkx.inputformat.RichInputFormat && formatStateMap != null){ - ((com.dtstack.flinkx.inputformat.RichInputFormat) format).setRestoreState(formatStateMap.get(context.getIndexOfThisSubtask())); + if (format instanceof com.dtstack.flinkx.inputformat.RichInputFormat){ + isStream = ((com.dtstack.flinkx.inputformat.RichInputFormat) format).getRestoreConfig().isStream(); + if(formatStateMap != null){ + ((com.dtstack.flinkx.inputformat.RichInputFormat) format).setRestoreState(formatStateMap.get(context.getIndexOfThisSubtask())); + } } format.configure(parameters); @@ -114,14 +119,19 @@ public void run(SourceContext ctx) throws Exception { // was called by checking the isRunning flag while (isRunning && !format.reachedEnd()) { - synchronized (ctx.getCheckpointLock()){ + if(isStream){ nextElement = format.nextRecord(nextElement); - if (nextElement != null) { - ctx.collect(nextElement); - } else { - break; + } else { + synchronized (ctx.getCheckpointLock()){ + nextElement = format.nextRecord(nextElement); } } + + if (nextElement != null) { + ctx.collect(nextElement); + } else { + break; + } } format.close(); completedSplitsCounter.inc(); From 4d46d4012e8f6c676c92ecc0b9053adc5b8671fd Mon Sep 17 00:00:00 2001 From: tudou Date: Fri, 18 Oct 2019 17:58:08 +0800 Subject: [PATCH 46/62] revert --- README.md | 4 ++++ flinkx-test/pom.xml | 11 +++++++++++ pom.xml | 1 + 3 files changed, 16 insertions(+) diff --git a/README.md b/README.md index a503809857..64daf68da2 100644 --- a/README.md +++ b/README.md @@ -233,6 +233,8 @@ reader和writer包括name和parameter,分别表示插件名称和插件参数 * [Carbondata读取插件](docs/carbondatareader.md) * [MySQL binlog读取插件](docs/binlog.md) * [KafKa读取插件](docs/kafkareader.md) +* [Kudu读取插件](docs/kudureader.md) + ### 5.2 写入插件 @@ -250,6 +252,8 @@ reader和writer包括name和parameter,分别表示插件名称和插件参数 * [Hive写入插件](docs/hivewriter.md) [断点续传和实时采集功能介绍](docs/restore.md) +* [Kudu写入插件](docs/kuduwriter.md) + ## 6.版本说明 diff --git a/flinkx-test/pom.xml b/flinkx-test/pom.xml index 4452e52497..f699eb3dea 100644 --- a/flinkx-test/pom.xml +++ b/flinkx-test/pom.xml @@ -291,6 +291,17 @@ 1.6 + + com.dtstack.flinkx + flinkx-kudu-reader + 1.6 + + + + com.dtstack.flinkx + flinkx-kudu-writer + 1.6 + diff --git a/pom.xml b/pom.xml index 1378ebe2b4..d70a7a7193 100644 --- a/pom.xml +++ b/pom.xml @@ -35,6 +35,7 @@ flinkx-kafka09 flinkx-kafka10 flinkx-kafka11 + flinkx-kudu From ba820073f2684ace6522bbd4740e9c3e16eb6db6 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Sat, 19 Oct 2019 13:46:49 +0800 Subject: [PATCH 47/62] =?UTF-8?q?[=E6=95=B0=E6=8D=AE=E5=90=8C=E6=AD=A5?= =?UTF-8?q?=E4=BB=BB=E5=8A=A1=E5=8D=A1=E6=AD=BB][19507]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flinkx-core/src/main/java/com/dtstack/flinkx/util/URLUtil.java | 1 + 1 file changed, 1 insertion(+) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/URLUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/URLUtil.java index 04ef38c524..d42ad1ce15 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/util/URLUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/URLUtil.java @@ -64,6 +64,7 @@ public String call() throws Exception{ respBody = EntityUtils.toString(entity,charset); } + response.close(); return respBody; } },MAX_RETRY_TIMES,SLEEP_TIME_MILLI_SECOND,false); From 5462c382b6abde84275d11d65ee85b4a80ce08f5 Mon Sep 17 00:00:00 2001 From: tudou Date: Mon, 21 Oct 2019 10:05:00 +0800 Subject: [PATCH 48/62] =?UTF-8?q?=E6=96=87=E6=A1=A3=E6=8E=92=E7=89=88?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index fdc265c005..67f20c68e7 100644 --- a/README.md +++ b/README.md @@ -250,10 +250,9 @@ reader和writer包括name和parameter,分别表示插件名称和插件参数 * [Carbondata写入插件](docs/carbondatawriter.md) * [Kafka写入插件](docs/kafkawriter.md) * [Hive写入插件](docs/hivewriter.md) - -[断点续传和实时采集功能介绍](docs/restore.md) * [Kudu写入插件](docs/kuduwriter.md) +[断点续传和实时采集功能介绍](docs/restore.md) [数据源开启Kerberos](docs/kerberos.md) From 0625596101850ac10da0d692cd30a385500478b9 Mon Sep 17 00:00:00 2001 From: jiangbo Date: Mon, 21 Oct 2019 13:37:42 +0800 Subject: [PATCH 49/62] =?UTF-8?q?[flinkx=E6=8F=92=E8=BF=9B=E5=8C=85?= =?UTF-8?q?=E6=8C=89=E6=AF=8F=E4=B8=AA=E6=8F=92=E4=BB=B6=E4=B8=80=E4=B8=AA?= =?UTF-8?q?classloader][18818]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../main/java/com/dtstack/flinkx/Main.java | 36 ++++---- .../classloader/ClassLoaderManager.java | 70 +++++++++++++++ .../classloader/ClassLoaderSupplier.java | 44 +++++++++ .../ClassLoaderSupplierCallBack.java | 38 ++++++++ .../PluginUtil.java} | 90 +++++++------------ .../com/dtstack/flinkx/reader/DataReader.java | 13 --- .../flinkx/reader/DataReaderFactory.java | 25 +++--- .../com/dtstack/flinkx/writer/DataWriter.java | 11 --- .../flinkx/writer/DataWriterFactory.java | 23 +++-- .../dev_test_job/stream_template.json | 5 +- 10 files changed, 236 insertions(+), 119 deletions(-) create mode 100644 flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderManager.java create mode 100644 flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderSupplier.java create mode 100644 flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderSupplierCallBack.java rename flinkx-core/src/main/java/com/dtstack/flinkx/{plugin/PluginLoader.java => classloader/PluginUtil.java} (54%) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java index 84f7b2fd95..c948d873ec 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java @@ -19,6 +19,7 @@ package com.dtstack.flinkx; import com.dtstack.flink.api.java.MyLocalStreamEnvironment; +import com.dtstack.flinkx.classloader.ClassLoaderManager; import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.constants.ConfigConstrant; import com.dtstack.flinkx.reader.DataReader; @@ -34,12 +35,14 @@ import org.apache.flink.api.common.JobExecutionResult; import org.apache.flink.api.common.restartstrategy.RestartStrategies; import org.apache.flink.api.common.time.Time; +import org.apache.flink.client.program.ContextEnvironment; import org.apache.flink.runtime.jobgraph.SavepointRestoreSettings; import org.apache.flink.runtime.state.filesystem.FsStateBackend; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.streaming.api.CheckpointingMode; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.CheckpointConfig; +import org.apache.flink.streaming.api.environment.StreamContextEnvironment; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.transformations.PartitionTransformation; import org.apache.flink.streaming.runtime.partitioner.DTRebalancePartitioner; @@ -48,13 +51,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.lang.reflect.Field; import java.net.URL; import java.net.URLClassLoader; import java.net.URLDecoder; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Properties; +import java.util.*; import java.util.concurrent.TimeUnit; /** @@ -122,28 +123,33 @@ public static void main(String[] args) throws Exception { dataWriter.writeData(dataStream); if(env instanceof MyLocalStreamEnvironment) { - List urlList = new ArrayList<>(); - URLClassLoader readerClassLoader = (URLClassLoader) dataReader.getClass().getClassLoader(); - urlList.addAll(Arrays.asList(readerClassLoader.getURLs())); - URLClassLoader writerClassLoader = (URLClassLoader) dataWriter.getClass().getClassLoader(); - for (URL url : writerClassLoader.getURLs()) { - if (!urlList.contains(url)) { - urlList.add(url); - } - } - ((MyLocalStreamEnvironment) env).setClasspaths(urlList); - if(StringUtils.isNotEmpty(savepointPath)){ ((MyLocalStreamEnvironment) env).setSettings(SavepointRestoreSettings.forPath(savepointPath)); } } + Set classPathSet = ClassLoaderManager.getClassPath(); + addEnvClassPath(env, classPathSet); + JobExecutionResult result = env.execute(jobIdString); if(env instanceof MyLocalStreamEnvironment){ ResultPrintUtil.printResult(result); } } + private static void addEnvClassPath(StreamExecutionEnvironment env, Set classPathSet) throws Exception{ + if(env instanceof MyLocalStreamEnvironment){ + ((MyLocalStreamEnvironment) env).setClasspaths(new ArrayList<>(classPathSet)); + } else if(env instanceof StreamContextEnvironment){ + Field field = env.getClass().getDeclaredField("ctx"); + field.setAccessible(true); + ContextEnvironment contextEnvironment= (ContextEnvironment) field.get(env); + for(URL url : classPathSet){ + contextEnvironment.getClasspaths().add(url); + } + } + } + private static Properties parseConf(String confStr) throws Exception{ if(StringUtils.isEmpty(confStr)){ return new Properties(); diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderManager.java b/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderManager.java new file mode 100644 index 0000000000..fced5508c8 --- /dev/null +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderManager.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flinkx.classloader; + +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URL; +import java.net.URLClassLoader; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; + +/** + * company: www.dtstack.com + * author: toutian + * create: 2019/10/14 + */ +public class ClassLoaderManager { + + private static final Logger LOG = LoggerFactory.getLogger(ClassLoaderManager.class); + + private static Map pluginClassLoader = new ConcurrentHashMap<>(); + + public static R newInstance(Set jarUrls, ClassLoaderSupplier supplier) throws Exception { + ClassLoader classLoader = retrieveClassLoad(new ArrayList<>(jarUrls)); + return ClassLoaderSupplierCallBack.callbackAndReset(supplier, classLoader); + } + + private static ClassLoader retrieveClassLoad(List jarUrls) { + jarUrls.sort(Comparator.comparing(URL::toString)); + String jarUrlkey = StringUtils.join(jarUrls, "_"); + return pluginClassLoader.computeIfAbsent(jarUrlkey, k -> { + try { + URL[] urls = jarUrls.toArray(new URL[jarUrls.size()]); + ClassLoader parentClassLoader = Thread.currentThread().getContextClassLoader(); + URLClassLoader classLoader = new URLClassLoader(urls, parentClassLoader); + LOG.info("jarUrl:{} create ClassLoad successful...", jarUrlkey); + return classLoader; + } catch (Throwable e) { + LOG.error("retrieve ClassLoad happens error:{}", e); + throw new RuntimeException("retrieve ClassLoad happens error"); + } + }); + } + + public static Set getClassPath() { + Set classPaths = new HashSet<>(); + for (Map.Entry entry : pluginClassLoader.entrySet()) { + classPaths.addAll(Arrays.asList(entry.getValue().getURLs())); + } + return classPaths; + } +} diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderSupplier.java b/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderSupplier.java new file mode 100644 index 0000000000..f701d61753 --- /dev/null +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderSupplier.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.classloader; + +/** + * Represents a supplier of results. + * + *

There is no requirement that a new or distinct result be returned each + * time the supplier is invoked. + * + *

This is a functional interface + * whose functional method is {@link #get()}. + * + * @param the type of results supplied by this supplier + * + * @since 1.8 + */ +@FunctionalInterface +public interface ClassLoaderSupplier { + + /** + * Gets a result. + * + * @return a result + */ + T get(ClassLoader cl) throws Exception; +} diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderSupplierCallBack.java b/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderSupplierCallBack.java new file mode 100644 index 0000000000..07e1855e54 --- /dev/null +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderSupplierCallBack.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flinkx.classloader; + +/** + * company: www.dtstack.com + * author: toutian + * create: 2019/10/14 + */ +public class ClassLoaderSupplierCallBack { + + public static R callbackAndReset(ClassLoaderSupplier supplier, ClassLoader toSetClassLoader) throws Exception { + ClassLoader oldClassLoader = Thread.currentThread().getContextClassLoader(); + Thread.currentThread().setContextClassLoader(toSetClassLoader); + try { + return supplier.get(toSetClassLoader); + } finally { + Thread.currentThread().setContextClassLoader(oldClassLoader); + } + } +} diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/plugin/PluginLoader.java b/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/PluginUtil.java similarity index 54% rename from flinkx-core/src/main/java/com/dtstack/flinkx/plugin/PluginLoader.java rename to flinkx-core/src/main/java/com/dtstack/flinkx/classloader/PluginUtil.java index f7b7c0704c..fd51f7e65c 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/plugin/PluginLoader.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/PluginUtil.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -16,61 +16,63 @@ * limitations under the License. */ -package com.dtstack.flinkx.plugin; -import com.dtstack.flinkx.loader.DTClassLoader; +package com.dtstack.flinkx.classloader; + import com.dtstack.flinkx.util.SysUtil; -import org.apache.flink.util.Preconditions; + import java.io.File; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; /** - * FlinkX Plguin Loader - * - * Company: www.dtstack.com - * @author huyifan.zju@163.com + * @author jiangbo + * @date 2019/10/21 */ -public class PluginLoader{ - - private String pluginRoot; +public class PluginUtil { - private String pluginName; + private static final String COMMON_DIR = "common"; - private String pluginClassName; + private static final String READER_SUFFIX = "reader"; - private List urlList = new ArrayList<>(); + private static final String WRITER_SUFFIX = "writer"; - private final String pkgPrefix = "com.dtstack.flinkx."; + private static final String PACKAGE_PREFIX = "com.dtstack.flinkx."; - private final String COMMON_DIR = "common"; + public static Set getJarFileDirPath(String pluginName, String pluginRoot){ + Set urlList = new HashSet<>(); - private final String READER_SUFFIX = "reader"; - - private final String WRITER_SUFFIX = "writer"; - - public PluginLoader(String pluginName, String pluginRoot) { + File commonDir = new File(pluginRoot + File.separator + COMMON_DIR + File.separator); + File pluginDir = new File(pluginRoot + File.separator + pluginName); - Preconditions.checkArgument(pluginName != null && pluginName.trim().length() != 0); - Preconditions.checkArgument(pluginRoot != null); + try { + urlList.addAll(SysUtil.findJarsInDir(commonDir)); + urlList.addAll(SysUtil.findJarsInDir(pluginDir)); - this.pluginName = pluginName; - this.pluginRoot = pluginRoot; - String lowerPluginName = pluginName.toLowerCase(); + return urlList; + } catch (MalformedURLException e) { + throw new RuntimeException(e); + } + } - if(lowerPluginName.endsWith(READER_SUFFIX)) { - pluginClassName = pkgPrefix + camelize(pluginName, READER_SUFFIX); - } else if(lowerPluginName.endsWith(WRITER_SUFFIX)) { - pluginClassName = pkgPrefix + camelize(pluginName, WRITER_SUFFIX); + public static String getPluginClassName(String pluginName){ + String pluginClassName; + if(pluginName.toLowerCase().endsWith(READER_SUFFIX)) { + pluginClassName = PACKAGE_PREFIX + camelize(pluginName, READER_SUFFIX); + } else if(pluginName.toLowerCase().endsWith(WRITER_SUFFIX)) { + pluginClassName = PACKAGE_PREFIX + camelize(pluginName, WRITER_SUFFIX); } else { throw new IllegalArgumentException("Plugin Name should end with reader, writer or database"); } + return pluginClassName; } - private String camelize(String pluginName, String suffix) { + private static String camelize(String pluginName, String suffix) { int pos = pluginName.indexOf(suffix); String left = pluginName.substring(0, pos); left = left.toLowerCase(); @@ -81,30 +83,4 @@ private String camelize(String pluginName, String suffix) { sb.append(suffix.substring(0,1).toUpperCase() + suffix.substring(1)); return sb.toString(); } - - public Class getPluginClass() { - File commonDir = new File(pluginRoot + File.separator + COMMON_DIR + File.separator); - File pluginDir = new File(pluginRoot + File.separator + pluginName); - - try { - urlList.addAll(SysUtil.findJarsInDir(commonDir)); - urlList.addAll(SysUtil.findJarsInDir(pluginDir)); - } catch (MalformedURLException e) { - throw new RuntimeException(e); - } - - - URL[] urls = urlList.toArray(new URL[urlList.size()]); - DTClassLoader classLoader = new DTClassLoader(urls); - Class clazz = null; - - try { - clazz = classLoader.loadClass(this.pluginClassName); - } catch (ClassNotFoundException e) { - throw new RuntimeException(e); - } - - return clazz; - } - } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/reader/DataReader.java b/flinkx-core/src/main/java/com/dtstack/flinkx/reader/DataReader.java index 485d4eed71..acf5e1c58c 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/reader/DataReader.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/reader/DataReader.java @@ -21,7 +21,6 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.config.RestoreConfig; import com.dtstack.flinkx.config.DirtyConfig; -import com.dtstack.flinkx.plugin.PluginLoader; import org.apache.flink.api.common.io.InputFormat; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.TypeExtractor; @@ -51,8 +50,6 @@ public abstract class DataReader { protected String monitorUrls; - protected PluginLoader pluginLoader; - protected RestoreConfig restoreConfig; protected List srcCols = new ArrayList<>(); @@ -80,16 +77,6 @@ public void setSrcCols(List srcCols) { this.srcCols = srcCols; } - public PluginLoader getPluginLoader() { - return pluginLoader; - } - - public void setPluginLoader(PluginLoader pluginLoader) { - this.pluginLoader = pluginLoader; - } - - protected List jarNameList = new ArrayList<>(); - protected DataReader(DataTransferConfig config, StreamExecutionEnvironment env) { this.env = env; this.numPartitions = config.getJob().getSetting().getSpeed().getChannel(); diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/reader/DataReaderFactory.java b/flinkx-core/src/main/java/com/dtstack/flinkx/reader/DataReaderFactory.java index 574a8948f4..6b4e8c3e31 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/reader/DataReaderFactory.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/reader/DataReaderFactory.java @@ -18,11 +18,13 @@ package com.dtstack.flinkx.reader; -import com.dtstack.flinkx.config.ReaderConfig; +import com.dtstack.flinkx.classloader.ClassLoaderManager; +import com.dtstack.flinkx.classloader.PluginUtil; import com.dtstack.flinkx.config.DataTransferConfig; -import com.dtstack.flinkx.plugin.PluginLoader; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import java.lang.reflect.Constructor; +import java.net.URL; +import java.util.Set; /** * The factory of DataReader @@ -35,20 +37,19 @@ public class DataReaderFactory { private DataReaderFactory() { } - public static DataReader getDataReader(DataTransferConfig config, StreamExecutionEnvironment env) { - - ReaderConfig readerConfig = config.getJob().getContent().get(0).getReader(); - - PluginLoader pluginLoader = new PluginLoader(readerConfig.getName().toLowerCase(), config.getPluginRoot()); - Class clz = pluginLoader.getPluginClass(); - try { - Constructor constructor = clz.getConstructor(DataTransferConfig.class, StreamExecutionEnvironment.class); - return (DataReader) constructor.newInstance(config, env); + String pluginName = config.getJob().getContent().get(0).getReader().getName(); + String pluginClassName = PluginUtil.getPluginClassName(pluginName); + Set urlList = PluginUtil.getJarFileDirPath(pluginName, config.getPluginRoot()); + + return ClassLoaderManager.newInstance(urlList, cl -> { + Class clazz = cl.loadClass(pluginClassName); + Constructor constructor = clazz.getConstructor(DataTransferConfig.class, StreamExecutionEnvironment.class); + return (DataReader)constructor.newInstance(config, env); + }); } catch (Exception e) { throw new RuntimeException(e); } - } } \ No newline at end of file diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DataWriter.java b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DataWriter.java index ec03fcca0a..fa852f2916 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DataWriter.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DataWriter.java @@ -21,7 +21,6 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.config.DirtyConfig; import com.dtstack.flinkx.config.RestoreConfig; -import com.dtstack.flinkx.plugin.PluginLoader; import com.dtstack.flinkx.reader.MetaColumn; import org.apache.commons.lang.StringUtils; import org.apache.flink.api.common.io.OutputFormat; @@ -47,8 +46,6 @@ public abstract class DataWriter { protected String monitorUrls; - protected PluginLoader pluginLoader; - protected Integer errors; protected Double errorRatio; @@ -69,14 +66,6 @@ public void setSrcCols(List srcCols) { this.srcCols = srcCols; } - public PluginLoader getPluginLoader() { - return pluginLoader; - } - - public void setPluginLoader(PluginLoader pluginLoader) { - this.pluginLoader = pluginLoader; - } - public DataWriter(DataTransferConfig config) { this.monitorUrls = config.getMonitorUrls(); this.restoreConfig = config.getJob().getSetting().getRestoreConfig(); diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DataWriterFactory.java b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DataWriterFactory.java index bc6772f246..4d375c2d51 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DataWriterFactory.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DataWriterFactory.java @@ -18,10 +18,13 @@ package com.dtstack.flinkx.writer; -import com.dtstack.flinkx.config.WriterConfig; +import com.dtstack.flinkx.classloader.ClassLoaderManager; +import com.dtstack.flinkx.classloader.PluginUtil; import com.dtstack.flinkx.config.DataTransferConfig; -import com.dtstack.flinkx.plugin.PluginLoader; + import java.lang.reflect.Constructor; +import java.net.URL; +import java.util.Set; /** * The factory of Writer plugins @@ -34,14 +37,16 @@ public class DataWriterFactory { private DataWriterFactory() {} public static DataWriter getDataWriter(DataTransferConfig config) { - - WriterConfig writerConfig = config.getJob().getContent().get(0).getWriter(); - PluginLoader pluginLoader = new PluginLoader(writerConfig.getName(), config.getPluginRoot()); - Class clz = pluginLoader.getPluginClass(); - try { - Constructor constructor = clz.getConstructor(DataTransferConfig.class); - return (DataWriter) constructor.newInstance(config); + String pluginName = config.getJob().getContent().get(0).getWriter().getName(); + String pluginClassName = PluginUtil.getPluginClassName(pluginName); + Set urlList = PluginUtil.getJarFileDirPath(pluginName, config.getPluginRoot()); + + return ClassLoaderManager.newInstance(urlList, cl -> { + Class clazz = cl.loadClass(pluginClassName); + Constructor constructor = clazz.getConstructor(DataTransferConfig.class); + return (DataWriter)constructor.newInstance(config); + }); } catch (Exception e) { throw new RuntimeException(e); } diff --git a/flinkx-test/src/main/resources/dev_test_job/stream_template.json b/flinkx-test/src/main/resources/dev_test_job/stream_template.json index 9bb64b06fe..5ead6d562c 100644 --- a/flinkx-test/src/main/resources/dev_test_job/stream_template.json +++ b/flinkx-test/src/main/resources/dev_test_job/stream_template.json @@ -4,7 +4,7 @@ { "reader": { "parameter": { - "sliceRecordCount": ["10"], + "sliceRecordCount": ["1000"], "column": [ { "name": "name", @@ -29,6 +29,7 @@ }, "writer": { "parameter": { + "print": true, "column": [ { "name": "cf:name", @@ -57,7 +58,7 @@ "hbase.zookeeper.quorum": "172.16.8.193:2181" } }, - "name": "hbasewriter" + "name": "streamwriter" } } ], From 9f570ce3ec92796a71a3c3d731cf0fe68ade006b Mon Sep 17 00:00:00 2001 From: tudou Date: Tue, 22 Oct 2019 20:35:46 +0800 Subject: [PATCH 50/62] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=A2=9E=E9=87=8F?= =?UTF-8?q?=E4=BB=BB=E5=8A=A1=E5=BC=80=E5=90=AF=E6=96=AD=E7=82=B9=E7=BB=AD?= =?UTF-8?q?=E4=BC=A0=E6=97=B6=E6=97=A0=E6=B3=95=E8=8E=B7=E5=8F=96=E6=AD=A3?= =?UTF-8?q?=E7=A1=AEstartLocation=20bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../JdbcInputFormat.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java index 3b1de2f1b9..1646577111 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java @@ -155,7 +155,7 @@ public void configure(Configuration configuration) { @Override public void openInternal(InputSplit inputSplit) throws IOException { try { - LOG.info(inputSplit.toString()); + LOG.info("inputSplit = {}", inputSplit); ClassUtil.forName(drivername, getClass().getClassLoader()); @@ -449,7 +449,7 @@ protected String buildQuerySql(InputSplit inputSplit){ String querySql = queryTemplate; if (inputSplit == null){ - LOG.warn("Executing sql is: '{}'", querySql); + LOG.warn("inputSplit = null, Executing sql is: '{}'", querySql); return querySql; } @@ -469,9 +469,13 @@ protected String buildQuerySql(InputSplit inputSplit){ } } else { String startLocation = getLocation(restoreColumn.getType(), formatState.getState()); + if(StringUtils.isNotBlank(startLocation)){ + LOG.info("update startLocation, before = {}, after = {}", jdbcInputSplit.getStartLocation(), startLocation); + jdbcInputSplit.setStartLocation(startLocation); + } String restoreFilter = buildIncrementFilter(restoreColumn.getType(), restoreColumn.getName(), - startLocation, + jdbcInputSplit.getStartLocation(), jdbcInputSplit.getEndLocation(), customSql, incrementConfig.isUseMaxFunc()); @@ -484,7 +488,7 @@ protected String buildQuerySql(InputSplit inputSplit){ } querySql = querySql.replace(DBUtil.INCREMENT_FILTER_PLACEHOLDER, StringUtils.EMPTY); - } else if (incrementConfig.isIncrement()){ + }else if (incrementConfig.isIncrement()){ querySql = buildIncrementSql(jdbcInputSplit, querySql); } @@ -506,7 +510,6 @@ private String buildIncrementSql(JdbcInputSplit jdbcInputSplit, String querySql) jdbcInputSplit.getEndLocation(), customSql, incrementConfig.isUseMaxFunc()); - if(StringUtils.isNotEmpty(incrementFilter)){ incrementFilter = " and " + incrementFilter; } @@ -524,7 +527,8 @@ private String buildIncrementSql(JdbcInputSplit jdbcInputSplit, String querySql) * @param useMaxFunc 是否保存结束位置数据 * @return */ - protected String buildIncrementFilter(String incrementColType,String incrementCol, String startLocation,String endLocation, String customSql, boolean useMaxFunc){ + protected String buildIncrementFilter(String incrementColType, String incrementCol, String startLocation, String endLocation, String customSql, boolean useMaxFunc){ + LOG.info("buildIncrementFilter, incrementColType = {}, incrementCol = {}, startLocation = {}, endLocation = {}, customSql = {}, useMaxFunc = {}", incrementColType, incrementCol, startLocation, endLocation, customSql, useMaxFunc); StringBuilder filter = new StringBuilder(128); if (org.apache.commons.lang.StringUtils.isNotEmpty(customSql)){ From 2c6d81c73493c8a67b30bb7b2b03148eb510ab3f Mon Sep 17 00:00:00 2001 From: tudou Date: Wed, 23 Oct 2019 19:27:23 +0800 Subject: [PATCH 51/62] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=AE=9E=E6=97=B6?= =?UTF-8?q?=E9=87=87=E9=9B=86=E8=84=8F=E6=95=B0=E6=8D=AE=E7=BB=9F=E8=AE=A1?= =?UTF-8?q?=E5=8F=8A=E5=86=99=E5=85=A5bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flinkx/outputformat/RichOutputFormat.java | 9 ++++++++ .../flinkx/writer/DirtyDataManager.java | 21 ++++++++++++++----- .../flinkx/hive/writer/HiveOutputFormat.java | 4 +++- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java index 4a977b6a74..8e69ede14b 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/RichOutputFormat.java @@ -165,6 +165,14 @@ public void setDirtyHadoopConfig(Map dirtyHadoopConfig) { this.dirtyHadoopConfig = dirtyHadoopConfig; } + public void setDirtyDataManager(DirtyDataManager dirtyDataManager) { + this.dirtyDataManager = dirtyDataManager; + } + + public void setErrorLimiter(ErrorLimiter errorLimiter) { + this.errorLimiter = errorLimiter; + } + public void setSrcFieldNames(List srcFieldNames) { this.srcFieldNames = srcFieldNames; } @@ -296,6 +304,7 @@ private void openDirtyDataManager(){ if(StringUtils.isNotBlank(dirtyPath)) { dirtyDataManager = new DirtyDataManager(dirtyPath, dirtyHadoopConfig, srcFieldNames.toArray(new String[srcFieldNames.size()])); dirtyDataManager.open(); + LOG.info("init dirtyDataManager, {}", this.dirtyDataManager); } } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java index 27dcb88f09..08b19cd594 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java @@ -28,14 +28,14 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.io.BufferedWriter; import java.io.IOException; import java.io.OutputStreamWriter; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.Map; -import java.util.UUID; +import java.util.*; + import static com.dtstack.flinkx.writer.WriteErrorTypes.*; /** @@ -46,10 +46,15 @@ */ public class DirtyDataManager { + private static final Logger LOG = LoggerFactory.getLogger(DirtyDataManager.class); + private String location; private Configuration config; private BufferedWriter bw; private String[] fieldNames; + private String jobId; + private long lastFlushTime; + private long flushInterval = 60000; private static final String FIELD_DELIMITER = "\u0001"; private static final String LINE_DELIMITER = "\n"; @@ -83,6 +88,12 @@ public String writeData(Row row, WriteRecordException ex) { try { bw.write(line); bw.write(LINE_DELIMITER); + long currentTimeMillis = System.currentTimeMillis(); + if(currentTimeMillis >= lastFlushTime + flushInterval){ + LOG.info("flush dirty data, currentTimeMillis = {}, lastFlushTime = {}", currentTimeMillis, lastFlushTime); + bw.flush(); + lastFlushTime = currentTimeMillis; + } return errorType; } catch (IOException e) { throw new RuntimeException(e); diff --git a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java index 3ba054dc7e..c3ab1ad73f 100644 --- a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java +++ b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java @@ -47,7 +47,7 @@ */ public class HiveOutputFormat extends RichOutputFormat { - private static Logger logger = LoggerFactory.getLogger(HiveOutputFormat.class); + private static final Logger logger = LoggerFactory.getLogger(HiveOutputFormat.class); private static final String SP = "/"; @@ -222,6 +222,8 @@ private Pair getHdfsOutputFormat(String tablePath, hdfsOutputFormatBuilder.setColumnTypes(tableInfo.getColumnTypes()); outputFormat = (HdfsOutputFormat) hdfsOutputFormatBuilder.finish(); + outputFormat.setDirtyDataManager(dirtyDataManager); + outputFormat.setErrorLimiter(errorLimiter); outputFormat.setRuntimeContext(getRuntimeContext()); outputFormat.configure(parameters); outputFormat.open(taskNumber, numTasks); From 2e10ae735bc3d23a3245254ce5a501aa3c1534e7 Mon Sep 17 00:00:00 2001 From: tudou Date: Wed, 23 Oct 2019 19:30:51 +0800 Subject: [PATCH 52/62] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=97=A0=E7=94=A8?= =?UTF-8?q?=E5=AD=97=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../main/java/com/dtstack/flinkx/writer/DirtyDataManager.java | 1 - 1 file changed, 1 deletion(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java index 08b19cd594..18618872ee 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java @@ -52,7 +52,6 @@ public class DirtyDataManager { private Configuration config; private BufferedWriter bw; private String[] fieldNames; - private String jobId; private long lastFlushTime; private long flushInterval = 60000; From 3bee6c0c5741f03e254574d90ef0cb977eafd22d Mon Sep 17 00:00:00 2001 From: tudou Date: Fri, 25 Oct 2019 09:44:42 +0800 Subject: [PATCH 53/62] =?UTF-8?q?=E8=84=8F=E6=95=B0=E6=8D=AE=E5=AE=9E?= =?UTF-8?q?=E6=97=B6=E5=88=B7=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flinkx/writer/DirtyDataManager.java | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java index 18618872ee..84e366522c 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java @@ -26,8 +26,11 @@ import org.apache.commons.lang3.StringUtils; import org.apache.flink.types.Row; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSOutputStream; +import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,8 +55,8 @@ public class DirtyDataManager { private Configuration config; private BufferedWriter bw; private String[] fieldNames; - private long lastFlushTime; - private long flushInterval = 60000; + FSDataOutputStream stream; + EnumSet syncFlags = EnumSet.of(HdfsDataOutputStream.SyncFlag.UPDATE_LENGTH); private static final String FIELD_DELIMITER = "\u0001"; private static final String LINE_DELIMITER = "\n"; @@ -85,14 +88,10 @@ public String writeData(Row row, WriteRecordException ex) { String errorType = retrieveCategory(ex); String line = StringUtils.join(new String[]{content,errorType, gson.toJson(ex.toString()), DateUtil.timestampToString(new Date()) }, FIELD_DELIMITER); try { - bw.write(line); - bw.write(LINE_DELIMITER); - long currentTimeMillis = System.currentTimeMillis(); - if(currentTimeMillis >= lastFlushTime + flushInterval){ - LOG.info("flush dirty data, currentTimeMillis = {}, lastFlushTime = {}", currentTimeMillis, lastFlushTime); - bw.flush(); - lastFlushTime = currentTimeMillis; - } + stream.writeChars(line); + stream.writeChars(LINE_DELIMITER); + DFSOutputStream dfsOutputStream = (DFSOutputStream) stream.getWrappedStream(); + dfsOutputStream.hsync(syncFlags); return errorType; } catch (IOException e) { throw new RuntimeException(e); @@ -117,8 +116,9 @@ public void open() { FileSystem fs = FileSystem.get(config); Path path = new Path(location); bw = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); - } catch (IOException e) { - throw new RuntimeException(e); + stream = fs.create(path, true); + } catch (Exception e) { + throw new RuntimeException("Open dirty manager error", e); } } @@ -132,5 +132,4 @@ public void close() { } } } - } From 65de17ef87d3ec45a18423c3c41e171969609c3f Mon Sep 17 00:00:00 2001 From: tudou Date: Fri, 25 Oct 2019 09:48:40 +0800 Subject: [PATCH 54/62] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=85=B3=E9=97=AD?= =?UTF-8?q?=E6=B5=81=E5=AF=B9=E8=B1=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/dtstack/flinkx/writer/DirtyDataManager.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java index 84e366522c..271ba830f3 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java @@ -53,7 +53,6 @@ public class DirtyDataManager { private String location; private Configuration config; - private BufferedWriter bw; private String[] fieldNames; FSDataOutputStream stream; EnumSet syncFlags = EnumSet.of(HdfsDataOutputStream.SyncFlag.UPDATE_LENGTH); @@ -115,7 +114,6 @@ public void open() { try { FileSystem fs = FileSystem.get(config); Path path = new Path(location); - bw = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); stream = fs.create(path, true); } catch (Exception e) { throw new RuntimeException("Open dirty manager error", e); @@ -123,10 +121,10 @@ public void open() { } public void close() { - if(bw != null) { + if(stream != null) { try { - bw.flush(); - bw.close(); + stream.flush(); + stream.close(); } catch (IOException e) { throw new RuntimeException(e); } From 9e36941f853e9814823cd042a391b88d57a819fa Mon Sep 17 00:00:00 2001 From: tudou Date: Fri, 25 Oct 2019 11:51:37 +0800 Subject: [PATCH 55/62] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=97=A0=E7=94=A8?= =?UTF-8?q?=E5=AF=BC=E5=85=A5=E5=8C=85=E5=8F=8Alogger?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/dtstack/flinkx/writer/DirtyDataManager.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java index 271ba830f3..5b87980499 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java @@ -31,12 +31,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSOutputStream; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.io.BufferedWriter; import java.io.IOException; -import java.io.OutputStreamWriter; import java.util.*; import static com.dtstack.flinkx.writer.WriteErrorTypes.*; @@ -49,8 +45,6 @@ */ public class DirtyDataManager { - private static final Logger LOG = LoggerFactory.getLogger(DirtyDataManager.class); - private String location; private Configuration config; private String[] fieldNames; From dd95fd0746d320b553a826b8f74d6a997f975149 Mon Sep 17 00:00:00 2001 From: tudou Date: Fri, 25 Oct 2019 11:54:27 +0800 Subject: [PATCH 56/62] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=8F=98=E9=87=8F?= =?UTF-8?q?=E4=BF=AE=E9=A5=B0=E7=AC=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../main/java/com/dtstack/flinkx/writer/DirtyDataManager.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java index 5b87980499..73d09bd2a8 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java @@ -48,8 +48,8 @@ public class DirtyDataManager { private String location; private Configuration config; private String[] fieldNames; - FSDataOutputStream stream; - EnumSet syncFlags = EnumSet.of(HdfsDataOutputStream.SyncFlag.UPDATE_LENGTH); + private FSDataOutputStream stream; + private EnumSet syncFlags = EnumSet.of(HdfsDataOutputStream.SyncFlag.UPDATE_LENGTH); private static final String FIELD_DELIMITER = "\u0001"; private static final String LINE_DELIMITER = "\n"; From 3f52f56bbaeab710e3f967e38acb9b261f9cfe7f Mon Sep 17 00:00:00 2001 From: jiangbo Date: Sat, 26 Oct 2019 17:49:37 +0800 Subject: [PATCH 57/62] =?UTF-8?q?[flinkx=E6=8F=92=E8=BF=9B=E5=8C=85?= =?UTF-8?q?=E6=8C=89=E6=AF=8F=E4=B8=AA=E6=8F=92=E4=BB=B6=E4=B8=80=E4=B8=AA?= =?UTF-8?q?classloader][18818]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/main/java/com/dtstack/flinkx/Main.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java index c948d873ec..a503beac9e 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java @@ -144,8 +144,16 @@ private static void addEnvClassPath(StreamExecutionEnvironment env, Set cla Field field = env.getClass().getDeclaredField("ctx"); field.setAccessible(true); ContextEnvironment contextEnvironment= (ContextEnvironment) field.get(env); - for(URL url : classPathSet){ - contextEnvironment.getClasspaths().add(url); + + List originUrlList = new ArrayList<>(); + for (URL url : contextEnvironment.getClasspaths()) { + originUrlList.add(url.toString()); + } + + for (URL url : classPathSet) { + if (!originUrlList.contains(url.toString())){ + contextEnvironment.getClasspaths().add(url); + } } } } From 43111e6bc6044d9c3a0b8ec3905714b7cba94fec Mon Sep 17 00:00:00 2001 From: tudou Date: Mon, 28 Oct 2019 11:06:17 +0800 Subject: [PATCH 58/62] =?UTF-8?q?mongoDB=E6=8F=92=E4=BB=B6=E6=94=AF?= =?UTF-8?q?=E6=8C=81URL=E8=BF=9E=E6=8E=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/dtstack/flinkx/mongodb/MongodbConfigKeys.java | 2 ++ .../java/com/dtstack/flinkx/mongodb/MongodbUtil.java | 11 +++++++---- .../dtstack/flinkx/mongodb/reader/MongodbReader.java | 4 ++++ .../dtstack/flinkx/mongodb/writer/MongodbWriter.java | 4 ++++ 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbConfigKeys.java b/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbConfigKeys.java index 1f5617b915..760052591b 100644 --- a/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbConfigKeys.java +++ b/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbConfigKeys.java @@ -32,6 +32,8 @@ public class MongodbConfigKeys { public final static String KEY_PASSWORD = "password"; + public final static String KEY_URL = "url"; + public final static String KEY_DATABASE = "database"; public final static String KEY_COLLECTION = "collectionName"; diff --git a/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbUtil.java b/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbUtil.java index 917bc3e64c..2cad2ec418 100644 --- a/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbUtil.java +++ b/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbUtil.java @@ -23,7 +23,6 @@ import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.util.DateUtil; import com.dtstack.flinkx.util.TelnetUtil; -import com.google.common.collect.Lists; import com.mongodb.*; import com.mongodb.client.MongoCursor; import org.apache.commons.collections.MapUtils; @@ -36,6 +35,7 @@ import java.math.BigDecimal; import java.sql.Timestamp; import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -83,12 +83,15 @@ public static MongoClient getMongoClient(Map mongodbConfig){ String username = MapUtils.getString(mongodbConfig, KEY_USERNAME); String password = MapUtils.getString(mongodbConfig, KEY_PASSWORD); String database = MapUtils.getString(mongodbConfig, KEY_DATABASE); + String url = MapUtils.getString(mongodbConfig, KEY_URL); - if(StringUtils.isEmpty(username)){ + if(StringUtils.isNotBlank(url)){ + mongoClient = new MongoClient(new MongoClientURI(url)); + }else if(StringUtils.isEmpty(username)){ mongoClient = new MongoClient(serverAddress,options); } else { MongoCredential credential = MongoCredential.createScramSha1Credential(username, database, password.toCharArray()); - List credentials = Lists.newArrayList(); + List credentials = new ArrayList<>(); credentials.add(credential); mongoClient = new MongoClient(serverAddress,credentials,options); @@ -147,7 +150,7 @@ private static Object convertField(Object val,MetaColumn column){ * parse server address from hostPorts string */ private static List getServerAddress(String hostPorts) { - List addresses = Lists.newArrayList(); + List addresses = new ArrayList<>(); for (String hostPort : hostPorts.split(HOST_SPLIT_REGEX)) { if(hostPort.length() == 0){ diff --git a/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbReader.java b/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbReader.java index 2a32a73aa1..1d6466cc12 100644 --- a/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbReader.java +++ b/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbReader.java @@ -46,6 +46,8 @@ public class MongodbReader extends DataReader { protected String password; + protected String url; + protected String database; protected String collection; @@ -65,6 +67,7 @@ public MongodbReader(DataTransferConfig config, StreamExecutionEnvironment env) hostPorts = readerConfig.getParameter().getStringVal(KEY_HOST_PORTS); username = readerConfig.getParameter().getStringVal(KEY_USERNAME); password = readerConfig.getParameter().getStringVal(KEY_PASSWORD); + url = readerConfig.getParameter().getStringVal(KEY_URL); database = readerConfig.getParameter().getStringVal(KEY_DATABASE); collection = readerConfig.getParameter().getStringVal(KEY_COLLECTION); filter = readerConfig.getParameter().getStringVal(KEY_FILTER); @@ -75,6 +78,7 @@ public MongodbReader(DataTransferConfig config, StreamExecutionEnvironment env) mongodbConfig.put(KEY_HOST_PORTS, hostPorts); mongodbConfig.put(KEY_USERNAME, username); mongodbConfig.put(KEY_PASSWORD, password); + mongodbConfig.put(KEY_URL, url); mongodbConfig.put(KEY_DATABASE, database); } diff --git a/flinkx-mongodb/flinkx-mongodb-writer/src/main/java/com/dtstack/flinkx/mongodb/writer/MongodbWriter.java b/flinkx-mongodb/flinkx-mongodb-writer/src/main/java/com/dtstack/flinkx/mongodb/writer/MongodbWriter.java index 85ba03bd4c..33b80d1586 100644 --- a/flinkx-mongodb/flinkx-mongodb-writer/src/main/java/com/dtstack/flinkx/mongodb/writer/MongodbWriter.java +++ b/flinkx-mongodb/flinkx-mongodb-writer/src/main/java/com/dtstack/flinkx/mongodb/writer/MongodbWriter.java @@ -50,6 +50,8 @@ public class MongodbWriter extends DataWriter { protected String database; + protected String url; + protected String collection; protected List columns; @@ -65,6 +67,7 @@ public MongodbWriter(DataTransferConfig config) { hostPorts = writerConfig.getParameter().getStringVal(KEY_HOST_PORTS); username = writerConfig.getParameter().getStringVal(KEY_USERNAME); password = writerConfig.getParameter().getStringVal(KEY_PASSWORD); + url = writerConfig.getParameter().getStringVal(KEY_URL); database = writerConfig.getParameter().getStringVal(KEY_DATABASE); collection = writerConfig.getParameter().getStringVal(KEY_COLLECTION); mode = writerConfig.getParameter().getStringVal(KEY_MODE, WriteMode.INSERT.getMode()); @@ -76,6 +79,7 @@ public MongodbWriter(DataTransferConfig config) { mongodbConfig.put(KEY_HOST_PORTS, hostPorts); mongodbConfig.put(KEY_USERNAME, username); mongodbConfig.put(KEY_PASSWORD, password); + mongodbConfig.put(KEY_URL, url); mongodbConfig.put(KEY_DATABASE, database); } From 2b7a965f204ecc27ba546bec7070d6ee89d8edf8 Mon Sep 17 00:00:00 2001 From: tudou Date: Mon, 28 Oct 2019 12:00:38 +0800 Subject: [PATCH 59/62] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=A2=9E=E9=87=8F?= =?UTF-8?q?=E4=BB=BB=E5=8A=A1=E5=BC=80=E5=90=AF=E6=96=AD=E7=82=B9=E7=BB=AD?= =?UTF-8?q?=E4=BC=A0=E6=97=B6=E6=97=A0=E6=B3=95=E8=8E=B7=E5=8F=96=E6=AD=A3?= =?UTF-8?q?=E7=A1=AEstartLocation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java index 662a27fe05..0736f11075 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java @@ -444,8 +444,12 @@ private String buildQuerySql(InputSplit inputSplit){ } } else { String startLocation = getLocation(restoreColumn.getType(), formatState.getState()); + if(StringUtils.isNotBlank(startLocation)){ + LOG.info("update startLocation, before = {}, after = {}", jdbcInputSplit.getStartLocation(), startLocation); + jdbcInputSplit.setStartLocation(startLocation); + } String restoreFilter = DBUtil.buildIncrementFilter(databaseInterface, restoreColumn.getType(), - restoreColumn.getName(), startLocation, jdbcInputSplit.getEndLocation(), customSql, incrementConfig.isUseMaxFunc()); + restoreColumn.getName(), jdbcInputSplit.getStartLocation(), jdbcInputSplit.getEndLocation(), customSql, incrementConfig.isUseMaxFunc()); if(StringUtils.isNotEmpty(restoreFilter)){ restoreFilter = " and " + restoreFilter; From beddb0ee58ec4eecb177608827845d8d2af5dcd5 Mon Sep 17 00:00:00 2001 From: tudou Date: Mon, 28 Oct 2019 17:18:57 +0800 Subject: [PATCH 60/62] =?UTF-8?q?fix=20flinkx=E4=BB=A5yarn=EF=BC=8CyarnPer?= =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E8=BF=90=E8=A1=8C=E6=97=B6=EF=BC=8Cjob?= =?UTF-8?q?=E5=8F=82=E6=95=B0=E8=A7=A3=E6=9E=90=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/dtstack/flinkx/launcher/Launcher.java | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java index 11fd434817..94faabf40c 100644 --- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java +++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java @@ -33,6 +33,7 @@ import org.apache.flink.util.Preconditions; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FilenameFilter; import java.net.MalformedURLException; @@ -76,7 +77,8 @@ private static List analyzeUserClasspath(String content, String pluginRoot) List urlList = new ArrayList<>(); - DataTransferConfig config = DataTransferConfig.parse(content); + String jobJson = readJob(content); + DataTransferConfig config = DataTransferConfig.parse(jobJson); Preconditions.checkNotNull(pluginRoot); @@ -177,4 +179,17 @@ public boolean accept(File dir, String name) { return coreJarFileName; } + + private static String readJob(String job) { + try { + File file = new File(job); + FileInputStream in = new FileInputStream(file); + byte[] fileContent = new byte[(int) file.length()]; + in.read(fileContent); + in.close(); + return new String(fileContent, "UTF-8"); + } catch (Exception e){ + throw new RuntimeException(e); + } + } } From 3e8f329860015ab6e69b17792585646039085b7f Mon Sep 17 00:00:00 2001 From: jiangbo Date: Tue, 29 Oct 2019 10:18:22 +0800 Subject: [PATCH 61/62] Merger 1.5_dev into 1.8_dev --- flinkx-core/src/main/java/com/dtstack/flinkx/Main.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java index b27a3746a3..768461fc6d 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java @@ -32,10 +32,8 @@ import org.apache.commons.lang.StringUtils; import org.apache.flink.api.common.JobExecutionResult; import org.apache.flink.api.common.restartstrategy.RestartStrategies; -import org.apache.flink.api.common.time.Time; import org.apache.flink.client.program.ContextEnvironment; import org.apache.flink.runtime.jobgraph.SavepointRestoreSettings; -import org.apache.flink.runtime.state.filesystem.FsStateBackend; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.streaming.api.CheckpointingMode; import org.apache.flink.streaming.api.datastream.DataStream; @@ -50,14 +48,11 @@ import java.lang.reflect.Field; import java.net.URL; -import java.net.URLClassLoader; import java.net.URLDecoder; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Properties; import java.util.*; -import java.util.concurrent.TimeUnit; /** * The main class entry From 437d2154e7c103a9ecb2fceef40401ec934c5494 Mon Sep 17 00:00:00 2001 From: tudou Date: Tue, 29 Oct 2019 15:14:42 +0800 Subject: [PATCH 62/62] fix npe --- .../api/functions/source/DtInputFormatSourceFunction.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/source/DtInputFormatSourceFunction.java b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/source/DtInputFormatSourceFunction.java index 478392203b..d136f05916 100644 --- a/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/source/DtInputFormatSourceFunction.java +++ b/flinkx-core/src/main/java/org/apache/flink/streaming/api/functions/source/DtInputFormatSourceFunction.java @@ -17,6 +17,7 @@ package org.apache.flink.streaming.api.functions.source; +import com.dtstack.flinkx.config.RestoreConfig; import com.dtstack.flinkx.restore.FormatState; import org.apache.flink.annotation.Internal; import org.apache.flink.api.common.io.InputFormat; @@ -88,7 +89,8 @@ public void open(Configuration parameters) throws Exception { } if (format instanceof com.dtstack.flinkx.inputformat.RichInputFormat){ - isStream = ((com.dtstack.flinkx.inputformat.RichInputFormat) format).getRestoreConfig().isStream(); + RestoreConfig restoreConfig = ((com.dtstack.flinkx.inputformat.RichInputFormat) format).getRestoreConfig(); + isStream = restoreConfig != null && restoreConfig.isStream(); if(formatStateMap != null){ ((com.dtstack.flinkx.inputformat.RichInputFormat) format).setRestoreState(formatStateMap.get(context.getIndexOfThisSubtask())); }