com.dtstack.flinkx
diff --git a/flinkx-kafka/flinkx-kafka-reader/pom.xml b/flinkx-kafka/flinkx-kafka-reader/pom.xml
index 908f857952..b2b936be42 100644
--- a/flinkx-kafka/flinkx-kafka-reader/pom.xml
+++ b/flinkx-kafka/flinkx-kafka-reader/pom.xml
@@ -68,7 +68,7 @@
+ tofile="${basedir}/../../syncplugins/kafkareader/${project.name}-${package.name}.jar"/>
diff --git a/flinkx-kafka/flinkx-kafka-writer/pom.xml b/flinkx-kafka/flinkx-kafka-writer/pom.xml
index 652593dd25..84efdd2b15 100644
--- a/flinkx-kafka/flinkx-kafka-writer/pom.xml
+++ b/flinkx-kafka/flinkx-kafka-writer/pom.xml
@@ -67,7 +67,7 @@
+ tofile="${basedir}/../../syncplugins/kafkawriter/${project.name}-${package.name}.jar" />
diff --git a/flinkx-kafka09/flinkx-kafka09-reader/.gitignore b/flinkx-kafka09/flinkx-kafka09-reader/.gitignore
deleted file mode 100644
index ca7ca55c4c..0000000000
--- a/flinkx-kafka09/flinkx-kafka09-reader/.gitignore
+++ /dev/null
@@ -1,13 +0,0 @@
-target
-.idea/
-/.idea/*
-*.pyc
-*.swp
-.DS_Store
-/target
-target
-.class
-.project
-.classpath
-*.eclipse.*
-*.iml
diff --git a/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/client/Kafka09Client.java b/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/client/Kafka09Client.java
deleted file mode 100644
index faacd2710d..0000000000
--- a/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/client/Kafka09Client.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.dtstack.flinkx.kafka09.client;
-
-import com.dtstack.flinkx.decoder.IDecode;
-import com.dtstack.flinkx.kafkabase.client.IClient;
-import com.dtstack.flinkx.kafkabase.entity.kafkaState;
-import com.dtstack.flinkx.kafkabase.format.KafkaBaseInputFormat;
-import com.dtstack.flinkx.util.ExceptionUtil;
-import kafka.consumer.ConsumerIterator;
-import kafka.consumer.KafkaStream;
-import kafka.message.MessageAndMetadata;
-import org.apache.commons.lang3.tuple.Pair;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Map;
-
-/**
- * Date: 2019/12/25
- * Company: www.dtstack.com
- *
- * @author tudou
- */
-public class Kafka09Client implements IClient {
-
- private static final Logger LOG = LoggerFactory.getLogger(Kafka09Client.class);
-
- private volatile boolean running = true;
- private KafkaStream mStream;
- private IDecode decode;
- private KafkaBaseInputFormat format;
-
- public Kafka09Client(KafkaStream aStream, KafkaBaseInputFormat format) {
- this.mStream = aStream;
- this.decode = format.getDecode();
- this.format = format;
- }
-
- @Override
- public void run() {
- Thread.currentThread().setUncaughtExceptionHandler((t, e) -> {
- LOG.warn("KafkaClient run failed, Throwable = {}", ExceptionUtil.getErrorMessage(e));
- });
- try {
- while (running) {
- ConsumerIterator it = mStream.iterator();
- while (it.hasNext()) {
- String m = null;
- try {
- MessageAndMetadata next = it.next();
- processMessage(new String(next.message(), format.getEncoding()),
- next.topic(),
- next.partition(),
- next.offset(),
- null);
- } catch (Exception e) {
- LOG.error("process event = {}, e = {}", m, ExceptionUtil.getErrorMessage(e));
- }
- }
- }
- } catch (Exception t) {
- LOG.error("kafka Consumer fetch error, e = {}", ExceptionUtil.getErrorMessage(t));
- }
- }
-
- @Override
- public void processMessage(String message, String topic, Integer partition, Long offset, Long timestamp) {
- Map event = decode.decode(message);
- if (event != null && event.size() > 0) {
- format.processEvent(Pair.of(event, new kafkaState(topic, partition, offset, timestamp)));
- }
- }
-
- @Override
- public void close() {
- running = false;
- }
-}
diff --git a/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/client/Kafka09Consumer.java b/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/client/Kafka09Consumer.java
deleted file mode 100644
index 110ef449d5..0000000000
--- a/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/client/Kafka09Consumer.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.dtstack.flinkx.kafka09.client;
-
-import com.dtstack.flinkx.kafkabase.KafkaInputSplit;
-import com.dtstack.flinkx.kafkabase.client.KafkaBaseConsumer;
-import com.dtstack.flinkx.kafkabase.format.KafkaBaseInputFormat;
-import kafka.consumer.KafkaStream;
-
-import java.util.Properties;
-
-/**
- * @company: www.dtstack.com
- * @author: toutian
- * @create: 2019/7/5
- */
-public class Kafka09Consumer extends KafkaBaseConsumer {
- private KafkaStream mStream;
-
- public Kafka09Consumer(KafkaStream aStream) {
- super(new Properties());
- this.mStream = aStream;
- }
-
- @Override
- public KafkaBaseConsumer createClient(String topic, String group, KafkaBaseInputFormat format, KafkaInputSplit kafkaInputSplit) {
- client = new Kafka09Client(mStream, format);
- return this;
- }
-}
diff --git a/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/format/Kafka09InputFormat.java b/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/format/Kafka09InputFormat.java
deleted file mode 100644
index bc4bcd9992..0000000000
--- a/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/format/Kafka09InputFormat.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.dtstack.flinkx.kafka09.format;
-
-import com.dtstack.flinkx.kafka09.client.Kafka09Consumer;
-import com.dtstack.flinkx.kafkabase.KafkaInputSplit;
-import com.dtstack.flinkx.kafkabase.enums.KafkaVersion;
-import com.dtstack.flinkx.kafkabase.format.KafkaBaseInputFormat;
-import com.dtstack.flinkx.kafkabase.util.KafkaUtil;
-import kafka.consumer.ConsumerConfig;
-import kafka.consumer.KafkaStream;
-import kafka.javaapi.consumer.ConsumerConnector;
-import org.apache.flink.core.io.InputSplit;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-/**
- * @company: www.dtstack.com
- * @author: toutian
- * @create: 2019/7/5
- */
-public class Kafka09InputFormat extends KafkaBaseInputFormat {
-
- private transient ConsumerConnector consumerConnector;
-
- @Override
- public void openInputFormat() throws IOException {
- super.openInputFormat();
- Properties props = KafkaUtil.geneConsumerProp(consumerSettings, mode);
- consumerConnector = kafka.consumer.Consumer.createJavaConsumerConnector(new ConsumerConfig(props));
- }
-
- @Override
- protected void openInternal(InputSplit inputSplit) {
- Map topicCountMap = Collections.singletonMap(topic, 1);
- Map>> consumerMap = consumerConnector.createMessageStreams(topicCountMap);
-
- List> streams = consumerMap.get(topic);
- for (final KafkaStream stream : streams) {
- consumer = new Kafka09Consumer(stream);
- }
- consumer.createClient(topic, groupId, this, (KafkaInputSplit)inputSplit).execute();
- running = true;
- }
-
- @Override
- protected void closeInternal() {
- if (running) {
- consumerConnector.commitOffsets(true);
- consumerConnector.shutdown();
- consumer.close();
- running = false;
- LOG.warn("input kafka release.");
- }
- }
-
- @Override
- public KafkaVersion getKafkaVersion() {
- return KafkaVersion.kafka09;
- }
-}
diff --git a/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/reader/Kafka09Reader.java b/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/reader/Kafka09Reader.java
deleted file mode 100644
index 31c6c6c5b4..0000000000
--- a/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/reader/Kafka09Reader.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.dtstack.flinkx.kafka09.reader;
-
-import com.dtstack.flinkx.config.DataTransferConfig;
-import com.dtstack.flinkx.kafka09.format.Kafka09InputFormat;
-import com.dtstack.flinkx.kafkabase.KafkaConfigKeys;
-import com.dtstack.flinkx.kafkabase.format.KafkaBaseInputFormatBuilder;
-import com.dtstack.flinkx.kafkabase.reader.KafkaBaseReader;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
-
-/**
- * @company: www.dtstack.com
- * @author: toutian
- * @create: 2019/7/4
- */
-public class Kafka09Reader extends KafkaBaseReader {
-
- public Kafka09Reader(DataTransferConfig config, StreamExecutionEnvironment env) {
- super(config, env);
- //兼容历史脚本
- String id = consumerSettings.get(KafkaConfigKeys.GROUP_ID);
- if(StringUtils.isNotBlank(id)){
- super.groupId = id;
- }
- }
-
- @Override
- public KafkaBaseInputFormatBuilder getBuilder(){
- return new KafkaBaseInputFormatBuilder(new Kafka09InputFormat());
- }
-}
diff --git a/flinkx-kafka09/flinkx-kafka09-writer/.gitignore b/flinkx-kafka09/flinkx-kafka09-writer/.gitignore
deleted file mode 100644
index ca7ca55c4c..0000000000
--- a/flinkx-kafka09/flinkx-kafka09-writer/.gitignore
+++ /dev/null
@@ -1,13 +0,0 @@
-target
-.idea/
-/.idea/*
-*.pyc
-*.swp
-.DS_Store
-/target
-target
-.class
-.project
-.classpath
-*.eclipse.*
-*.iml
diff --git a/flinkx-kafka09/flinkx-kafka09-writer/pom.xml b/flinkx-kafka09/flinkx-kafka09-writer/pom.xml
deleted file mode 100644
index cf7820adba..0000000000
--- a/flinkx-kafka09/flinkx-kafka09-writer/pom.xml
+++ /dev/null
@@ -1,79 +0,0 @@
-
-
-
- flinkx-kafka09
- com.dtstack.flinkx
- 1.6
-
- 4.0.0
-
- flinkx-kafka09-writer
-
-
-
- com.dtstack.flinkx
- flinkx-kb-writer
- 1.6
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-shade-plugin
- 3.1.0
-
-
- package
-
- shade
-
-
- false
-
-
- com.google.common
- shade.core.com.google.common
-
-
- com.google.thirdparty
- shade.core.com.google.thirdparty
-
-
-
-
-
-
-
- maven-antrun-plugin
- 1.2
-
-
- copy-resources
-
- package
-
- run
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/flinkx-kafka09/flinkx-kafka09-writer/src/main/java/com/dtstack/flinkx/kafka09/format/Kafka09OutputFormat.java b/flinkx-kafka09/flinkx-kafka09-writer/src/main/java/com/dtstack/flinkx/kafka09/format/Kafka09OutputFormat.java
deleted file mode 100644
index add067e1c7..0000000000
--- a/flinkx-kafka09/flinkx-kafka09-writer/src/main/java/com/dtstack/flinkx/kafka09/format/Kafka09OutputFormat.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.dtstack.flinkx.kafka09.format;
-
-import com.dtstack.flinkx.kafkabase.format.KafkaBaseOutputFormat;
-import com.dtstack.flinkx.kafkabase.util.Formatter;
-import com.dtstack.flinkx.kafkabase.writer.HeartBeatController;
-import com.dtstack.flinkx.util.MapUtil;
-import com.dtstack.flinkx.util.TelnetUtil;
-import org.apache.flink.configuration.Configuration;
-import org.apache.kafka.clients.producer.KafkaProducer;
-import org.apache.kafka.clients.producer.ProducerRecord;
-import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
-
-import java.io.IOException;
-import java.util.Map;
-import java.util.Objects;
-import java.util.concurrent.TimeUnit;
-
-/**
- * @company: www.dtstack.com
- * @author: toutian
- * @create: 2019/7/5
- */
-public class Kafka09OutputFormat extends KafkaBaseOutputFormat {
-
- private String encoding;
- private String brokerList;
- private transient KafkaProducer producer;
- private HeartBeatController heartBeatController;
-
- @Override
- public void configure(Configuration parameters) {
- props.put("key.serializer", org.apache.kafka.common.serialization.StringSerializer.class.getName());
- props.put("value.serializer", org.apache.kafka.common.serialization.StringSerializer.class.getName());
- props.put("producer.type", "sync");
- props.put("compression.codec", "none");
- props.put("request.required.acks", "1");
- props.put("batch.num.messages", "1024");
- props.put("partitioner.class", DefaultPartitioner.class.getName());
-
- props.put("client.id", "");
-
- if (producerSettings != null) {
- props.putAll(producerSettings);
- }
- props.put("metadata.broker.list", brokerList);
- producer = new KafkaProducer<>(props);
-
- LOG.info("brokerList {}", brokerList);
- String broker = brokerList.split(",")[0];
- String[] split = broker.split(":");
-
- try {
- TelnetUtil.telnet(split[0], Integer.parseInt(split[1]));
- }catch (Exception e){
- throw new RuntimeException("telnet error, brokerList = " + brokerList);
- }
- }
-
- @Override
- protected void emit(Map event) throws IOException {
- heartBeatController.acquire();
- String tp = Formatter.format(event, topic, timezone);
- producer.send(new ProducerRecord<>(tp, event.toString(), MapUtil.writeValueAsString(event)), (metadata, exception) -> {
- if (Objects.nonNull(exception)) {
- LOG.warn("kafka writeSingleRecordInternal error:{}", exception.getMessage(), exception);
- heartBeatController.onFailed(exception);
- } else {
- heartBeatController.onSuccess();
- }
- });
- }
-
- @Override
- public void closeInternal() {
- LOG.info("kafka output closeInternal.");
- //未设置具体超时时间 关闭时间默认是long.value 导致整个方法长时间等待关闭不了,因此明确指定20s时间
- producer.close(KafkaBaseOutputFormat.CLOSE_TIME, TimeUnit.MILLISECONDS);
- }
-
- public void setEncoding(String encoding) {
- this.encoding = encoding;
- }
-
- public void setBrokerList(String brokerList) {
- this.brokerList = brokerList;
- }
-
- public void setHeartBeatController(HeartBeatController heartBeatController) {
- this.heartBeatController = heartBeatController;
- }
-}
diff --git a/flinkx-kafka09/flinkx-kafka09-writer/src/main/java/com/dtstack/flinkx/kafka09/writer/Kafka09Writer.java b/flinkx-kafka09/flinkx-kafka09-writer/src/main/java/com/dtstack/flinkx/kafka09/writer/Kafka09Writer.java
deleted file mode 100644
index 4caa61ba91..0000000000
--- a/flinkx-kafka09/flinkx-kafka09-writer/src/main/java/com/dtstack/flinkx/kafka09/writer/Kafka09Writer.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.dtstack.flinkx.kafka09.writer;
-
-import com.dtstack.flinkx.config.DataTransferConfig;
-import com.dtstack.flinkx.config.WriterConfig;
-import com.dtstack.flinkx.kafka09.format.Kafka09OutputFormat;
-import com.dtstack.flinkx.kafkabase.KafkaConfigKeys;
-import com.dtstack.flinkx.kafkabase.writer.HeartBeatController;
-import com.dtstack.flinkx.kafkabase.writer.KafkaBaseWriter;
-import org.apache.commons.lang.StringUtils;
-import org.apache.flink.streaming.api.datastream.DataStream;
-import org.apache.flink.streaming.api.datastream.DataStreamSink;
-import org.apache.flink.types.Row;
-
-import java.nio.charset.StandardCharsets;
-
-/**
- * @company: www.dtstack.com
- * @author: toutian
- * @create: 2019/7/4
- */
-public class Kafka09Writer extends KafkaBaseWriter {
-
- private String encoding;
- private String brokerList;
-
- public Kafka09Writer(DataTransferConfig config) {
- super(config);
- WriterConfig writerConfig = config.getJob().getContent().get(0).getWriter();
- encoding = writerConfig.getParameter().getStringVal(KafkaConfigKeys.KEY_ENCODING, StandardCharsets.UTF_8.name());
- brokerList = writerConfig.getParameter().getStringVal(KafkaConfigKeys.KEY_BROKER_LIST);
- if (StringUtils.isBlank(brokerList)) {
- throw new RuntimeException("brokerList can not be empty!");
- }
- }
-
- @Override
- public DataStreamSink> writeData(DataStream dataSet) {
- Kafka09OutputFormat format = new Kafka09OutputFormat();
- format.setTimezone(timezone);
- format.setEncoding(encoding);
- format.setTopic(topic);
- format.setTableFields(tableFields);
- format.setBrokerList(brokerList);
- format.setProducerSettings(producerSettings);
- format.setRestoreConfig(restoreConfig);
- format.setHeartBeatController(new HeartBeatController());
-
- format.setDirtyPath(dirtyPath);
- format.setDirtyHadoopConfig(dirtyHadoopConfig);
- format.setSrcFieldNames(srcCols);
- return createOutput(dataSet, format);
- }
-}
diff --git a/flinkx-kafka09/pom.xml b/flinkx-kafka09/pom.xml
deleted file mode 100644
index 3a8a141fcc..0000000000
--- a/flinkx-kafka09/pom.xml
+++ /dev/null
@@ -1,74 +0,0 @@
-
-
-
- flinkx-all
- com.dtstack.flinkx
- 1.6
-
- 4.0.0
- pom
-
- flinkx-kafka09
-
-
- flinkx-kafka09-reader
- flinkx-kafka09-writer
-
-
-
-
- com.dtstack.flinkx
- flinkx-core
- 1.6
- provided
-
-
- ch.qos.logback
- logback-classic
-
-
- ch.qos.logback
- logback-core
-
-
-
-
- org.apache.kafka
- kafka_2.11
- 0.9.0.1
-
-
- slf4j-api
- org.slf4j
-
-
- slf4j-log4j12
- org.slf4j
-
-
- log4j
- log4j
-
-
- scala-library
- org.scala-lang
-
-
- netty
- io.netty
-
-
- snappy-java
- org.xerial.snappy
-
-
- junit
- junit
-
-
-
-
-
-
\ No newline at end of file
diff --git a/flinkx-kafka10/flinkx-kafka10-reader/pom.xml b/flinkx-kafka10/flinkx-kafka10-reader/pom.xml
index 6d1d444762..9170712ee0 100644
--- a/flinkx-kafka10/flinkx-kafka10-reader/pom.xml
+++ b/flinkx-kafka10/flinkx-kafka10-reader/pom.xml
@@ -67,7 +67,7 @@
+ tofile="${basedir}/../../syncplugins/kafka10reader/${project.name}-${package.name}.jar" />
diff --git a/flinkx-kafka10/flinkx-kafka10-writer/pom.xml b/flinkx-kafka10/flinkx-kafka10-writer/pom.xml
index 44a97b4ce4..40234ddd1c 100644
--- a/flinkx-kafka10/flinkx-kafka10-writer/pom.xml
+++ b/flinkx-kafka10/flinkx-kafka10-writer/pom.xml
@@ -67,7 +67,7 @@
+ tofile="${basedir}/../../syncplugins/kafka10writer/${project.name}-${package.name}.jar" />
diff --git a/flinkx-kafka11/flinkx-kafka11-reader/pom.xml b/flinkx-kafka11/flinkx-kafka11-reader/pom.xml
index 280ae7bf26..4ce9b9fd74 100644
--- a/flinkx-kafka11/flinkx-kafka11-reader/pom.xml
+++ b/flinkx-kafka11/flinkx-kafka11-reader/pom.xml
@@ -67,7 +67,7 @@
+ tofile="${basedir}/../../syncplugins/kafka11reader/${project.name}-${package.name}.jar"/>
diff --git a/flinkx-kafka11/flinkx-kafka11-reader/src/main/java/com/dtstack/flinkx/kafka11/client/Kafka11Client.java b/flinkx-kafka11/flinkx-kafka11-reader/src/main/java/com/dtstack/flinkx/kafka11/client/Kafka11Client.java
index 2b6ff0bf0d..007afbc7c5 100644
--- a/flinkx-kafka11/flinkx-kafka11-reader/src/main/java/com/dtstack/flinkx/kafka11/client/Kafka11Client.java
+++ b/flinkx-kafka11/flinkx-kafka11-reader/src/main/java/com/dtstack/flinkx/kafka11/client/Kafka11Client.java
@@ -25,7 +25,7 @@
* @author tudou
*/
public class Kafka11Client implements IClient {
- private static Logger LOG = LoggerFactory.getLogger(Kafka11Consumer.class);
+ private static Logger LOG = LoggerFactory.getLogger(Kafka11Client.class);
private volatile boolean running = true;
private long pollTimeout;
private boolean blankIgnore;
diff --git a/flinkx-kafka11/flinkx-kafka11-writer/pom.xml b/flinkx-kafka11/flinkx-kafka11-writer/pom.xml
index 7ca967cbd6..907e45992d 100644
--- a/flinkx-kafka11/flinkx-kafka11-writer/pom.xml
+++ b/flinkx-kafka11/flinkx-kafka11-writer/pom.xml
@@ -68,7 +68,7 @@
+ tofile="${basedir}/../../syncplugins/kafka11writer/${project.name}-${package.name}.jar" />
diff --git a/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/KafkaConfigKeys.java b/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/KafkaConfigKeys.java
index d24df8c172..7d44c2bd0f 100755
--- a/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/KafkaConfigKeys.java
+++ b/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/KafkaConfigKeys.java
@@ -44,11 +44,6 @@ public class KafkaConfigKeys {
public static final String KEY_OFFSET = "offset";
public static final String KEY_TIMESTAMP = "timestamp";
public static List KEY_ASSIGNER_DEFAULT_RULE = Arrays.asList("database", "schema", "table");
- /**
- * kafka 09
- */
- public static final String KEY_ENCODING = "encoding";
- public static final String KEY_BROKER_LIST = "brokerList";
public static final String GROUP_ID = "group.id";
diff --git a/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/entity/kafkaState.java b/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/entity/kafkaState.java
index 7ee7669f54..ca8e7babb9 100644
--- a/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/entity/kafkaState.java
+++ b/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/entity/kafkaState.java
@@ -78,9 +78,9 @@ public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
kafkaState that = (kafkaState) o;
- return partition == that.partition &&
- offset == that.offset &&
- timestamp == that.timestamp &&
+ return offset.equals(that.offset) &&
+ timestamp.equals(that.timestamp) &&
+ partition.equals(that.partition) &&
topic.equals(that.topic);
}
diff --git a/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/format/KafkaBaseInputFormat.java b/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/format/KafkaBaseInputFormat.java
index 5f09e75d31..1daeee612c 100644
--- a/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/format/KafkaBaseInputFormat.java
+++ b/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/format/KafkaBaseInputFormat.java
@@ -59,7 +59,6 @@ public class KafkaBaseInputFormat extends BaseRichInputFormat {
protected String groupId;
protected String codec;
protected boolean blankIgnore;
- protected String encoding;
protected StartupMode mode;
protected String offset;
protected Long timestamp;
@@ -174,10 +173,6 @@ public Object getState(){
return formatState == null ? null : formatState.getState();
}
- public String getEncoding() {
- return encoding;
- }
-
public IDecode getDecode() {
return decode;
}
diff --git a/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/format/KafkaBaseInputFormatBuilder.java b/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/format/KafkaBaseInputFormatBuilder.java
index b140a6747d..f46655dbf6 100644
--- a/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/format/KafkaBaseInputFormatBuilder.java
+++ b/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/format/KafkaBaseInputFormatBuilder.java
@@ -64,10 +64,6 @@ public void setConsumerSettings(Map consumerSettings) {
format.consumerSettings = consumerSettings;
}
- public void setEncoding(String encoding) {
- format.encoding = encoding;
- }
-
public void setMode(StartupMode mode) {
format.mode = mode;
}
diff --git a/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/reader/KafkaBaseReader.java b/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/reader/KafkaBaseReader.java
index dacfc4ba0e..614dbbd7ae 100644
--- a/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/reader/KafkaBaseReader.java
+++ b/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/reader/KafkaBaseReader.java
@@ -29,7 +29,6 @@
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.types.Row;
-import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
@@ -59,7 +58,6 @@ public KafkaBaseReader(DataTransferConfig config, StreamExecutionEnvironment env
groupId = readerConfig.getParameter().getStringVal(KafkaConfigKeys.KEY_GROUP_ID, "default");
codec = readerConfig.getParameter().getStringVal(KafkaConfigKeys.KEY_CODEC, "text");
blankIgnore = readerConfig.getParameter().getBooleanVal(KafkaConfigKeys.KEY_BLANK_IGNORE, false);
- encoding = readerConfig.getParameter().getStringVal(KafkaConfigKeys.KEY_ENCODING, StandardCharsets.UTF_8.name());
mode = readerConfig.getParameter().getStringVal(KafkaConfigKeys.KEY_MODE, StartupMode.GROUP_OFFSETS.name);
offset = readerConfig.getParameter().getStringVal(KafkaConfigKeys.KEY_OFFSET, "");
timestamp = readerConfig.getParameter().getLongVal(KafkaConfigKeys.KEY_TIMESTAMP, -1L);
@@ -76,7 +74,6 @@ public DataStream readData() {
builder.setGroupId(groupId);
builder.setCodec(codec);
builder.setBlankIgnore(blankIgnore);
- builder.setEncoding(encoding);
builder.setConsumerSettings(consumerSettings);
builder.setMode(StartupMode.getFromName(mode));
builder.setOffset(offset);
diff --git a/flinkx-kingbase/flinkx-kingbase-core/pom.xml b/flinkx-kingbase/flinkx-kingbase-core/pom.xml
index 7ce87ab996..899f3ecdfd 100644
--- a/flinkx-kingbase/flinkx-kingbase-core/pom.xml
+++ b/flinkx-kingbase/flinkx-kingbase-core/pom.xml
@@ -1,6 +1,6 @@
-
flinkx-kingbase
diff --git a/flinkx-kingbase/flinkx-kingbase-reader/pom.xml b/flinkx-kingbase/flinkx-kingbase-reader/pom.xml
index 613433025e..38c18a7e05 100644
--- a/flinkx-kingbase/flinkx-kingbase-reader/pom.xml
+++ b/flinkx-kingbase/flinkx-kingbase-reader/pom.xml
@@ -1,6 +1,6 @@
-
flinkx-kingbase
@@ -94,7 +94,7 @@
+ tofile="${basedir}/../../syncplugins/kingbasereader/${project.name}-${package.name}.jar" />
diff --git a/flinkx-kingbase/flinkx-kingbase-writer/pom.xml b/flinkx-kingbase/flinkx-kingbase-writer/pom.xml
index a4a8831300..5a1bd1a2bc 100644
--- a/flinkx-kingbase/flinkx-kingbase-writer/pom.xml
+++ b/flinkx-kingbase/flinkx-kingbase-writer/pom.xml
@@ -1,6 +1,6 @@
-
flinkx-kingbase
@@ -94,7 +94,7 @@
+ tofile="${basedir}/../../syncplugins/kingbasewriter/${project.name}-${package.name}.jar" />
diff --git a/flinkx-kingbase/pom.xml b/flinkx-kingbase/pom.xml
index 9915d66707..14ceabbcf7 100644
--- a/flinkx-kingbase/pom.xml
+++ b/flinkx-kingbase/pom.xml
@@ -1,6 +1,6 @@
-
flinkx-all
diff --git a/flinkx-kudu/flinkx-kudu-reader/pom.xml b/flinkx-kudu/flinkx-kudu-reader/pom.xml
index f3ba6a805a..53568ff52a 100644
--- a/flinkx-kudu/flinkx-kudu-reader/pom.xml
+++ b/flinkx-kudu/flinkx-kudu-reader/pom.xml
@@ -82,7 +82,7 @@
+ tofile="${basedir}/../../syncplugins/kudureader/${project.name}-${package.name}.jar" />
diff --git a/flinkx-kudu/flinkx-kudu-writer/pom.xml b/flinkx-kudu/flinkx-kudu-writer/pom.xml
index c1a0afdb78..c0030ab1bd 100644
--- a/flinkx-kudu/flinkx-kudu-writer/pom.xml
+++ b/flinkx-kudu/flinkx-kudu-writer/pom.xml
@@ -82,7 +82,7 @@
+ tofile="${basedir}/../../syncplugins/kuduwriter/${project.name}-${package.name}.jar" />
diff --git a/flinkx-launcher/pom.xml b/flinkx-launcher/pom.xml
index 9c52b9d958..8d26b84b5d 100644
--- a/flinkx-launcher/pom.xml
+++ b/flinkx-launcher/pom.xml
@@ -13,9 +13,9 @@
- ch.qos.logback
- logback-classic
- 1.1.7
+ ch.qos.logback
+ logback-classic
+ 1.1.7
@@ -30,8 +30,6 @@
-
-
com.google.code.gson
gson
diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java
index 2a464515f5..4d6c46c039 100644
--- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java
+++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java
@@ -124,13 +124,14 @@ public static JobGraph buildJobGraph(Options launcherOptions, String[] remoteArg
}
PackagedProgram program = PackagedProgram.newBuilder()
.setJarFile(jarFile)
- .setUserClassPaths(urlList)
.setEntryPointClassName(MAIN_CLASS)
.setConfiguration(launcherOptions.loadFlinkConfiguration())
.setSavepointRestoreSettings(savepointRestoreSettings)
.setArguments(remoteArgs)
.build();
- return PackagedProgramUtils.createJobGraph(program, launcherOptions.loadFlinkConfiguration(), Integer.parseInt(launcherOptions.getParallelism()), false);
+ JobGraph jobGraph = PackagedProgramUtils.createJobGraph(program, launcherOptions.loadFlinkConfiguration(), Integer.parseInt(launcherOptions.getParallelism()), false);
+ jobGraph.addJars(urlList);
+ return jobGraph;
}
public static List analyzeUserClasspath(String content, String pluginRoot) {
diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perJob/FlinkPerJobUtil.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perJob/FlinkPerJobUtil.java
index 2ee9028014..5718ba229a 100644
--- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perJob/FlinkPerJobUtil.java
+++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perJob/FlinkPerJobUtil.java
@@ -43,9 +43,9 @@ public class FlinkPerJobUtil {
* the minimum memory should be higher than the min heap cutoff
*/
public final static int MIN_JM_MEMORY = 768;
- public final static int MIN_TM_MEMORY = 768;
- public final static String JOBMANAGER_MEMORY_MB = "jobmanager.memory.mb";
- public final static String TASKMANAGER_MEMORY_MB = "taskmanager.memory.mb";
+ public final static int MIN_TM_MEMORY = 1024;
+ public final static String JOBMANAGER_MEMORY_MB = "jobmanager.memory.process.size";
+ public final static String TASKMANAGER_MEMORY_MB = "taskmanager.memory.process.size";
public final static String SLOTS_PER_TASKMANAGER = "taskmanager.slots";
private static final Logger LOG = LoggerFactory.getLogger(FlinkPerJobUtil.class);
@@ -56,16 +56,16 @@ public class FlinkPerJobUtil {
* @return
*/
public static ClusterSpecification createClusterSpecification(Properties conProp) {
- int jobmanagerMemoryMb = 768;
- int taskmanagerMemoryMb = 768;
+ int jobManagerMemoryMb = 768;
+ int taskManagerMemoryMb = 1024;
int slotsPerTaskManager = 1;
if (conProp != null) {
- if (conProp.containsKey(JOBMANAGER_MEMORY_MB)) {
- jobmanagerMemoryMb = Math.max(MIN_JM_MEMORY, ValueUtil.getInt(conProp.getProperty(JOBMANAGER_MEMORY_MB)));
+ if (conProp.contains(JOBMANAGER_MEMORY_MB)) {
+ jobManagerMemoryMb = Math.max(MIN_JM_MEMORY, ValueUtil.getInt(conProp.getProperty(JOBMANAGER_MEMORY_MB)));
}
- if (conProp.containsKey(TASKMANAGER_MEMORY_MB)) {
- taskmanagerMemoryMb = Math.max(MIN_JM_MEMORY, ValueUtil.getInt(conProp.getProperty(TASKMANAGER_MEMORY_MB)));
+ if (conProp.contains(TASKMANAGER_MEMORY_MB)) {
+ taskManagerMemoryMb = Math.max(MIN_JM_MEMORY, ValueUtil.getInt(conProp.getProperty(TASKMANAGER_MEMORY_MB)));
}
if (conProp.containsKey(SLOTS_PER_TASKMANAGER)) {
slotsPerTaskManager = ValueUtil.getInt(conProp.get(SLOTS_PER_TASKMANAGER));
@@ -73,8 +73,8 @@ public static ClusterSpecification createClusterSpecification(Properties conProp
}
return new ClusterSpecification.ClusterSpecificationBuilder()
- .setMasterMemoryMB(jobmanagerMemoryMb)
- .setTaskManagerMemoryMB(taskmanagerMemoryMb)
+ .setMasterMemoryMB(jobManagerMemoryMb)
+ .setTaskManagerMemoryMB(taskManagerMemoryMb)
.setSlotsPerTaskManager(slotsPerTaskManager)
.createClusterSpecification();
}
diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perJob/PerJobClusterClientBuilder.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perJob/PerJobClusterClientBuilder.java
index cf8c8caef5..f13251a5ed 100644
--- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perJob/PerJobClusterClientBuilder.java
+++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perJob/PerJobClusterClientBuilder.java
@@ -26,8 +26,8 @@
import org.apache.flink.runtime.security.SecurityUtils;
import org.apache.flink.yarn.YarnClientYarnClusterInformationRetriever;
import org.apache.flink.yarn.YarnClusterDescriptor;
-import org.apache.flink.yarn.cli.FlinkYarnSessionCli;
import org.apache.flink.yarn.configuration.YarnConfigOptionsInternal;
+import org.apache.flink.yarn.configuration.YarnLogConfigUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -99,13 +99,13 @@ public YarnClusterDescriptor createPerJobClusterDescriptor(Options launcherOptio
throw new IllegalArgumentException("The Flink jar path is null");
}
- File log4j = new File(launcherOptions.getFlinkconf()+ File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME);
+ File log4j = new File(launcherOptions.getFlinkconf()+ File.separator + YarnLogConfigUtil.CONFIG_FILE_LOG4J_NAME);
if(log4j.exists()){
- flinkConfig.setString(YarnConfigOptionsInternal.APPLICATION_LOG_CONFIG_FILE, launcherOptions.getFlinkconf()+ File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME);
+ flinkConfig.setString(YarnConfigOptionsInternal.APPLICATION_LOG_CONFIG_FILE, launcherOptions.getFlinkconf()+ File.separator + YarnLogConfigUtil.CONFIG_FILE_LOG4J_NAME);
} else{
- File logback = new File(launcherOptions.getFlinkconf()+ File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME);
+ File logback = new File(launcherOptions.getFlinkconf()+ File.separator + YarnLogConfigUtil.CONFIG_FILE_LOGBACK_NAME);
if(logback.exists()){
- flinkConfig.setString(YarnConfigOptionsInternal.APPLICATION_LOG_CONFIG_FILE, launcherOptions.getFlinkconf()+ File.separator + FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME);
+ flinkConfig.setString(YarnConfigOptionsInternal.APPLICATION_LOG_CONFIG_FILE, launcherOptions.getFlinkconf()+ File.separator + YarnLogConfigUtil.CONFIG_FILE_LOGBACK_NAME);
}
}
diff --git a/flinkx-launcher/src/main/java/org/apache/flink/client/deployment/ClusterSpecification.java b/flinkx-launcher/src/main/java/org/apache/flink/client/deployment/ClusterSpecification.java
index 3f37518c41..f95543a97f 100644
--- a/flinkx-launcher/src/main/java/org/apache/flink/client/deployment/ClusterSpecification.java
+++ b/flinkx-launcher/src/main/java/org/apache/flink/client/deployment/ClusterSpecification.java
@@ -19,10 +19,7 @@
package org.apache.flink.client.deployment;
import org.apache.flink.client.program.PackagedProgram;
-import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
-import org.apache.flink.configuration.JobManagerOptions;
-import org.apache.flink.configuration.TaskManagerOptions;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.SavepointRestoreSettings;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -62,20 +59,6 @@ private ClusterSpecification(int masterMemoryMB, int taskManagerMemoryMB, int nu
this.priority = priority;
}
- public static ClusterSpecification fromConfiguration(Configuration configuration) {
- int slots = configuration.getInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
-
- int jobManagerMemoryMb = configuration.getInteger(JobManagerOptions.JOB_MANAGER_HEAP_MEMORY_MB);
- int taskManagerMemoryMb = configuration.getInteger(TaskManagerOptions.TASK_MANAGER_HEAP_MEMORY_MB);
-
- return new ClusterSpecificationBuilder()
- .setMasterMemoryMB(jobManagerMemoryMb)
- .setTaskManagerMemoryMB(taskManagerMemoryMb)
- .setNumberTaskManagers(1)
- .setSlotsPerTaskManager(slots)
- .createClusterSpecification();
- }
-
public PackagedProgram getProgram() {
return program;
}
@@ -200,7 +183,7 @@ public String toString() {
*/
public static class ClusterSpecificationBuilder {
private int masterMemoryMB = 768;
- private int taskManagerMemoryMB = 768;
+ private int taskManagerMemoryMB = 1024;
private int numberTaskManagers = 1;
private int slotsPerTaskManager = 1;
private int parallelism = 1;
diff --git a/flinkx-launcher/src/main/java/org/apache/flink/yarn/YarnClusterDescriptor.java b/flinkx-launcher/src/main/java/org/apache/flink/yarn/YarnClusterDescriptor.java
index 3827005790..3d13479ef5 100644
--- a/flinkx-launcher/src/main/java/org/apache/flink/yarn/YarnClusterDescriptor.java
+++ b/flinkx-launcher/src/main/java/org/apache/flink/yarn/YarnClusterDescriptor.java
@@ -18,6 +18,9 @@
package org.apache.flink.yarn;
+import com.dtstack.flinkx.constants.ConfigConstant;
+import com.dtstack.flinkx.constants.ConstantValue;
+import com.dtstack.flinkx.launcher.perJob.FlinkPerJobUtil;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.cache.DistributedCache;
@@ -26,32 +29,60 @@
import org.apache.flink.client.deployment.ClusterDescriptor;
import org.apache.flink.client.deployment.ClusterRetrieveException;
import org.apache.flink.client.deployment.ClusterSpecification;
+import org.apache.flink.client.deployment.application.ApplicationConfiguration;
import org.apache.flink.client.program.ClusterClientProvider;
import org.apache.flink.client.program.PackagedProgram;
import org.apache.flink.client.program.PackagedProgramUtils;
import org.apache.flink.client.program.rest.RestClusterClient;
-import org.apache.flink.configuration.*;
+import org.apache.flink.configuration.ConfigConstants;
+import org.apache.flink.configuration.ConfigUtils;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.ConfigurationUtils;
+import org.apache.flink.configuration.CoreOptions;
+import org.apache.flink.configuration.HighAvailabilityOptions;
+import org.apache.flink.configuration.IllegalConfigurationException;
+import org.apache.flink.configuration.JobManagerOptions;
+import org.apache.flink.configuration.PipelineOptions;
+import org.apache.flink.configuration.ResourceManagerOptions;
+import org.apache.flink.configuration.RestOptions;
+import org.apache.flink.configuration.SecurityOptions;
import org.apache.flink.core.plugin.PluginConfig;
import org.apache.flink.core.plugin.PluginUtils;
import org.apache.flink.runtime.clusterframework.BootstrapTools;
import org.apache.flink.runtime.entrypoint.ClusterEntrypoint;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobmanager.HighAvailabilityMode;
+import org.apache.flink.runtime.jobmanager.JobManagerProcessSpec;
+import org.apache.flink.runtime.jobmanager.JobManagerProcessUtils;
+import org.apache.flink.runtime.util.HadoopUtils;
import org.apache.flink.util.FlinkException;
import org.apache.flink.util.Preconditions;
import org.apache.flink.util.ShutdownHookUtil;
import org.apache.flink.yarn.configuration.YarnConfigOptions;
import org.apache.flink.yarn.configuration.YarnConfigOptionsInternal;
+import org.apache.flink.yarn.configuration.YarnDeploymentTarget;
+import org.apache.flink.yarn.configuration.YarnLogConfigUtil;
+import org.apache.flink.yarn.entrypoint.YarnApplicationClusterEntryPoint;
import org.apache.flink.yarn.entrypoint.YarnJobClusterEntrypoint;
import org.apache.flink.yarn.entrypoint.YarnSessionClusterEntrypoint;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsAction;
-import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
-import org.apache.hadoop.yarn.api.records.*;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationReport;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.api.records.NodeReport;
+import org.apache.hadoop.yarn.api.records.NodeState;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.QueueInfo;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.YarnApplicationState;
+import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.client.api.YarnClientApplication;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -62,30 +93,36 @@
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
-import java.io.*;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectOutputStream;
+import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
+import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.nio.charset.Charset;
-import java.nio.file.FileVisitResult;
-import java.nio.file.Files;
-import java.nio.file.SimpleFileVisitor;
-import java.nio.file.attribute.BasicFileAttributes;
-import java.util.*;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
import java.util.stream.Collectors;
-import static com.dtstack.flinkx.constants.ConfigConstant.FLINK_PLUGIN_LOAD_MODE_KEY;
-import static com.dtstack.flinkx.constants.ConstantValue.SHIP_FILE_PLUGIN_LOAD_MODE;
-import static com.dtstack.flinkx.launcher.perJob.FlinkPerJobUtil.buildProgram;
-import static com.dtstack.flinkx.launcher.perJob.FlinkPerJobUtil.getUrlFormat;
import static org.apache.flink.configuration.ConfigConstants.DEFAULT_FLINK_USR_LIB_DIR;
import static org.apache.flink.configuration.ConfigConstants.ENV_FLINK_LIB_DIR;
import static org.apache.flink.runtime.entrypoint.component.FileJobGraphRetriever.JOB_GRAPH_FILE_PATH;
import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;
-import static org.apache.flink.yarn.cli.FlinkYarnSessionCli.CONFIG_FILE_LOG4J_NAME;
-import static org.apache.flink.yarn.cli.FlinkYarnSessionCli.CONFIG_FILE_LOGBACK_NAME;
+import static org.apache.flink.yarn.YarnConfigKeys.LOCAL_RESOURCE_DESCRIPTOR_SEPARATOR;
/**
* The descriptor with deployment information for deploying a Flink cluster on Yarn.
@@ -106,11 +143,17 @@ public class YarnClusterDescriptor implements ClusterDescriptor {
private final List shipFiles = new LinkedList<>();
private final String yarnQueue;
+
+ private Path flinkJarPath;
+
private final Configuration flinkConfiguration;
+
private final String customName;
+
private final String nodeLabel;
+
private final String applicationType;
- private Path flinkJarPath;
+
private String zookeeperNamespace;
private YarnConfigOptions.UserJarInclusion userJarInclusion;
@@ -142,161 +185,6 @@ public YarnClusterDescriptor(
this.zookeeperNamespace = flinkConfiguration.getString(HighAvailabilityOptions.HA_CLUSTER_ID, null);
}
- /**
- * Uploads and registers a single resource and adds it to localResources .
- *
- * @param key
- * the key to add the resource under
- * @param fs
- * the remote file system to upload to
- * @param appId
- * application ID
- * @param localSrcPath
- * local path to the file
- * @param localResources
- * map of resources
- *
- * @return the remote path to the uploaded resource
- */
- private static Path setupSingleLocalResource(
- String key,
- FileSystem fs,
- ApplicationId appId,
- Path localSrcPath,
- Map localResources,
- Path targetHomeDir,
- String relativeTargetPath) throws IOException {
- Tuple2 resource = Utils.setupLocalResource(
- fs,
- appId.toString(),
- localSrcPath,
- targetHomeDir,
- relativeTargetPath);
-
- localResources.put(key, resource.f1);
-
- return resource.f0;
- }
-
- /**
- * Match file name for "flink-dist*.jar " pattern.
- *
- * @param fileName file name to check
- * @return true if file is a dist jar
- */
- private static boolean isDistJar(String fileName) {
- return fileName.startsWith("flink-dist") && fileName.endsWith("jar");
- }
-
- /**
- * Recursively uploads (and registers) any (user and system) files in shipFiles except
- * for files matching "flink-dist*.jar " which should be uploaded separately.
- *
- * @param shipFiles
- * files to upload
- * @param fs
- * file system to upload to
- * @param targetHomeDir
- * remote home directory to upload to
- * @param appId
- * application ID
- * @param remotePaths
- * paths of the remote resources (uploaded resources will be added)
- * @param localResources
- * map of resources (uploaded resources will be added)
- * @param localResourcesDirectory
- * the directory the localResources are uploaded to
- * @param envShipFileList
- * list of shipped files in a format understood by {@link Utils#createTaskExecutorContext}
- *
- * @return list of class paths with the the proper resource keys from the registration
- */
- static List uploadAndRegisterFiles(
- Collection shipFiles,
- FileSystem fs,
- Path targetHomeDir,
- ApplicationId appId,
- List remotePaths,
- Map localResources,
- String localResourcesDirectory,
- StringBuilder envShipFileList) throws IOException {
- final List localPaths = new ArrayList<>();
- final List relativePaths = new ArrayList<>();
- for (File shipFile : shipFiles) {
- if (shipFile.isDirectory()) {
- // add directories to the classpath
- final java.nio.file.Path shipPath = shipFile.toPath();
- final java.nio.file.Path parentPath = shipPath.getParent();
- Files.walkFileTree(shipPath, new SimpleFileVisitor() {
- @Override
- public FileVisitResult visitFile(java.nio.file.Path file, BasicFileAttributes attrs) {
- localPaths.add(new Path(file.toUri()));
- relativePaths.add(new Path(localResourcesDirectory, parentPath.relativize(file).toString()));
- return FileVisitResult.CONTINUE;
- }
- });
- } else {
- localPaths.add(new Path(shipFile.toURI()));
- relativePaths.add(new Path(localResourcesDirectory, shipFile.getName()));
- }
- }
-
- final Set archives = new HashSet<>();
- final Set resources = new HashSet<>();
- for (int i = 0; i < localPaths.size(); i++) {
- final Path localPath = localPaths.get(i);
- final Path relativePath = relativePaths.get(i);
- if (!isDistJar(relativePath.getName())) {
- final String key = relativePath.toString();
- final Path remotePath = setupSingleLocalResource(
- key,
- fs,
- appId,
- localPath,
- localResources,
- targetHomeDir,
- relativePath.getParent().toString());
- remotePaths.add(remotePath);
- envShipFileList.append(key).append("=").append(remotePath).append(",");
- // add files to the classpath
- if (key.endsWith("jar")) {
- archives.add(relativePath.toString());
- } else {
- resources.add(relativePath.getParent().toString());
- }
- }
- }
-
- // construct classpath, we always want resource directories to go first, we also sort
- // both resources and archives in order to make classpath deterministic
- final ArrayList classPaths = new ArrayList<>();
- resources.stream().sorted().forEach(classPaths::add);
- archives.stream().sorted().forEach(classPaths::add);
- return classPaths;
- }
-
- private static YarnConfigOptions.UserJarInclusion getUserJarInclusionMode(org.apache.flink.configuration.Configuration config) {
- return config.getEnum(YarnConfigOptions.UserJarInclusion.class, YarnConfigOptions.CLASSPATH_INCLUDE_USER_JAR);
- }
-
- private static boolean isUsrLibDirIncludedInShipFiles(List shipFiles) {
- return shipFiles.stream()
- .filter(File::isDirectory)
- .map(File::getName)
- .noneMatch(name -> name.equals(DEFAULT_FLINK_USR_LIB_DIR));
- }
-
- public static void logDetachedClusterInformation(ApplicationId yarnApplicationId, Logger logger) {
- logger.info(
- "The Flink YARN session cluster has been started in detached mode. In order to " +
- "stop Flink gracefully, use the following command:\n" +
- "$ echo \"stop\" | ./bin/yarn-session.sh -id {}\n" +
- "If this should not be possible, then you can also kill Flink via YARN's web interface or via:\n" +
- "$ yarn application -kill {}\n" +
- "Note that killing Flink might not clean up all job artifacts and temporary files.",
- yarnApplicationId, yarnApplicationId);
- }
-
private Optional> decodeDirsToShipToCluster(final Configuration configuration) {
checkNotNull(configuration);
@@ -358,10 +246,6 @@ public Configuration getFlinkConfiguration() {
return flinkConfiguration;
}
- // -------------------------------------------------------------
- // Lifecycle management
- // -------------------------------------------------------------
-
public void setLocalJarPath(Path localJarPath) {
if (!localJarPath.toString().endsWith("jar")) {
throw new IllegalArgumentException("The passed jar path ('" + localJarPath + "') does not end with the 'jar' extension");
@@ -369,27 +253,12 @@ public void setLocalJarPath(Path localJarPath) {
this.flinkJarPath = localJarPath;
}
- // -------------------------------------------------------------
- // ClusterClient overrides
- // -------------------------------------------------------------
-
- /**
- * Adds the given files to the list of files to ship.
- *
- * Note that any file matching "flink-dist*.jar " will be excluded from the upload by
- * {@link #uploadAndRegisterFiles(Collection, FileSystem, Path, ApplicationId, List, Map, String, StringBuilder)}
- * since we upload the Flink uber jar ourselves and do not need to deploy it multiple times.
- *
- * @param shipFiles files to ship
- */
- public void addShipFiles(List shipFiles) {
- checkArgument(userJarInclusion != YarnConfigOptions.UserJarInclusion.DISABLED || isUsrLibDirIncludedInShipFiles(shipFiles),
- "This is an illegal ship directory : %s. When setting the %s to %s the name of ship directory can not be %s.",
- ConfigConstants.DEFAULT_FLINK_USR_LIB_DIR,
- YarnConfigOptions.CLASSPATH_INCLUDE_USER_JAR.key(),
- YarnConfigOptions.UserJarInclusion.DISABLED,
- ConfigConstants.DEFAULT_FLINK_USR_LIB_DIR);
- this.shipFiles.addAll(shipFiles);
+ private static String encodeYarnLocalResourceDescriptorListToString(List resources) {
+ return String.join(
+ LOCAL_RESOURCE_DESCRIPTOR_SEPARATOR,
+ resources.stream()
+ .map(YarnLocalResourceDescriptor::toString)
+ .collect(Collectors.toList()));
}
private void isReadyForDeployment(ClusterSpecification clusterSpecification) throws Exception {
@@ -444,6 +313,10 @@ public String getNodeLabel() {
return nodeLabel;
}
+ // -------------------------------------------------------------
+ // Lifecycle management
+ // -------------------------------------------------------------
+
@Override
public void close() {
if (!sharedYarnClient) {
@@ -451,6 +324,10 @@ public void close() {
}
}
+ // -------------------------------------------------------------
+ // ClusterClient overrides
+ // -------------------------------------------------------------
+
@Override
public ClusterClientProvider retrieve(ApplicationId applicationId) throws ClusterRetrieveException {
@@ -500,6 +377,10 @@ public ClusterClientProvider deploySessionCluster(ClusterSpecific
}
}
+ private static YarnConfigOptions.UserJarInclusion getUserJarInclusionMode(org.apache.flink.configuration.Configuration config) {
+ return config.getEnum(YarnConfigOptions.UserJarInclusion.class, YarnConfigOptions.CLASSPATH_INCLUDE_USER_JAR);
+ }
+
@Override
public ClusterClientProvider deployJobCluster(
ClusterSpecification clusterSpecification,
@@ -517,13 +398,119 @@ public ClusterClientProvider deployJobCluster(
}
}
+ private static boolean isUsrLibDirIncludedInShipFiles(List shipFiles) {
+ return shipFiles.stream()
+ .filter(File::isDirectory)
+ .map(File::getName)
+ .noneMatch(name -> name.equals(DEFAULT_FLINK_USR_LIB_DIR));
+ }
+
+ public static void logDetachedClusterInformation(ApplicationId yarnApplicationId, Logger logger) {
+ logger.info(
+ "The Flink YARN session cluster has been started in detached mode. In order to " +
+ "stop Flink gracefully, use the following command:\n" +
+ "$ echo \"stop\" | ./bin/yarn-session.sh -id {}\n" +
+ "If this should not be possible, then you can also kill Flink via YARN's web interface or via:\n" +
+ "$ yarn application -kill {}\n" +
+ "Note that killing Flink might not clean up all job artifacts and temporary files.",
+ yarnApplicationId, yarnApplicationId);
+ }
+
+ /**
+ * Adds the given files to the list of files to ship.
+ *
+ * Note that any file matching "flink-dist*.jar " will be excluded from the upload by
+ * {@link YarnApplicationFileUploader#registerMultipleLocalResources(Collection, String)}
+ * since we upload the Flink uber jar ourselves and do not need to deploy it multiple times.
+ *
+ * @param shipFiles files to ship
+ */
+ public void addShipFiles(List shipFiles) {
+ checkArgument(userJarInclusion != YarnConfigOptions.UserJarInclusion.DISABLED || isUsrLibDirIncludedInShipFiles(shipFiles),
+ "This is an illegal ship directory : %s. When setting the %s to %s the name of ship directory can not be %s.",
+ ConfigConstants.DEFAULT_FLINK_USR_LIB_DIR,
+ YarnConfigOptions.CLASSPATH_INCLUDE_USER_JAR.key(),
+ YarnConfigOptions.UserJarInclusion.DISABLED,
+ ConfigConstants.DEFAULT_FLINK_USR_LIB_DIR);
+ this.shipFiles.addAll(shipFiles);
+ }
+
+ @Override
+ public ClusterClientProvider deployApplicationCluster(
+ final ClusterSpecification clusterSpecification,
+ final ApplicationConfiguration applicationConfiguration) throws ClusterDeploymentException {
+ checkNotNull(clusterSpecification);
+ checkNotNull(applicationConfiguration);
+
+ final YarnDeploymentTarget deploymentTarget = YarnDeploymentTarget.fromConfig(flinkConfiguration);
+ if (YarnDeploymentTarget.APPLICATION != deploymentTarget) {
+ throw new ClusterDeploymentException(
+ "Couldn't deploy Yarn Application Cluster." +
+ " Expected deployment.target=" + YarnDeploymentTarget.APPLICATION.getName() +
+ " but actual one was \"" + deploymentTarget.getName() + "\"");
+ }
+
+ applicationConfiguration.applyToConfiguration(flinkConfiguration);
+
+ final List pipelineJars = flinkConfiguration.getOptional(PipelineOptions.JARS).orElse(Collections.emptyList());
+ Preconditions.checkArgument(pipelineJars.size() == 1, "Should only have one jar");
+
+ try {
+ return deployInternal(
+ clusterSpecification,
+ "Flink Application Cluster",
+ YarnApplicationClusterEntryPoint.class.getName(),
+ null,
+ false);
+ } catch (Exception e) {
+ throw new ClusterDeploymentException("Couldn't deploy Yarn Application Cluster", e);
+ }
+ }
+
+ private void checkYarnQueues(YarnClient yarnClient) {
+ try {
+ List queues = yarnClient.getAllQueues();
+ if (queues.size() > 0 && this.yarnQueue != null) { // check only if there are queues configured in yarn and for this session.
+ boolean queueFound = false;
+ for (QueueInfo queue : queues) {
+ if (queue.getQueueName().equals(this.yarnQueue)) {
+ queueFound = true;
+ break;
+ }
+ }
+ if (!queueFound) {
+ String queueNames = "";
+ for (QueueInfo queue : queues) {
+ queueNames += queue.getQueueName() + ", ";
+ }
+ LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " +
+ "Available queues: " + queueNames);
+ }
+ } else {
+ LOG.debug("The YARN cluster does not have any queues configured");
+ }
+ } catch (Throwable e) {
+ LOG.warn("Error while getting queue information from YARN: " + e.getMessage());
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Error details", e);
+ }
+ }
+ }
+
@Override
public void killCluster(ApplicationId applicationId) throws FlinkException {
try {
yarnClient.killApplication(applicationId);
- Utils.deleteApplicationFiles(Collections.singletonMap(
- YarnConfigKeys.FLINK_YARN_FILES,
- getYarnFilesDir(applicationId).toUri().toString()));
+
+ try (final FileSystem fs = FileSystem.get(yarnConfiguration)) {
+ final Path applicationDir = YarnApplicationFileUploader
+ .getApplicationDirPath(fs.getHomeDirectory(), applicationId);
+
+ Utils.deleteApplicationFiles(Collections.singletonMap(
+ YarnConfigKeys.FLINK_YARN_FILES,
+ applicationDir.toUri().toString()));
+ }
+
} catch (YarnException | IOException e) {
throw new FlinkException("Could not kill the Yarn Flink cluster with id " + applicationId + '.', e);
}
@@ -545,18 +532,13 @@ private ClusterClientProvider deployInternal(
@Nullable JobGraph jobGraph,
boolean detached) throws Exception {
- if (UserGroupInformation.isSecurityEnabled()) {
- // note: UGI::hasKerberosCredentials inaccurately reports false
- // for logins based on a keytab (fixed in Hadoop 2.6.1, see HADOOP-10786),
- // so we check only in ticket cache scenario.
+ final UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
+ if (HadoopUtils.isKerberosSecurityEnabled(currentUser)) {
boolean useTicketCache = flinkConfiguration.getBoolean(SecurityOptions.KERBEROS_LOGIN_USETICKETCACHE);
- UserGroupInformation loginUser = UserGroupInformation.getCurrentUser();
- if (loginUser.getAuthenticationMethod() == UserGroupInformation.AuthenticationMethod.KERBEROS
- && useTicketCache && !loginUser.hasKerberosCredentials()) {
- LOG.error("Hadoop security with Kerberos is enabled but the login user does not have Kerberos credentials");
+ if (!HadoopUtils.areKerberosCredentialsValid(currentUser, useTicketCache)) {
throw new RuntimeException("Hadoop security with Kerberos is enabled but the login user " +
- "does not have Kerberos credentials");
+ "does not have Kerberos credentials or delegation tokens!");
}
}
@@ -573,12 +555,12 @@ private ClusterClientProvider deployInternal(
final GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();
if(clusterSpecification.isCreateProgramDelay()){
- String url = getUrlFormat(clusterSpecification.getYarnConfiguration(), yarnClient) + "/" + appResponse.getApplicationId().toString();
- PackagedProgram program = buildProgram(url,clusterSpecification);
+ String url = FlinkPerJobUtil.getUrlFormat(clusterSpecification.getYarnConfiguration(), yarnClient) + "/" + appResponse.getApplicationId().toString();
+ PackagedProgram program = FlinkPerJobUtil.buildProgram(url,clusterSpecification);
clusterSpecification.setProgram(program);
jobGraph = PackagedProgramUtils.createJobGraph(program, clusterSpecification.getConfiguration(), clusterSpecification.getParallelism(), false);
- String pluginLoadMode = clusterSpecification.getConfiguration().getString(FLINK_PLUGIN_LOAD_MODE_KEY);
- if(StringUtils.equalsIgnoreCase(pluginLoadMode, SHIP_FILE_PLUGIN_LOAD_MODE)){
+ String pluginLoadMode = clusterSpecification.getConfiguration().getString(ConfigConstant.FLINK_PLUGIN_LOAD_MODE_KEY);
+ if(StringUtils.equalsIgnoreCase(pluginLoadMode, ConstantValue.SHIP_FILE_PLUGIN_LOAD_MODE)){
jobGraph.getClasspaths().forEach(jarFile -> {
try {
shipFiles.add(new File(jarFile.toURI()));
@@ -660,12 +642,8 @@ private ClusterSpecification validateClusterResources(
int jobManagerMemoryMb = clusterSpecification.getMasterMemoryMB();
final int taskManagerMemoryMb = clusterSpecification.getTaskManagerMemoryMB();
- if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) {
- LOG.warn("The JobManager or TaskManager memory is below the smallest possible YARN Container size. "
- + "The value of 'yarn.scheduler.minimum-allocation-mb' is '" + yarnMinAllocationMB + "'. Please increase the memory size." +
- "YARN will allocate the smaller containers but the scheduler will account for the minimum-allocation-mb, maybe not all instances " +
- "you requested will start.");
- }
+ logIfComponentMemNotIntegerMultipleOfYarnMinAllocation("JobManager", jobManagerMemoryMb, yarnMinAllocationMB);
+ logIfComponentMemNotIntegerMultipleOfYarnMinAllocation("TaskManager", taskManagerMemoryMb, yarnMinAllocationMB);
// set the memory to minAllocationMB to do the next checks correctly
if (jobManagerMemoryMb < yarnMinAllocationMB) {
@@ -705,33 +683,36 @@ private ClusterSpecification validateClusterResources(
}
- private void checkYarnQueues(YarnClient yarnClient) {
+ private void logIfComponentMemNotIntegerMultipleOfYarnMinAllocation(
+ String componentName,
+ int componentMemoryMB,
+ int yarnMinAllocationMB) {
+ int normalizedMemMB = (componentMemoryMB + (yarnMinAllocationMB - 1)) / yarnMinAllocationMB * yarnMinAllocationMB;
+ if (normalizedMemMB <= 0) {
+ normalizedMemMB = yarnMinAllocationMB;
+ }
+ if (componentMemoryMB != normalizedMemMB) {
+ LOG.info("The configured {} memory is {} MB. YARN will allocate {} MB to make up an integer multiple of its "
+ + "minimum allocation memory ({} MB, configured via 'yarn.scheduler.minimum-allocation-mb'). The extra {} MB "
+ + "may not be used by Flink.", componentName, componentMemoryMB, normalizedMemMB, yarnMinAllocationMB,
+ normalizedMemMB - componentMemoryMB);
+ }
+ }
+
+ /**
+ * Kills YARN application and stops YARN client.
+ *
+ * Use this method to kill the App before it has been properly deployed
+ */
+ private void failSessionDuringDeployment(YarnClient yarnClient, YarnClientApplication yarnApplication) {
+ LOG.info("Killing YARN application");
+
try {
- List queues = yarnClient.getAllQueues();
- if (queues.size() > 0 && this.yarnQueue != null) { // check only if there are queues configured in yarn and for this session.
- boolean queueFound = false;
- for (QueueInfo queue : queues) {
- if (queue.getQueueName().equals(this.yarnQueue)) {
- queueFound = true;
- break;
- }
- }
- if (!queueFound) {
- String queueNames = "";
- for (QueueInfo queue : queues) {
- queueNames += queue.getQueueName() + ", ";
- }
- LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " +
- "Available queues: " + queueNames);
- }
- } else {
- LOG.debug("The YARN cluster does not have any queues configured");
- }
- } catch (Throwable e) {
- LOG.warn("Error while getting queue information from YARN: " + e.getMessage());
- if (LOG.isDebugEnabled()) {
- LOG.debug("Error details", e);
- }
+ yarnClient.killApplication(yarnApplication.getNewApplicationResponse().getApplicationId());
+ } catch (Exception e) {
+ // we only log a debug message here because the "killApplication" call is a best-effort
+ // call (we don't know if the application has been deployed when the error occured).
+ LOG.debug("Error while killing YARN application", e);
}
}
@@ -750,11 +731,7 @@ private ApplicationReport startAppMaster(
configuration,
PluginUtils.createPluginManagerFromRootFolder(configuration));
- // initialize file system
- // Copy the application master jar to the filesystem
- // Create a local resource to point to the destination jar path
final FileSystem fs = FileSystem.get(yarnConfiguration);
- final Path homeDir = fs.getHomeDirectory();
// hard coded check for the GoogleHDFS client because its not overriding the getScheme() method.
if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") &&
@@ -765,10 +742,18 @@ private ApplicationReport startAppMaster(
}
ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();
+
+ final List providedLibDirs = getRemoteSharedPaths(configuration);
+
+ final YarnApplicationFileUploader fileUploader = YarnApplicationFileUploader.from(
+ fs,
+ fs.getHomeDirectory(),
+ providedLibDirs,
+ appContext.getApplicationId(),
+ getFileReplication());
+
// The files need to be shipped and added to classpath.
Set systemShipFiles = new HashSet<>(shipFiles.size());
- // The files only need to be shipped.
- Set shipOnlyFiles = new HashSet<>();
for (File file : shipFiles) {
systemShipFiles.add(file.getAbsoluteFile());
}
@@ -778,11 +763,6 @@ private ApplicationReport startAppMaster(
systemShipFiles.add(new File(logConfigFilePath));
}
- addLibFoldersToShipFiles(systemShipFiles);
-
- // Plugin files only need to be shipped and should not be added to classpath.
- addPluginsFoldersToShipFiles(shipOnlyFiles);
-
// Set-up ApplicationSubmissionContext for the application
final ApplicationId appId = appContext.getApplicationId();
@@ -814,21 +794,24 @@ private ApplicationReport startAppMaster(
1));
}
- final Set userJarFiles = (jobGraph == null)
- // not per-job submission
- ? Collections.emptySet()
- // add user code jars from the provided JobGraph
- : jobGraph.getUserJars().stream().map(f -> f.toUri()).map(File::new).collect(Collectors.toSet());
+ final Set userJarFiles = new HashSet<>();
+ if (jobGraph != null) {
+ userJarFiles.addAll(jobGraph.getUserJars().stream().map(f -> f.toUri()).map(Path::new).collect(Collectors.toSet()));
+ }
+
+ final List jarUrls = ConfigUtils.decodeListFromConfig(configuration, PipelineOptions.JARS, URI::create);
+ if (jarUrls != null && YarnApplicationClusterEntryPoint.class.getName().equals(yarnClusterEntrypoint)) {
+ userJarFiles.addAll(jarUrls.stream().map(Path::new).collect(Collectors.toSet()));
+ }
// only for per job mode
if (jobGraph != null) {
for (Map.Entry entry : jobGraph.getUserArtifacts().entrySet()) {
- org.apache.flink.core.fs.Path path = new org.apache.flink.core.fs.Path(entry.getValue().filePath);
// only upload local files
- if (!path.getFileSystem().isDistributedFS()) {
- Path localPath = new Path(path.getPath());
+ if (!Utils.isRemotePath(entry.getValue().filePath)) {
+ Path localPath = new Path(entry.getValue().filePath);
Tuple2 remoteFileInfo =
- Utils.uploadLocalFileToRemote(fs, appId.toString(), localPath, homeDir, entry.getKey());
+ fileUploader.uploadLocalFileToRemote(localPath, entry.getKey());
jobGraph.setUserArtifactRemotePath(entry.getKey(), remoteFileInfo.f0.toString());
}
}
@@ -836,45 +819,33 @@ private ApplicationReport startAppMaster(
jobGraph.writeUserArtifactEntriesToConfiguration();
}
- // local resource map for Yarn
- final Map localResources = new HashMap<>(2 + systemShipFiles.size() + userJarFiles.size());
- // list of remote paths (after upload)
- final List paths = new ArrayList<>(2 + systemShipFiles.size() + userJarFiles.size());
- // ship list that enables reuse of resources for task manager containers
- StringBuilder envShipFileList = new StringBuilder();
+ if (providedLibDirs == null || providedLibDirs.isEmpty()) {
+ addLibFoldersToShipFiles(systemShipFiles);
+ }
- // upload and register ship files, these files will be added to classpath.
- List systemClassPaths = uploadAndRegisterFiles(
- systemShipFiles,
- fs,
- homeDir,
- appId,
- paths,
- localResources,
- Path.CUR_DIR,
- envShipFileList);
+ // Register all files in provided lib dirs as local resources with public visibility
+ // and upload the remaining dependencies as local resources with APPLICATION visibility.
+ final List systemClassPaths = fileUploader.registerProvidedLocalResources();
+ final List uploadedDependencies = fileUploader.registerMultipleLocalResources(
+ systemShipFiles.stream().map(e -> new Path(e.toURI())).collect(Collectors.toSet()),
+ Path.CUR_DIR);
+ systemClassPaths.addAll(uploadedDependencies);
// upload and register ship-only files
- uploadAndRegisterFiles(
- shipOnlyFiles,
- fs,
- homeDir,
- appId,
- paths,
- localResources,
- Path.CUR_DIR,
- envShipFileList);
-
- final List userClassPaths = uploadAndRegisterFiles(
+ // Plugin files only need to be shipped and should not be added to classpath.
+ if (providedLibDirs == null || providedLibDirs.isEmpty()) {
+ Set shipOnlyFiles = new HashSet<>();
+ addPluginsFoldersToShipFiles(shipOnlyFiles);
+ fileUploader.registerMultipleLocalResources(
+ shipOnlyFiles.stream().map(e -> new Path(e.toURI())).collect(Collectors.toSet()),
+ Path.CUR_DIR);
+ }
+
+ // Upload and register user jars
+ final List userClassPaths = fileUploader.registerMultipleLocalResources(
userJarFiles,
- fs,
- homeDir,
- appId,
- paths,
- localResources,
userJarInclusion == YarnConfigOptions.UserJarInclusion.DISABLED ?
- ConfigConstants.DEFAULT_FLINK_USR_LIB_DIR : Path.CUR_DIR,
- envShipFileList);
+ ConfigConstants.DEFAULT_FLINK_USR_LIB_DIR : Path.CUR_DIR);
if (userJarInclusion == YarnConfigOptions.UserJarInclusion.ORDER) {
systemClassPaths.addAll(userClassPaths);
@@ -896,17 +867,39 @@ private ApplicationReport startAppMaster(
}
// Setup jar for ApplicationMaster
- Path remotePathJar = setupSingleLocalResource(
- flinkJarPath.getName(),
- fs,
- appId,
- flinkJarPath,
- localResources,
- homeDir,
- "");
+ final YarnLocalResourceDescriptor localResourceDescFlinkJar = fileUploader.uploadFlinkDist(flinkJarPath);
+ classPathBuilder.append(localResourceDescFlinkJar.getResourceKey()).append(File.pathSeparator);
+
+ // write job graph to tmp file and add it to local resource
+ // TODO: server use user main method to generate job graph
+ if (jobGraph != null) {
+ File tmpJobGraphFile = null;
+ try {
+ tmpJobGraphFile = File.createTempFile(appId.toString(), null);
+ try (FileOutputStream output = new FileOutputStream(tmpJobGraphFile);
+ ObjectOutputStream obOutput = new ObjectOutputStream(output)) {
+ obOutput.writeObject(jobGraph);
+ }
- paths.add(remotePathJar);
- classPathBuilder.append(flinkJarPath.getName()).append(File.pathSeparator);
+ final String jobGraphFilename = "job.graph";
+ configuration.setString(JOB_GRAPH_FILE_PATH, jobGraphFilename);
+
+ fileUploader.registerSingleLocalResource(
+ jobGraphFilename,
+ new Path(tmpJobGraphFile.toURI()),
+ "",
+ true,
+ false);
+ classPathBuilder.append(jobGraphFilename).append(File.pathSeparator);
+ } catch (Exception e) {
+ LOG.warn("Add job graph to local resource fail.");
+ throw e;
+ } finally {
+ if (tmpJobGraphFile != null && !tmpJobGraphFile.delete()) {
+ LOG.warn("Fail to delete temporary file {}.", tmpJobGraphFile.toPath());
+ }
+ }
+ }
// Upload the flink configuration
// write out configuration file
@@ -916,16 +909,12 @@ private ApplicationReport startAppMaster(
BootstrapTools.writeConfiguration(configuration, tmpConfigurationFile);
String flinkConfigKey = "flink-conf.yaml";
- Path remotePathConf = setupSingleLocalResource(
+ fileUploader.registerSingleLocalResource(
flinkConfigKey,
- fs,
- appId,
new Path(tmpConfigurationFile.getAbsolutePath()),
- localResources,
- homeDir,
- "");
- envShipFileList.append(flinkConfigKey).append("=").append(remotePathConf).append(",");
- paths.add(remotePathConf);
+ "",
+ true,
+ true);
classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator);
} finally {
if (tmpConfigurationFile != null && !tmpConfigurationFile.delete()) {
@@ -939,43 +928,6 @@ private ApplicationReport startAppMaster(
}
}
- // write job graph to tmp file and add it to local resource
- if (jobGraph != null) {
- File tmpJobGraphFile = null;
- try {
- tmpJobGraphFile = File.createTempFile(appId.toString(), null);
- try (FileOutputStream output = new FileOutputStream(tmpJobGraphFile);
- ObjectOutputStream obOutput = new ObjectOutputStream(output);){
- obOutput.writeObject(jobGraph);
- }
-
- final String jobGraphFilename = "job.graph";
- flinkConfiguration.setString(JOB_GRAPH_FILE_PATH, jobGraphFilename);
-
- Path pathFromYarnURL = setupSingleLocalResource(
- jobGraphFilename,
- fs,
- appId,
- new Path(tmpJobGraphFile.toURI()),
- localResources,
- homeDir,
- "");
- paths.add(pathFromYarnURL);
- classPathBuilder.append(jobGraphFilename).append(File.pathSeparator);
- } catch (Exception e) {
- LOG.warn("Add job graph to local resource fail");
- throw e;
- } finally {
- if (tmpJobGraphFile != null && !tmpJobGraphFile.delete()) {
- LOG.warn("Fail to delete temporary file {}.", tmpConfigurationFile.toPath());
- }
- }
- }
-
- final Path yarnFilesDir = getYarnFilesDir(appId);
- FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
- fs.setPermission(yarnFilesDir, permission); // set permission for path.
-
//To support Yarn Secure Integration Test Scenario
//In Integration test setup, the Yarn containers created by YarnMiniCluster does not have the Yarn site XML
//and KRB5 configuration files. We are adding these files as container local resources for the container
@@ -987,89 +939,93 @@ private ApplicationReport startAppMaster(
File f = new File(System.getenv("YARN_CONF_DIR"), Utils.YARN_SITE_FILE_NAME);
LOG.info("Adding Yarn configuration {} to the AM container local resource bucket", f.getAbsolutePath());
Path yarnSitePath = new Path(f.getAbsolutePath());
- remoteYarnSiteXmlPath = setupSingleLocalResource(
+ remoteYarnSiteXmlPath = fileUploader.registerSingleLocalResource(
Utils.YARN_SITE_FILE_NAME,
- fs,
- appId,
yarnSitePath,
- localResources,
- homeDir,
- "");
+ "",
+ false,
+ false).getPath();
String krb5Config = System.getProperty("java.security.krb5.conf");
if (krb5Config != null && krb5Config.length() != 0) {
File krb5 = new File(krb5Config);
LOG.info("Adding KRB5 configuration {} to the AM container local resource bucket", krb5.getAbsolutePath());
Path krb5ConfPath = new Path(krb5.getAbsolutePath());
- remoteKrb5Path = setupSingleLocalResource(
+ remoteKrb5Path = fileUploader.registerSingleLocalResource(
Utils.KRB5_FILE_NAME,
- fs,
- appId,
krb5ConfPath,
- localResources,
- homeDir,
- "");
+ "",
+ false,
+ false).getPath();
hasKrb5 = true;
}
}
- // setup security tokens
Path remotePathKeytab = null;
+ String localizedKeytabPath = null;
String keytab = configuration.getString(SecurityOptions.KERBEROS_LOGIN_KEYTAB);
if (keytab != null) {
- LOG.info("Adding keytab {} to the AM container local resource bucket", keytab);
- remotePathKeytab = setupSingleLocalResource(
- Utils.KEYTAB_FILE_NAME,
- fs,
- appId,
- new Path(keytab),
- localResources,
- homeDir,
- "");
+ boolean localizeKeytab = flinkConfiguration.getBoolean(YarnConfigOptions.SHIP_LOCAL_KEYTAB);
+ localizedKeytabPath = flinkConfiguration.getString(YarnConfigOptions.LOCALIZED_KEYTAB_PATH);
+ if (localizeKeytab) {
+ // Localize the keytab to YARN containers via local resource.
+ LOG.info("Adding keytab {} to the AM container local resource bucket", keytab);
+ remotePathKeytab = fileUploader.registerSingleLocalResource(
+ localizedKeytabPath,
+ new Path(keytab),
+ "",
+ false,
+ false).getPath();
+ } else {
+ // // Assume Keytab is pre-installed in the container.
+ localizedKeytabPath = flinkConfiguration.getString(YarnConfigOptions.LOCALIZED_KEYTAB_PATH);
+ }
}
- final boolean hasLogback = logConfigFilePath != null && logConfigFilePath.endsWith(CONFIG_FILE_LOGBACK_NAME);
- final boolean hasLog4j = logConfigFilePath != null && logConfigFilePath.endsWith(CONFIG_FILE_LOG4J_NAME);
-
+ final JobManagerProcessSpec processSpec = JobManagerProcessUtils.processSpecFromConfigWithNewOptionToInterpretLegacyHeap(
+ flinkConfiguration,
+ JobManagerOptions.TOTAL_PROCESS_MEMORY);
final ContainerLaunchContext amContainer = setupApplicationMasterContainer(
yarnClusterEntrypoint,
- hasLogback,
- hasLog4j,
hasKrb5,
- clusterSpecification.getMasterMemoryMB());
+ processSpec);
+ // setup security tokens
if (UserGroupInformation.isSecurityEnabled()) {
// set HDFS delegation tokens when security is enabled
LOG.info("Adding delegation token to the AM container.");
- Utils.setTokensFor(amContainer, paths, yarnConfiguration);
+ Utils.setTokensFor(amContainer, fileUploader.getRemotePaths(), yarnConfiguration);
}
- amContainer.setLocalResources(localResources);
- fs.close();
+ amContainer.setLocalResources(fileUploader.getRegisteredLocalResources());
+ fileUploader.close();
// Setup CLASSPATH and environment variables for ApplicationMaster
final Map appMasterEnv = new HashMap<>();
// set user specified app master environment variables
appMasterEnv.putAll(
- BootstrapTools.getEnvironmentVariables(ResourceManagerOptions.CONTAINERIZED_MASTER_ENV_PREFIX, configuration));
+ ConfigurationUtils.getPrefixedKeyValuePairs(ResourceManagerOptions.CONTAINERIZED_MASTER_ENV_PREFIX, configuration));
// set Flink app class path
appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString());
// set Flink on YARN internal configuration values
- appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString());
+ appMasterEnv.put(YarnConfigKeys.FLINK_DIST_JAR, localResourceDescFlinkJar.toString());
appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
- appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, homeDir.toString());
- appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
+ appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fileUploader.getHomeDir().toString());
+ appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, encodeYarnLocalResourceDescriptorListToString(fileUploader.getEnvShipResourceList()));
appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace());
- appMasterEnv.put(YarnConfigKeys.FLINK_YARN_FILES, yarnFilesDir.toUri().toString());
+ appMasterEnv.put(YarnConfigKeys.FLINK_YARN_FILES, fileUploader.getApplicationDir().toUri().toString());
// https://github.com/apache/hadoop/blob/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnApplicationSecurity.md#identity-on-an-insecure-cluster-hadoop_user_name
appMasterEnv.put(YarnConfigKeys.ENV_HADOOP_USER_NAME, UserGroupInformation.getCurrentUser().getUserName());
- if (remotePathKeytab != null) {
- appMasterEnv.put(YarnConfigKeys.KEYTAB_PATH, remotePathKeytab.toString());
+ if (localizedKeytabPath != null) {
+ appMasterEnv.put(YarnConfigKeys.LOCAL_KEYTAB_PATH, localizedKeytabPath);
String principal = configuration.getString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL);
appMasterEnv.put(YarnConfigKeys.KEYTAB_PRINCIPAL, principal);
+ if (remotePathKeytab != null) {
+ appMasterEnv.put(YarnConfigKeys.REMOTE_KEYTAB_PATH, remotePathKeytab.toString());
+ }
}
//To support Yarn Secure Integration Test Scenario
@@ -1113,7 +1069,7 @@ private ApplicationReport startAppMaster(
setApplicationTags(appContext);
// add a hook to clean up in case deployment fails
- Thread deploymentFailureHook = new DeploymentFailureHook(yarnApplication, yarnFilesDir);
+ Thread deploymentFailureHook = new DeploymentFailureHook(yarnApplication, fileUploader.getApplicationDir());
Runtime.getRuntime().addShutdownHook(deploymentFailureHook);
LOG.info("Submitting application master " + appId);
yarnClient.submitApplication(appContext);
@@ -1163,34 +1119,6 @@ private ApplicationReport startAppMaster(
return report;
}
- /**
- * Returns the Path where the YARN application files should be uploaded to.
- *
- * @param appId YARN application id
- */
- private Path getYarnFilesDir(final ApplicationId appId) throws IOException {
- final FileSystem fileSystem = FileSystem.get(yarnConfiguration);
- final Path homeDir = fileSystem.getHomeDirectory();
- return new Path(homeDir, ".flink/" + appId + '/');
- }
-
- /**
- * Kills YARN application and stops YARN client.
- *
- * Use this method to kill the App before it has been properly deployed
- */
- private void failSessionDuringDeployment(YarnClient yarnClient, YarnClientApplication yarnApplication) {
- LOG.info("Killing YARN application");
-
- try {
- yarnClient.killApplication(yarnApplication.getNewApplicationResponse().getApplicationId());
- } catch (Exception e) {
- // we only log a debug message here because the "killApplication" call is a best-effort
- // call (we don't know if the application has been deployed when the error occured).
- LOG.debug("Error while killing YARN application", e);
- }
- }
-
private ClusterResourceDescription getCurrentFreeClusterResources(YarnClient yarnClient) throws YarnException, IOException {
List nodes = yarnClient.getNodeReports(NodeState.RUNNING);
@@ -1289,38 +1217,30 @@ private void setApplicationNodeLabel(final ApplicationSubmissionContext appConte
}
}
- @VisibleForTesting
- void addLibFoldersToShipFiles(Collection effectiveShipFiles) {
- // Add lib folder to the ship files if the environment variable is set.
- // This is for convenience when running from the command-line.
- // (for other files users explicitly set the ship files)
- String libDir = System.getenv().get(ENV_FLINK_LIB_DIR);
- if (libDir != null) {
- File directoryFile = new File(libDir);
- if (directoryFile.isDirectory()) {
- effectiveShipFiles.add(directoryFile);
- } else {
- throw new YarnDeploymentException("The environment variable '" + ENV_FLINK_LIB_DIR +
- "' is set to '" + libDir + "' but the directory doesn't exist.");
- }
- } else if (shipFiles.isEmpty()) {
- LOG.warn("Environment variable '{}' not set and ship files have not been provided manually. " +
- "Not shipping any library files.", ENV_FLINK_LIB_DIR);
- }
+ private int getFileReplication() {
+ final int yarnFileReplication = yarnConfiguration.getInt(DFSConfigKeys.DFS_REPLICATION_KEY, DFSConfigKeys.DFS_REPLICATION_DEFAULT);
+ final int fileReplication = flinkConfiguration.getInteger(YarnConfigOptions.FILE_REPLICATION);
+ return fileReplication > 0 ? fileReplication : yarnFileReplication;
}
- @VisibleForTesting
- void addPluginsFoldersToShipFiles(Collection effectiveShipFiles) {
- final Optional pluginsDir = PluginConfig.getPluginsDir();
- pluginsDir.ifPresent(effectiveShipFiles::add);
+ private List getRemoteSharedPaths(Configuration configuration) throws IOException, FlinkException {
+ final List providedLibDirs = ConfigUtils.decodeListFromConfig(
+ configuration, YarnConfigOptions.PROVIDED_LIB_DIRS, Path::new);
+
+ for (Path path : providedLibDirs) {
+ if (!Utils.isRemotePath(path.toString())) {
+ throw new FlinkException(
+ "The \"" + YarnConfigOptions.PROVIDED_LIB_DIRS.key() + "\" should only contain" +
+ " dirs accessible from all worker nodes, while the \"" + path + "\" is local.");
+ }
+ }
+ return providedLibDirs;
}
ContainerLaunchContext setupApplicationMasterContainer(
String yarnClusterEntrypoint,
- boolean hasLogback,
- boolean hasLog4j,
boolean hasKrb5,
- int jobManagerMemoryMb) {
+ JobManagerProcessSpec processSpec) {
// ------------------ Prepare Application Master Container ------------------------------
// respect custom JVM options in the YAML file
@@ -1340,26 +1260,12 @@ ContainerLaunchContext setupApplicationMasterContainer(
final Map startCommandValues = new HashMap<>();
startCommandValues.put("java", "$JAVA_HOME/bin/java");
- int heapSize = BootstrapTools.calculateHeapSize(jobManagerMemoryMb, flinkConfiguration);
- String jvmHeapMem = String.format("-Xms%sm -Xmx%sm", heapSize, heapSize);
+ String jvmHeapMem = JobManagerProcessUtils.generateJvmParametersStr(processSpec, flinkConfiguration);
startCommandValues.put("jvmmem", jvmHeapMem);
startCommandValues.put("jvmopts", javaOpts);
- String logging = "";
-
- if (hasLogback || hasLog4j) {
- logging = "-Dlog.file=\"" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.log\"";
+ startCommandValues.put("logging", YarnLogConfigUtil.getLoggingYarnCommand(flinkConfiguration));
- if (hasLogback) {
- logging += " -Dlogback.configurationFile=file:" + CONFIG_FILE_LOGBACK_NAME;
- }
-
- if (hasLog4j) {
- logging += " -Dlog4j.configuration=file:" + CONFIG_FILE_LOG4J_NAME;
- }
- }
-
- startCommandValues.put("logging", logging);
startCommandValues.put("class", yarnClusterEntrypoint);
startCommandValues.put("redirects",
"1> " + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.out " +
@@ -1379,6 +1285,32 @@ ContainerLaunchContext setupApplicationMasterContainer(
return amContainer;
}
+ @VisibleForTesting
+ void addLibFoldersToShipFiles(Collection effectiveShipFiles) {
+ // Add lib folder to the ship files if the environment variable is set.
+ // This is for convenience when running from the command-line.
+ // (for other files users explicitly set the ship files)
+ String libDir = System.getenv().get(ENV_FLINK_LIB_DIR);
+ if (libDir != null) {
+ File directoryFile = new File(libDir);
+ if (directoryFile.isDirectory()) {
+ effectiveShipFiles.add(directoryFile);
+ } else {
+ throw new YarnDeploymentException("The environment variable '" + ENV_FLINK_LIB_DIR +
+ "' is set to '" + libDir + "' but the directory doesn't exist.");
+ }
+ } else if (shipFiles.isEmpty()) {
+ LOG.warn("Environment variable '{}' not set and ship files have not been provided manually. " +
+ "Not shipping any library files.", ENV_FLINK_LIB_DIR);
+ }
+ }
+
+ @VisibleForTesting
+ void addPluginsFoldersToShipFiles(Collection effectiveShipFiles) {
+ final Optional pluginsDir = PluginConfig.getPluginsDir();
+ pluginsDir.ifPresent(effectiveShipFiles::add);
+ }
+
private void setClusterEntrypointInfoToConfig(final ApplicationReport report) {
checkNotNull(report);
@@ -1397,15 +1329,15 @@ private void setClusterEntrypointInfoToConfig(final ApplicationReport report) {
flinkConfiguration.set(YarnConfigOptions.APPLICATION_ID, ConverterUtils.toString(clusterId));
}
- private static class ClusterResourceDescription {
- public final int totalFreeMemory;
- public final int containerLimit;
- public final int[] nodeManagersFree;
+ private static class YarnDeploymentException extends RuntimeException {
+ private static final long serialVersionUID = -812040641215388943L;
- public ClusterResourceDescription(int totalFreeMemory, int containerLimit, int[] nodeManagersFree) {
- this.totalFreeMemory = totalFreeMemory;
- this.containerLimit = containerLimit;
- this.nodeManagersFree = nodeManagersFree;
+ public YarnDeploymentException(String message) {
+ super(message);
+ }
+
+ public YarnDeploymentException(String message, Throwable cause) {
+ super(message, cause);
}
}
@@ -1426,15 +1358,22 @@ private static class ApplicationSubmissionContextReflector {
private static final ApplicationSubmissionContextReflector instance =
new ApplicationSubmissionContextReflector(ApplicationSubmissionContext.class);
+
+ public static ApplicationSubmissionContextReflector getInstance() {
+ return instance;
+ }
+
private static final String APPLICATION_TAGS_METHOD_NAME = "setApplicationTags";
private static final String ATTEMPT_FAILURES_METHOD_NAME = "setAttemptFailuresValidityInterval";
private static final String KEEP_CONTAINERS_METHOD_NAME = "setKeepContainersAcrossApplicationAttempts";
private static final String NODE_LABEL_EXPRESSION_NAME = "setNodeLabelExpression";
+
private final Method applicationTagsMethod;
private final Method attemptFailuresValidityIntervalMethod;
private final Method keepContainersMethod;
@Nullable
private final Method nodeLabelExpressionMethod;
+
private ApplicationSubmissionContextReflector(Class clazz) {
Method applicationTagsMethod;
Method attemptFailuresValidityIntervalMethod;
@@ -1488,10 +1427,6 @@ private ApplicationSubmissionContextReflector(Class applicationTags) throws InvocationTargetException, IllegalAccessException {
@@ -1552,15 +1487,15 @@ public void setKeepContainersAcrossApplicationAttempts(
}
}
- private static class YarnDeploymentException extends RuntimeException {
- private static final long serialVersionUID = -812040641215388943L;
-
- public YarnDeploymentException(String message) {
- super(message);
- }
+ private static class ClusterResourceDescription {
+ public final int totalFreeMemory;
+ public final int containerLimit;
+ public final int[] nodeManagersFree;
- public YarnDeploymentException(String message, Throwable cause) {
- super(message, cause);
+ public ClusterResourceDescription(int totalFreeMemory, int containerLimit, int[] nodeManagersFree) {
+ this.totalFreeMemory = totalFreeMemory;
+ this.containerLimit = containerLimit;
+ this.nodeManagersFree = nodeManagersFree;
}
}
diff --git a/flinkx-launcher/src/main/resources/log4j2.xml b/flinkx-launcher/src/main/resources/log4j2.xml
new file mode 100644
index 0000000000..e9fc82c633
--- /dev/null
+++ b/flinkx-launcher/src/main/resources/log4j2.xml
@@ -0,0 +1,25 @@
+
+
+
+
+
+ %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ${pattern}
+
+
+
+
\ No newline at end of file
diff --git a/flinkx-launcher/src/main/resources/logback.xml b/flinkx-launcher/src/main/resources/logback.xml
new file mode 100644
index 0000000000..0125d733de
--- /dev/null
+++ b/flinkx-launcher/src/main/resources/logback.xml
@@ -0,0 +1,22 @@
+
+
+
+
+
+
+
+
+
+ ${CONSOLE_LOG_PATTERN}
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/constants/MetaDataEs6Cons.java b/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/constants/MetaDataEs6Cons.java
deleted file mode 100644
index 85b3193e2a..0000000000
--- a/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/constants/MetaDataEs6Cons.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package com.dtstack.flinkx.metadataes6.constants;
-
-public class MetaDataEs6Cons {
-
- public static final String KEY_INDICES = "indices";
-
- public static final String KEY_ADDRESS = "address";
-
- public static final String KEY_USERNAME = "username";
-
- public static final String KEY_PASSWORD = "password";
-
- public static final String KEY_TIMEOUT = "timeout";
-
- public static final String KEY_PATH_PREFIX = "pathPrefix";
-
- public static final String KEY_INDEX_HEALTH = "health"; //green为正常,yellow表示索引不可靠(单节点),red索引不可用
-
- public static final String KEY_INDEX_STATUS = "status"; //表明索引是否打开
-
- public static final String KEY_INDEX = "index";
-
- public static final String KEY_INDEX_PROP = "indexProperties";
-
- public static final String KEY_INDEX_UUID = "uuid"; //索引的唯一标识
-
- public static final String KEY_INDEX_PRI = "indexPri"; //集群的主分片数
-
- public static final String KEY_INDEX_REP = "replicas";
-
- public static final String KEY_INDEX_DOCS_COUNT = "docs_count"; //文档数
-
- public static final String KEY_INDEX_DOCS_DELETED = "docs_deleted"; //已删除文档数
-
- public static final String KEY_INDEX_SIZE = "totalsize"; //索引存储的总容量
-
- public static final String KEY_INDEX_PRI_SIZE = "pri_size"; //主分片的总容量
-
- public static final String KEY_INDEX_CREATE_TIME = "createtime"; //索引创建时间
-
- public static final String KEY_TYPE_NAME = "type"; //索引下类型名
-
- public static final String KEY_INDEX_SHARDS = "shards"; //分片数
-
- public static final String KEY_ALIAS = "alias"; //索引别名
-
- public static final String KEY_COLUMN = "column";
-
- public static final String KEY_COLUMN_NAME = "column_name"; //文档名
-
- public static final String KEY_DATA_TYPE = "data_type"; //数据类型
-
- public static final String KEY_FIELDS = "fields"; //字段映射
-
- public static final String KEY_FIELD_NAME = "field_name"; //字段映射名
-
- public static final String KEY_FIELD_PROP = "field_prop"; //字段映射参数
-
- public static final String API_METHOD_GET = "GET"; //restAPI请求方式,GET
-}
diff --git a/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/format/Metadataes6InputFormat.java b/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/format/Metadataes6InputFormat.java
deleted file mode 100644
index 1cd7b1f4c4..0000000000
--- a/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/format/Metadataes6InputFormat.java
+++ /dev/null
@@ -1,131 +0,0 @@
-package com.dtstack.flinkx.metadataes6.format;
-
-import com.dtstack.flinkx.inputformat.BaseRichInputFormat;
-import com.dtstack.flinkx.metadataes6.constants.MetaDataEs6Cons;
-import com.dtstack.flinkx.metadataes6.utils.Es6Util;
-import org.apache.commons.collections.CollectionUtils;
-import org.apache.flink.core.io.GenericInputSplit;
-import org.apache.flink.core.io.InputSplit;
-import org.apache.flink.types.Row;
-import org.elasticsearch.client.RestClient;
-
-import java.io.IOException;
-import java.util.*;
-
-public class Metadataes6InputFormat extends BaseRichInputFormat {
-
- protected String address;
-
- protected String username;
-
- protected String password;
-
- /**
- * 存放所有需要查询的index的名字
- */
- protected List indices;
-
- /**
- * 记录当前查询的表所在list中的位置
- */
- protected int start;
-
- protected Map clientConfig;
-
- private transient RestClient restClient;
-
- protected static transient ThreadLocal> indexIterator = new ThreadLocal<>();
-
- @Override
- public void openInternal(InputSplit inputSplit) throws IOException {
-
- restClient = Es6Util.getClient(address, username, password, clientConfig);
- if (CollectionUtils.isEmpty(indices)) {
- indices = showIndices();
- }
-
- LOG.info("indicesSize = {}, indices = {}",indices.size(), indices);
- indexIterator.set(indices.iterator());
-
- }
-
- @Override
- public InputSplit[] createInputSplitsInternal(int splitNum) {
-
- InputSplit[] splits = new InputSplit[splitNum];
- for (int i = 0; i < splitNum; i++) {
- splits[i] = new GenericInputSplit(i,splitNum);
- }
-
- return splits;
-
- }
-
- @Override
- protected Row nextRecordInternal(Row row) throws IOException {
-
- Map metaData = new HashMap<>(16);
- String indexName = (String) indexIterator.get().next();
- metaData.putAll(queryMetaData(indexName));
- LOG.info("query metadata: {}", metaData);
-
- return Row.of(metaData);
-
- }
-
- @Override
- protected void closeInternal() throws IOException {
-
- if(restClient != null) {
- restClient.close();
- restClient = null;
- }
-
- }
-
- /**
- * 返回es集群下的所有索引
- * @return 索引列表
- * @throws IOException
- */
- protected List showIndices() throws IOException {
-
- List indiceName = new ArrayList<>();
- String[] indices = Es6Util.queryIndicesByCat(restClient);
- int n = 2;
- while (n < indices.length)
- {
- indiceName.add(indices[n]);
- n += 10;
- }
-
- return indiceName;
-
- }
-
- /**
- * 查询元数据
- * @param indexName 索引名称
- * @return 元数据
- * @throws IOException
- */
- protected Map queryMetaData(String indexName) throws IOException {
-
- Map result = new HashMap<>(16);
- Map indexProp = Es6Util.queryIndexProp(indexName,restClient);
- List> alias = Es6Util.queryAliases(indexName,restClient);
- List> column = Es6Util.queryColumns(indexName,restClient);
- result.put(MetaDataEs6Cons.KEY_INDEX,indexName);
- result.put(MetaDataEs6Cons.KEY_INDEX_PROP, indexProp);
- result.put(MetaDataEs6Cons.KEY_COLUMN,column);
- result.put(MetaDataEs6Cons.KEY_ALIAS,alias);
-
- return result;
-
- }
-
- @Override
- public boolean reachedEnd(){
- return !indexIterator.get().hasNext();
- }
-}
diff --git a/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/format/Metadataes6InputFormatBuilder.java b/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/format/Metadataes6InputFormatBuilder.java
deleted file mode 100644
index 1d7f168482..0000000000
--- a/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/format/Metadataes6InputFormatBuilder.java
+++ /dev/null
@@ -1,47 +0,0 @@
-package com.dtstack.flinkx.metadataes6.format;
-
-
-import com.dtstack.flinkx.inputformat.BaseRichInputFormatBuilder;
-
-import java.util.List;
-import java.util.Map;
-
-public class Metadataes6InputFormatBuilder extends BaseRichInputFormatBuilder {
-
- private Metadataes6InputFormat format;
-
- public Metadataes6InputFormatBuilder() {
- super.format = this.format = new Metadataes6InputFormat();
- }
-
- public Metadataes6InputFormatBuilder setAddress(String address) {
- format.address = address;
- return this;
- }
-
- public Metadataes6InputFormatBuilder setUsername(String username) {
- format.username = username;
- return this;
- }
-
- public Metadataes6InputFormatBuilder setPassword(String password) {
- format.password = password;
- return this;
- }
-
- public Metadataes6InputFormatBuilder setIndices(List indices){
- format.indices = indices;
- return this;
- }
-
- public Metadataes6InputFormatBuilder setClientConfig(Map clientConfig){
- format.clientConfig = clientConfig;
- return this;
- }
- @Override
- protected void checkFormat() {
- if (format.getRestoreConfig() != null && format.getRestoreConfig().isRestore()){
- throw new UnsupportedOperationException("This plugin not support restore from failed state");
- }
- }
-}
diff --git a/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/reader/Metadataes6Reader.java b/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/reader/Metadataes6Reader.java
deleted file mode 100644
index 75ae19730e..0000000000
--- a/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/reader/Metadataes6Reader.java
+++ /dev/null
@@ -1,59 +0,0 @@
-package com.dtstack.flinkx.metadataes6.reader;
-
-
-import com.dtstack.flinkx.config.DataTransferConfig;
-import com.dtstack.flinkx.config.ReaderConfig;
-import com.dtstack.flinkx.inputformat.BaseRichInputFormat;
-import com.dtstack.flinkx.metadataes6.constants.MetaDataEs6Cons;
-import com.dtstack.flinkx.metadataes6.format.Metadataes6InputFormatBuilder;
-import com.dtstack.flinkx.reader.BaseDataReader;
-import org.apache.flink.streaming.api.datastream.DataStream;
-import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
-import org.apache.flink.types.Row;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-public class Metadataes6Reader extends BaseDataReader {
-
- private String address; //数据库地址
-
- private String username;
-
- private String password;
-
- private List indices; //索引列表
-
- private Map clientConfig;
-
- public Metadataes6Reader(DataTransferConfig config, StreamExecutionEnvironment env) {
- super(config, env);
- ReaderConfig readerConfig = config.getJob().getContent().get(0).getReader();
- address = readerConfig.getParameter().getStringVal(MetaDataEs6Cons.KEY_ADDRESS);
- username = readerConfig.getParameter().getStringVal(MetaDataEs6Cons.KEY_USERNAME);
- password = readerConfig.getParameter().getStringVal(MetaDataEs6Cons.KEY_PASSWORD);
- indices = (List) readerConfig.getParameter().getVal(MetaDataEs6Cons.KEY_INDICES);
-
-
- clientConfig = new HashMap<>();
- clientConfig.put(MetaDataEs6Cons.KEY_TIMEOUT, readerConfig.getParameter().getVal(MetaDataEs6Cons.KEY_TIMEOUT));
- clientConfig.put(MetaDataEs6Cons.KEY_PATH_PREFIX, readerConfig.getParameter().getVal(MetaDataEs6Cons.KEY_PATH_PREFIX));
- }
-
- @Override
- public DataStream readData() {
- Metadataes6InputFormatBuilder builder = new Metadataes6InputFormatBuilder();
- builder.setDataTransferConfig(dataTransferConfig);
- builder.setAddress(address);
- builder.setPassword(password);
- builder.setUsername(username);
- builder.setIndices(indices);
- builder.setClientConfig(clientConfig);
-
- BaseRichInputFormat format = builder.finish();
-
- return createInput(format);
- }
-
-}
diff --git a/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/utils/Es6Util.java b/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/utils/Es6Util.java
deleted file mode 100644
index b3189b40b5..0000000000
--- a/flinkx-metadata-es6/flinkx-metadata-es6-reader/src/main/java/com/dtstack/flinkx/metadataes6/utils/Es6Util.java
+++ /dev/null
@@ -1,257 +0,0 @@
-package com.dtstack.flinkx.metadataes6.utils;
-
-import com.dtstack.flinkx.metadataes6.constants.MetaDataEs6Cons;
-import com.dtstack.flinkx.util.DateUtil;
-import com.dtstack.flinkx.util.GsonUtil;
-import com.dtstack.flinkx.util.TelnetUtil;
-import org.apache.commons.collections.MapUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.http.util.EntityUtils;
-import org.elasticsearch.client.Response;
-import org.elasticsearch.client.RestClient;
-import org.elasticsearch.client.RestClientBuilder;
-import org.apache.http.HttpHost;
-import org.apache.http.auth.AuthScope;
-import org.apache.http.auth.UsernamePasswordCredentials;
-import org.apache.http.client.CredentialsProvider;
-import org.apache.http.impl.client.BasicCredentialsProvider;
-
-import java.io.IOException;
-import java.text.SimpleDateFormat;
-import java.util.*;
-
-public class Es6Util {
-
- /**
- * 建立LowLevelRestClient连接
- * @param address es服务端地址,"ip:port"
- * @param username 用户名
- * @param password 密码
- * @param config 配置
- * @return LowLevelRestClient
- */
- public static RestClient getClient(String address, String username, String password, Map config) {
- List httpHostList = new ArrayList<>();
- String[] addr = address.split(",");
- for(String add : addr) {
- String[] pair = add.split(":");
- TelnetUtil.telnet(pair[0], Integer.parseInt(pair[1]));
- httpHostList.add(new HttpHost(pair[0], Integer.parseInt(pair[1]), "http"));
- }
-
- RestClientBuilder builder = RestClient.builder(httpHostList.toArray(new HttpHost[0]));
-
- Integer timeout = MapUtils.getInteger(config, MetaDataEs6Cons.KEY_TIMEOUT);
- if (timeout != null){
- builder.setMaxRetryTimeoutMillis(timeout * 1000);
- }
-
- String pathPrefix = MapUtils.getString(config, MetaDataEs6Cons.KEY_PATH_PREFIX);
- if (StringUtils.isNotEmpty(pathPrefix)){
- builder.setPathPrefix(pathPrefix);
- }
- if(StringUtils.isNotBlank(username)){
- CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
- credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username, password));
- builder.setHttpClientConfigCallback(httpClientBuilder -> {
- httpClientBuilder.disableAuthCaching();
- return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
- });
- }
-
- return builder.build();
- }
-
- /**
- * 返回指定索引的配置信息
- * @param indexName 索引名称
- * @param restClient ES6 LowLevelRestClient
- * @return 索引的配置信息
- * @throws IOException
- */
- public static Map queryIndexProp(String indexName,RestClient restClient) throws IOException {
- Map indexProp = new HashMap<>(16);
-
- String [] prop_1 = queryIndexByCat(indexName,restClient);
- indexProp.put(MetaDataEs6Cons.KEY_INDEX_UUID,prop_1[3]);
- indexProp.put(MetaDataEs6Cons.KEY_INDEX_SIZE,prop_1[8]);
- indexProp.put(MetaDataEs6Cons.KEY_INDEX_DOCS_COUNT,prop_1[6]);
- indexProp.put(MetaDataEs6Cons.KEY_INDEX_DOCS_DELETED,prop_1[7]);
- indexProp.put(MetaDataEs6Cons.KEY_INDEX_PRI_SIZE,prop_1[9]);
- indexProp.put(MetaDataEs6Cons.KEY_INDEX_STATUS,prop_1[1]);
-
- Map index = queryIndex(indexName,restClient);
- Map settings = ( Map) (( Map) index.get(indexName)).get("settings");
- settings = ( Map) settings.get(MetaDataEs6Cons.KEY_INDEX);
- Object creation_date = formatDate(settings.get("creation_date"));
- Object shards = settings.get("number_of_shards");
- Object replicas = settings.get("number_of_replicas");
- indexProp.put(MetaDataEs6Cons.KEY_INDEX_CREATE_TIME,creation_date);
- indexProp.put(MetaDataEs6Cons.KEY_INDEX_SHARDS,shards);
- indexProp.put(MetaDataEs6Cons.KEY_INDEX_REP,replicas);
-
- return indexProp;
- }
-
- /**
- * 查询指定索引下的所有字段信息
- * @param indexName 索引名称
- * @param restClient ES6 LowLevelRestClient
- * @return 字段信息
- * @throws IOException
- */
- public static List> queryColumns(String indexName,RestClient restClient) throws IOException {
-
- List> columnList = new ArrayList<>();
- Map index = queryIndex(indexName,restClient);
- Map mappings = (Map) ((Map) index.get(indexName)).get("mappings");
-
- if (mappings.isEmpty()){
- return columnList;
- }
-
- for (int i = 0; i < 2; i++) {
- List keys = new ArrayList(mappings.keySet());
- mappings = (Map) mappings.get(keys.get(0));
- }
-
- return getColumn(mappings,new StringBuilder(),new ArrayList<>());
- }
-
- /**
- * 返回字段列表
- * @param docs 未经处理包含所有字段信息的map
- * @param columnName 字段名
- * @param columnList 处理后的字段列表
- * @return 字段列表
- */
- public static List> getColumn(Map docs,StringBuilder columnName,List> columnList){
- for(String key : docs.keySet()){
- if (key.equals("properties")){
- getColumn((Map) docs.get(key),columnName,columnList);
- break;
- }else if(key.equals("type")){
- Map column = new HashMap<>();
- StringBuilder column_name = new StringBuilder(columnName);
- column.put(MetaDataEs6Cons.KEY_COLUMN_NAME,column_name);
- column.put(MetaDataEs6Cons.KEY_DATA_TYPE,docs.get(key));
- if (docs.get(MetaDataEs6Cons.KEY_FIELDS) != null){
- column.put(MetaDataEs6Cons.KEY_FIELDS,getFieldList(docs));
- }
- int cursor = columnList.size() + 1;
- column.put("cursor",cursor);
- columnList.add(column);
- break;
- } else {
- StringBuilder temp = new StringBuilder(columnName);
- if (columnName.toString().equals("")){
- columnName.append(key);
- }else {
- columnName.append(".").append(key);
- }
- getColumn((Map) docs.get(key),columnName,columnList);
- columnName.delete(0,columnName.length());
- columnName.append(temp);
- }
- }
-
- return columnList;
- }
-
- /**
- * 返回字段映射参数
- * @param docs 该字段属性map
- * @return
- */
- public static List> getFieldList(Map docs){
- Map fields = (Map) docs.get("fields");
- Iterator> it = fields.entrySet().iterator();
- List> fieldsList = new ArrayList<>();
- Map field = new HashMap();
- while (it.hasNext()){
- Map.Entry entry = it.next();
- field.put(MetaDataEs6Cons.KEY_FIELD_NAME,entry.getKey());
- field.put(MetaDataEs6Cons.KEY_FIELD_PROP,entry.getValue());
- fieldsList.add(field);
- }
-
- return fieldsList;
- }
-
- /**
- * 查询索引别名
- * @param indexName 索引名称
- * @param restClient ES6 LowLevelRestClient
- * @return
- * @throws IOException
- */
- public static List> queryAliases(String indexName,RestClient restClient) throws IOException {
- List> aliasList = new ArrayList<>();
- Map alias = new HashMap();
- Map index = queryIndex(indexName,restClient);
- Map aliases = (Map) ((Map) index.get(indexName)).get("aliases");
- Iterator> it = aliases.entrySet().iterator();
-
- while (it.hasNext()){
- Map.Entry entry = it.next();
- alias.put("aliase_name",entry.getKey());
- alias.put("aliase_prop",entry.getValue());
- aliasList.add(alias);
- }
- return aliasList;
- }
-
- /**
- * 使用/_cat/indices{index}的方式查询指定index
- * @param restClient ES6 LowLevelRestClient
- * @param indexName 索引名称
- * @return
- * @throws IOException
- */
- public static String[] queryIndexByCat(String indexName,RestClient restClient) throws IOException {
- String endpoint = "/_cat/indices";
- Map params = Collections.singletonMap(MetaDataEs6Cons.KEY_INDEX, indexName);
- Response response = restClient.performRequest(MetaDataEs6Cons.API_METHOD_GET,endpoint,params);
- String resBody = EntityUtils.toString(response.getEntity());
- String [] indices = resBody.split("\\s+");
- return indices;
- }
-
- /**
- * indexName为*表示查询所有的索引信息
- * @param restClient ES6 LowLevelRestClient
- * @return
- * @throws IOException
- */
- public static String[] queryIndicesByCat(RestClient restClient) throws IOException {
- return queryIndexByCat("*",restClient);
- }
-
- /**
- * 使用/index的方式查询指定索引的详细信息
- * @param indexName 索引名称
- * @param restClient ES6 LowLevelRestClient
- * @return
- * @throws IOException
- */
- public static Map queryIndex(String indexName,RestClient restClient) throws IOException {
- String endpoint = "/"+indexName;
- Response response = restClient.performRequest(MetaDataEs6Cons.API_METHOD_GET,endpoint);
- String resBody = EntityUtils.toString(response.getEntity());
- Map index = GsonUtil.GSON.fromJson(resBody, GsonUtil.gsonMapTypeToken);
- return index;
- }
-
- /**
- * 格式化日期
- * @param date
- * @return
- */
- public static Object formatDate (Object date){
- long long_time =Long.parseLong(date.toString());
- Date date_time = new Date(long_time);
- SimpleDateFormat format = DateUtil.getDateTimeFormatter();
- date = format.format(date_time);
- return date;
- }
-}
diff --git a/flinkx-metadata-es6/pom.xml b/flinkx-metadata-es6/pom.xml
deleted file mode 100644
index 5aeaf82134..0000000000
--- a/flinkx-metadata-es6/pom.xml
+++ /dev/null
@@ -1,27 +0,0 @@
-
-
-
- flinkx-all
- com.dtstack.flinkx
- 1.6
-
- 4.0.0
-
- flinkx-metadata-es6
- pom
-
-
- flinkx-metadata-es6-reader
-
-
-
-
- com.dtstack.flinkx
- flinkx-core
- 1.6
- provided
-
-
-
\ No newline at end of file
diff --git a/flinkx-metadata-hbase/flinkx-metadata-hbase-reader/pom.xml b/flinkx-metadata-hbase/flinkx-metadata-hbase-reader/pom.xml
deleted file mode 100644
index 56a4b7eab6..0000000000
--- a/flinkx-metadata-hbase/flinkx-metadata-hbase-reader/pom.xml
+++ /dev/null
@@ -1,265 +0,0 @@
-
-
-
- flinkx-metadata-hbase
- com.dtstack.flinkx
- 1.6
-
- 4.0.0
-
- flinkx-metadata-hbase-reader
-
-
-
- com.google.guava
- guava
- 12.0.1
-
-
- com.dtstack.flinkx
- flinkx-metadata-reader
- 1.6
-
-
-
-
- org.apache.hbase
- hbase-client
- 1.3.1
-
-
- org.apache.hadoop
- hadoop-common
-
-
- org.apache.hadoop
- hadoop-auth
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-core
-
-
- log4j
- log4j
-
-
- guava
- com.google.guava
-
-
- commons-codec
- commons-codec
-
-
- commons-collections
- commons-collections
-
-
- commons-lang
- commons-lang
-
-
- commons-logging
- commons-logging
-
-
- jackson-core-asl
- org.codehaus.jackson
-
-
- zookeeper
- org.apache.zookeeper
-
-
- jackson-mapper-asl
- org.codehaus.jackson
-
-
- slf4j-api
- org.slf4j
-
-
- slf4j-log4j12
- org.slf4j
-
-
-
-
-
- org.apache.curator
- curator-test
- 2.6.0
- test
-
-
- zookeeper
- org.apache.zookeeper
-
-
- guava
- com.google.guava
-
-
-
-
- org.apache.hbase
- hbase-mapreduce
- 2.2.5
- compile
-
-
- commons-codec
- commons-codec
-
-
- commons-io
- commons-io
-
-
- commons-lang3
- org.apache.commons
-
-
- commons-math3
- org.apache.commons
-
-
- hadoop-annotations
- org.apache.hadoop
-
-
- hadoop-auth
- org.apache.hadoop
-
-
- hadoop-common
- org.apache.hadoop
-
-
- hadoop-hdfs
- org.apache.hadoop
-
-
- hadoop-mapreduce-client-core
- org.apache.hadoop
-
-
- hbase-client
- org.apache.hbase
-
-
- hbase-common
- org.apache.hbase
-
-
- hbase-protocol
- org.apache.hbase
-
-
- zookeeper
- org.apache.zookeeper
-
-
- javassist
- org.javassist
-
-
- slf4j-log4j12
- org.slf4j
-
-
- slf4j-api
- org.slf4j
-
-
- log4j
- log4j
-
-
-
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-shade-plugin
- 3.1.0
-
-
- package
-
- shade
-
-
- false
-
-
- org.slf4j:slf4j-api
- log4j:log4j
- ch.qos.logback:*
-
-
-
-
- *:*
-
- META-INF/*.SF
- META-INF/*.DSA
- META-INF/*.RSA
-
-
-
-
-
- io.netty
- shade.metadatahbase.io.netty
-
-
- com.google.common
- shade.metadatahbase.com.google.common
-
-
- com.google.thirdparty
- shade.metadatahbase.com.google.thirdparty
-
-
-
-
-
-
-
-
- maven-antrun-plugin
- 1.2
-
-
- copy-resources
-
- package
-
- run
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/flinkx-metadata-hbase/flinkx-metadata-hbase-reader/src/main/java/com/dtstack/flinkx/metadatahbase/inputformat/MetadatahbaseInputFormat.java b/flinkx-metadata-hbase/flinkx-metadata-hbase-reader/src/main/java/com/dtstack/flinkx/metadatahbase/inputformat/MetadatahbaseInputFormat.java
deleted file mode 100644
index 71e4970aa9..0000000000
--- a/flinkx-metadata-hbase/flinkx-metadata-hbase-reader/src/main/java/com/dtstack/flinkx/metadatahbase/inputformat/MetadatahbaseInputFormat.java
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.dtstack.flinkx.metadatahbase.inputformat;
-
-import com.dtstack.flinkx.constants.ConstantValue;
-import com.dtstack.flinkx.enums.SizeUnitType;
-import com.dtstack.flinkx.metadata.inputformat.BaseMetadataInputFormat;
-import com.dtstack.flinkx.metadata.inputformat.MetadataInputSplit;
-import com.dtstack.flinkx.metadatahbase.util.HbaseHelper;
-import com.dtstack.flinkx.util.ExceptionUtil;
-import com.dtstack.flinkx.util.ZkHelper;
-import org.apache.commons.collections.CollectionUtils;
-import org.apache.flink.core.io.InputSplit;
-import org.apache.hadoop.hbase.ClusterStatus;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.RegionLoad;
-import org.apache.hadoop.hbase.ServerLoad;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.zookeeper.ZooKeeper;
-
-import java.io.IOException;
-import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import static com.dtstack.flinkx.constants.ConstantValue.COMMA_SYMBOL;
-import static com.dtstack.flinkx.metadata.MetaDataCons.KEY_COLUMN;
-import static com.dtstack.flinkx.metadata.MetaDataCons.KEY_TABLE_PROPERTIES;
-import static com.dtstack.flinkx.metadatahbase.util.HbaseCons.KEY_COLUMN_FAMILY;
-import static com.dtstack.flinkx.metadatahbase.util.HbaseCons.KEY_CREATE_TIME;
-import static com.dtstack.flinkx.metadatahbase.util.HbaseCons.KEY_NAMESPACE;
-import static com.dtstack.flinkx.metadatahbase.util.HbaseCons.KEY_REGION_COUNT;
-import static com.dtstack.flinkx.metadatahbase.util.HbaseCons.KEY_STORAGE_SIZE;
-import static com.dtstack.flinkx.metadatahbase.util.HbaseCons.KEY_TABLE_NAME;
-import static com.dtstack.flinkx.util.ZkHelper.APPEND_PATH;
-
-/** 获取元数据
- * @author kunni@dtstack.com
- */
-public class MetadatahbaseInputFormat extends BaseMetadataInputFormat {
-
- private static final long serialVersionUID = 1L;
-
- /**
- * 用于连接hbase的配置
- */
- protected Map hadoopConfig;
-
- /**
- * hbase 连接
- */
- protected Connection hbaseConnection;
-
- protected Admin admin;
-
- protected Map createTimeMap;
-
- protected Map tableSizeMap;
-
- protected ZooKeeper zooKeeper;
-
- protected String path;
-
- /**
- * 因为connection的类型不同,重写该方法
- * @param inputSplit 某个命名空间及需要查询的表
- */
- @Override
- protected void openInternal(InputSplit inputSplit) throws IOException{
- LOG.info("inputSplit = {}", inputSplit);
- currentDb.set(((MetadataInputSplit) inputSplit).getDbName());
- tableList = ((MetadataInputSplit) inputSplit).getTableList();
- try {
- createTimeMap = queryCreateTimeMap(hadoopConfig);
- hbaseConnection = HbaseHelper.getHbaseConnection(hadoopConfig);
- hadoopConfig.forEach((key,value)->{
- LOG.info("{}:{} ",key,value);
- });
- admin = hbaseConnection.getAdmin();
- tableSizeMap = generateTableSizeMap();
- if(CollectionUtils.isEmpty(tableList)){
- tableList = showTables();
- }
- LOG.info("current database = {}, tableSize = {}, tableList = {}",currentDb.get(), tableList.size(), tableList);
- tableIterator.set(tableList.iterator());
- }catch (Exception e){
- throw new IOException(e);
- }
- }
-
- /**
- * 获取表的region大小的总和即为表饿的存储大小,误差最大为1M * regionSize
- * @return
- * @throws Exception
- */
- private Map generateTableSizeMap() throws Exception{
- Map sizeMap = new HashMap<>(16);
- ClusterStatus clusterStatus = admin.getClusterStatus();
- for (ServerName serverName : clusterStatus.getServers()) {
- ServerLoad serverLoad = clusterStatus.getLoad(serverName);
- for (Map.Entry entry : serverLoad.getRegionsLoad().entrySet()) {
- RegionLoad regionLoad = entry.getValue();
- String regionName = new String(entry.getKey(), "UTF-8");
- String[] regionSplits = regionName.split(COMMA_SYMBOL);
- //regionSplits[0] 为table name
- int sumSize=sizeMap.getOrDefault(regionSplits[0],0)+regionLoad.getStorefileSizeMB();;
- sizeMap.put(regionSplits[0],sumSize);
- }
- }
- return sizeMap;
- }
-
- @Override
- protected void closeInternal() {
- HbaseHelper.closeAdmin(admin);
- HbaseHelper.closeConnection(hbaseConnection);
- }
-
- @Override
- protected List showTables() throws SQLException {
- List tableNameList = new LinkedList<>();
- try {
- HTableDescriptor[] tableNames = admin.listTableDescriptorsByNamespace(currentDb.get());
- for (HTableDescriptor table : tableNames){
- TableName tableName = table.getTableName();
- // 排除系统表
- if(!tableName.isSystemTable()){
- //此时的表名带有namespace,需要去除
- String tableWithNameSpace = tableName.getNameAsString();
- if(tableWithNameSpace.contains(ConstantValue.COLON_SYMBOL)){
- tableWithNameSpace = tableWithNameSpace.split(ConstantValue.COLON_SYMBOL)[1];
- }
- tableNameList.add(tableWithNameSpace);
- }
- }
- }catch (IOException e){
- LOG.error("query table list failed. currentDb = {}, Exception = {}", currentDb.get(), ExceptionUtil.getErrorMessage(e));
- throw new SQLException(e);
- }
- return tableNameList;
- }
-
- @Override
- protected void switchDatabase(String databaseName) {
- currentDb.set(databaseName);
- }
-
- @Override
- protected Map queryMetaData(String tableName) throws SQLException {
- Map result = new HashMap<>(16);
- tableName = String.format("%s:%s", currentDb.get(), tableName);
- result.put(KEY_TABLE_PROPERTIES, queryTableProperties(tableName));
- result.put(KEY_COLUMN, queryColumnList(tableName));
- return result;
- }
-
- /**
- * 获取hbase表级别的元数据信息
- * @param tableName 表名
- * @return 表的元数据
- * @throws SQLException sql异常
- */
- protected Map queryTableProperties(String tableName) throws SQLException {
- Map tableProperties = new HashMap<>(16);
- try{
- HTableDescriptor table = admin.getTableDescriptor(TableName.valueOf(tableName));
- List regionInfos = admin.getTableRegions(table.getTableName());
- tableProperties.put(KEY_REGION_COUNT, regionInfos.size());
- //统一表大小单位为字节
- String tableSize = SizeUnitType.covertUnit(SizeUnitType.MB,SizeUnitType.B,Long.valueOf(tableSizeMap.get(table.getNameAsString())));
- tableProperties.put(KEY_STORAGE_SIZE, Long.valueOf(tableSize));
- tableProperties.put(KEY_CREATE_TIME, createTimeMap.get(table.getNameAsString()));
- //这里的table带了schema
- if(tableName.contains(ConstantValue.COLON_SYMBOL)){
- tableName = tableName.split(ConstantValue.COLON_SYMBOL)[1];
- }
- tableProperties.put(KEY_TABLE_NAME, tableName);
- tableProperties.put(KEY_NAMESPACE, currentDb.get());
- }catch (IOException e){
- LOG.error("query tableProperties failed. {}", ExceptionUtil.getErrorMessage(e));
- throw new SQLException(e);
- }
- return tableProperties;
- }
-
- /**
- * 获取列族信息
- * @return 列族
- */
- protected List> queryColumnList(String tableName) throws SQLException {
- List> columnList = new ArrayList<>();
- try{
- HTableDescriptor table = admin.getTableDescriptor(TableName.valueOf(tableName));
- HColumnDescriptor[] columnDescriptors = table.getColumnFamilies();
- for (HColumnDescriptor column : columnDescriptors){
- Map map = new HashMap<>(16);
- map.put(KEY_COLUMN_FAMILY, column.getNameAsString());
- columnList.add(map);
- }
- }catch (IOException e){
- LOG.error("query columnList failed. {}", ExceptionUtil.getErrorMessage(e));
- throw new SQLException(e);
- }
- return columnList;
- }
-
- /**
- * 查询hbase表的创建时间
- * 如果zookeeper没有权限访问,返回空map
- * @param hadoopConfig hadoop配置
- * @return 表名与创建时间的映射
- */
- protected Map queryCreateTimeMap(Map hadoopConfig) {
- Map createTimeMap = new HashMap<>(16);
- try{
- zooKeeper = ZkHelper.createZkClient((String) hadoopConfig.get(HConstants.ZOOKEEPER_QUORUM), ZkHelper.DEFAULT_TIMEOUT);
- List tables = ZkHelper.getChildren(zooKeeper, path);
- if(tables != null){
- for(String table : tables){
- LOG.info(table);
- createTimeMap.put(table, ZkHelper.getCreateTime(zooKeeper,path + ConstantValue.SINGLE_SLASH_SYMBOL + table));
- }
- }
- }catch (Exception e){
- LOG.error("query createTime map failed, error {} ", ExceptionUtil.getErrorMessage(e));
- }finally {
- ZkHelper.closeZooKeeper(zooKeeper);
- }
- return createTimeMap;
- }
-
- public void setPath(String path){
- this.path = path + APPEND_PATH;
- }
-
- @Override
- protected String quote(String name) {
- return name;
- }
-
- public void setHadoopConfig(Map hadoopConfig){
- this.hadoopConfig = hadoopConfig;
- }
-}
diff --git a/flinkx-metadata-hbase/flinkx-metadata-hbase-reader/src/main/java/com/dtstack/flinkx/metadatahbase/reader/MetadatahbaseReader.java b/flinkx-metadata-hbase/flinkx-metadata-hbase-reader/src/main/java/com/dtstack/flinkx/metadatahbase/reader/MetadatahbaseReader.java
deleted file mode 100644
index db539a1253..0000000000
--- a/flinkx-metadata-hbase/flinkx-metadata-hbase-reader/src/main/java/com/dtstack/flinkx/metadatahbase/reader/MetadatahbaseReader.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.dtstack.flinkx.metadatahbase.reader;
-
-import com.dtstack.flinkx.config.DataTransferConfig;
-import com.dtstack.flinkx.metadata.inputformat.MetadataInputFormatBuilder;
-import com.dtstack.flinkx.metadata.reader.MetadataReader;
-import com.dtstack.flinkx.metadatahbase.inputformat.MetadatahbaseInputFormat;
-import com.dtstack.flinkx.metadatahbase.inputformat.MetadatahbaseInputFormatBuilder;
-import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
-import org.apache.hadoop.hbase.HConstants;
-
-import java.util.Map;
-
-import static com.dtstack.flinkx.metadatahbase.util.HbaseCons.KEY_HADOOP_CONFIG;
-import static com.dtstack.flinkx.metadatahbase.util.HbaseCons.KEY_PATH;
-import static com.dtstack.flinkx.util.ZkHelper.DEFAULT_PATH;
-
-/**
- * 读取hbase config并进行配置
- * @author kunni@dtstack.com
- */
-public class MetadatahbaseReader extends MetadataReader {
-
- private Map hadoopConfig;
-
- private String path;
-
- @SuppressWarnings("unchecked")
- public MetadatahbaseReader(DataTransferConfig config, StreamExecutionEnvironment env) {
- super(config, env);
- hadoopConfig = (Map) config.getJob().getContent()
- .get(0).getReader().getParameter().getVal(KEY_HADOOP_CONFIG);
- if(!hadoopConfig.containsKey(HConstants.ZOOKEEPER_QUORUM)){
- hadoopConfig.put(HConstants.ZOOKEEPER_QUORUM, jdbcUrl);
- }
- path = config.getJob().getContent().get(0).getReader()
- .getParameter().getStringVal(KEY_PATH, DEFAULT_PATH);
- if(!hadoopConfig.containsKey(HConstants.ZOOKEEPER_ZNODE_PARENT)){
- hadoopConfig.put(HConstants.ZOOKEEPER_ZNODE_PARENT, path);
- }
- }
-
- @Override
- protected MetadataInputFormatBuilder getBuilder(){
- MetadatahbaseInputFormatBuilder builder = new MetadatahbaseInputFormatBuilder(new MetadatahbaseInputFormat());
- builder.setHadoopConfig(hadoopConfig);
- builder.setDataTransferConfig(dataTransferConfig);
- builder.setPath(path);
- return builder;
- }
-
-}
diff --git a/flinkx-metadata-hbase/flinkx-metadata-hbase-reader/src/test/java/com/dtstack/flinkx/metadatahbase/inputformat/MetadatahbaseInputFormatTest.java b/flinkx-metadata-hbase/flinkx-metadata-hbase-reader/src/test/java/com/dtstack/flinkx/metadatahbase/inputformat/MetadatahbaseInputFormatTest.java
deleted file mode 100644
index a120eb501e..0000000000
--- a/flinkx-metadata-hbase/flinkx-metadata-hbase-reader/src/test/java/com/dtstack/flinkx/metadatahbase/inputformat/MetadatahbaseInputFormatTest.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.dtstack.flinkx.metadatahbase.inputformat;
-
-import org.apache.curator.framework.CuratorFramework;
-import org.apache.curator.framework.CuratorFrameworkFactory;
-import org.apache.curator.retry.ExponentialBackoffRetry;
-import org.apache.curator.test.TestingServer;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.zookeeper.CreateMode;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-public class MetadatahbaseInputFormatTest {
-
- protected MetadatahbaseInputFormat inputFormat = new MetadatahbaseInputFormat();
-
- private static TestingServer server;
-
- @Before
- public void createZkServer() throws Exception {
- server = new TestingServer(2191, true);
- server.start();
- CuratorFramework client = CuratorFrameworkFactory.builder()
- .connectString("localhost:2191")
- .connectionTimeoutMs(5000)
- .retryPolicy(new ExponentialBackoffRetry(1000, 3))
- .build();
- client.start();
- client.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL).forPath("/hbase/table/test1", "init".getBytes());
- client.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL).forPath("/hbase/table/test2", "init".getBytes());
- client.close();
- }
-
- @Test
- public void testSetPath(){
- inputFormat.setPath("/hbase");
- Assert.assertEquals(inputFormat.path, "/hbase/table");
- }
-
- @Test
- public void testQuote(){
- Assert.assertEquals(inputFormat.quote("table"), "table");
- }
-
- @Test
- public void testQueryCreateTimeMap(){
- Map hadoopConfig = new HashMap<>();
- hadoopConfig.put(HConstants.ZOOKEEPER_QUORUM, "localhost:2191");
- inputFormat.setPath("/hbase");
- Assert.assertEquals(inputFormat.queryCreateTimeMap(hadoopConfig).size(),0);
- }
-
- @After
- public void closedZkServer() throws IOException {
- server.close();
- }
-}
diff --git a/flinkx-metadata-hive1/flinkx-metadata-hive1-reader/pom.xml b/flinkx-metadata-hive1/flinkx-metadata-hive1-reader/pom.xml
deleted file mode 100644
index 5fd45d15ba..0000000000
--- a/flinkx-metadata-hive1/flinkx-metadata-hive1-reader/pom.xml
+++ /dev/null
@@ -1,246 +0,0 @@
-
-
-
- flinkx-metadata-hive1
- com.dtstack.flinkx
- 1.6
-
- 4.0.0
-
- flinkx-metadata-hive1-reader
-
-
- com.dtstack.flinkx
- flinkx-metadata-reader
- 1.6
-
-
- org.apache.hive
- hive-jdbc
- 1.1.1
-
-
- slf4j-log4j12
- org.slf4j
-
-
- log4j-slf4j-impl
- org.apache.logging.log4j
-
-
- log4j-web
- org.apache.logging.log4j
-
-
- log4j-core
- org.apache.logging.log4j
-
-
- log4j-api
- org.apache.logging.log4j
-
-
- log4j-1.2-api
- org.apache.logging.log4j
-
-
- netty-all
- io.netty
-
-
- hive-common
- org.apache.hive
-
-
- parquet-hadoop-bundle
- org.apache.parquet
-
-
- xerces
- xercesImpl
-
-
- hbase-client
- org.apache.hbase
-
-
- curator-framework
- org.apache.curator
-
-
- zookeeper
- org.apache.zookeeper
-
-
- slf4j-api
- org.slf4j
-
-
- commons-cli
- commons-cli
-
-
- commons-compress
- org.apache.commons
-
-
- commons-lang
- commons-lang
-
-
- guava
- com.google.guava
-
-
- gson
- com.google.code.gson
-
-
- avro
- org.apache.avro
-
-
- hbase-common
- org.apache.hbase
-
-
- hbase-hadoop2-compat
- org.apache.hbase
-
-
- hbase-server
- org.apache.hbase
-
-
- tephra-hbase-compat-1.0
- co.cask.tephra
-
-
- hbase-hadoop-compat
- org.apache.hbase
-
-
-
-
- com.dtstack.flinkx
- flinkx-metadata-hive2-reader
- 1.6
-
-
- org.apache.hive
- hive-jdbc
-
-
- org.apache.hive
- hive-serde
-
-
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-shade-plugin
- 3.1.0
-
-
- package
-
- shade
-
-
- false
-
-
- org.slf4j:slf4j-api
- log4j:log4j
- ch.qos.logback:*
-
-
-
-
-
- *:*
-
- META-INF/*.SF
- META-INF/*.DSA
- META-INF/*.RSA
-
-
-
-
-
- org.apache.hive.jdbc
- shade.hive1.jdbc
-
-
- org.apache.hadoop.hive.serde2
- shade.hive1.serde2
-
-
- org.apache.hive.service
- shade.hive1.service
-
-
- com.google.common
- shade.core.com.google.common
-
-
- com.google.thirdparty
- shade.core.com.google.thirdparty
-
-
- org.apache.http
- shade.metadatahive1.org.apache.http
-
-
-
-
-
- META-INF/services/java.sql.Driver
-
-
- META-INF/services
- java.sql.hive1.Driver
-
-
-
-
-
-
-
-
- maven-antrun-plugin
- 1.2
-
-
- copy-resources
-
- package
-
- run
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/flinkx-metadata-hive2/flinkx-metadata-hive2-reader/src/main/java/com/dtstack/flinkx/metadatahive2/constants/Hive2MetaDataCons.java b/flinkx-metadata-hive2/flinkx-metadata-hive2-reader/src/main/java/com/dtstack/flinkx/metadatahive2/constants/Hive2MetaDataCons.java
deleted file mode 100644
index 004e69230c..0000000000
--- a/flinkx-metadata-hive2/flinkx-metadata-hive2-reader/src/main/java/com/dtstack/flinkx/metadatahive2/constants/Hive2MetaDataCons.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.dtstack.flinkx.metadatahive2.constants;
-
-import com.dtstack.flinkx.metadata.MetaDataCons;
-
-/**
- * @author : tiezhu
- * @date : 2020/3/9
- * @description :
- */
-@SuppressWarnings("all")
-public class Hive2MetaDataCons extends MetaDataCons {
- public static final String DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver";
- public static final String KEY_HADOOP_CONFIG = "hadoopConfig";
-
- public static final String KEY_SOURCE = "source";
- public static final String KEY_VERSION = "version";
-
- public static final String TEXT_FORMAT = "TextOutputFormat";
- public static final String ORC_FORMAT = "OrcOutputFormat";
- public static final String PARQUET_FORMAT = "MapredParquetOutputFormat";
-
- public static final String TYPE_TEXT = "text";
- public static final String TYPE_ORC = "orc";
- public static final String TYPE_PARQUET = "parquet";
-
- public static final String PARTITION_INFORMATION = "# Partition Information";
- public static final String TABLE_INFORMATION = "# Detailed Table Information";
-
- // desc formatted后的列名
- public static final String KEY_RESULTSET_COL_NAME = "# col_name";
- public static final String KEY_RESULTSET_DATA_TYPE = "data_type";
- public static final String KEY_RESULTSET_COMMENT = "comment";
-
- public static final String KEY_COL_LOCATION = "Location:";
- public static final String KEY_COL_CREATETIME = "CreateTime:";
- public static final String KEY_COL_CREATE_TIME = "Create Time:";
- public static final String KEY_COL_LASTACCESSTIME = "LastAccessTime:";
- public static final String KEY_COL_LAST_ACCESS_TIME = "Last Access Time:";
- public static final String KEY_COL_OUTPUTFORMAT = "OutputFormat:";
- public static final String KEY_COL_TABLE_PARAMETERS = "Table Parameters:";
-
- public static final String KEY_LOCATION = "location";
- public static final String KEY_CREATETIME = "createTime";
- public static final String KEY_LASTACCESSTIME = "lastAccessTime";
- public static final String KEY_TOTALSIZE = "totalSize";
- public static final String KEY_TRANSIENT_LASTDDLTIME = "transient_lastDdlTime";
-
- public static final String KEY_NAME = "name";
- public static final String KEY_VALUE = "value";
-
-
- public static final String SQL_QUERY_DATA = "desc formatted %s";
- public static final String SQL_SHOW_PARTITIONS = "show partitions %s";
-}
diff --git a/flinkx-metadata-hive2/flinkx-metadata-hive2-reader/src/main/java/com/dtstack/flinkx/metadatahive2/constants/HiveDbUtil.java b/flinkx-metadata-hive2/flinkx-metadata-hive2-reader/src/main/java/com/dtstack/flinkx/metadatahive2/constants/HiveDbUtil.java
deleted file mode 100644
index f98b7860f8..0000000000
--- a/flinkx-metadata-hive2/flinkx-metadata-hive2-reader/src/main/java/com/dtstack/flinkx/metadatahive2/constants/HiveDbUtil.java
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.dtstack.flinkx.metadatahive2.constants;
-
-import com.dtstack.flinkx.authenticate.KerberosUtil;
-import com.dtstack.flinkx.util.ExceptionUtil;
-import com.dtstack.flinkx.util.FileSystemUtil;
-import com.dtstack.flinkx.util.RetryUtil;
-import org.apache.commons.collections.MapUtils;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.security.PrivilegedAction;
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.SQLException;
-import java.util.Map;
-import java.util.concurrent.locks.ReentrantLock;
-
-/**
- * @author toutian
- */
-
-public final class HiveDbUtil {
-
- public static final String SQLSTATE_USERNAME_PWD_ERROR = "28000";
- public static final String SQLSTATE_CANNOT_ACQUIRE_CONNECT = "08004";
- public static final int JDBC_PART_SIZE = 2;
- public static final String JDBC_REGEX = "[?|;|#]";
- public static final String KEY_VAL_DELIMITER = "=";
- public static final String PARAM_DELIMITER = "&";
- public static final String KEY_PRINCIPAL = "principal";
- private static Logger LOG = LoggerFactory.getLogger(HiveDbUtil.class);
- private static ReentrantLock lock = new ReentrantLock();
-
- private HiveDbUtil() {
- }
-
- public static Connection getConnection(ConnectionInfo connectionInfo) {
- if(openKerberos(connectionInfo.getJdbcUrl())){
- return getConnectionWithKerberos(connectionInfo);
- } else {
- return getConnectionWithRetry(connectionInfo);
- }
- }
-
- private static Connection getConnectionWithRetry(ConnectionInfo connectionInfo){
- try {
- return RetryUtil.executeWithRetry(() -> connect(connectionInfo), 1, 1000L, false);
- } catch (Exception e1) {
- throw new RuntimeException(String.format("连接:%s 时发生错误:%s.", connectionInfo.getJdbcUrl(), ExceptionUtil.getErrorMessage(e1)));
- }
- }
-
- private static Connection getConnectionWithKerberos(ConnectionInfo connectionInfo){
- if(connectionInfo.getHiveConf() == null || connectionInfo.getHiveConf().isEmpty()){
- throw new IllegalArgumentException("hiveConf can not be null or empty");
- }
-
- String keytabFileName = KerberosUtil.getPrincipalFileName(connectionInfo.getHiveConf());
-
- keytabFileName = KerberosUtil.loadFile(connectionInfo.getHiveConf(), keytabFileName);
- String principal = KerberosUtil.getPrincipal(connectionInfo.getHiveConf(), keytabFileName);
- KerberosUtil.loadKrb5Conf(connectionInfo.getHiveConf());
-
- Configuration conf = FileSystemUtil.getConfiguration(connectionInfo.getHiveConf(), null);
-
- UserGroupInformation ugi;
- try {
- ugi = KerberosUtil.loginAndReturnUgi(conf, principal, keytabFileName);
- } catch (Exception e){
- throw new RuntimeException("Login kerberos error:", e);
- }
-
- LOG.info("current ugi:{}", ugi);
- return ugi.doAs((PrivilegedAction) () -> getConnectionWithRetry(connectionInfo));
- }
-
- private static boolean openKerberos(final String jdbcUrl){
- String[] splits = jdbcUrl.split(JDBC_REGEX);
- if (splits.length != JDBC_PART_SIZE) {
- return false;
- }
-
- String paramsStr = splits[1];
- String[] paramArray = paramsStr.split(PARAM_DELIMITER);
- for (String param : paramArray) {
- String[] keyVal = param.split(KEY_VAL_DELIMITER);
- if(KEY_PRINCIPAL.equalsIgnoreCase(keyVal[0])){
- return true;
- }
- }
-
- return false;
- }
-
- public static Connection connect(ConnectionInfo connectionInfo) {
- lock.lock();
- try {
- Class.forName(connectionInfo.getDriver());
- DriverManager.setLoginTimeout(connectionInfo.getTimeout());
- if(StringUtils.isNotBlank(connectionInfo.getUsername())){
- return DriverManager.getConnection(connectionInfo.getJdbcUrl(), connectionInfo.getUsername(), connectionInfo.getPassword());
- }else{
- return DriverManager.getConnection(connectionInfo.getJdbcUrl());
- }
- } catch (SQLException e) {
- if (SQLSTATE_USERNAME_PWD_ERROR.equals(e.getSQLState())) {
- throw new RuntimeException("用户名或密码错误.");
- } else if (SQLSTATE_CANNOT_ACQUIRE_CONNECT.equals(e.getSQLState())) {
- throw new RuntimeException("应用程序服务器拒绝建立连接.");
- } else {
- throw new RuntimeException("连接信息:" + connectionInfo.getJdbcUrl() + " 错误信息:" + ExceptionUtil.getErrorMessage(e));
- }
- } catch (Exception e1) {
- throw new RuntimeException("连接信息:" + connectionInfo.getJdbcUrl() + " 错误信息:" + ExceptionUtil.getErrorMessage(e1));
- } finally {
- lock.unlock();
- }
- }
-
- public static class ConnectionInfo{
- private String jdbcUrl;
- private String username;
- private String password;
- private String driver;
- private int timeout = 30000;
- private Map hiveConf;
-
- public String getJdbcUrl() {
- return jdbcUrl;
- }
-
- public void setJdbcUrl(String jdbcUrl) {
- this.jdbcUrl = jdbcUrl;
- }
-
- public String getUsername() {
- return username;
- }
-
- public void setUsername(String username) {
- this.username = username;
- }
-
- public String getPassword() {
- return password;
- }
-
- public void setPassword(String password) {
- this.password = password;
- }
-
- public Map getHiveConf() {
- return hiveConf;
- }
-
- public void setHiveConf(Map hiveConf) {
- this.hiveConf = hiveConf;
- }
-
- public int getTimeout() {
- return timeout;
- }
-
- public void setTimeout(int timeout) {
- this.timeout = timeout;
- }
-
- public String getDriver(){
- return driver;
- }
-
- public void setDriver(String driver){
- this.driver = driver;
- }
-
- @Override
- public String toString() {
- return "ConnectionInfo{" +
- "jdbcUrl='" + jdbcUrl + '\'' +
- ", username='" + username + '\'' +
- ", password='" + password + '\'' +
- ", timeout='" + timeout + '\'' +
- ", driver='" + driver + '\'' +
- ", hiveConf=" + hiveConf +
- '}';
- }
- }
-
-}
diff --git a/flinkx-metadata-hive2/flinkx-metadata-hive2-reader/src/main/java/com/dtstack/flinkx/metadatahive2/inputformat/Metadatahive2InputFormat.java b/flinkx-metadata-hive2/flinkx-metadata-hive2-reader/src/main/java/com/dtstack/flinkx/metadatahive2/inputformat/Metadatahive2InputFormat.java
deleted file mode 100644
index 4bbce68a59..0000000000
--- a/flinkx-metadata-hive2/flinkx-metadata-hive2-reader/src/main/java/com/dtstack/flinkx/metadatahive2/inputformat/Metadatahive2InputFormat.java
+++ /dev/null
@@ -1,368 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.dtstack.flinkx.metadatahive2.inputformat;
-
-import com.dtstack.flinkx.constants.ConstantValue;
-import com.dtstack.flinkx.metadata.inputformat.BaseMetadataInputFormat;
-import com.dtstack.flinkx.metadatahive2.constants.HiveDbUtil;
-import org.apache.commons.lang3.StringUtils;
-
-import java.sql.Connection;
-import java.sql.ResultSet;
-import java.sql.ResultSetMetaData;
-import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-
-import static com.dtstack.flinkx.metadata.MetaDataCons.KEY_INDEX_COMMENT;
-import static com.dtstack.flinkx.metadata.MetaDataCons.KEY_TABLE_COMMENT;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COLUMN;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COLUMN_COMMENT;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COLUMN_DATA_TYPE;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COLUMN_INDEX;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COLUMN_NAME;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COLUMN_TYPE;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COL_CREATETIME;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COL_CREATE_TIME;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COL_LASTACCESSTIME;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COL_LAST_ACCESS_TIME;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COL_LOCATION;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COL_NAME;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COL_OUTPUTFORMAT;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_COL_TABLE_PARAMETERS;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_CREATETIME;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_LASTACCESSTIME;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_LOCATION;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_NAME;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_PARTITIONS;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_PARTITION_COLUMNS;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_RESULTSET_COL_NAME;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_RESULTSET_COMMENT;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_RESULTSET_DATA_TYPE;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_STORED_TYPE;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_TABLE_PROPERTIES;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_TOTALSIZE;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_TRANSIENT_LASTDDLTIME;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.KEY_VALUE;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.ORC_FORMAT;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.PARQUET_FORMAT;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.PARTITION_INFORMATION;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.SQL_QUERY_DATA;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.SQL_SHOW_PARTITIONS;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.SQL_SHOW_TABLES;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.SQL_SWITCH_DATABASE;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.TABLE_INFORMATION;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.TEXT_FORMAT;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.TYPE_ORC;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.TYPE_PARQUET;
-import static com.dtstack.flinkx.metadatahive2.constants.Hive2MetaDataCons.TYPE_TEXT;
-
-/**
- * @author : tiezhu
- * @date : 2020/3/9
- */
-public class Metadatahive2InputFormat extends BaseMetadataInputFormat {
-
- private static final long serialVersionUID = 1L;
-
- protected Map hadoopConfig;
-
- String paraFirst = KEY_COL_NAME;
- String paraSecond = KEY_COLUMN_DATA_TYPE;
-
- @Override
- protected void switchDatabase(String databaseName) throws SQLException {
- statement.get().execute(String.format(SQL_SWITCH_DATABASE, quote(databaseName)));
- }
-
- /**
- * Unicode 编码转字符串
- *
- * @param string 支持 Unicode 编码和普通字符混合的字符串
- * @return 解码后的字符串
- */
- public static String unicodeToStr(String string) {
- String prefix = "\\u";
- if (string == null || !string.contains(prefix)) {
- // 传入字符串为空或不包含 Unicode 编码返回原内容
- return string;
- }
-
- StringBuilder value = new StringBuilder(string.length() >> 2);
- String[] strings = string.split("\\\\u");
- String hex, mix;
- char hexChar;
- int ascii, n;
-
- if (strings[0].length() > 0) {
- // 处理开头的普通字符串
- value.append(strings[0]);
- }
-
- try {
- for (int i = 1; i < strings.length; i++) {
- hex = strings[i];
- if (hex.length() > 3) {
- mix = "";
- if (hex.length() > 4) {
- // 处理 Unicode 编码符号后面的普通字符串
- mix = hex.substring(4);
- }
- hex = hex.substring(0, 4);
-
- try {
- Integer.parseInt(hex, 16);
- } catch (Exception e) {
- // 不能将当前 16 进制字符串正常转换为 10 进制数字,拼接原内容后跳出
- value.append(prefix).append(strings[i]);
- continue;
- }
-
- ascii = 0;
- for (int j = 0; j < hex.length(); j++) {
- hexChar = hex.charAt(j);
- // 将 Unicode 编码中的 16 进制数字逐个转为 10 进制
- n = Integer.parseInt(String.valueOf(hexChar), 16);
- // 转换为 ASCII 码
- ascii += n * ((int) Math.pow(16, (hex.length() - j - 1)));
- }
-
- // 拼接解码内容
- value.append((char) ascii).append(mix);
- } else {
- // 不转换特殊长度的 Unicode 编码
- value.append(prefix).append(hex);
- }
- }
- } catch (Exception e) {
- // Unicode 编码格式有误,解码失败
- return null;
- }
-
- return value.toString();
- }
-
- @Override
- protected String quote(String name) {
- return String.format("`%s`", name);
- }
-
- @Override
- protected List showTables() throws SQLException {
- List tables = new ArrayList<>();
- try (ResultSet rs = statement.get().executeQuery(SQL_SHOW_TABLES)) {
- int pos = rs.getMetaData().getColumnCount()==1?1:2;
- while (rs.next()) {
- tables.add(rs.getString(pos));
- }
- }
-
- return tables;
- }
-
- @Override
- protected Map queryMetaData(String tableName) throws SQLException {
- Map result = new HashMap<>(16);
- List> columnList = new ArrayList<>();
- List> partitionColumnList = new ArrayList<>();
- Map tableProperties = new HashMap<>(16);
-
- List> metaData = queryData(tableName);
- Iterator> it = metaData.iterator();
- int metaDataFlag = 0;
- while(it.hasNext()){
- Map lineDataInternal = it.next();
- String colNameInternal = lineDataInternal.get(KEY_COL_NAME);
- if (StringUtils.isBlank(colNameInternal)) {
- continue;
- }
- if(colNameInternal.startsWith("#")){
- colNameInternal = StringUtils.trim(colNameInternal);
- switch (colNameInternal){
- case PARTITION_INFORMATION:
- metaDataFlag = 1;
- break;
- case TABLE_INFORMATION:
- metaDataFlag = 2;
- break;
- case KEY_RESULTSET_COL_NAME:
- paraFirst = KEY_RESULTSET_DATA_TYPE;
- paraSecond = KEY_RESULTSET_COMMENT;
- break;
- default:
- break;
- }
- continue;
- }
- switch (metaDataFlag){
- case 0:
- columnList.add(parseColumn(lineDataInternal, columnList.size()+1));
- break;
- case 1:
- partitionColumnList.add(parseColumn(lineDataInternal, partitionColumnList.size()+1));
- break;
- case 2:
- parseTableProperties(lineDataInternal, tableProperties, it);
- break;
- default:
- break;
- }
- }
-
- if (partitionColumnList.size() > 0) {
- List partitionColumnNames = new ArrayList<>();
- for (Map partitionColumn : partitionColumnList) {
- partitionColumnNames.add(partitionColumn.get(KEY_COLUMN_NAME).toString());
- }
-
- columnList.removeIf(column -> partitionColumnNames.contains(column.get(KEY_COLUMN_NAME).toString()));
- result.put(KEY_PARTITIONS, showPartitions(tableName));
- }
- result.put(KEY_TABLE_PROPERTIES, tableProperties);
- result.put(KEY_PARTITION_COLUMNS, partitionColumnList);
- result.put(KEY_COLUMN, columnList);
-
- return result;
- }
-
- private Map parseColumn(Map lineDataInternal, int index){
- String dataTypeInternal = lineDataInternal.get(KEY_COLUMN_DATA_TYPE);
- String commentInternal = lineDataInternal.get(KEY_INDEX_COMMENT);
- String colNameInternal = lineDataInternal.get(KEY_COL_NAME);
-
- Map lineResult = new HashMap<>(16);
- lineResult.put(KEY_COLUMN_NAME, colNameInternal);
- lineResult.put(KEY_COLUMN_TYPE, dataTypeInternal);
- lineResult.put(KEY_COLUMN_COMMENT, unicodeToStr(commentInternal));
- lineResult.put(KEY_COLUMN_INDEX, index);
- return lineResult;
- }
-
- private String getStoredType(String storedClass) {
- if (storedClass.endsWith(TEXT_FORMAT)){
- return TYPE_TEXT;
- } else if (storedClass.endsWith(ORC_FORMAT)){
- return TYPE_ORC;
- } else if (storedClass.endsWith(PARQUET_FORMAT)){
- return TYPE_PARQUET;
- } else {
- return storedClass;
- }
- }
-
-
- private List> queryData(String table) throws SQLException{
- try (ResultSet rs = statement.get().executeQuery(String.format(SQL_QUERY_DATA, quote(table)))) {
- ResultSetMetaData metaData = rs.getMetaData();
- int columnCount = metaData.getColumnCount();
- List columnNames = new ArrayList<>(columnCount);
- for (int i = 0; i < columnCount; i++) {
- columnNames.add(metaData.getColumnName(i+1));
- }
-
- List> data = new ArrayList<>();
- while (rs.next()) {
- Map lineData = new HashMap<>(Math.max((int) (columnCount/.75f) + 1, 16));
- for (String columnName : columnNames) {
- lineData.put(columnName, rs.getString(columnName));
- }
-
- data.add(lineData);
- }
-
- return data;
- }
- }
-
- private List> showPartitions (String table) throws SQLException{
- List> partitions = new ArrayList<>();
- try (ResultSet rs = statement.get().executeQuery(String.format(SQL_SHOW_PARTITIONS, quote(table)))) {
- while (rs.next()) {
- String str = rs.getString(1);
- String[] split = str.split(ConstantValue.EQUAL_SYMBOL);
- if(split.length == 2){
- Map