Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fix][Connector-V2] Fix Hive fails to load *-site.xml file in hive.hadoop.conf-path #7733

Open
wants to merge 1 commit into
base: dev
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

package org.apache.seatunnel.connectors.seatunnel.hive.utils;

import org.apache.seatunnel.shade.com.google.common.collect.ImmutableList;

import org.apache.seatunnel.api.configuration.ReadonlyConfig;
import org.apache.seatunnel.connectors.seatunnel.file.hadoop.HadoopLoginFactory;
import org.apache.seatunnel.connectors.seatunnel.file.hdfs.source.config.HdfsSourceConfigOptions;
Expand All @@ -42,53 +40,53 @@
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

@Slf4j
public class HiveMetaStoreProxy {
private HiveMetaStoreClient hiveMetaStoreClient;
private static volatile HiveMetaStoreProxy INSTANCE = null;
private static final List<String> HADOOP_CONF_FILES = ImmutableList.of("hive-site.xml");

private HiveMetaStoreProxy(ReadonlyConfig readonlyConfig) {
String metastoreUri = readonlyConfig.get(HiveSourceOptions.METASTORE_URI);
String hiveHadoopConfigPath = readonlyConfig.get(HiveConfig.HADOOP_CONF_PATH);
String hiveSitePath = readonlyConfig.get(HiveConfig.HIVE_SITE_PATH);
HiveConf hiveConf = new HiveConf();
hiveConf.set("hive.metastore.uris", metastoreUri);
Configuration hadoopConf = new Configuration();
try {
if (StringUtils.isNotBlank(hiveHadoopConfigPath)) {
HADOOP_CONF_FILES.forEach(
confFile -> {
java.nio.file.Path path = Paths.get(hiveHadoopConfigPath, confFile);
if (Files.exists(path)) {
try {
hiveConf.addResource(path.toUri().toURL());
} catch (IOException e) {
log.warn(
"Error adding Hadoop resource {}, resource was not added",
path,
e);
}
}
});
getConfigFiles(hiveHadoopConfigPath)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like after changed, we supported other site file like named new-hive-site.xml? Not only hive-site.xml?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, These files are located in a specific configured folder, ensuring that only the intended files are present. The folder can contain core-site.xml, hdfs-site.xml, and hive-site.xml or any other intended file.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks OK, can you provide some screenshots of it running successfully?

.forEach(
confFile -> {
try {
hadoopConf.addResource(confFile.toUri().toURL());
} catch (MalformedURLException e) {
log.warn(
"Error adding Hadoop resource {}, resource was not added",
confFile,
e);
}
});
}

if (StringUtils.isNotBlank(hiveSitePath)) {
hiveConf.addResource(new File(hiveSitePath).toURI().toURL());
hadoopConf.addResource(new File(hiveSitePath).toURI().toURL());
}

log.info("hive client conf:{}", hiveConf);
HiveConf hiveConf = new HiveConf(hadoopConf, HiveConf.class);
hiveConf.set("hive.metastore.uris", metastoreUri);

if (HiveMetaStoreProxyUtils.enableKerberos(readonlyConfig)) {
// login Kerberos
Configuration authConf = new Configuration();
authConf.set("hadoop.security.authentication", "kerberos");
hadoopConf.set("hadoop.security.authentication", "kerberos");
this.hiveMetaStoreClient =
HadoopLoginFactory.loginWithKerberos(
authConf,
hadoopConf,
readonlyConfig.get(HdfsSourceConfigOptions.KRB5_PATH),
readonlyConfig.get(HdfsSourceConfigOptions.KERBEROS_PRINCIPAL),
readonlyConfig.get(HdfsSourceConfigOptions.KERBEROS_KEYTAB_PATH),
Expand Down Expand Up @@ -119,7 +117,7 @@ private HiveMetaStoreProxy(ReadonlyConfig readonlyConfig) {
} catch (MalformedURLException e) {
String errorMsg =
String.format(
"Using this hive uris [%s], hive conf [%s] to initialize "
"Using this hive uris [%s], hive hadoopConf [%s] to initialize "
+ "hive metastore client instance failed",
metastoreUri, readonlyConfig.get(HiveSourceOptions.HIVE_SITE_PATH));
throw new HiveConnectorException(
Expand All @@ -132,6 +130,19 @@ private HiveMetaStoreProxy(ReadonlyConfig readonlyConfig) {
}
}

private static List<Path> getConfigFiles(String hiveHadoopConfigPath) throws IOException {
List<Path> configFiles = new ArrayList<>();
Path dirPath = Paths.get(hiveHadoopConfigPath);

try (DirectoryStream<Path> stream = Files.newDirectoryStream(dirPath, "*-site.xml")) {
for (Path entry : stream) {
configFiles.add(entry);
}
}

return configFiles;
}

public static HiveMetaStoreProxy getInstance(ReadonlyConfig readonlyConfig) {
if (INSTANCE == null) {
synchronized (HiveMetaStoreProxy.class) {
Expand Down
Loading