错误现象 :
Job failed with java.lang.ClassNotFoundException: org.apache.spark.AccumulatorParam
FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask. Spark job failed during runtime. Please check stacktrace for the root cause.
解决方案 :
重新装一边 Hive
https://blog.csdn.net/qq_44226094/article/details/123218860
关键在于 初始化 Hive 元数据库
把原来的数据库里的数据清空 , 在初始化:
schematool -initSchema -dbType mysql -verbose
下面是我的配置文件
conf 目录下新建 hive-site.xml 文件
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- jdbc连接的URL -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://cpu102:3306/metastore?useSSL=false&useUnicode=true&characterEncoding=UTF-8</value>
</property>
<!-- jdbc连接的Driver-->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<!-- jdbc连接的username-->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<!-- jdbc连接的password -->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>xxxxxx</value>
</property>
<!-- Hive默认在HDFS的工作目录 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>
<!-- Hive元数据存储的验证 -->
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<!-- 指定存储元数据要连接的地址 -->
<!--
<property>
<name>hive.metastore.uris</name>
<value>thrift://cpu101:9083</value>
</property>
-->
<!-- 指定hiveserver2连接的端口号 -->
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<!-- 指定hiveserver2连接的host -->
<property>
<name>hive.server2.thrift.bind.host</name>
<value>cpu101</value>
</property>
<!-- 元数据存储授权 -->
<property>
<name>hive.metastore.event.db.notification.api.auth</name>
<value>false</value>
</property>
<!-- 打印默认表头 -->
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<!-- 打印默认库 -->
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<!-- 该HA Spark依赖位置(注意:端口号8020必须和 namenode 的端口号一致)-->
<property>
<name>spark.yarn.jars</name>
<value>hdfs://mycluster/spark-jars/*</value>
</property>
<!--Hive执行引擎-->
<property>
<name>hive.execution.engine</name>
<value>spark</value>
</property>
</configuration>
未配置 HA :
<!--Spark依赖位置(注意:端口号8020必须和 namenode 的端口号一致)-->
<property>
<name>spark.yarn.jars</name>
<value>hdfs://cpu101:8020/spark-jars/*</value>
</property>
spark-defaults.conf
spark.master yarn
spark.eventLog.enabled true
spark.eventLog.dir hdfs://cpu101:8020/spark-history
spark.executor.memory 1g
spark.driver.memory 1g