org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 12.0 failed 1 times, most recent failure: Lost task 0.0 in stage 12.0 (TID 18, localhost, executor driver): scala.util.control.BreakControl
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1703)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1691)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1690)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1690)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:873)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:873)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:873)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1924)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1873)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1862)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:682)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2047)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2068)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2087)
at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1368)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:367)
at org.apache.spark.rdd.RDD.take(RDD.scala:1341)
at com.kingyea.datac.process.Explorations$$anonfun$main$2.apply(Explorations.scala:99)
at com.kingyea.datac.process.Explorations$$anonfun$main$2.apply(Explorations.scala:83)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at com.kingyea.datac.process.Explorations$.main(Explorations.scala:83)
at com.kingyea.datac.process.Explorations.main(Explorations.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:920)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:195)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:220)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:140)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: scala.util.control.BreakControl
查阅了很多资料
这个错误刚开始以为触发action 把数据都写到Driver端 排查了结果不是这个报错
查阅其他资料说数据量太大,在程序中也排除了这个错误
结果是在代码里面写了跳出循环 也就是break()
可能是在集群中都是分布式跑,break没办法控制哪个点跳出,所以就一直报这个错误,排查了一整天了!!!!!!!