Hadoop Batch Ingestion on MapR cluster

Hi,

i have 3 node mapr cluster with Hadoop 2.7.0-mapr-1808. I’ve launched druid tutorial cluster on one of the nodes. I’ve copied hadoop.xmls from /opt/mapr/hadoop/hadoop-2.7.0/etc/hadoop/ into ./quickstart/tutorial/conf/druid/_common/hadoop-xml/. I’ve put wikiticker-2015-09-12-sampled.json.gz on hdfs and tried to ingest it with following spec:

{
“type” : “index_hadoop”,
“spec” : {
“dataSchema” : {
“dataSource” : “wikipedia”,
“parser” : {
“type” : “hadoopyString”,
“parseSpec” : {
“format” : “json”,
“dimensionsSpec” : {
“dimensions” : [
“channel”,
“cityName”,
“comment”,
“countryIsoCode”,
“countryName”,
“isAnonymous”,
“isMinor”,
“isNew”,
“isRobot”,
“isUnpatrolled”,
“metroCode”,
“namespace”,
“page”,
“regionIsoCode”,
“regionName”,
“user”,
{ “name”: “added”, “type”: “long” },
{ “name”: “deleted”, “type”: “long” },
{ “name”: “delta”, “type”: “long” }
]
},
“timestampSpec” : {
“format” : “auto”,
“column” : “time”
}
}
},
“metricsSpec” : ,
“granularitySpec” : {
“type” : “uniform”,
“segmentGranularity” : “day”,
“queryGranularity” : “none”,
“intervals” : [“2015-09-12/2015-09-13”],
“rollup” : false
}
},
“ioConfig” : {
“type” : “hadoop”,
“inputSpec” : {
“type” : “static”,
“paths” : “hdfs://destination.host.ip:7222/quickstart/wikiticker-2015-09-12-sampled.json.gz”
}
},
“tuningConfig” : {
“type” : “hadoop”,
“partitionsSpec” : {
“type” : “hashed”,
“targetPartitionSize” : 5000000
}
}
}
}

``

But when i run task i got following error:

[…]
2019-04-25T13:03:01,182 ERROR [task-runner-0-priority-0] org.apache.druid.indexing.common.task.HadoopIndexTask - Got invocation target exception in run(), cause:

java.lang.RuntimeException: java.io.IOException: Failed on local exception: java.io.IOException: Connection reset by peer; Host Details : local host is: “my.host.private.adress/my.host.adress”; destination host is: “destination.host.ip”:7222;

at com.google.common.base.Throwables.propagate(Throwables.java:160) ~[guava-16.0.1.jar:?]

at org.apache.druid.indexer.DetermineHashedPartitionsJob.run(DetermineHashedPartitionsJob.java:223) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexer.JobHelper.runSingleJob(JobHelper.java:372) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexer.HadoopDruidDetermineConfigurationJob.run(HadoopDruidDetermineConfigurationJob.java:60) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessingRunner.runTask(HadoopIndexTask.java:617) ~[druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_181]

at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_181]

at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_181]

at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_181]

at org.apache.druid.indexing.common.task.HadoopIndexTask.runInternal(HadoopIndexTask.java:309) ~[druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.common.task.HadoopIndexTask.run(HadoopIndexTask.java:244) [druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.overlord.SingleTaskBackgroundRunner$SingleTaskBackgroundRunnerCallable.call(SingleTaskBackgroundRunner.java:419) [druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.overlord.SingleTaskBackgroundRunner$SingleTaskBackgroundRunnerCallable.call(SingleTaskBackgroundRunner.java:391) [druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_181]

at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_181]

at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_181]

at java.lang.Thread.run(Thread.java:748) [?:1.8.0_181]

Caused by: java.io.IOException: Failed on local exception: java.io.IOException: Connection reset by peer; Host Details : local host is: “my.host.private.ip/my.host.adress”; destination host is: “destination.host.ip”:7222;

at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:782) ~[?:?]

at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1493) ~[?:?]

at org.apache.hadoop.ipc.Client.call(Client.java:1435) ~[?:?]

at org.apache.hadoop.ipc.Client.call(Client.java:1345) ~[?:?]

at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:227) ~[?:?]

at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:116) ~[?:?]

at com.sun.proxy.$Proxy208.getFileInfo(Unknown Source) ~[?:?]

at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:796) ~[?:?]

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_181]

at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_181]

at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_181]

at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_181]

at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:409) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:163) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:155) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:346) ~[?:?]

at com.sun.proxy.$Proxy209.getFileInfo(Unknown Source) ~[?:?]

at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.jav2019-04-25T13:03:01,182 ERROR [task-runner-0-priority-0] org.apache.druid.indexing.common.task.HadoopIndexTask - Got invocation target exception in run(), cause:

java.lang.RuntimeException: java.io.IOException: Failed on local exception: java.io.IOException: Connection reset by peer; Host Details : local host is: “my.host.private.adress/my.host.adress”; destination host is: “destination.host.ip”:7222;

at com.google.common.base.Throwables.propagate(Throwables.java:160) ~[guava-16.0.1.jar:?]

at org.apache.druid.indexer.DetermineHashedPartitionsJob.run(DetermineHashedPartitionsJob.java:223) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexer.JobHelper.runSingleJob(JobHelper.java:372) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexer.HadoopDruidDetermineConfigurationJob.run(HadoopDruidDetermineConfigurationJob.java:60) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessingRunner.runTask(HadoopIndexTask.java:617) ~[druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_181]

at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_181]

at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_181]

at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_181]

at org.apache.druid.indexing.common.task.HadoopIndexTask.runInternal(HadoopIndexTask.java:309) ~[druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.common.task.HadoopIndexTask.run(HadoopIndexTask.java:244) [druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.overlord.SingleTaskBackgroundRunner$SingleTaskBackgroundRunnerCallable.call(SingleTaskBackgroundRunner.java:419) [druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.overlord.SingleTaskBackgroundRunner$SingleTaskBackgroundRunnerCallable.call(SingleTaskBackgroundRunner.java:391) [druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_181]

at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_181]

at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_181]

at java.lang.Thread.run(Thread.java:748) [?:1.8.0_181]

Caused by: java.io.IOException: Failed on local exception: java.io.IOException: Connection reset by peer; Host Details : local host is: “my.host.private.ip/my.host.adress”; destination host is: “destination.host.ip”:7222;

at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:782) ~[?:?]

at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1493) ~[?:?]

at org.apache.hadoop.ipc.Client.call(Client.java:1435) ~[?:?]

at org.apache.hadoop.ipc.Client.call(Client.java:1345) ~[?:?]

at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:227) ~[?:?]

at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:116) ~[?:?]

at com.sun.proxy.$Proxy208.getFileInfo(Unknown Source) ~[?:?]

at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:796) ~[?:?]

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_181]

at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_181]

at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_181]

at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_181]

at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:409) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:163) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:155) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:346) ~[?:?]

at com.sun.proxy.$Proxy209.getFileInfo(Unknown Source) ~[?:?]

at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1717) ~[?:?]

at org.apache.hadoop.hdfs.DistributedFileSystem$27.doCall(DistributedFileSystem.java:1437) ~[?:?]

at org.apache.hadoop.hdfs.DistributedFileSystem$27.doCall(DistributedFileSystem.java:1434) ~[?:?]

at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) ~[?:?]

at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1434) ~[?:?]

at org.apache.hadoop.fs.Globber.getFileStatus(Globber.java:64) ~[?:?]

at org.apache.hadoop.fs.Globber.doGlob(Globber.java:269) ~[?:?]

at org.apache.hadoop.fs.Globber.glob(Globber.java:148) ~[?:?]

at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1705) ~[?:?]

at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:300) ~[?:?]

at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.listStatus(FileInputFormat.java:271) ~[?:?]

at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.getSplits(FileInputFormat.java:393) ~[?:?]

at org.apache.hadoop.mapreduce.lib.input.DelegatingInputFormat.getSplits(DelegatingInputFormat.java:115) ~[?:?]

at org.apache.hadoop.mapreduce.JobSubmitter.writeNewSplits(JobSubmitter.java:303) ~[?:?]

at org.apache.hadoop.mapreduce.JobSubmitter.writeSplits(JobSubmitter.java:320) ~[?:?]

at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:198) ~[?:?]

at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1341) ~[?:?]

at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1338) ~[?:?]

at java.security.AccessController.doPrivileged(Native Method) ~[?:1.8.0_181]

at javax.security.auth.Subject.doAs(Subject.java:422) ~[?:1.8.0_181]

at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1836) ~[?:?]

at org.apache.hadoop.mapreduce.Job.submit(Job.java:1338) ~[?:?]

at org.apache.druid.indexer.DetermineHashedPartitionsJob.run(DetermineHashedPartitionsJob.java:126) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

… 15 more

Caused by: java.io.IOException: Connection reset by peer

at sun.nio.ch.FileDispatcherImpl.read0(Native Method) ~[?:1.8.0_181]

at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:39) ~[?:1.8.0_181]

at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:223) ~[?:1.8.0_181]

at sun.nio.ch.IOUtil.read(IOUtil.java:197) ~[?:1.8.0_181]

at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:380) ~[?:1.8.0_181]

at org.apache.hadoop.net.SocketInputStream$Reader.performIO(SocketInputStream.java:57) ~[?:?]

at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142) ~[?:?]

at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:161) ~[?:?]

at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:131) ~[?:?]

at java.io.FilterInputStream.read(FilterInputStream.java:133) ~[?:1.8.0_181]

at java.io.BufferedInputStream.fill(BufferedInputStream.java:246) ~[?:1.8.0_181]

at java.io.BufferedInputStream.read(BufferedInputStream.java:265) ~[?:1.8.0_181]

at java.io.FilterInputStream.read(FilterInputStream.java:83) ~[?:1.8.0_181]

at java.io.FilterInputStream.read(FilterInputStream.java:83) ~[?:1.8.0_181]

at org.apache.hadoop.ipc.Client$Connection$PingInputStream.read(Client.java:554) ~[?:?]

at java.io.DataInputStream.readInt(DataInputStream.java:387) ~[?:1.8.0_181]

at org.apache.hadoop.ipc.Client$IpcStreams.readResponse(Client.java:1794) ~[?:?]

at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:1163) ~[?:?]

at org.apache.hadoop.ipc.Client$Connection.run(Client.java:1059) ~[?:?]

2019-04-25T13:03:01,235 INFO [task-runner-0-priority-0] org.apache.druid.segment.realtime.firehose.ServiceAnnouncingChatHandlerProvider - Unregistering chat handler[index_hadoop_wikipedia_2019-04-25T13:02:46.714Z]

2019-04-25T13:03:01,236 INFO [task-runner-0-priority-0] org.apache.druid.indexing.overlord.TaskRunnerUtils - Task [index_hadoop_wikipedia_2019-04-25T13:02:46.714Z] status changed to [FAILED].

2019-04-25T13:03:01,238 INFO [task-runner-0-priority-0] org.apache.druid.indexing.worker.executor.ExecutorLifecycle - Task completed with status: {

“id” : “index_hadoop_wikipedia_2019-04-25T13:02:46.714Z”,

“status” : “FAILED”,

“duration” : 4987,

“errorMsg” : “java.lang.RuntimeException: java.io.IOException: Failed on local exception: java.io.IOException: Con…”

}
[…]

``

Any ideas what should i check?

Hi,

I don’t have much idea but from the error logs I see local host is: “my.host.private.ip/my.host.adress”; destination host is: “destination.host.ip”:7222;

Have the ips been masked in the logs intentionally or should my.host.private.ip, destination.host.ip should be set some where ?

Thanks,

Sashi

Hi,

it’s been masked, just to be safe :slight_smile:

Hi,

Are you using mapr with posix client or by mounting or someother way ?

Hi,

it’s been masked, just to be safe :slight_smile:

Hi,

I don’t have much idea but from the error logs I see local host is: “my.host.private.ip/my.host.adress”; destination host is: “destination.host.ip”:7222;

Have the ips been masked in the logs intentionally or should my.host.private.ip, destination.host.ip should be set some where ?

Thanks,

Sashi

Hi,

i have 3 node mapr cluster with Hadoop 2.7.0-mapr-1808. I’ve launched druid tutorial cluster on one of the nodes. I’ve copied hadoop.xmls from /opt/mapr/hadoop/hadoop-2.7.0/etc/hadoop/ into ./quickstart/tutorial/conf/druid/_common/hadoop-xml/. I’ve put wikiticker-2015-09-12-sampled.json.gz on hdfs and tried to ingest it with following spec:

{
“type” : “index_hadoop”,
“spec” : {
“dataSchema” : {
“dataSource” : “wikipedia”,
“parser” : {
“type” : “hadoopyString”,
“parseSpec” : {
“format” : “json”,
“dimensionsSpec” : {
“dimensions” : [
“channel”,
“cityName”,
“comment”,
“countryIsoCode”,
“countryName”,
“isAnonymous”,
“isMinor”,
“isNew”,
“isRobot”,
“isUnpatrolled”,
“metroCode”,
“namespace”,
“page”,
“regionIsoCode”,
“regionName”,
“user”,
{ “name”: “added”, “type”: “long” },
{ “name”: “deleted”, “type”: “long” },
{ “name”: “delta”, “type”: “long” }
]
},
“timestampSpec” : {
“format” : “auto”,
“column” : “time”
}
}
},
“metricsSpec” : ,
“granularitySpec” : {
“type” : “uniform”,
“segmentGranularity” : “day”,
“queryGranularity” : “none”,
“intervals” : [“2015-09-12/2015-09-13”],
“rollup” : false
}
},
“ioConfig” : {
“type” : “hadoop”,
“inputSpec” : {
“type” : “static”,
“paths” : “hdfs://destination.host.ip:7222/quickstart/wikiticker-2015-09-12-sampled.json.gz”
}
},
“tuningConfig” : {
“type” : “hadoop”,
“partitionsSpec” : {
“type” : “hashed”,
“targetPartitionSize” : 5000000
}
}
}
}

``

But when i run task i got following error:

[…]
2019-04-25T13:03:01,182 ERROR [task-runner-0-priority-0] org.apache.druid.indexing.common.task.HadoopIndexTask - Got invocation target exception in run(), cause:

java.lang.RuntimeException: java.io.IOException: Failed on local exception: java.io.IOException: Connection reset by peer; Host Details : local host is: “my.host.private.adress/my.host.adress”; destination host is: “destination.host.ip”:7222;

at com.google.common.base.Throwables.propagate(Throwables.java:160) ~[guava-16.0.1.jar:?]

at org.apache.druid.indexer.DetermineHashedPartitionsJob.run(DetermineHashedPartitionsJob.java:223) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexer.JobHelper.runSingleJob(JobHelper.java:372) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexer.HadoopDruidDetermineConfigurationJob.run(HadoopDruidDetermineConfigurationJob.java:60) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessingRunner.runTask(HadoopIndexTask.java:617) ~[druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_181]

at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_181]

at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_181]

at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_181]

at org.apache.druid.indexing.common.task.HadoopIndexTask.runInternal(HadoopIndexTask.java:309) ~[druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.common.task.HadoopIndexTask.run(HadoopIndexTask.java:244) [druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.overlord.SingleTaskBackgroundRunner$SingleTaskBackgroundRunnerCallable.call(SingleTaskBackgroundRunner.java:419) [druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.overlord.SingleTaskBackgroundRunner$SingleTaskBackgroundRunnerCallable.call(SingleTaskBackgroundRunner.java:391) [druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_181]

at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_181]

at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_181]

at java.lang.Thread.run(Thread.java:748) [?:1.8.0_181]

Caused by: java.io.IOException: Failed on local exception: java.io.IOException: Connection reset by peer; Host Details : local host is: “my.host.private.ip/my.host.adress”; destination host is: “destination.host.ip”:7222;

at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:782) ~[?:?]

at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1493) ~[?:?]

at org.apache.hadoop.ipc.Client.call(Client.java:1435) ~[?:?]

at org.apache.hadoop.ipc.Client.call(Client.java:1345) ~[?:?]

at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:227) ~[?:?]

at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:116) ~[?:?]

at com.sun.proxy.$Proxy208.getFileInfo(Unknown Source) ~[?:?]

at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:796) ~[?:?]

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_181]

at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_181]

at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_181]

at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_181]

at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:409) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:163) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:155) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:346) ~[?:?]

at com.sun.proxy.$Proxy209.getFileInfo(Unknown Source) ~[?:?]

at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.jav2019-04-25T13:03:01,182 ERROR [task-runner-0-priority-0] org.apache.druid.indexing.common.task.HadoopIndexTask - Got invocation target exception in run(), cause:

java.lang.RuntimeException: java.io.IOException: Failed on local exception: java.io.IOException: Connection reset by peer; Host Details : local host is: “my.host.private.adress/my.host.adress”; destination host is: “destination.host.ip”:7222;

at com.google.common.base.Throwables.propagate(Throwables.java:160) ~[guava-16.0.1.jar:?]

at org.apache.druid.indexer.DetermineHashedPartitionsJob.run(DetermineHashedPartitionsJob.java:223) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexer.JobHelper.runSingleJob(JobHelper.java:372) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexer.HadoopDruidDetermineConfigurationJob.run(HadoopDruidDetermineConfigurationJob.java:60) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessingRunner.runTask(HadoopIndexTask.java:617) ~[druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_181]

at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_181]

at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_181]

at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_181]

at org.apache.druid.indexing.common.task.HadoopIndexTask.runInternal(HadoopIndexTask.java:309) ~[druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.common.task.HadoopIndexTask.run(HadoopIndexTask.java:244) [druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.overlord.SingleTaskBackgroundRunner$SingleTaskBackgroundRunnerCallable.call(SingleTaskBackgroundRunner.java:419) [druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at org.apache.druid.indexing.overlord.SingleTaskBackgroundRunner$SingleTaskBackgroundRunnerCallable.call(SingleTaskBackgroundRunner.java:391) [druid-indexing-service-0.14.0-incubating.jar:0.14.0-incubating]

at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_181]

at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_181]

at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_181]

at java.lang.Thread.run(Thread.java:748) [?:1.8.0_181]

Caused by: java.io.IOException: Failed on local exception: java.io.IOException: Connection reset by peer; Host Details : local host is: “my.host.private.ip/my.host.adress”; destination host is: “destination.host.ip”:7222;

at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:782) ~[?:?]

at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1493) ~[?:?]

at org.apache.hadoop.ipc.Client.call(Client.java:1435) ~[?:?]

at org.apache.hadoop.ipc.Client.call(Client.java:1345) ~[?:?]

at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:227) ~[?:?]

at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:116) ~[?:?]

at com.sun.proxy.$Proxy208.getFileInfo(Unknown Source) ~[?:?]

at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:796) ~[?:?]

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_181]

at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_181]

at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_181]

at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_181]

at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:409) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:163) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:155) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95) ~[?:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:346) ~[?:?]

at com.sun.proxy.$Proxy209.getFileInfo(Unknown Source) ~[?:?]

at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1717) ~[?:?]

at org.apache.hadoop.hdfs.DistributedFileSystem$27.doCall(DistributedFileSystem.java:1437) ~[?:?]

at org.apache.hadoop.hdfs.DistributedFileSystem$27.doCall(DistributedFileSystem.java:1434) ~[?:?]

at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) ~[?:?]

at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1434) ~[?:?]

at org.apache.hadoop.fs.Globber.getFileStatus(Globber.java:64) ~[?:?]

at org.apache.hadoop.fs.Globber.doGlob(Globber.java:269) ~[?:?]

at org.apache.hadoop.fs.Globber.glob(Globber.java:148) ~[?:?]

at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1705) ~[?:?]

at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:300) ~[?:?]

at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.listStatus(FileInputFormat.java:271) ~[?:?]

at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.getSplits(FileInputFormat.java:393) ~[?:?]

at org.apache.hadoop.mapreduce.lib.input.DelegatingInputFormat.getSplits(DelegatingInputFormat.java:115) ~[?:?]

at org.apache.hadoop.mapreduce.JobSubmitter.writeNewSplits(JobSubmitter.java:303) ~[?:?]

at org.apache.hadoop.mapreduce.JobSubmitter.writeSplits(JobSubmitter.java:320) ~[?:?]

at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:198) ~[?:?]

at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1341) ~[?:?]

at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1338) ~[?:?]

at java.security.AccessController.doPrivileged(Native Method) ~[?:1.8.0_181]

at javax.security.auth.Subject.doAs(Subject.java:422) ~[?:1.8.0_181]

at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1836) ~[?:?]

at org.apache.hadoop.mapreduce.Job.submit(Job.java:1338) ~[?:?]

at org.apache.druid.indexer.DetermineHashedPartitionsJob.run(DetermineHashedPartitionsJob.java:126) ~[druid-indexing-hadoop-0.14.0-incubating.jar:0.14.0-incubating]

… 15 more

Caused by: java.io.IOException: Connection reset by peer

at sun.nio.ch.FileDispatcherImpl.read0(Native Method) ~[?:1.8.0_181]

at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:39) ~[?:1.8.0_181]

at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:223) ~[?:1.8.0_181]

at sun.nio.ch.IOUtil.read(IOUtil.java:197) ~[?:1.8.0_181]

at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:380) ~[?:1.8.0_181]

at org.apache.hadoop.net.SocketInputStream$Reader.performIO(SocketInputStream.java:57) ~[?:?]

at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:142) ~[?:?]

at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:161) ~[?:?]

at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:131) ~[?:?]

at java.io.FilterInputStream.read(FilterInputStream.java:133) ~[?:1.8.0_181]

at java.io.BufferedInputStream.fill(BufferedInputStream.java:246) ~[?:1.8.0_181]

at java.io.BufferedInputStream.read(BufferedInputStream.java:265) ~[?:1.8.0_181]

at java.io.FilterInputStream.read(FilterInputStream.java:83) ~[?:1.8.0_181]

at java.io.FilterInputStream.read(FilterInputStream.java:83) ~[?:1.8.0_181]

at org.apache.hadoop.ipc.Client$Connection$PingInputStream.read(Client.java:554) ~[?:?]

at java.io.DataInputStream.readInt(DataInputStream.java:387) ~[?:1.8.0_181]

at org.apache.hadoop.ipc.Client$IpcStreams.readResponse(Client.java:1794) ~[?:?]

at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:1163) ~[?:?]

at org.apache.hadoop.ipc.Client$Connection.run(Client.java:1059) ~[?:?]

2019-04-25T13:03:01,235 INFO [task-runner-0-priority-0] org.apache.druid.segment.realtime.firehose.ServiceAnnouncingChatHandlerProvider - Unregistering chat handler[index_hadoop_wikipedia_2019-04-25T13:02:46.714Z]

2019-04-25T13:03:01,236 INFO [task-runner-0-priority-0] org.apache.druid.indexing.overlord.TaskRunnerUtils - Task [index_hadoop_wikipedia_2019-04-25T13:02:46.714Z] status changed to [FAILED].

2019-04-25T13:03:01,238 INFO [task-runner-0-priority-0] org.apache.druid.indexing.worker.executor.ExecutorLifecycle - Task completed with status: {

“id” : “index_hadoop_wikipedia_2019-04-25T13:02:46.714Z”,

“status” : “FAILED”,

“duration” : 4987,

“errorMsg” : “java.lang.RuntimeException: java.io.IOException: Failed on local exception: java.io.IOException: Con…”

}
[…]

``

Any ideas what should i check?

You received this message because you are subscribed to the Google Groups “Druid User” group.

To unsubscribe from this group and stop receiving emails from it, send an email to druid...@googlegroups.com.

To post to this group, send email to druid...@googlegroups.com.

To view this discussion on the web visit https://groups.google.com/d/msgid/druid-user/1f50bd97-a3db-4106-a249-3fb6567cce0f%40googlegroups.com.

For more options, visit https://groups.google.com/d/optout.

Can you try giving a complete path - hdfs://destination.host.ip:7222/mapr/mapr.cluster.name/quickstart/wikiticker-2015-09-12-sampled.json.gz