[0.9.2-rc1][URGENT] KafkaIndexingService Load Spec not setting correctly.

Hey ,

There seems to be some issue with the latest version of druid 0.9.2-rc2. Load Spec while generating segments from kafka indexing service is not setting up properly. Sample Load Spec in metadata storage :

{“dataSource”:“segment-data-final-2”,“interval”:“2016-10-13T00:00:00.000Z/2016-10-14T00:00:00.000Z”,“version”:“2016-10-14T11:58:13.969Z”,“loadSpec”:{“type”:“hdfs”,“path”:"/segments/segment-data-final-2/20161013T000000.000Z_20161014T000000.000Z/2016-10-14T11_58_13.969Z/0/index.zip"},“dimensions”:“event_id,lang,share_clicks,ts_bucket,old_hash_id,ab_test,event_name,title,noti_opened,fullstory_time_total,ts_back_valid,custom_title,targeted_city,at,short_view_event,published_dt,short_time,notification_type,variants,device_id,category,toss_opened,noti_shown,event_source,score,author,bookmark,is_video,source,like_count,vid_length,content,fullstory_view,ts_valid,targeted_country,video_event,shortened_url,toss_clicked,hashId,group_id,img_url,is_deleted”,“metrics”:“count,fullstory_total_time,total_like_count,total_share_views,total_vid_length,total_short_time,distinct_user,distinct_event,distinct_hash_Id,total_bookmark,total_fullstory_view,total_noti_opened,total_noti_shown,total_toss_clicked,total_toss_opened,total_share_click,total_short_views,total_video_views,total_ts_valid,total_full_ts_valid,is_ab,ab_variants”,“shardSpec”:{“type”:“numbered”,“partitionNum”:0,“partitions”:0},“binaryVersion”:9,“size”:2656279,“identifier”:“segment-data-final-2_2016-10-13T00:00:00.000Z_2016-10-14T00:00:00.000Z_2016-10-14T11:58:13.969Z”}

Path doesn’t not add gs://bucket extensions.

Because of this issue , I am getting the following exception while loading the segments from deep storage :

2016-10-14T13:03:21,202 WARN [ZkCoordinator-1] com.metamx.common.RetryUtils - Failed on try 1, retrying in 1,123ms.

java.io.FileNotFoundException: File does not exist: /segments/segment-data-final-2/20161011T000000.000Z_20161012T000000.000Z/2016-10-14T11_58_12.714Z/2/index.zip

at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:71)

at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:61)

at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocationsInt(FSNamesystem.java:1828)

at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1799)

at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1712)

at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:587)

at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:365)

at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)

at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:616)

at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:969)

at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2049)

at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2045)

at java.security.AccessController.doPrivileged(Native Method)

at javax.security.auth.Subject.doAs(Subject.java:422)

at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)

at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2043)

at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) ~[?:1.8.0_101]

at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) ~[?:1.8.0_101]

at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) ~[?:1.8.0_101]

at java.lang.reflect.Constructor.newInstance(Constructor.java:423) ~[?:1.8.0_101]

at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106) ~[hadoop-common-2.3.0.jar:?]

at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73) ~[hadoop-common-2.3.0.jar:?]

at org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:1133) ~[hadoop-hdfs-2.3.0.jar:?]

at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:1121) ~[hadoop-hdfs-2.3.0.jar:?]

at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:1111) ~[hadoop-hdfs-2.3.0.jar:?]

at org.apache.hadoop.hdfs.DFSInputStream.fetchLocatedBlocksAndGetLastBlockLength(DFSInputStream.java:272) ~[hadoop-hdfs-2.3.0.jar:?]

at org.apache.hadoop.hdfs.DFSInputStream.openInfo(DFSInputStream.java:239) ~[hadoop-hdfs-2.3.0.jar:?]

at org.apache.hadoop.hdfs.DFSInputStream.(DFSInputStream.java:232) ~[hadoop-hdfs-2.3.0.jar:?]

at org.apache.hadoop.hdfs.DFSClient.open(DFSClient.java:1279) ~[hadoop-hdfs-2.3.0.jar:?]

at org.apache.hadoop.hdfs.DistributedFileSystem$3.doCall(DistributedFileSystem.java:296) ~[hadoop-hdfs-2.3.0.jar:?]

at org.apache.hadoop.hdfs.DistributedFileSystem$3.doCall(DistributedFileSystem.java:292) ~[hadoop-hdfs-2.3.0.jar:?]

at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) ~[hadoop-common-2.3.0.jar:?]

at org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:292) ~[hadoop-hdfs-2.3.0.jar:?]

at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:765) ~[hadoop-common-2.3.0.jar:?]

at io.druid.storage.hdfs.HdfsDataSegmentPuller$1.openInputStream(HdfsDataSegmentPuller.java:107) ~[druid-hdfs-storage-0.9.2-rc2-SNAPSHOT.jar:0.9.2-rc2-SNAPSHOT]

at io.druid.storage.hdfs.HdfsDataSegmentPuller.getInputStream(HdfsDataSegmentPuller.java:298) ~[druid-hdfs-storage-0.9.2-rc2-SNAPSHOT.jar:0.9.2-rc2-SNAPSHOT]

at io.druid.storage.hdfs.HdfsDataSegmentPuller$3.openStream(HdfsDataSegmentPuller.java:241) ~[druid-hdfs-storage-0.9.2-rc2-SNAPSHOT.jar:0.9.2-rc2-SNAPSHOT]

at com.metamx.common.CompressionUtils$1.call(CompressionUtils.java:138) ~[java-util-0.27.10.jar:?]

at com.metamx.common.CompressionUtils$1.call(CompressionUtils.java:134) ~[java-util-0.27.10.jar:?]

at com.metamx.common.RetryUtils.retry(RetryUtils.java:60) [java-util-0.27.10.jar:?]

at com.metamx.common.RetryUtils.retry(RetryUtils.java:78) [java-util-0.27.10.jar:?]

at com.metamx.common.CompressionUtils.unzip(CompressionUtils.java:132) [java-util-0.27.10.jar:?]

at io.druid.storage.hdfs.HdfsDataSegmentPuller.getSegmentFiles(HdfsDataSegmentPuller.java:235) [druid-hdfs-storage-0.9.2-rc2-SNAPSHOT.jar:0.9.2-rc2-SNAPSHOT]

at io.druid.storage.hdfs.HdfsLoadSpec.loadSegment(HdfsLoadSpec.java:62) [druid-hdfs-storage-0.9.2-rc2-SNAPSHOT.jar:0.9.2-rc2-SNAPSHOT]

at io.druid.segment.loading.SegmentLoaderLocalCacheManager.getSegmentFiles(SegmentLoaderLocalCacheManager.java:143) [druid-server-0.9.2-rc2-SNAPSHOT.jar:0.9.2-rc2-SNAPSHOT]

at io.druid.segment.loading.SegmentLoaderLocalCacheManager.getSegment(SegmentLoaderLocalCacheManager.java:95) [druid-server-0.9.2-rc2-SNAPSHOT.jar:0.9.2-rc2-SNAPSHOT]

at io.druid.server.coordination.ServerManager.loadSegment(ServerManager.java:152) [druid-server-0.9.2-rc2-SNAPSHOT.jar:0.9.2-rc2-SNAPSHOT]

at io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:306) [druid-server-0.9.2-rc2-SNAPSHOT.jar:0.9.2-rc2-SNAPSHOT]

at io.druid.server.coordination.ZkCoordinator.addSegment(ZkCoordinator.java:351) [druid-server-0.9.2-rc2-SNAPSHOT.jar:0.9.2-rc2-SNAPSHOT]

at io.druid.server.coordination.SegmentChangeRequestLoad.go(SegmentChangeRequestLoad.java:44) [druid-server-0.9.2-rc2-SNAPSHOT.jar:0.9.2-rc2-SNAPSHOT]

at io.druid.server.coordination.ZkCoordinator$1.childEvent(ZkCoordinator.java:153) [druid-server-0.9.2-rc2-SNAPSHOT.jar:0.9.2-rc2-SNAPSHOT]

at org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:522) [curator-recipes-2.11.0.jar:?]

at org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:516) [curator-recipes-2.11.0.jar:?]

at org.apache.curator.framework.listen.ListenerContainer$1.run(ListenerContainer.java:93) [curator-framework-2.11.0.jar:?]

at com.google.common.util.concurrent.MoreExecutors$SameThreadExecutorService.execute(MoreExecutors.java:297) [guava-16.0.1.jar:?]

at org.apache.curator.framework.listen.ListenerContainer.forEach(ListenerContainer.java:84) [curator-framework-2.11.0.jar:?]

at org.apache.curator.framework.recipes.cache.PathChildrenCache.callListeners(PathChildrenCache.java:513) [curator-recipes-2.11.0.jar:?]

at org.apache.curator.framework.recipes.cache.EventOperation.invoke(EventOperation.java:35) [curator-recipes-2.11.0.jar:?]

at org.apache.curator.framework.recipes.cache.PathChildrenCache$9.run(PathChildrenCache.java:773) [curator-recipes-2.11.0.jar:?]

at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_101]

at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_101]

at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_101]

at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_101]

at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_101]

at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_101]

at java.lang.Thread.run(Thread.java:745) [?:1.8.0_101]

Caused by: org.apache.hadoop.ipc.RemoteException: File does not exist: /segments/segment-data-final-2/20161011T000000.000Z_20161012T000000.000Z/2016-10-14T11_58_12.714Z/2/index.zip

at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:71)

at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:61)

at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocationsInt(FSNamesystem.java:1828)

at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1799)

at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1712)

at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:587)

at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:365)

at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)

at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:616)

at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:969)

at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2049)

at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2045)

at java.security.AccessController.doPrivileged(Native Method)

at javax.security.auth.Subject.doAs(Subject.java:422)

at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)

at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2043)

at org.apache.hadoop.ipc.Client.call(Client.java:1406) ~[hadoop-common-2.3.0.jar:?]

at org.apache.hadoop.ipc.Client.call(Client.java:1359) ~[hadoop-common-2.3.0.jar:?]

at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206) ~[hadoop-common-2.3.0.jar:?]

at com.sun.proxy.$Proxy59.getBlockLocations(Unknown Source) ~[?:?]

at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_101]

at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_101]

at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_101]

at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_101]

at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186) ~[hadoop-common-2.3.0.jar:?]

at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) ~[hadoop-common-2.3.0.jar:?]

at com.sun.proxy.$Proxy59.getBlockLocations(Unknown Source) ~[?:?]

at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getBlockLocations(ClientNamenodeProtocolTranslatorPB.java:206) ~[hadoop-hdfs-2.3.0.jar:?]

at org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:1131) ~[hadoop-hdfs-2.3.0.jar:?]

… 43 more

Thanks,

Saurabh

Raised https://github.com/druid-io/druid/issues/3576 to track. Thanks for the report.