Not able to do batch data ingestion

Hi,

I'm submitting a job

task definition is as follows

{
“task”: “index_hadoop_package_2018-05-22T17:49:47.440Z”,
“payload”: {
“id”: “index_hadoop_package_2018-05-22T17:49:47.440Z”,
“spec”: {
“dataSchema”: {
“dataSource”: “package”,
“parser”: {
“type”: “hadoopyString”,
“parseSpec”: {
“format”: “json”,
“timestampSpec”: {
“column”: “ud”,
“format”: “auto”
},
“dimensionsSpec”: {
“dimensions”: [
“wbn”
],
“dimensionExclusions”: [

          ],
          "spatialDimensions": [
           
          ]
        }
      }
    },
    "metricsSpec": [
      {
        "type": "count",
        "name": "count"
      },
      {
        "type": "doubleSum",
        "name": "added",
        "fieldName": "added",
        "expression": null
      },
      {
        "type": "doubleSum",
        "name": "deleted",
        "fieldName": "deleted",
        "expression": null
      },
      {
        "type": "doubleSum",
        "name": "delta",
        "fieldName": "delta",
        "expression": null
      }
    ],
    "granularitySpec": {
      "type": "uniform",
      "segmentGranularity": "DAY",
      "queryGranularity": {
        "type": "none"
      },
      "rollup": true,
      "intervals": [
        "2018-03-24T00:00:00.000Z/2018-03-25T00:00:00.000Z"
      ]
    }
  },
  "ioConfig": {
    "type": "hadoop",
    "inputSpec": {
      "type": "static",
      "paths": "s3n://prod-integration-s3-package-ad-json/ad=2018-03-24-04/Integrate-Package.info+10+0085289945.json"
    },
    "metadataUpdateSpec": null,
    "segmentOutputPath": null
  },
  "tuningConfig": {
    "type": "hadoop",
    "workingPath": null,
    "version": "2018-05-22T17:49:47.440Z",
    "partitionsSpec": {
      "type": "hashed",
      "targetPartitionSize": -1,
      "maxPartitionSize": -1,
      "assumeGrouped": false,
      "numShards": -1,
      "partitionDimensions": [
       
      ]
    },
    "shardSpecs": {
     
    },
    "indexSpec": {
      "bitmap": {
        "type": "concise"
      },
      "dimensionCompression": "lz4",
      "metricCompression": "lz4",
      "longEncoding": "longs"
    },
    "maxRowsInMemory": 75000,
    "leaveIntermediate": false,
    "cleanupOnFailure": true,
    "overwriteFiles": false,
    "ignoreInvalidRows": false,
    "jobProperties": {
      "mapreduce.job.classloader": "true",
      "mapreduce.job.classloader.system.classes": "-javax.validation.,java.,javax.,org.apache.commons.logging.,org.apache.log4j.,org.apache.hadoop.",
      "fs.s3.awsAccessKeyId": <ACCESS_KEY>,
      "fs.s3.awsSecretAccessKey": <SECRET_KEY>,
      "fs.s3n.awsAccessKeyId": <ACCESS_KEY>,
      "fs.s3n.awsSecretAccessKey": <SECRET_KEY>,
      "fs.s3.impl": "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
      "fs.s3n.impl": "org.apache.hadoop.fs.s3native.NativeS3FileSystem",
      "io.compression.codecs": "org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec"
    },
    "combineText": false,
    "useCombiner": false,
    "buildV9Directly": true,
    "numBackgroundPersistThreads": 0,
    "forceExtendableShardSpecs": false,
    "useExplicitVersion": false,
    "allowedHadoopPrefix": [
     
    ]
  },
  "uniqueId": "da4ff84e675b45f7908e6737a02435a7"
},
"hadoopDependencyCoordinates": [
  "org.apache.hadoop:hadoop-client:2.7.3",
  "org.apache.hadoop:hadoop-aws:2.7.3"
],
"classpathPrefix": null,
"context": null,
"groupId": "index_hadoop_package_2018-05-22T17:49:47.440Z",
"dataSource": "package",
"resource": {
  "availabilityGroup": "index_hadoop_package_2018-05-22T17:49:47.440Z",
  "requiredCapacity": 1
}

}
}

but still I'm getting error, though I have followed the doc http://druid.io/docs/latest/operations/other-hadoop.html and got stuck here after fixing multiple steps.

2018-05-22T17:01:37,603 INFO [task-runner-0-priority-0] org.apache.hadoop.mapreduce.Job -  map 0% reduce 0%
2018-05-22T17:01:55,151 INFO [task-runner-0-priority-0] org.apache.hadoop.mapreduce.Job - Task Id : attempt_1527007990232_0001_m_000000_0, Status : FAILED
Error: org.apache.hadoop.fs.s3.S3Exception: org.jets3t.service.ServiceException: Request Error: java.lang.ClassCastException: org.jets3t.service.utils.RestUtils$ConnManagerFactory cannot be cast to org.apache.http.conn.ClientConnectionManagerFactory
	at org.apache.hadoop.fs.s3native.Jets3tNativeFileSystemStore.processException(Jets3tNativeFileSystemStore.java:478)
	at org.apache.hadoop.fs.s3native.Jets3tNativeFileSystemStore.processException(Jets3tNativeFileSystemStore.java:427)
	at org.apache.hadoop.fs.s3native.Jets3tNativeFileSystemStore.handleException(Jets3tNativeFileSystemStore.java:411)
	at org.apache.hadoop.fs.s3native.Jets3tNativeFileSystemStore.retrieveMetadata(Jets3tNativeFileSystemStore.java:181)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:191)
	at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
	at org.apache.hadoop.fs.s3native.$Proxy31.retrieveMetadata(Unknown Source)
	at org.apache.hadoop.fs.s3native.NativeS3FileSystem.getFileStatus(NativeS3FileSystem.java:477)
	at org.apache.hadoop.fs.s3native.NativeS3FileSystem.open(NativeS3FileSystem.java:625)
	at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:773)
	at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.initialize(LineRecordReader.java:85)
	at org.apache.hadoop.mapreduce.lib.input.DelegatingRecordReader.initialize(DelegatingRecordReader.java:84)
	at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:557)
	at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:795)
	at org.apache.hadoop.mapred.MapTask.run(MapTask.java:342)
	at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698)
	at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Caused by: org.jets3t.service.ServiceException: Request Error: java.lang.ClassCastException: org.jets3t.service.utils.RestUtils$ConnManagerFactory cannot be cast to org.apache.http.conn.ClientConnectionManagerFactory
	at org.jets3t.service.impl.rest.httpclient.RestStorageService.performRequest(RestStorageService.java:574)
	at org.jets3t.service.impl.rest.httpclient.RestStorageService.performRequest(RestStorageService.java:281)
	at org.jets3t.service.impl.rest.httpclient.RestStorageService.performRestHead(RestStorageService.java:942)
	at org.jets3t.service.impl.rest.httpclient.RestStorageService.getObjectImpl(RestStorageService.java:2148)
	at org.jets3t.service.impl.rest.httpclient.RestStorageService.getObjectDetailsImpl(RestStorageService.java:2075)
	at org.jets3t.service.StorageService.getObjectDetails(StorageService.java:1093)
	at org.jets3t.service.StorageService.getObjectDetails(StorageService.java:548)
	at org.apache.hadoop.fs.s3native.Jets3tNativeFileSystemStore.retrieveMetadata(Jets3tNativeFileSystemStore.java:174)
	... 20 more
Caused by: java.lang.ClassCastException: org.jets3t.service.utils.RestUtils$ConnManagerFactory cannot be cast to org.apache.http.conn.ClientConnectionManagerFactory
	at org.apache.http.impl.client.AbstractHttpClient.createClientConnectionManager(AbstractHttpClient.java:284)
	at org.apache.http.impl.client.AbstractHttpClient.getConnectionManager(AbstractHttpClient.java:437)
	at org.apache.http.impl.client.AbstractHttpClient.createHttpContext(AbstractHttpClient.java:246)
	at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:771)
	at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83)
	at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:56)
	at org.jets3t.service.impl.rest.httpclient.RestStorageService.performRequest(RestStorageService.java:334)
	... 27 more

Container killed by the ApplicationMaster.
Container killed on request. Exit code is 143
Container exited with a non-zero exit code 143

I have solved it by upgrading EMR. It’s working fine now

I’m still getting the same error.
Problem was not resolved earlier, I have copied wrong hadoop xml files to druid conf, which results in picking the batch ingestion tasks by middlemanager node itself, instead of pushing to emr.

I was lucky and fixed it with setting mapreduce.job.user.classpath.first = true instead of mapreduce.job.classloader = true. Worth trying.