Loading avro files from an S3 bucket for ingestion into druid

Hi All,

I’m trying to ingest avro files based on a prefix (path) in an S3 bucket into druid. I’m trying to use the batch ingest task as specified in the druid website.
http://druid.io/docs/latest/ingestion/tasks.html

Please find my ingestion spec below

{

“type”:“index”,

“spec”:{

“dataSchema”:{

“dataSource”:“test_data_source_1”,

“parser”:{

“type”:“avro_hadoop”,

“parseSpec”:{

“format”:“avro”,

“timestampSpec”:{

“column”:“timestamp”,

“format”:“millis”

},

“dimensionsSpec”:{

“dimensions”:[

“company”,

“project”,

“country”,

“adSpace”,

“adNetwork”,

“platform”,

“adAssetCriterion”,

“adSpacePlacement”

]

}

}

},

“metricsSpec”:[

{

“type”:“longSum”,

“name”:“impressions”,

“fieldName”:“impressions”

},

{

“type”:“longSum”,

“name”:“clicks”,

“fieldName”:“clicks”

},

{

“type”:“longSum”,

“name”:“revenueInUSD”,

“fieldName”:“revenueInUSD”

},

{

“type”:“longSum”,

“name”:“demandSourceAdsReceived”,

“fieldName”:“demandSourceAdsReceived”

},

{

“type”:“longSum”,

“name”:“demandSourceAdsRequested”,

“fieldName”:“demandSourceAdsRequested”

},

{

“type”:“longSum”,

“name”:“videoStarts”,

“fieldName”:“videoStarts”

},

{

“type”:“longSum”,

“name”:“videoFirstQuarterCompletes”,

“fieldName”:“videoFirstQuarterCompletes”

},

{

“type”:“longSum”,

“name”:“videoHalfCompletes”,

“fieldName”:“videoHalfCompletes”

},

{

“type”:“longSum”,

“name”:“videoThirdQuarterCompletes”,

“fieldName”:“videoThirdQuarterCompletes”

},

{

“type”:“longSum”,

“name”:“videoCompletions”,

“fieldName”:“videoCompletions”

},

{

“type”:“longSum”,

“name”:“videoViews”,

“fieldName”:“videoViews”

}

],

“granularitySpec”:{

“type”:“uniform”,

“queryGranularity”: “HOUR”,

“segmentGranularity”: “HOUR”,

“intervals”: [“2018-03-30T01:00:00Z/2018-03-30T02:00:00Z”]

}

},

“ioConfig”:{

“type”:“index”,

“firehose” : {

“type” : “static-s3”,

“prefixes”: [“s3://mainDir/subDir/2018033001/”],

“fetchTimeout” : “111111”

}

},

“tuningConfig”:{

“type”:“index”,

“rowFlushBoundary” :350000,

“buildV9Directly”:true

}

}

}

I’m getting the following error below. Can someone please advise as to what is going wrong here?? Are my parser, parseSpec and ioConfigs right for this use case? I tried using

java.lang.ClassCastException: io.druid.segment.indexing.TransformingInputRowParser cannot be cast to io.druid.data.input.impl.StringInputRowParser
	at io.druid.data.input.impl.PrefetchableTextFilesFirehoseFactory.connect(PrefetchableTextFilesFirehoseFactory.java:107) ~[druid-api-0.11.1-1515709212-d5fa1b6-456.jar:0.11.1-1515709212-d5fa1b6-456]
	at io.druid.indexing.common.task.IndexTask.generateAndPublishSegments(IndexTask.java:619) ~[druid-indexing-service-0.11.1-1515709212-d5fa1b6-456.jar:0.11.1-1515709212-d5fa1b6-456]
	at io.druid.indexing.common.task.IndexTask.run(IndexTask.java:233) ~[druid-indexing-service-0.11.1-1515709212-d5fa1b6-456.jar:0.11.1-1515709212-d5fa1b6-456]
	at io.druid.indexing.overlord.ThreadPoolTaskRunner$ThreadPoolTaskRunnerCallable.call(ThreadPoolTaskRunner.java:436) [druid-indexing-service-0.11.1-1515709212-d5fa1b6-456.jar:0.11.1-1515709212-d5fa1b6-456]
	at io.druid.indexing.overlord.ThreadPoolTaskRunner$ThreadPoolTaskRunnerCallable.call(ThreadPoolTaskRunner.java:408) [druid-indexing-service-0.11.1-1515709212-d5fa1b6-456.jar:0.11.1-1515709212-d5fa1b6-456]
	at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_131]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_131]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_131]
	at java.lang.Thread.run(Thread.java:748) [?:1.8.0_131]
2018-04-19T00:01:44,816 INFO [task-runner-0-priority-0] io.druid.indexing.overlord.TaskRunnerUtils - Task [index_squall_test_data_source_1_2018-04-19T00:01:37.992Z] status changed to [FAILED].
2018-04-19T00:01:44,824 INFO [task-runner-0-priority-0] io.druid.indexing.worker.executor.ExecutorLifecycle - Task completed with status: {
  "id" : "index_squall_test_data_source_1_2018-04-19T00:01:37.992Z",
  "status" : "FAILED",
  "duration" : 372
}