java.lang.UnsupportedOperationException while indexing ORC file

Hi ,

I am getting below error when I am trying to index a ORC file residing on s3 in to my Druid nodes

2019-04-01T05:47:10,549 INFO [task-runner-0-priority-0] org.apache.druid.indexing.worker.executor.ExecutorLifecycle - Task completed with status: {

“id” : “index_visitdests3ORC_2019-04-01T05:47:06.108Z”,

“status” : “FAILED”,

“duration” : 418,

“errorMsg” : “java.lang.UnsupportedOperationException: not supported\n\tat org.apache.druid.data.input.impl.TimeAndD…”

I have included “druid-orc-extensions” in my druid.extensions.loadList

And my index task.json file is as below :

{

“type” : “index”,

“spec” : {

"ioConfig" : {

"type" : "index",

"firehose": {

            "type": "static-s3",

            "inputFormat": "org.apache.hadoop.hive.ql.io.orc.OrcNewInputFormat",

            "uris": ["s3://...folder/000000"],

            "fetchTimeout": 90000

        },

        "appendToExisting": false

    },



"dataSchema" : {

  "dataSource" : "visitdests3ORC",

  "parser" : {

    "type" : "orc",

    "parseSpec" : {

      "format" : "timeAndDims",

       "timestampSpec" : {

        "format" : "auto",

        "column" : "local_dt"

      },

      "dimensionsSpec" : {

        "dimensions" : [

         "site_name",

          "product_ln_name",

          "dest_id",

          "geo_name",

          "city_geo_id",

          "multi_city_vicinity_geo_id",

          "multi_city_vicinity_name",

          "province_state_geo_id",

          "province_state_name",

          "country_geo_id",

          "country_name",

          "continent_geo_id",

          "continent_name",

          "super_region_geo_id",

          "super_region_name",

          "parent_geo_id",

          "parent_geo_name",

          "unique_visitor",

          "page_views"

        ],

        "dimensionExclusions": [],

        "spatialDimensions": []

      }

     

    },

    "typeString": "struct<local_dt:date,site_name:string,product_ln_name:string,dest_id:string,geo_name:string,city_geo_id:string,multi_city_vicinity_geo_id:string,multi_city_vicinity_name:string,province_state_geo_id:string,province_state_name:string,country_geo_id:string,country_name:string,continent_geo_id:string,continent_name:string,super_region_geo_id:string,super_region_name:string,parent_geo_id:string,parent_geo_name:string,unique_visitor:bigint,page_views:bigint>"

  },

  "metricsSpec" : [],

  "granularitySpec" : {

    "type" : "uniform",

    "segmentGranularity" : "day",

    "queryGranularity" : "none",

    "intervals" : ["2017-01-01/2017-01-01"],

    "rollup" : false

  }

},

"tuningConfig" : {

  "type" : "index",

    "targetPartitionSize" : 5000000,

    "maxRowsInMemory" : 500,

    "forceExtendableShardSpecs": true,

     "reportParseExceptions": true

  }

}

}

can anyone please suggest me a solution ?

Thanks,

Anoosha

The ORC extension won’t work with the native batch indexing task, it only supports the Hadoop task presently.

Thanks,

Jon

The ORC extension won’t work with the native batch indexing task, it only supports the Hadoop task presently.

Has this changed with the recent 0.15.1 version of Druid? or does the ORC format still need the Hadoop dependency?

I am trying to accomplish the same thing as OP, reading an ORC file from S3 to druid