Segments in Datasources

Hello,

I’m new in Druid, and I’m trying the kafa ingestion. But I have this problem.

I have a task runing with this configuration:

{

“dataSources” : {

"sep-todo" : {

  "spec" : {

    "dataSchema" : {

      "dataSource" : "sep-todo",

      "parser" : {

        "type" : "string",

        "parseSpec" : {

          "timestampSpec" : {

            "column" : "fecha",

            "format" : "yyyy-MM-dd HH:mm:ss"

          },

            "flattenSpec" : {

              "useFieldDiscovery": true,

              "fields" : [{

                "type" : "path",

                "name" : "fecha",

                "expr" : "$.server.StartTime",

                "format" : "auto"

              }]

           },

          "dimensionsSpec" : {

            "dimensions" : [

              "action",

              "action_field"

              

            ],

              "dimensionExclusions" : ["message","primary_key"],

              "spatialDimensions" : []

          },

          "format" : "json"

        }

      },

      "granularitySpec" : {

        "type" : "uniform",

        "segmentGranularity" : "DAY",

        "queryGranularity" : "none"

      },

      "metricsSpec" : [

        {"type" : "longSum", "name" : file_size, "fieldName" : file_size},

        {"type" : "longSum", "name" : infected, "fieldName" : infected},

        {"type" : "longSum", "name" : infected_count, "fieldName" : infected_count},

        {"type" : "longSum", "name" : occurrences, "fieldName" : occurrences},

        {"type" : "longSum", "name" : omitted, "fieldName" : omitted},

        {"type" : "longSum", "name" : threat_count, "fieldName" : threat_count},

        {"type" : "longSum", "name" : threats, "fieldName" : threats},

        {"type" : "longSum", "name" : total_file_count, "fieldName" : total_file_count}

         ]

    },

    "ioConfig" : {

      "type" : "realtime"

    },

    "tuningConfig" : {

      "type" : "realtime",

      "maxRowsInMemory" : "100000",

      "intermediatePersistPeriod" : "PT40H",

      "windowPeriod" : "PT40H"

    }

  },

  "properties" : {

    "task.partitions" : "1",

    "task.replicants" : "1",

    "topicPattern" : "sep",

    "reportDropsAsExceptions" : true

  }

}

},

“properties” : {

"zookeeper.connect" : "localhost",

"druid.discovery.curator.path" : "/druid/discovery",

"druid.selectors.indexing.serviceName" : "druid/overlord",

"commit.periodMillis" : "15000",

"consumer.numThreads" : "2",

"kafka.zookeeper.connect" : "localhost",

"[kafka.group.id](http://kafka.group.id/)" : "tranquility-kafka"

}

}

And when I run the task I don’t see any error, in fact i think it works fine:

[KafkaConsumer-CommitThread] INFO c.m.tranquility.kafka.KafkaConsumer - Flushed {sep={receivedCount=9510, sentCount=9510, droppedCount=0, unparseableCount=0}} pending messages in 0ms and committed offsets in 4ms.

But when I search the datasource sep-todo in the cluster console localhost:8081/#/datasources it hasn’t been created. I also noticed that there is no segments file for that datasource.

What is that I’m missing?

Regards,

Joaquín Silva

I noticed the data that are ingesting goes to druid-0.9.2/var/druid/task/{task-id}/work/persist but never writes it into a segment. When are they moved to a segment?

This should happen near the end of the taskDuration. The tasks write to a temp location first, then copy to deep storage when they’re done building a segment, then exit once a historical node has loaded that segment.