Druid segment is almost 900MB how to reduce it down.

Our traffic just got increase a lot and now the segment is around 900MB how do we increase it down to within 300-700MB range?

Here’s our spec file from tranquility.

{

  • task: “index_realtime_sparrow-firehose-web_2016-07-12T03:00:00.000Z_0_0”,

  • payload:

    {

    • id: “index_realtime_sparrow-firehose-web_2016-07-12T03:00:00.000Z_0_0”,

    • resource:

      {

      • availabilityGroup: “sparrow-firehose-web-2016-07-12T03:00:00.000Z-0000”,

      • requiredCapacity: 1

      },

    • spec:

      {

      • dataSchema:

        {

        • dataSource: “sparrow-firehose-web”,

        • parser:

          {

          • type: “map”,

          • parseSpec:

            {

            • format: “json”,

            • timestampSpec:

              {

              • column: “_ts”,

              • format: “auto”,

              • missingValue: “2016-07-12T03:50:13.686Z”

              },

            • dimensionsSpec:

              {

              • dimensionExclusions:

                [

                • “uID”,

                • “uIP”,

                • “sID”,

                • “lCi”,

                • “pHp”,

                • “cTi”,

                • “xID”,

                • “pID”

                ],

              • spatialDimensions:

                [

                • {

                  • dimName: “latLng”,

                  • dims:

                    [

                    • “lLa”,

                    • “lLo”

                    ]

                  },

                • {

                  • dimName: “mousePosition”,

                  • dims:

                    [

                    • “uMx”,

                    • “uMy”

                    ]

                  }

                ]

              }

            }

          },

        • metricsSpec:

          [

          • {

            • type: “count”,

            • name: “count”

            },

          • {

            • type: “hyperUnique”,

            • name: “unique”,

            • fieldName: “uID”

            },

          • {

            • type: “hyperUnique”,

            • name: “session”,

            • fieldName: “sID”

            },

          • {

            • type: “hyperUnique”,

            • name: “pageSession”,

            • fieldName: “pID”

            },

          • {

            • type: “hyperUnique”,

            • name: “iunique”,

            • fieldName: “xID”

            },

          • {

            • type: “longSum”,

            • name: “audience”,

            • fieldName: “uUq”

            },

          • {

            • type: “longSum”,

            • name: “newAudience”,

            • fieldName: “uNw”

            },

          • {

            • type: “doubleSum”,

            • name: “valueSum”,

            • fieldName: “_v”

            },

          • {

            • type: “doubleSum”,

            • name: “valueSum1”,

            • fieldName: “_v1”

            },

          • {

            • type: “doubleSum”,

            • name: “valueSum2”,

            • fieldName: “_v2”

            }

          ],

        • granularitySpec:

          {

          • type: “uniform”,

          • segmentGranularity: “HOUR”,

          • queryGranularity:

            {

            • type: “none”

            },

          • intervals: null

          }

        },

      • ioConfig:

        {

        • type: “realtime”,

        • firehose:

          {

          • type: “clipped”,

          • delegate:

            {

            • type: “timed”,

            • delegate:

              {

              • type: “receiver”,

              • serviceName: “firehose:druid:overlord:sparrow-firehose-web-003-0000-0000”,

              • bufferSize: 100000

              },

            • shutoffTime: “2016-07-12T04:15:00.000Z”

            },

          • interval: “2016-07-12T03:00:00.000Z/2016-07-12T04:00:00.000Z”

          },

        • firehoseV2: null

        },

      • tuningConfig:

        {

        • type: “realtime”,

        • maxRowsInMemory: 75000,

        • intermediatePersistPeriod: “PT10M”,

        • windowPeriod: “PT10M”,

        • basePersistDirectory: “/tmp/1467066153915-0”,

        • versioningPolicy:

          {

          • type: “intervalStart”

          },

        • rejectionPolicy:

          {

          • type: “none”

          },

        • maxPendingPersists: 0,

        • shardSpec:

          {

          • type: “linear”,

          • partitionNum: 0

          },

        • indexSpec:

          {

          • bitmap:

            {

            • type: “concise”

            },

          • dimensionCompression: null,

          • metricCompression: null

          },

        • buildV9Directly: false,

        • persistThreadPriority: 0,

        • mergeThreadPriority: 0,

        • reportParseExceptions: false

        }

      },

    • context: null,

    • groupId: “index_realtime_sparrow-firehose-web”,

    • dataSource: “sparrow-firehose-web”

    }

}

Hey Noppanit,

You can do this by either increasing the number of partitions (task.partitions in Tranquility) to get more segments per interval, or by including fewer columns.