Getting Java Heap out of memory exception when batch ingesting data into druid using hadoop indexing

Hi,
I am trying to ingest data into Druid using batch index_hadoop. With the following tuning parameters specified in the ingestion config:

"granularitySpec" : {
    "type" : "uniform",
    "segmentGranularity" : "DAY",
    "queryGranularity" : "DAY",
    "intervals" : ["XREPLACE_WITH_START_DTX/XREPLACE_WITH_END_DTX"],
    "rollup" : true
  }
},
"ioConfig": {
  "type": "hadoop",
  "inputSpec": {
    "type": "static",
    "inputFormat": "io.druid.data.input.parquet.DruidParquetInputFormat",
    "paths": "XREPLACE_WITH_S3_PATHX/date_key=XREPLACE_WITH_START_DTX/"
  }
},
"tuningConfig": {
  "type": "hadoop",
  "partitionsSpec": {
    "type": "hashed",
    "targetPartitionSize": 400000
  },
  "forceExtendableShardSpecs" : true,
  "jobProperties": {
    "mapreduce.job.classloader": "true",
    "mapreduce.map.memory.mb": "8192",
    "mapreduce.reduce.memory.mb": "18288",
    "mapreduce.task.timeout": "1800000",
    "mapreduce.map.speculative": "false",
    "mapreduce.reduce.speculative": "false",
    "mapreduce.input.fileinputformat.split.minsize": "125829120",
    "mapreduce.input.fileinputformat.split.maxsize": "268435456",
    "mapreduce.map.java.opts": "-Xmx1639m -Duser.timezone=UTC -Dfile.encoding=UTF-8",
    "mapreduce.reduce.java.opts": "-Xmx3277m -Duser.timezone=UTC -Dfile.encoding=UTF-8"
  }
}

I get the following error while indexing. Can anyone provide suggestions on how I can fix this problem. Thanks

ERROR [qtp419923787-71] com.sun.jersey.spi.container.ContainerResponse - The exception contained within MappableContainerException could not be mapped to a response, re-throwing to the HTTP container

java.lang.OutOfMemoryError: Java heap space

2019-03-09T04:59:32,144 WARN [qtp419923787-71] org.eclipse.jetty.servlet.ServletHandler -

javax.servlet.ServletException: java.lang.OutOfMemoryError: Java heap space

at com.sun.jersey.spi.container.servlet.WebComponent.service(WebComponent.java:420) ~[jersey-servlet-1.19.3.jar:1.19.3]

at com.sun.jersey.spi.container.servlet.ServletContainer.service(ServletContainer.java:558) ~[jersey-servlet-1.19.3.jar:1.19.3]

at com.sun.jersey.spi.container.servlet.ServletContainer.service(ServletContainer.java:733) ~[jersey-servlet-1.19.3.jar:1.19.3]

at javax.servlet.http.HttpServlet.service(HttpServlet.java:790) ~[javax.servlet-api-3.1.0.jar:3.1.0]

at com.google.inject.servlet.ServletDefinition.doServiceImpl(ServletDefinition.java:286) ~[guice-servlet-4.1.0.jar:?]

at com.google.inject.servlet.ServletDefinition.doService(ServletDefinition.java:276) ~[guice-servlet-4.1.0.jar:?]

at com.google.inject.servlet.ServletDefinition.service(ServletDefinition.java:181) ~[guice-servlet-4.1.0.jar:?]

at com.google.inject.servlet.ManagedServletPipeline.service(ManagedServletPipeline.java:91) ~[guice-servlet-4.1.0.jar:?]

at com.google.inject.servlet.ManagedFilterPipeline.dispatch(ManagedFilterPipeline.java:120) ~[guice-servlet-4.1.0.jar:?]

at com.google.inject.servlet.GuiceFilter.doFilter(GuiceFilter.java:135) ~[guice-servlet-4.1.0.jar:?]

at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) ~[jetty-servlet-9.3.19.v20170502.jar:9.3.19.v20170502]

at io.druid.server.http.RedirectFilter.doFilter(RedirectFilter.java:72) ~[druid-server-0.12.2-iap5.jar:0.12.2-iap5]

at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) ~[jetty-servlet-9.3.19.v20170502.jar:9.3.19.v20170502]

at io.druid.server.security.PreResponseAuthorizationCheckFilter.doFilter(PreResponseAuthorizationCheckFilter.java:84) ~[druid-server-0.12.2-iap5.jar:0.12.2-iap5]

at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) ~[jetty-servlet-9.3.19.v20170502.jar:9.3.19.v20170502]

at io.druid.server.security.AllowOptionsResourceFilter.doFilter(AllowOptionsResourceFilter.java:76) ~[druid-server-0.12.2-iap5.jar:0.12.2-iap5]

at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) ~[jetty-servlet-9.3.19.v20170502.jar:9.3.19.v20170502]

at io.druid.server.security.AllowAllAuthenticator$1.doFilter(AllowAllAuthenticator.java:85) ~[druid-server-0.12.2-iap5.jar:0.12.2-iap5]

at io.druid.server.security.AuthenticationWrappingFilter.doFilter(AuthenticationWrappingFilter.java:60) ~[druid-server-0.12.2-iap5.jar:0.12.2-iap5]

at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) ~[jetty-servlet-9.3.19.v20170502.jar:9.3.19.v20170502]

at io.druid.server.security.SecuritySanityCheckFilter.doFilter(SecuritySanityCheckFilter.java:86) ~[druid-server-0.12.2-iap5.jar:0.12.2-iap5]

at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1759) ~[jetty-servlet-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) [jetty-servlet-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:224) [jetty-server-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) [jetty-server-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) [jetty-servlet-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) [jetty-server-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) [jetty-server-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) [jetty-server-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.java:493) [jetty-server-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.server.handler.HandlerList.handle(HandlerList.java:52) [jetty-server-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) [jetty-server-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.server.Server.handle(Server.java:534) [jetty-server-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) [jetty-server-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) [jetty-server-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:283) [jetty-io-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:108) [jetty-io-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) [jetty-io-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) [jetty-util-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148) [jetty-util-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:136) [jetty-util-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:671) [jetty-util-9.3.19.v20170502.jar:9.3.19.v20170502]

at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:589) [jetty-util-9.3.19.v20170502.jar:9.3.19.v20170502]

at java.lang.Thread.run(Thread.java:748) [?:1.8.0_181]

Caused by: java.lang.OutOfMemoryError: Java heap space

2019-03-09T05:00:59,881 INFO [TaskQueue-StorageSync] io.druid.indexing.overlord.TaskQueue - Synced 1 tasks from storage (0 tasks added, 0 tasks removed).

Try updating to Druid 0.13.0. It has some improved automated memory tuning at ingestion time (see maxBytesInMemory). From your logs it looks like you’re using the Imply distribution, so that would be any 2.8.x (the latest is 2.8.18).

Anand, it appears from your setting that though you have bumped the mapper/reducer process memory you haven’t bumped the max heap size.

It is recommended that you set -Xmx to 80% of mapper/reducer process memory.

“mapreduce.map.memory.mb”: “8192”,

// You may want to increase mapper -Xmx to -Xmx6550m from 1639m

“mapreduce.map.java.opts”: “-Xmx1639m -Duser.timezone=UTC -Dfile.encoding=UTF-8”,

“mapreduce.reduce.memory.mb”: “18288”,

// You may want to increase reducer -Xmx to -Xmx14630m from 3277m

“mapreduce.reduce.java.opts”: “-Xmx3277m -Duser.timezone=UTC -Dfile.encoding=UTF-8”

Relevant: https://stackoverflow.com/questions/24070557/what-is-the-relation-between-mapreduce-map-memory-mb-and-mapred-map-child-jav

Arup