doris的BE节点报错:"OutOfMemoryError: Java heap space"

113 阅读3分钟

报错现象

W20250701 10:07:38.051013 205303 vhive_partition_writer.cpp:121] _file_format_transformer close failed, reason: [INTERNAL_ERROR]IOError: [INTERNAL_ERROR]write hdfs failed. namenode: hdfs://datalake-prd.bigdata3.prd.storage.local, path: /tmp/.doris_staging/srv_cdptraffic_prd/7a98748a3d254f77ba738fc70750f
652/p_date=20250629/appkey=73083E073B5547BBA0EA5509EE65FCD6/p_funnel_type=1/23128e1ebbb8400f-80a5aac1399a42cf_c06a39b6-7fbc-4556-9588-1b534fd95c0e-0.parquet, error: (12), Cannot allocate memory), reason: OutOfMemoryError: Java heap space

        0#  doris::io::HdfsFileWriter::appendv(doris::Slice const*, unsigned long) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:187
        1#  doris::vectorized::ParquetOutputStream::Write(void const*, long) at /home/zcp/repo_center/doris_release/doris/be/src/common/status.h:494
        2#  arrow::io::Writable::Write(std::shared_ptr<arrow::Buffer> const&)
        3#  parquet::BufferedPageWriter::Close(bool, bool)
        4#  parquet::ColumnWriterImpl::Close()
        5#  parquet::RowGroupWriter::Close()
        6#  parquet::arrow::FileWriterImpl::Close()
        7#  doris::vectorized::VParquetTransformer::close() at /home/zcp/repo_center/doris_release/doris/thirdparty/installed/include/arrow/status.h:274
        8#  doris::vectorized::VHivePartitionWriter::close(doris::Status const&) at /home/zcp/repo_center/doris_release/doris/be/src/common/status.h:380
        9#  doris::vectorized::VHiveTableWriter::close(doris::Status) at /home/zcp/repo_center/doris_release/doris/be/src/common/status.h:494
        10# doris::vectorized::AsyncResultWriter::process_block(doris::RuntimeState*, doris::RuntimeProfile*) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:360
        11# std::_Function_handler<void (), doris::vectorized::AsyncResultWriter::start_writer(doris::RuntimeState*, doris::RuntimeProfile*)::$_0>::_M_invoke(std::_Any_data const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/ext/atomicity.h:98
        12# doris::ThreadPool::dispatch_thread() at /home/zcp/repo_center/doris_release/doris/be/src/util/threadpool.cpp:0
        13# doris::Thread::supervise_thread(void*) at /var/local/ldb-toolchain/bin/../usr/include/pthread.h:562
        14# start_thread
        15# __clone


        0#  doris::to_doris_status(arrow::Status const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:187
        1#  doris::vectorized::VParquetTransformer::close() at /home/zcp/repo_center/doris_release/doris/be/src/vec/runtime/vparquet_transformer.cpp:0
        2#  doris::vectorized::VHivePartitionWriter::close(doris::Status const&) at /home/zcp/repo_center/doris_release/doris/be/src/common/status.h:380
        3#  doris::vectorized::VHiveTableWriter::close(doris::Status) at /home/zcp/repo_center/doris_release/doris/be/src/common/status.h:494
        4#  doris::vectorized::AsyncResultWriter::process_block(doris::RuntimeState*, doris::RuntimeProfile*) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:360
        5#  std::_Function_handler<void (), doris::vectorized::AsyncResultWriter::start_writer(doris::RuntimeState*, doris::RuntimeProfile*)::$_0>::_M_invoke(std::_Any_data const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/ext/atomicity.h:98
        6#  doris::ThreadPool::dispatch_thread() at /home/zcp/repo_center/doris_release/doris/be/src/util/threadpool.cpp:0
        7#  doris::Thread::supervise_thread(void*) at /var/local/ldb-toolchain/bin/../usr/include/pthread.h:562
        8#  start_thread
        9#  __clone
W20250701 10:07:38.815284 205303 vhive_table_writer.cpp:250] partition writer close failed for partition [INTERNAL_ERROR]IOError: [INTERNAL_ERROR]write hdfs failed. namenode: hdfs://datalake-prd.bigdata3.prd.storage.local, path: /tmp/.doris_staging/srv_cdptraffic_prd/7a98748a3d254f77ba738fc70750f652/p_d
ate=20250629/appkey=73083E073B5547BBA0EA5509EE65FCD6/p_funnel_type=1/23128e1ebbb8400f-80a5aac1399a42cf_c06a39b6-7fbc-4556-9588-1b534fd95c0e-0.parquet, error: (12), Cannot allocate memory), reason: OutOfMemoryError: Java heap space

        0#  doris::io::HdfsFileWriter::appendv(doris::Slice const*, unsigned long) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:187
        1#  doris::vectorized::ParquetOutputStream::Write(void const*, long) at /home/zcp/repo_center/doris_release/doris/be/src/common/status.h:494
        2#  arrow::io::Writable::Write(std::shared_ptr<arrow::Buffer> const&)
        3#  parquet::BufferedPageWriter::Close(bool, bool)
        4#  parquet::ColumnWriterImpl::Close()
        5#  parquet::RowGroupWriter::Close()
        6#  parquet::arrow::FileWriterImpl::Close()
        7#  doris::vectorized::VParquetTransformer::close() at /home/zcp/repo_center/doris_release/doris/thirdparty/installed/include/arrow/status.h:274
        8#  doris::vectorized::VHivePartitionWriter::close(doris::Status const&) at /home/zcp/repo_center/doris_release/doris/be/src/common/status.h:380
        9#  doris::vectorized::VHiveTableWriter::close(doris::Status) at /home/zcp/repo_center/doris_release/doris/be/src/common/status.h:494
        10# doris::vectorized::AsyncResultWriter::process_block(doris::RuntimeState*, doris::RuntimeProfile*) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:360
        11# std::_Function_handler<void (), doris::vectorized::AsyncResultWriter::start_writer(doris::RuntimeState*, doris::RuntimeProfile*)::$_0>::_M_invoke(std::_Any_data const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/ext/atomicity.h:98
        12# doris::ThreadPool::dispatch_thread() at /home/zcp/repo_center/doris_release/doris/be/src/util/threadpool.cpp:0
        13# doris::Thread::supervise_thread(void*) at /var/local/ldb-toolchain/bin/../usr/include/pthread.h:562
        14# start_thread
        15# __clone


        0#  doris::to_doris_status(arrow::Status const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:187
        1#  doris::vectorized::VParquetTransformer::close() at /home/zcp/repo_center/doris_release/doris/be/src/vec/runtime/vparquet_transformer.cpp:0
        2#  doris::vectorized::VHivePartitionWriter::close(doris::Status const&) at /home/zcp/repo_center/doris_release/doris/be/src/common/status.h:380
        3#  doris::vectorized::VHiveTableWriter::close(doris::Status) at /home/zcp/repo_center/doris_release/doris/be/src/common/status.h:494
        4#  doris::vectorized::AsyncResultWriter::process_block(doris::RuntimeState*, doris::RuntimeProfile*) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:360
        5#  std::_Function_handler<void (), doris::vectorized::AsyncResultWriter::start_writer(doris::RuntimeState*, doris::RuntimeProfile*)::$_0>::_M_invoke(std::_Any_data const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/ext/atomicity.h:98
        6#  doris::ThreadPool::dispatch_thread() at /home/zcp/repo_center/doris_release/doris/be/src/util/threadpool.cpp:0
        7#  doris::Thread::supervise_thread(void*) at /var/local/ldb-toolchain/bin/../usr/include/pthread.h:562
        8#  start_thread
        9#  __clone

问题原因:

当前be节点,未对jvm参数做配置。所以Doris BE这个jvm进程在启动时是使用默认参数也就是Xmx分配1G

不满足当前业务需求,HdfsFileWriter组件无法分配到足够的内存导致报错“OutOfMemoryError: Java heap space”


# For jdk 8
JAVA_OPTS="-Dfile.encoding=UTF-8 -Xmx2048m -DlogPath=$LOG_DIR/jni.log -Xloggc:$DORIS_HOME/log/be.gc.log.$CUR_DATE -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.security.krb5.debug=true -Dsun.java.command=DorisBE -XX:-CriticalJNINatives  -Darrow.enable_null_check_for_get=false"

当前be配置中没有此行配置

解决方法:

增加be.conf JVM启动参数

cd /data1/be/conf
vim be.conf
#增加如下两行到be.conf中
# For jdk 8
JAVA_OPTS="-Dfile.encoding=UTF-8 -Xmx8192m -DlogPath=$LOG_DIR/jni.log -Xloggc:$DORIS_HOME/log/be.gc.log.$CUR_DATE -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.security.krb5.debug=true -Dsun.java.command=DorisBE -XX:-CriticalJNINatives  -Darrow.enable_null_check_for_get=false"

相关案例:

doris 外表查询 paimon 的聚合表,be 报错 IOException: java.lang.OutOfMemoryError: Java heap space - Apache Doris

paimon2doris insert into select xxx OutOfMemoryError - Apache Doris 中文技术论坛