1 arthas火焰图
- 启动命令
java -jar arthas-boot.jar
java -jar arthas-boot.jar --target-ip 0.0.0.0 --http-port 8080
-
退出命令 退出arthas前,记得用stop退出
-
抓取火焰图命令
启动采样,profiler start
启动采样,profiler status -- 显示抓取时长,一般抓取90s
启动采样,profiler stop
- 可能会遇到的问题
Perf events unavailble. See stderr of the target process.
解决方法root用户:echo 1 > /proc/sys/kernel/perf_event_paranoid
参考:www.cnblogs.com/rongfenglia…
2 常用命令
2.1 trace
trace --> jdk native 类加载不出来
# 用-n参数指定捕捉结果的次数 捕捉到一次调用就退出命令
trace org.apache.hadoop.hdfs.server.namenode.FSNamesystem registerDatanode '#cost > 50' -n 20
trace org.apache.hadoop.hdfs.server.namenode.FSNamesystem resolveRemoteAddress '#cost > 50' -n 20
trace --skipJDKMethod false org.apache.hadoop.hdfs.server.blockmanagement.BlockManager computeInvalidateWork '#cost>100'
trace org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager resolveNetworkLocation '#cost > 50' -n 20
trace org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager getCurrentNsDatanodeDescriptor '#cost > 50' -n 20
trace org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager resolveIncludesDnHosts '#cost > 50' -n 5
日志:
[arthas@1840]$ trace org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager resolveIncludesDnHosts '#cost > 5' -n 5
Press Q or Ctrl+C to abort.
Affect(class-cnt:2 , method-cnt:1) cost in 120 ms.
`---ts=2021-07-28 10:57:53;thread_name=main;id=1;is_daemon=false;priority=5;TCCL=jdk.internal.loader.ClassLoaders$AppClassLoader@22d8cfe0
`---[671.817143ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:resolveIncludesDnHosts()
+---[0.026207ms] org.apache.hadoop.util.Time:monotonicNow() #1217
+---[0.012928ms] org.apache.hadoop.hdfs.server.blockmanagement.HostConfigManager:getIncludes() #1220
+---[620.684968ms] org.apache.hadoop.net.DNSToSwitchMapping:resolve() #1223
+---[0.005863ms] org.apache.hadoop.util.Time:monotonicNow() #1225
`---[0.327295ms] org.slf4j.Logger:info() #1229
[arthas@25774]$ trace org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer refreshNodes -n 5 --skipJDKMethod false
Press Q or Ctrl+C to abort.
Affect(class-cnt:2 , method-cnt:1) cost in 278 ms.
`---ts=2021-12-14 14:22:29;thread_name=IPC Server handler 9 on default port 8023;id=2aa;is_daemon=true;priority=5;TCCL=jdk.internal.loader.ClassLoaders$AppClassLoader@22d8cfe0
`---[7195.484803ms] org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer:refreshNodes()
+---[0.017304ms] org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer:checkNNStartup() #1333
`---[7194.440945ms] org.apache.hadoop.hdfs.server.namenode.FSNamesystem:refreshNodes() #1334
`---ts=2021-12-14 14:23:09;thread_name=IPC Server handler 1 on default port 8023;id=2a2;is_daemon=true;priority=5;TCCL=jdk.internal.loader.ClassLoaders$AppClassLoader@22d8cfe0
`---[59965.973859ms] org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer:refreshNodes()
+---[0.006611ms] org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer:checkNNStartup() #1333
`---[59965.801931ms] org.apache.hadoop.hdfs.server.namenode.FSNamesystem:refreshNodes() #1334
`---ts=2021-12-14 14:27:31;thread_name=IPC Server handler 1 on default port 8023;id=2a2;is_daemon=true;priority=5;TCCL=jdk.internal.loader.ClassLoaders$AppClassLoader@22d8cfe0
`---[25712.063457ms] org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer:refreshNodes()
+---[0.007589ms] org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer:checkNNStartup() #1333
`---[25711.858563ms] org.apache.hadoop.hdfs.server.namenode.FSNamesystem:refreshNodes() #1334
[arthas@25774]$ trace org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager refreshNodes -n 5 --skipJDKMethod false
Press Q or Ctrl+C to abort.
Affect(class-cnt:2 , method-cnt:1) cost in 253 ms.
`---ts=2021-12-14 14:39:14;thread_name=IPC Server handler 1 on default port 8023;id=2a2;is_daemon=true;priority=5;TCCL=jdk.internal.loader.ClassLoaders$AppClassLoader@22d8cfe0
`---[28181.472948ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:refreshNodes()
+---[26860.05101ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:refreshHostsReader() #1208
+---[12.666944ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:resolveIncludesDnHosts() #1209
+---[0.278995ms] org.apache.hadoop.hdfs.server.namenode.Namesystem:writeLock() #1210
+---[1301.743071ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:refreshDatanodes() #1212
+---[5.990685ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:countSoftwareVersions() #1213
`---[0.023196ms] org.apache.hadoop.hdfs.server.namenode.Namesystem:writeUnlock() #1215
`---ts=2021-12-14 14:42:52;thread_name=IPC Server handler 1 on default port 8023;id=2a2;is_daemon=true;priority=5;TCCL=jdk.internal.loader.ClassLoaders$AppClassLoader@22d8cfe0
`---[5275.553658ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:refreshNodes()
+---[3837.741763ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:refreshHostsReader() #1208
+---[13.162538ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:resolveIncludesDnHosts() #1209
+---[1.667395ms] org.apache.hadoop.hdfs.server.namenode.Namesystem:writeLock() #1210
+---[1416.804633ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:refreshDatanodes() #1212
+---[5.959973ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:countSoftwareVersions() #1213
`---[0.011837ms] org.apache.hadoop.hdfs.server.namenode.Namesystem:writeUnlock() #1215
`---ts=2021-12-14 14:44:49;thread_name=IPC Server handler 1 on default port 8023;id=2a2;is_daemon=true;priority=5;TCCL=jdk.internal.loader.ClassLoaders$AppClassLoader@22d8cfe0
`---[2524.478156ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:refreshNodes()
+---[125.27665ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:refreshHostsReader() #1208
+---[7.954986ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:resolveIncludesDnHosts() #1209
+---[0.005633ms] org.apache.hadoop.hdfs.server.namenode.Namesystem:writeLock() #1210
+---[2350.819566ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:refreshDatanodes() #1212
+---[39.294086ms] org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager:countSoftwareVersions() #1213
`---[0.017516ms] org.apache.hadoop.hdfs.server.namenode.Namesystem:writeUnlock() #1215
refreshHostsReader -- 3~26860ms
resolveIncludesDnHosts -- 10~13ms
real 1m1.796s
user 0m6.410s
sys 0m0.702s
2.2 watch
watch org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager getDatanodeDescriptor "{params,returnObj}" -n 1 -x 1
watch org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager resolveNetworkLocation "{params,returnObj}" -n 10 -x 3
watch org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager getCurrentNsDatanodeDescriptor "{params,returnObj}" -n 10 -x 3
watch org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager getCurrentNsDatanodeDescriptor "{params,returnObj}" "returnObj==null" -n 10 -x 3
watch org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager getCurrentNsDatanodeDescriptor "{params,returnObj}" "returnObj==null" -n 1 -x 1
watch org.apache.hadoop.hdfs.server.namenode.FSNamesystem handleHeartbeat '{params,throwExp}' -e -x 3
watch org.apache.hadoop.hdfs.server.namenode.FSNamesystem resolveRemoteAddress '{params,throwExp}' -e -x 3
watch class method 可选 参数 返回值 异常
watch fsnamesystem handleHeartBeat target
watch org.apache.hadoop.hdfs.server.convertor.ConvertDefaultResolver scanJobAndDispatchTask '{params,throwExp}' -e -x 3
2.3 其它
sc *
sm Datanode*
sm * handleHeartBeat params.length>=1
stack