wall
WallClock 通过给线程发送 signal 的方式来获取调用栈。
每一轮 tick,至多采集 8 个线程,来减少多服务的 overhead。
_interval 控制的是当只采集 running 线程的时候的休眠时间。
当要采集休眠线程的时候,会根据线程数量去调整 _interval,让采样更平均。(比如 100ms 的 interval,总共 80 个线程,就会调整为每 8 ms 醒一次给 8 个线程发送采集信号)
// Maximum number of threads sampled in one iteration. This limit serves as a throttle
// when generating profiling signals. Otherwise applications with too many threads may
// suffer from a big profiling overhead. Also, keeping this limit low enough helps
// to avoid contention on a spin lock inside Profiler::recordSample().
const int THREADS_PER_TICK = 8;
// Set the hard limit for thread walking interval to 100 microseconds.
// Smaller intervals are practically unusable due to large overhead.
const long MIN_INTERVAL = 100000;
void WallClock::timerLoop() {
int self = OS::threadId();
ThreadFilter* thread_filter = Profiler::instance()->threadFilter();
bool thread_filter_enabled = thread_filter->enabled();
bool sample_idle_threads = _sample_idle_threads;
ThreadList* thread_list = OS::listThreads();
long long next_cycle_time = OS::nanotime();
while (_running) {
if (!_enabled) {
OS::sleep(_interval);
continue;
}
if (sample_idle_threads) {
// Try to keep the wall clock interval stable, regardless of the number of profiled threads
int estimated_thread_count = thread_filter_enabled ? thread_filter->size() : thread_list->size();
next_cycle_time += adjustInterval(_interval, estimated_thread_count);
}
for (int count = 0; count < THREADS_PER_TICK; ) {
int thread_id = thread_list->next();
if (thread_id == -1) {
thread_list->rewind();
break;
}
if (thread_id == self || (thread_filter_enabled && !thread_filter->accept(thread_id))) {
continue;
}
if (sample_idle_threads || OS::threadState(thread_id) == THREAD_RUNNING) {
if (OS::sendSignalToThread(thread_id, _signal)) {
count++;
}
}
}
if (sample_idle_threads) {
long long current_time = OS::nanotime();
if (next_cycle_time - current_time > MIN_INTERVAL) {
OS::sleep(next_cycle_time - current_time);
} else {
next_cycle_time = current_time + MIN_INTERVAL;
OS::sleep(MIN_INTERVAL);
}
} else {
OS::sleep(_interval);
}
}
delete thread_list;
}
这里通过 pc 地址来区分是 THREAD_SLEEPING 状态还是 THREAD_RUNNING 状态。
ThreadState WallClock::getThreadState(void* ucontext) {
StackFrame frame(ucontext);
uintptr_t pc = frame.pc();
// Consider a thread sleeping, if it has been interrupted in the middle of syscall execution,
// either when PC points to the syscall instruction, or if syscall has just returned with EINTR
if (StackFrame::isSyscall((instruction_t*)pc)) {
return THREAD_SLEEPING;
}
// Make sure the previous instruction address is readable
uintptr_t prev_pc = pc - SYSCALL_SIZE;
if ((pc & 0xfff) >= SYSCALL_SIZE || Profiler::instance()->findLibraryByAddress((instruction_t*)prev_pc) != NULL) {
if (StackFrame::isSyscall((instruction_t*)prev_pc) && frame.checkInterruptedSyscall()) {
return THREAD_SLEEPING;
}
}
return THREAD_RUNNING;
}
mutex
mutex 通过接受 JVMTI_EVENT_MONITOR_CONTENDED_ENTER 和 JVMTI_EVENT_MONITOR_CONTENDED_ENTERED 这两个事件。
并且注册 park hook, 记录 ReentrantLock,ReentrantReadWriteLock,Semaphore 等获取并发锁的函数。
Error LockTracer::start(Arguments& args) {
_ticks_to_nanos = 1e9 / TSC::frequency();
_threshold = (jlong)(args._lock * (TSC::frequency() / 1e9));
if (!_initialized) {
initialize();
}
// Enable Java Monitor events
jvmtiEnv* jvmti = VM::jvmti();
jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_MONITOR_CONTENDED_ENTER, NULL);
jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_MONITOR_CONTENDED_ENTERED, NULL);
_start_time = TSC::ticks();
// Intercept Unsafe.park() for tracing contended ReentrantLocks
if (_orig_Unsafe_park != NULL) {
bindUnsafePark(UnsafeParkHook);
}
return Error::OK;
}
bool LockTracer::isConcurrentLock(const char* lock_name) {
// Do not count synchronizers other than ReentrantLock, ReentrantReadWriteLock and Semaphore
return strncmp(lock_name, "Ljava/util/concurrent/locks/ReentrantLock", 41) == 0 ||
strncmp(lock_name, "Ljava/util/concurrent/locks/ReentrantReadWriteLock", 50) == 0 ||
strncmp(lock_name, "Ljava/util/concurrent/Semaphore", 31) == 0;
}
_threshold 并没有传递给 jvm,而是在接收到 JVMTI_EVENT_MONITOR_CONTENDED_ENTERED 事件之后用于判断。
heap
-
在 tlab 内分配内存
- tlab 已经有了。
- tlab 不够大,new tlab create and fill。
-
在 tlab 外分配内存。
HeapWord* MemAllocator::mem_allocate(Allocation& allocation) const {
if (UseTLAB) {
HeapWord* result = allocate_inside_tlab(allocation);
if (result != NULL) {
return result;
}
}
return allocate_outside_tlab(allocation);
}
HeapWord* MemAllocator::allocate_inside_tlab(Allocation& allocation) const {
assert(UseTLAB, "should use UseTLAB");
// Try allocating from an existing TLAB.
HeapWord* mem = _thread->tlab().allocate(_word_size);
if (mem != NULL) {
return mem;
}
// Try refilling the TLAB and allocating the object in it.
return allocate_inside_tlab_slow(allocation);
}
接受两类回调事件:
size_in_bytes 为要分配的结构体大小。
也就是说这两类事件不是像 go 一样固定每 512 kb 分配就记录一次,因此可以去获取到整体的内存分配情况。
(如果 tlab 够大的话,这段时间的分配就不会记录其中,所以应该是程序启动的时候内存分配的多)。
void MemAllocator::Allocation::notify_allocation_jfr_sampler() {
HeapWord* mem = (HeapWord*)obj();
size_t size_in_bytes = _allocator._word_size * HeapWordSize;
if (_allocated_outside_tlab) {
AllocTracer::send_allocation_outside_tlab(_allocator._klass, mem, size_in_bytes, _thread);
} else if (_allocated_tlab_size != 0) {
// TLAB was refilled
AllocTracer::send_allocation_in_new_tlab(_allocator._klass, mem, _allocated_tlab_size * HeapWordSize,
size_in_bytes, _thread);
}
}
对于 send_allocation_in_new_tlab,我们的 total_size 是 tlab size.
对于 send_allocation_outside_tlab_event, total size 是 alloc_size.
// Called whenever our breakpoint trap is hit
void AllocTracer::trapHandler(int signo, siginfo_t* siginfo, void* ucontext) {
StackFrame frame(ucontext);
EventType event_type;
uintptr_t total_size;
uintptr_t instance_size;
// PC points either to BREAKPOINT instruction or to the next one
if (_in_new_tlab.covers(frame.pc())) {
// send_allocation_in_new_tlab(Klass* klass, HeapWord* obj, size_t tlab_size, size_t alloc_size, Thread* thread)
// send_allocation_in_new_tlab_event(KlassHandle klass, size_t tlab_size, size_t alloc_size)
event_type = ALLOC_SAMPLE;
total_size = _trap_kind == 1 ? frame.arg2() : frame.arg1();
instance_size = _trap_kind == 1 ? frame.arg3() : frame.arg2();
} else if (_outside_tlab.covers(frame.pc())) {
// send_allocation_outside_tlab(Klass* klass, HeapWord* obj, size_t alloc_size, Thread* thread)
// send_allocation_outside_tlab_event(KlassHandle klass, size_t alloc_size);
event_type = ALLOC_OUTSIDE_TLAB;
total_size = _trap_kind == 1 ? frame.arg2() : frame.arg1();
instance_size = 0;
} else {
// Not our trap
Profiler::instance()->trapHandler(signo, siginfo, ucontext);
return;
}
// Leave the trapped function by simulating "ret" instruction
uintptr_t klass = frame.arg0();
frame.ret();
if (_enabled && updateCounter(_allocated_bytes, total_size, _interval)) {
recordAllocation(ucontext, event_type, klass, total_size, instance_size);
}
}
for jdk 11+
--alloc 是通过 SetHeapSamplingInterval 函数去设置每 512 kb 的分配间隔去记录一次样本(并且该样本分配了一个新的 tlab 或者 tlab 外才会通知)。
在 jdk 11+ 中,对于
- talb outsize, allocation size 就是类的大小。
- tlab insize, allocation size 是自从上次采样以来 tlab 被填充的大小,以及要分配的类的大小。
void MemAllocator::Allocation::notify_allocation_jvmti_sampler() {
// support for JVMTI VMObjectAlloc event (no-op if not enabled)
JvmtiExport::vm_object_alloc_event_collector(obj());
if (!ThreadHeapSampler::enabled()) {
// Sampling disabled
return;
}
if (!_allocated_outside_tlab && _allocated_tlab_size == 0 && !_tlab_end_reset_for_sample) {
// Sample if it's a non-TLAB allocation, or a TLAB allocation that either refills the TLAB
// or expands it due to taking a sampler induced slow path.
return;
}
assert(JavaThread::current()->heap_sampler().add_sampling_collector(),
"Should never return false.");
// Only check if the sampler could actually sample something in this path.
assert(!JvmtiExport::should_post_sampled_object_alloc() ||
!JvmtiSampledObjectAllocEventCollector::object_alloc_is_safe_to_sample() ||
_thread->heap_sampler().sampling_collector_present(),
"Sampling collector not present.");
if (JvmtiExport::should_post_sampled_object_alloc()) {
// If we want to be sampling, protect the allocated object with a Handle
// before doing the callback. The callback is done in the destructor of
// the JvmtiSampledObjectAllocEventCollector.
PreserveObj obj_h(_thread, _obj_ptr);
JvmtiSampledObjectAllocEventCollector collector;
size_t size_in_bytes = _allocator._word_size * HeapWordSize;
ThreadLocalAllocBuffer& tlab = _thread->tlab();
size_t bytes_since_last = _allocated_outside_tlab ? 0 : tlab.bytes_since_last_sample_point();
_thread->heap_sampler().check_for_sampling(obj_h(), size_in_bytes, bytes_since_last);
}
assert(JavaThread::current()->heap_sampler().remove_sampling_collector(), "Should never return false.");
if (_tlab_end_reset_for_sample || _allocated_tlab_size != 0) {
_thread->tlab().set_sample_end();
}
}
AsyncGetCallTrace
- EXECUTION_SAMPLE, 依赖 async get call trace 获取调用栈。
- memalloc,在 hotspot 非 v9 并且非 zing 版本上,使用内部函数
- 其他, getJavaTraceJvmti。
也就是说,只要我们不使用 cpu 或 wall 类型,就不会使用 AsyncGetCallTrace, bugs.openjdk.org/browse/JDK-…,就不会有可能导致 jvm crash。
if (event_type <= EXECUTION_SAMPLE) {
// Async events
int java_frames = getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth, &java_ctx);
if (java_frames > 0 && java_ctx.pc != NULL && VMStructs::hasMethodStructs()) {
NMethod* nmethod = CodeHeap::findNMethod(java_ctx.pc);
if (nmethod != NULL) {
fillFrameTypes(frames + num_frames, java_frames, nmethod);
}
}
num_frames += java_frames;
} else if (event_type >= ALLOC_SAMPLE && event_type <= ALLOC_OUTSIDE_TLAB && _alloc_engine == &alloc_tracer) {
if (VMStructs::_get_stack_trace != NULL) {
// Object allocation in HotSpot happens at known places where it is safe to call JVM TI,
// but not directly, since the thread is in_vm rather than in_native
num_frames += getJavaTraceInternal(jvmti_frames + num_frames, frames + num_frames, _max_stack_depth);
} else {
num_frames += getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth, &java_ctx);
}
} else {
// Lock events and instrumentation events can safely call synchronous JVM TI stack walker.
// Skip Instrument.recordSample() method
int start_depth = event_type == INSTRUMENTED_METHOD ? 1 : 0;
num_frames += getJavaTraceJvmti(jvmti_frames + num_frames, frames + num_frames, start_depth, _max_stack_depth);
}
void VMStructs::initJvmFunctions() {
if (!VM::isOpenJ9() && !VM::isZing()) {
_get_stack_trace = (GetStackTraceFunc)_libjvm->findSymbolByPrefix("_ZN8JvmtiEnv13GetStackTraceEP10JavaThreadiiP");
}
//466370500
stalk walker
参考以下两个文章。 遍历 fp 得到 fp 的链条。 然后加载 fp + 1 得到返回地址链表。
int StackWalker::walkFP(void* ucontext, const void** callchain, int max_depth) {
const void* pc;
uintptr_t fp;
uintptr_t prev_fp = (uintptr_t)&fp;
uintptr_t bottom = prev_fp + MAX_WALK_SIZE;
if (ucontext == NULL) {
pc = __builtin_return_address(0);
fp = (uintptr_t)__builtin_frame_address(1);
} else {
StackFrame frame(ucontext);
pc = (const void*)frame.pc();
fp = frame.fp();
}
int depth = 0;
// Walk until the bottom of the stack or until the first Java frame
while (depth < max_depth && !CodeHeap::contains(pc)) {
callchain[depth++] = pc;
// Check if the next frame is below on the current stack
if (fp <= prev_fp || fp >= prev_fp + MAX_FRAME_SIZE || fp >= bottom) {
break;
}
// Frame pointer must be word aligned
if ((fp & (sizeof(uintptr_t) - 1)) != 0) {
break;
}
pc = stripPointer(SafeAccess::load((void**)fp + FRAME_PC_SLOT));
if (pc < (const void*)MIN_VALID_PC || pc > (const void*)-MIN_VALID_PC) {
break;
}
prev_fp = fp;
fp = *(uintptr_t*)fp;
}
return depth;
}