Car项目

61 阅读4分钟

1.coredump采集

linux接口实现:

  • backtrace:获取调用栈地址(函数指针数组)。
  • backtrace_symbols:将地址转换为可读字符串(需 free)。
  • 用途:在崩溃时打印调用栈,辅助调试。
  • 优化:编译时加 -g -rdynamic,确保正确释放内存。
#include <execinfo.h>
1. int backtrace(void **buffer, int size);
//获取当前线程的调用堆栈(call stack),即函数调用的层级关系。
//返回的 `buffer` 是一个指针数组,每个元素代表一个栈帧(stack frame)的返回地址(即调用函数的地址)。
eg.
void* trace[10];
int trace_size = backtrace(trace, 10); // 获取最近 10 层调用栈

2. char **backtrace_symbols(void *const *buffer, int size);
//将 `backtrace` 返回的地址数组(`buffer`)转换成可读的字符串(如 `"main() [0x55a1b2c3d4e5]"`)。
//返回一个字符串数组,每个字符串代表一个栈帧的符号信息。需手动 `free` 释放内存

eg.
char **symbols = backtrace_symbols(trace, trace_size);
if (symbols) {
    for (int i = 0; i < trace_size; i++) {
        printf("[%d] %s\n", i, symbols[i]); // 打印堆栈信息
    }
    free(symbols); // 必须手动释放!
}


demo实现:

namespace coredump_catch {
// 初始化后会捕获崩溃信号 默认会抓取 SIGSEGV SIGABRT SIGILL SIGFPE
// 注意点:
// 1.非线程安全,程序初始化由主线程调用
// 2.core文件目录:一般通过ZKOS_COREDUMP_PATH宏配置,不配置就不开启,程序需要对目录有写权限
// 3.
// 初始化1
// 不传入参数,core文件根据ZKOS_COREDUMP_PATH宏,有就开启,没有不开
void Init();
void Init(std::string_view core_file_path);
} // namespace CoredumpCatch

namespace coredump_catch {
static std::string g_core_file_path;
#ifdef ZKOS_LINUX_COREDUMP_CATCH
void traceInfo(const char* info, int fd)
{
if (!info) {
return;
}
int n = write(STDOUT_FILENO, info, strlen(info));
(void)n;//`告诉编译器这个变量 `n` 被故意忽略,从而避免产生“未使用变量”的警告
if (fd > 0) {
int w_ret = write(fd, info, strlen(info));
if (w_ret == -1) {
char tmp[COREDUMP_CATCH_TMP_BUFF_SIZE];
snprintf(tmp, COREDUMP_CATCH_TMP_BUFF_SIZE, "CoredumpCatch: write failed, %d\n", errno);
n = write(STDOUT_FILENO, tmp, strlen(tmp));
}}}

static bool getProcessName(std::string& name)
{
FILE* file = fopen("/proc/self/comm", "r");
if (!file) {
char tmp[COREDUMP_CATCH_TMP_BUFF_SIZE] = {0};
snprintf(tmp, COREDUMP_CATCH_TMP_BUFF_SIZE, "CoredumpCatch: fopen /proc/self/comm failed, %d\n", errno);
traceInfo(tmp, 0);
return false;
}
char buff[64] = {0};
if (!fgets(buff, sizeof(buff) - 1, file)) {
char tmp[COREDUMP_CATCH_TMP_BUFF_SIZE] = {0};
snprintf(tmp, COREDUMP_CATCH_TMP_BUFF_SIZE, "CoredumpCatch: fgets /proc/self/comm failed, %d\n", errno);
traceInfo(tmp, 0);
fclose(file);
return false;
}

size_t n = strlen(buff);
if (n > 1 && buff[n - 1] == '\n') {
buff[n - 1] = 0;
}
name.append(buff);
fclose(file);
return true;
}

static void signalHandler(int sig)
{
char tmp[COREDUMP_CATCH_TMP_BUFF_SIZE] = {0};
snprintf(tmp, COREDUMP_CATCH_TMP_BUFF_SIZE, "CoredumpCatch: sig=%d\n", sig);
traceInfo(tmp, 0);
std::string p_name;
if (!getProcessName(p_name)) {
p_name.append("NOT_FOUND_NAME");
}

std::string filename = g_core_file_path + "/" + p_name + ".coreinfo";
int fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0644);
if (fd < 0) {
snprintf(tmp, COREDUMP_CATCH_TMP_BUFF_SIZE, "CoredumpCatch: open %s failed, %d\n", filename.c_str(), errno);
traceInfo(tmp, 0);
}
// base process info
std::stringstream info;
info << "=====coredump base info=====\n";
info << "pid = " << getpid() << "\n";
info << "bin name = " << p_name << "\n";
info << "sig = " << sig << "\n";
info << "=====helper info===== \n";
info << "You can use the following information to indicate specific "
"functions.\n";
info << "You can also locate the specific line based on this information "
"Coredump Match manual for details\n";
traceInfo(info.str().c_str(), fd);

// back trace info

traceInfo("=====coredump back trace=====\n", fd);
void* trace_array[COREDUMP_CATCH_ARRAY_SIZE];
int size = backtrace(trace_array, COREDUMP_CATCH_ARRAY_SIZE);
if (size == 0) {
info.str("");
info << "CoredumpCatch: backtrace failed, errno = " << errno << "\n";
traceInfo(info.str().c_str(), fd);
if (fd > 0) {
close(fd);
}
exit(1); // failed
}

char** buff = backtrace_symbols(trace_array, size);
if (!buff) {
info.str("");
info << "CoredumpCatch: backtrace_symbols failed, errno = " << errno
<< "\n";
traceInfo(info.str().c_str(), fd);
if (fd > 0) {
close(fd);
}
exit(1); // failed
}

for (int i = 0; i < size; ++i) {
if (buff[i]) {
snprintf(tmp, COREDUMP_CATCH_TMP_BUFF_SIZE, "[%02d] %s\n", i, buff[i]);
traceInfo(tmp, fd);
} else {
traceInfo("backtrace_symbols result break\n", fd);
break;
}
}

if (fd > 0) {
close(fd);
}
exit(0);
}
void Init()
{
// init coredump path
const char* path = std::getenv("ZKOS_COREDUMP_PATH");
if (!path) {
printf("CoredumpCatch::Init ignore, env path empty\n");
return;
}
g_core_file_path.append(path);
signal(SIGSEGV, signalHandler); // Segmentation fault (core dump),指针错误,内存溢出等
signal(SIGABRT, signalHandler); // Abort signal (e.g., abort() function),包含异常未处理
signal(SIGFPE, signalHandler); // Floating point exception
signal(SIGILL, signalHandler); // Illegal instruction,非法/未知值令
signal(SIGBUS, signalHandler); // 访问对内存管理单元(MMU)来说非法的地址空间,例如访问未对齐的内存访问
printf("CoredumpCatch::Init, will catch signal: SIGSEGV SIGABRT SIGFPE "
"SIGILL. path: %s\n", g_core_file_path.c_str());}

void Init(std::string_view core_file_path)
{
// init coredump path
if (core_file_path.empty()) {
printf("CoredumpCatch::Init ignore, param path empty\n");
return;
}
g_core_file_path = core_file_path;
signal(SIGSEGV, signalHandler); // Segmentation fault (core dump),指针错误,内存溢出等
signal(SIGABRT, signalHandler); // Abort signal (e.g., abort() function),包含异常未处理
signal(SIGFPE, signalHandler); // Floating point exception
signal(SIGILL, signalHandler); // Illegal instruction,非法/未知值令
signal(SIGBUS, signalHandler); // 访问对内存管理单元(MMU)来说非法的地址空间,例如访问未对齐的内存访问
printf("CoredumpCatch::Init, will catch signal: SIGSEGV SIGABRT SIGFPE "
"SIGILL. path: %s\n", g_core_file_path.c_str());
}
#else
void Init(std::string_view core_file_path)
{
printf("CoredumpCatch::Init, not support plat\n");
}
#endif
}}

2.ebpf调度时延