69天探索操作系统-第23天:内存分配内部机制(malloc、free)

129 阅读7分钟

pro16.avif

1.介绍

内存分配是计算机系统中的一个基本操作。它涉及复杂的算法,用于有效地和动态地管理内存。这篇文章探讨了内存分配的内部结构,重点关注实现一个模拟malloc和free行为的自定义内存分配器的实现。了解这些内部结构对于开发健壮且性能良好的软件至关重要。

本主题深入探讨了动态内存管理背后的核心概念和技术,超越了malloc和free表面层次,深入探索数据结构、算法和优化策略,用于构建一个自定义的内存分配器。

image.png

2.内存块结构

内存块结构是动态内存分配中的一个关键数据结构。它包含每个分配或空闲内存块的信息,包括其大小、分配状态以及链接块的指针。这种结构允许分配器跟踪可用内存空间。

结构定义包括用于块大小的字段、指示块是否空闲或已分配的标志以及链接自由列表或使用列表中块的指针。该结构还包括一个用于存储已分配块实际数据的可变数组成员。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <assert.h>
#include <unistd.h>

#define ALIGNMENT 8
#define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~(ALIGNMENT-1))
#define BLOCK_SIZE sizeof(block_t)

typedef struct block_t {
    size_t size;          // Size of the block including header
    bool is_free;         // Whether block is free
    struct block_t* next; // Next block in the list
    struct block_t* prev; // Previous block in the list
    char data[1];        // Start of the data (flexible array member)
} block_t;

typedef struct {
    block_t* free_list;  // Head of free blocks list
    block_t* used_list;  // Head of used blocks list
    size_t total_size;   // Total heap size
    size_t used_size;    // Total used size
} heap_t;

3.自定义内存分配器实现

下面可以看到自定义内存分配器的实现。它包括初始化堆、查找合适空块、分割块、分配内存(custom_malloc)、释放内存(custom_free)和合并相邻空块等功能。这个实现提供了内存分配器如何工作的具体例子。

custom_malloc实现使用最佳适应分配策略。它会搜索空闲列表,找到最小的空闲块,该块可以满足请求。custom_free函数会释放已分配的内存,并合并相邻的空闲块,以减少碎片。

// Global heap structure
static heap_t heap = {0};

void init_heap(size_t initial_size) {
    initial_size = ALIGN(initial_size);
    
    // Request memory from OS
    void* memory = sbrk(initial_size);
    if (memory == (void*)-1) {
        perror("Failed to initialize heap");
        return;
    }
    
    // Initialize first block
    block_t* initial_block = (block_t*)memory;
    initial_block->size = initial_size;
    initial_block->is_free = true;
    initial_block->next = NULL;
    initial_block->prev = NULL;
    
    // Initialize heap structure
    heap.free_list = initial_block;
    heap.used_list = NULL;
    heap.total_size = initial_size;
    heap.used_size = 0;
}

// Find best fit block
block_t* find_best_fit(size_t size) {
    block_t* current = heap.free_list;
    block_t* best_fit = NULL;
    size_t smallest_diff = SIZE_MAX;
    
    while (current != NULL) {
        if (current->is_free && current->size >= size) {
            size_t diff = current->size - size;
            if (diff < smallest_diff) {
                smallest_diff = diff;
                best_fit = current;
                
                // If perfect fit, stop searching
                if (diff == 0) break;
            }
        }
        current = current->next;
    }
    
    return best_fit;
}

void split_block(block_t* block, size_t size) {
    size_t remaining_size = block->size - size;
    
    // Only split if remaining size is large enough for a new block
    if (remaining_size > BLOCK_SIZE + ALIGNMENT) {
        block_t* new_block = (block_t*)((char*)block + size);
        new_block->size = remaining_size;
        new_block->is_free = true;
        new_block->next = block->next;
        new_block->prev = block;
        
        if (block->next) {
            block->next->prev = new_block;
        }
        
        block->next = new_block;
        block->size = size;
    }
}

void* custom_malloc(size_t size) {
    if (size == 0) return NULL;
    
    // Adjust size to include header and alignment
    size_t total_size = ALIGN(size + BLOCK_SIZE);
    
    // Find suitable block
    block_t* block = find_best_fit(total_size);
    
    // If no suitable block found, request more memory
    if (block == NULL) {
        size_t request_size = total_size > 4096 ? total_size : 4096;
        void* memory = sbrk(request_size);
        if (memory == (void*)-1) {
            return NULL;
        }
        
        block = (block_t*)memory;
        block->size = request_size;
        block->is_free = true;
        block->next = heap.free_list;
        block->prev = NULL;
        
        if (heap.free_list) {
            heap.free_list->prev = block;
        }
        
        heap.free_list = block;
        heap.total_size += request_size;
    }
    
    // Split block if necessary
    split_block(block, total_size);
    
    // Mark block as used
    block->is_free = false;
    
    // Remove from free list and add to used list
    if (block->prev) {
        block->prev->next = block->next;
    } else {
        heap.free_list = block->next;
    }
    
    if (block->next) {
        block->next->prev = block->prev;
    }
    
    block->next = heap.used_list;
    block->prev = NULL;
    if (heap.used_list) {
        heap.used_list->prev = block;
    }
    heap.used_list = block;
    
    heap.used_size += block->size;
    
    return block->data;
}

// Coalesce adjacent free blocks
void coalesce_blocks(block_t* block) {
    // Coalesce with next block
    if (block->next && block->next->is_free) {
        block->size += block->next->size;
        block->next = block->next->next;
        if (block->next) {
            block->next->prev = block;
        }
    }
    
    // Coalesce with previous block
    if (block->prev && block->prev->is_free) {
        block->prev->size += block->size;
        block->prev->next = block->next;
        if (block->next) {
            block->next->prev = block->prev;
        }
        block = block->prev;
    }
}

void custom_free(void* ptr) {
    if (!ptr) return;
    
    // Get block header
    block_t* block = (block_t*)((char*)ptr - BLOCK_SIZE);
    
    // Mark block as free
    block->is_free = true;
    
    // Remove from used list
    if (block->prev) {
        block->prev->next = block->next;
    } else {
        heap.used_list = block->next;
    }
    
    if (block->next) {
        block->next->prev = block->prev;
    }
    
    block->next = heap.free_list;
    block->prev = NULL;
    if (heap.free_list) {
        heap.free_list->prev = block;
    }
    heap.free_list = block;
    
    heap.used_size -= block->size;
    
    // Coalesce adjacent free blocks
    coalesce_blocks(block);
}

void print_memory_stats() {
    printf("\nMemory Statistics:\n");
    printf("Total Heap Size: %zu bytes\n", heap.total_size);
    printf("Used Size: %zu bytes\n", heap.used_size);
    printf("Free Size: %zu bytes\n", heap.total_size - heap.used_size);
    
    printf("\nFree Blocks:\n");
    block_t* current = heap.free_list;
    while (current) {
        printf("Block at %p, size: %zu\n", (void*)current, current->size);
        current = current->next;
    }
    
    printf("\nUsed Blocks:\n");
    current = heap.used_list;
    while (current) {
        printf("Block at %p, size: %zu\n", (void*)current, current->size);
        current = current->next;
    }
}

4.内存分析和调试工具

调试和分析内存分配对于识别泄漏、碎片化和性能瓶颈至关重要。提供的代码包括用于检测内存泄漏、打印内存统计数据和执行简单调试任务的功能。

debug_mallocdebug_free 函数跟踪已分配的内存,并允许检测内存泄漏。print_memory_stats 函数提供了关于总内存、使用内存和空闲内存的统计信息,有助于理解内存使用模式。

typedef struct {
    void* ptr;
    size_t size;
    const char* file;
    int line;
} allocation_info_t;

#define MAX_ALLOCATIONS 1000
static allocation_info_t allocations[MAX_ALLOCATIONS];
static int allocation_count = 0;

void* debug_malloc(size_t size, const char* file, int line) {
    void* ptr = custom_malloc(size);
    if (ptr && allocation_count < MAX_ALLOCATIONS) {
        allocations[allocation_count].ptr = ptr;
        allocations[allocation_count].size = size;
        allocations[allocation_count].file = file;
        allocations[allocation_count].line = line;
        allocation_count++;
    }
    return ptr;
}

void debug_free(void* ptr, const char* file, int line) {
    for (int i = 0; i < allocation_count; i++) {
        if (allocations[i].ptr == ptr) {
            memmove(&allocations[i], &allocations[i + 1], 
                    (allocation_count - i - 1) * sizeof(allocation_info_t));
            allocation_count--;
            break;
        }
    }
    custom_free(ptr);
}

void check_leaks() {
    if (allocation_count > 0) {
        printf("\nMemory Leaks Detected:\n");
        for (int i = 0; i < allocation_count; i++) {
            printf("Leak: %zu bytes at %p, allocated in %s:%d\n",
                   allocations[i].size, allocations[i].ptr,
                   allocations[i].file, allocations[i].line);
        }
    } else {
        printf("No memory leaks detected\n");
    }
}

5. 自定义分配器的用法示例

在下面,您可以看到如何使用自定义内存分配器,通过主函数的一个简单例子。它包括内存分配和释放、打印内存统计信息以及检查内存泄漏。

int main() {
    init_heap(1024 * 1024);
    
    int* numbers = (int*)custom_malloc(10 * sizeof(int));
    char* string = (char*)custom_malloc(100);
    
    for (int i = 0; i < 10; i++) {
        numbers[i] = i;
    }
    strcpy(string, "Hello, World!");
    
    print_memory_stats();
    
    custom_free(numbers);
    custom_free(string);
    
    check_leaks();
    
    return 0;
}

6. 性能优化技术

优化内存分配性能的技术包括内存对齐、缓存友好块放置以及内存预分配策略。这些技术旨在改进内存访问模式,减少碎片,并提高整体分配器的效率。

内存对齐 该代码包含内存对齐的示例,以确保高效的内存访问和缓存友好块的放置,以优化数据局部性,这可以在实际应用中显著提高性能。

// Optimize memory alignment for different architectures
#if defined(__x86_64__) || defined(_M_X64)
    #define ALIGNMENT 16
#else
    #define ALIGNMENT 8
#endif

缓存友好块放置

// Place frequently accessed metadata at the start of the block
typedef struct block_t {
    size_t size;          // Most frequently accessed
    bool is_free;         // Second most frequently accessed
    struct block_t* next; // Less frequently accessed
    struct block_t* prev; // Less frequently accessed
    char data[];         // Actual data
} __attribute__((aligned(ALIGNMENT))) block_t;

7.总结

一个定制内存分配器可以对内存管理进行精细控制,从而提高性能、调试能力并深入理解系统级内存分配。不过,它也引入了与碎片化、线程安全和缓存管理相关的复杂性。

8.参考资料和进一步阅读

  • "The C Programming Language" by Kernighan and Ritchie
  • "Advanced Programming in the UNIX Environment" by W. Richard Stevens
  • "Understanding and Using C Pointers" by Richard Reese
  • Doug Lea's Memory Allocator Documentation
  • "Memory Systems: Cache, DRAM, Disk" by Jacob, Ng, and Wang