本文基于Linux 5.0
include/linux/mmzone.h
include/linux/pageblock-flags.h
/arch/arm/include/asm/pgtable-3level.h
/arch/arm64/include/asm/pgtable.h
/arch/arm64/include/asm/pgtable-hwdef.h
mm/vmstat.c
mm/page_alloc.c
// buddyinfo, pagetypeinfo, vmstat和zoneinfo都是在kernel_init时创建在proc文件系统下的
void __init init_mm_internals(void)
{
int ret __maybe_unused;
mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
...
#ifdef CONFIG_PROC_FS
proc_create("buddyinfo", 0444, NULL, &buddyinfo_file_operations);
// 创建/proc/pagetypeinfo并定义其文件操作函数
proc_create("pagetypeinfo", 0400, NULL, &pagetypeinfo_file_operations);
proc_create("vmstat", 0444, NULL, &vmstat_file_operations);
proc_create("zoneinfo", 0444, NULL, &zoneinfo_file_operations);
#endif
// 通过pagetypeinfo_show查看pagetypeinfo节点内容
static const struct seq_operations pagetypeinfo_op = {
.start = frag_start,
.next = frag_next,
.stop = frag_stop,
.show = pagetypeinfo_show,
};
static int pagetypeinfo_open(struct inode *inode, struct file *file)
{
return seq_open(file, &pagetypeinfo_op);
}
static const struct file_operations pagetypeinfo_file_operations = {
.open = pagetypeinfo_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
1. pagetypeinfo_show
/*
* This prints out statistics in relation to grouping pages by mobility.
* It is expensive to collect so do not constantly read the file.
*/
static int pagetypeinfo_show(struct seq_file *m, void *arg)
{
pg_data_t *pgdat = (pg_data_t *)arg;
/* check memoryless node */
if (!node_state(pgdat->node_id, N_MEMORY))
return 0;
// 打印page block阶数[见第2节]
seq_printf(m, "Page block order: %d\n", pageblock_order);
// 打印page block页数[见第2节]
seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
seq_putc(m, '\n');
// 打印伙伴系统每一阶每一种迁移类型的空闲页数量[见第3节]
pagetypeinfo_showfree(m, pgdat);
// 打印迁移类型page block数量[见第4节]
pagetypeinfo_showblockcount(m, pgdat);
pagetypeinfo_showmixedcount(m, pgdat);
return 0;
}
2. pageblock_order
// 支持huge page
#ifdef CONFIG_HUGETLB_PAGE
// 支持huge page大小是变量
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
/* Huge page sizes are variable */
// huge page大小是变量
extern unsigned int pageblock_order;
#else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
/* Huge pages are a constant size */
// huge page大小是与体系结构相关的常量HUGETLB_PAGE_ORDER[见2.1节]
#define pageblock_order HUGETLB_PAGE_ORDER
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
#else /* CONFIG_HUGETLB_PAGE */
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
// 不支持huge page时page block order默认为10阶(MAX_ORDER一般为11阶)
#define pageblock_order (MAX_ORDER-1)
#endif /* CONFIG_HUGETLB_PAGE */
// page block页面大小
// pageblock_order在arm32和arm64都是9, 所以是9阶即512个page也即2MB大小
#define pageblock_nr_pages (1UL << pageblock_order)
2.1 HUGETLB_PAGE_ORDER
/*
* Hugetlb definitions.
*/
// arm32
// HPAGE_SHIFT等于PMD_SHIFT[见2.2节]
#define HPAGE_SHIFT PMD_SHIFT
// huge page大小
#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
// huge page掩码
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
// HPAGE_SHIFT等于21
// PAGE_SHIFT一般等于12即4kb的页面大小
// HUGETLB_PAGE_ORDER = 21 - 12 = 9
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
/*
* Hugetlb definitions.
*/
// arm64
#define HUGE_MAX_HSTATE 4
// HPAGE_SHIFT等于PMD_SHIFT[见2.2节]
#define HPAGE_SHIFT PMD_SHIFT
// huge page大小
#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
// huge page掩码
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
// HPAGE_SHIFT等于21
// PAGE_SHIFT一般等于12即4kb的页面大小
// HUGETLB_PAGE_ORDER = 21 - 12 = 9
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
2.2 PMD_SHIFT
/*
* PMD_SHIFT determines the size a middle-level page table entry can map.
*/
// arm32
#define PMD_SHIFT 21
// arm64
// (12 - 3) * (4 - 2) + 3 = 21
#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3)
/*
* PMD_SHIFT determines the size a level 2 page table entry can map.
*/
#if CONFIG_PGTABLE_LEVELS > 2
// 有3级及以上页表时通过ARM64_HW_PGTABLE_LEVEL_SHIFT宏计算得到
#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
#define PTRS_PER_PMD PTRS_PER_PTE
#endif
3. pagetypeinfo_showfree
/* Print out the free pages at each order for each migatetype */
static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
{
int order;
pg_data_t *pgdat = (pg_data_t *)arg;
/* Print header */
seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
// 打印阶数
for (order = 0; order < MAX_ORDER; ++order)
seq_printf(m, "%6d ", order);
seq_putc(m, '\n');
// 遍历每个zone并打印该zone伙伴系统空闲页信息[见3.1节]
walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
return 0;
}
3.1 pagetypeinfo_showfree_print
// 迁移类型枚举
enum migratetype {
MIGRATE_UNMOVABLE,
MIGRATE_MOVABLE,
MIGRATE_RECLAIMABLE,
MIGRATE_PCPTYPES, /* the number of types on the pcp lists */
MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
#ifdef CONFIG_CMA
MIGRATE_CMA,
#endif
#ifdef CONFIG_MEMORY_ISOLATION
MIGRATE_ISOLATE, /* can't allocate from here */
#endif
MIGRATE_TYPES
};
// 迁移类型名称
char * const migratetype_names[MIGRATE_TYPES] = {
"Unmovable",
"Movable",
"Reclaimable",
"HighAtomic",
#ifdef CONFIG_CMA
"CMA",
#endif
#ifdef CONFIG_MEMORY_ISOLATION
"Isolate",
#endif
};
static void pagetypeinfo_showfree_print(struct seq_file *m,
pg_data_t *pgdat, struct zone *zone)
{
int order, mtype;
// 遍历每一种迁移类型
for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
// 打印节点id, zone名称, 迁移类型名称
seq_printf(m, "Node %4d, zone %8s, type %12s ",
pgdat->node_id,
zone->name,
migratetype_names[mtype]);
// 遍历每一阶空闲区
for (order = 0; order < MAX_ORDER; ++order) {
unsigned long freecount = 0;
struct free_area *area;
struct list_head *curr;
// 取这一阶的空闲区
area = &(zone->free_area[order]);
// 取该空闲区指定迁移类型的空闲链表上空闲页个数
list_for_each(curr, &area->free_list[mtype])
freecount++;
// 打印空闲页数量
seq_printf(m, "%6lu ", freecount);
}
seq_putc(m, '\n');
}
}
4. pagetypeinfo_showblockcount
/* Print out the number of pageblocks for each migratetype */
static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
{
int mtype;
pg_data_t *pgdat = (pg_data_t *)arg;
seq_printf(m, "\n%-23s", "Number of blocks type ");
// 打印每一种迁移类型名称
for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
seq_printf(m, "%12s ", migratetype_names[mtype]);
seq_putc(m, '\n');
// 遍历每个zone并打印该zone伙伴系统page block信息[见4.1节]
walk_zones_in_node(m, pgdat, true, false,
pagetypeinfo_showblockcount_print);
return 0;
}
4.1 pagetypeinfo_showblockcount_print
static void pagetypeinfo_showblockcount_print(struct seq_file *m,
pg_data_t *pgdat, struct zone *zone)
{
int mtype;
unsigned long pfn;
// zone的起始page frame number
unsigned long start_pfn = zone->zone_start_pfn;
// zone的结束page frame number
unsigned long end_pfn = zone_end_pfn(zone);
// 用于统计每一种迁移类型的page block数量
unsigned long count[MIGRATE_TYPES] = { 0, };
// 以page block大小的维度遍历该zone
for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
struct page *page;
// 将page frame number转换为page
page = pfn_to_online_page(pfn);
if (!page)
continue;
/* Watch for unexpected holes punched in the memmap */
// 判断page是否落在内存空洞内
if (!memmap_valid_within(pfn, page, zone))
continue;
// 判断page与zone是否匹配
if (page_zone(page) != zone)
continue;
// 获取page block的迁移类型[见4.2节]
mtype = get_pageblock_migratetype(page);
// 该迁移类型的page block数量加1
if (mtype < MIGRATE_TYPES)
count[mtype]++;
}
/* Print counts */
// 打印节点id和zone名称
seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
// 打印每一种迁移类型的page block数量
for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
seq_printf(m, "%12lu ", count[mtype]);
seq_putc(m, '\n');
}
4.2 get_pageblock_migratetype
/* Bit indices that affect a whole block of pages */
enum pageblock_bits {
// 0
PB_migrate,
// 2
PB_migrate_end = PB_migrate + 3 - 1,
/* 3 bits required for migrate types */
// 3
PB_migrate_skip,/* If set the block is skipped by compaction */
/*
* Assume the bits will always align on a word. If this assumption
* changes then get/set pageblock needs updating.
*/
// 4
NR_PAGEBLOCK_BITS
};
// 迁移类型比特位
// 3
#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1)
// 迁移类型掩码
// 1 << 3 - 1 = 8 - 1 = 7 = 0000 0111
#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1)
// [见4.3节]
#define get_pageblock_migratetype(page) \
get_pfnblock_flags_mask(page, page_to_pfn(page), \
PB_migrate_end, MIGRATETYPE_MASK)
\
4.3 get_pfnblock_flags_mask
unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
unsigned long end_bitidx,
unsigned long mask)
{
return __get_pfnblock_flags_mask(page, pfn, end_bitidx, mask);
}
#ifdef CONFIG_64BIT
#define BITS_PER_LONG 64
#else
#define BITS_PER_LONG 32
#endif /* CONFIG_64BIT */
// page:待检测的page
// pfn:page的编号
// end_bitidx:2
// mask:0000 0111
static __always_inline unsigned long __get_pfnblock_flags_mask(struct page *page,
unsigned long pfn,
unsigned long end_bitidx,
unsigned long mask)
{
unsigned long *bitmap;
unsigned long bitidx, word_bitidx;
unsigned long word;
// 见4.3.1节
bitmap = get_pageblock_bitmap(page, pfn);
// 见4.3.2节
bitidx = pfn_to_bitidx(page, pfn);
word_bitidx = bitidx / BITS_PER_LONG;
bitidx &= (BITS_PER_LONG-1);
word = bitmap[word_bitidx];
bitidx += end_bitidx;
return (word >> (BITS_PER_LONG - bitidx - 1)) & mask;
}
4.3.1 get_pageblock_bitmap
/* Return a pointer to the bitmap storing bits affecting a block of pages */
static inline unsigned long *get_pageblock_bitmap(struct page *page,
unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
return __pfn_to_section(pfn)->pageblock_flags;
#else
// page block标志位默认为zone->pageblock_flags
return page_zone(page)->pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
}
4.3.2 pfn_to_bitidx
static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
pfn &= (PAGES_PER_SECTION-1);
return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
#else
pfn = pfn - round_down(page_zone(page)->zone_start_pfn, pageblock_nr_pages);
return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
#endif /* CONFIG_SPARSEMEM */
}