Linux系统内存信息(一)/proc/pagetypeinfo

774 阅读2分钟
本文基于Linux 5.0

include/linux/mmzone.h    
include/linux/pageblock-flags.h
/arch/arm/include/asm/pgtable-3level.h
/arch/arm64/include/asm/pgtable.h
/arch/arm64/include/asm/pgtable-hwdef.h

mm/vmstat.c
mm/page_alloc.c
// buddyinfo, pagetypeinfo, vmstat和zoneinfo都是在kernel_init时创建在proc文件系统下的
void __init init_mm_internals(void)
{
	int ret __maybe_unused;

	mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
    ...
#ifdef CONFIG_PROC_FS
	proc_create("buddyinfo", 0444, NULL, &buddyinfo_file_operations);
        // 创建/proc/pagetypeinfo并定义其文件操作函数
	proc_create("pagetypeinfo", 0400, NULL, &pagetypeinfo_file_operations);
	proc_create("vmstat", 0444, NULL, &vmstat_file_operations);
	proc_create("zoneinfo", 0444, NULL, &zoneinfo_file_operations);
#endif
// 通过pagetypeinfo_show查看pagetypeinfo节点内容
static const struct seq_operations pagetypeinfo_op = {
	.start	= frag_start,
	.next	= frag_next,
	.stop	= frag_stop,
	.show	= pagetypeinfo_show,
};

static int pagetypeinfo_open(struct inode *inode, struct file *file)
{
	return seq_open(file, &pagetypeinfo_op);
}

static const struct file_operations pagetypeinfo_file_operations = {
	.open		= pagetypeinfo_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= seq_release,
};

1. pagetypeinfo_show

/*
 * This prints out statistics in relation to grouping pages by mobility.
 * It is expensive to collect so do not constantly read the file.
 */
static int pagetypeinfo_show(struct seq_file *m, void *arg)
{
	pg_data_t *pgdat = (pg_data_t *)arg;

	/* check memoryless node */
	if (!node_state(pgdat->node_id, N_MEMORY))
		return 0;

        // 打印page block阶数[见第2节]
	seq_printf(m, "Page block order: %d\n", pageblock_order);
        // 打印page block页数[见第2节]
	seq_printf(m, "Pages per block:  %lu\n", pageblock_nr_pages);
	seq_putc(m, '\n');
        // 打印伙伴系统每一阶每一种迁移类型的空闲页数量[见第3节]
	pagetypeinfo_showfree(m, pgdat);
        // 打印迁移类型page block数量[见第4节]
	pagetypeinfo_showblockcount(m, pgdat);
	pagetypeinfo_showmixedcount(m, pgdat);

	return 0;
}

2. pageblock_order

// 支持huge page
#ifdef CONFIG_HUGETLB_PAGE

// 支持huge page大小是变量
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE

/* Huge page sizes are variable */
// huge page大小是变量
extern unsigned int pageblock_order;

#else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */

/* Huge pages are a constant size */
// huge page大小是与体系结构相关的常量HUGETLB_PAGE_ORDER[见2.1节]
#define pageblock_order		HUGETLB_PAGE_ORDER

#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */

#else /* CONFIG_HUGETLB_PAGE */

/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
// 不支持huge page时page block order默认为10阶(MAX_ORDER一般为11阶)
#define pageblock_order		(MAX_ORDER-1)

#endif /* CONFIG_HUGETLB_PAGE */

// page block页面大小
// pageblock_order在arm32和arm64都是9, 所以是9阶即512个page也即2MB大小
#define pageblock_nr_pages	(1UL << pageblock_order)

2.1 HUGETLB_PAGE_ORDER

/*
 * Hugetlb definitions.
 */
// arm32
// HPAGE_SHIFT等于PMD_SHIFT[见2.2节]
#define HPAGE_SHIFT		PMD_SHIFT
// huge page大小
#define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
// huge page掩码
#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
// HPAGE_SHIFT等于21
// PAGE_SHIFT一般等于12即4kb的页面大小
// HUGETLB_PAGE_ORDER = 21 - 12 = 9
#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
/*
 * Hugetlb definitions.
 */
// arm64
#define HUGE_MAX_HSTATE		4
// HPAGE_SHIFT等于PMD_SHIFT[见2.2节]
#define HPAGE_SHIFT		PMD_SHIFT
// huge page大小
#define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
// huge page掩码
#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
// HPAGE_SHIFT等于21
// PAGE_SHIFT一般等于12即4kb的页面大小
// HUGETLB_PAGE_ORDER = 21 - 12 = 9
#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)

2.2 PMD_SHIFT

/*
 * PMD_SHIFT determines the size a middle-level page table entry can map.
 */
// arm32
#define PMD_SHIFT		21
// arm64
// (12 - 3) * (4 - 2) + 3 = 21
#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n)	((PAGE_SHIFT - 3) * (4 - (n)) + 3)

/*
 * PMD_SHIFT determines the size a level 2 page table entry can map.
 */
#if CONFIG_PGTABLE_LEVELS > 2
// 有3级及以上页表时通过ARM64_HW_PGTABLE_LEVEL_SHIFT宏计算得到
#define PMD_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define PMD_SIZE		(_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK		(~(PMD_SIZE-1))
#define PTRS_PER_PMD		PTRS_PER_PTE
#endif

3. pagetypeinfo_showfree

/* Print out the free pages at each order for each migatetype */
static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
{
	int order;
	pg_data_t *pgdat = (pg_data_t *)arg;

	/* Print header */
	seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
        // 打印阶数
	for (order = 0; order < MAX_ORDER; ++order)
		seq_printf(m, "%6d ", order);
	seq_putc(m, '\n');

        // 遍历每个zone并打印该zone伙伴系统空闲页信息[见3.1节]
	walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);

	return 0;
}

3.1 pagetypeinfo_showfree_print

// 迁移类型枚举
enum migratetype {
	MIGRATE_UNMOVABLE,
	MIGRATE_MOVABLE,
	MIGRATE_RECLAIMABLE,
	MIGRATE_PCPTYPES,	/* the number of types on the pcp lists */
	MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
#ifdef CONFIG_CMA
	MIGRATE_CMA,
#endif
#ifdef CONFIG_MEMORY_ISOLATION
	MIGRATE_ISOLATE,	/* can't allocate from here */
#endif
	MIGRATE_TYPES
};
// 迁移类型名称
char * const migratetype_names[MIGRATE_TYPES] = {
	"Unmovable",
	"Movable",
	"Reclaimable",
	"HighAtomic",
#ifdef CONFIG_CMA
	"CMA",
#endif
#ifdef CONFIG_MEMORY_ISOLATION
	"Isolate",
#endif
};
static void pagetypeinfo_showfree_print(struct seq_file *m,
					pg_data_t *pgdat, struct zone *zone)
{
	int order, mtype;

        // 遍历每一种迁移类型
	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
                // 打印节点id, zone名称, 迁移类型名称
		seq_printf(m, "Node %4d, zone %8s, type %12s ",
					pgdat->node_id,
					zone->name,
					migratetype_names[mtype]);
                // 遍历每一阶空闲区
		for (order = 0; order < MAX_ORDER; ++order) {
			unsigned long freecount = 0;
			struct free_area *area;
			struct list_head *curr;

                        // 取这一阶的空闲区
			area = &(zone->free_area[order]);

                        // 取该空闲区指定迁移类型的空闲链表上空闲页个数
			list_for_each(curr, &area->free_list[mtype])
				freecount++;
                        // 打印空闲页数量
			seq_printf(m, "%6lu ", freecount);
		}
		seq_putc(m, '\n');
	}
}

4. pagetypeinfo_showblockcount

/* Print out the number of pageblocks for each migratetype */
static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
{
	int mtype;
	pg_data_t *pgdat = (pg_data_t *)arg;

	seq_printf(m, "\n%-23s", "Number of blocks type ");
        // 打印每一种迁移类型名称
	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
		seq_printf(m, "%12s ", migratetype_names[mtype]);
	seq_putc(m, '\n');
        // 遍历每个zone并打印该zone伙伴系统page block信息[见4.1节]
	walk_zones_in_node(m, pgdat, true, false,
		pagetypeinfo_showblockcount_print);

	return 0;
}

4.1 pagetypeinfo_showblockcount_print

static void pagetypeinfo_showblockcount_print(struct seq_file *m,
					pg_data_t *pgdat, struct zone *zone)
{
	int mtype;
	unsigned long pfn;
        // zone的起始page frame number
	unsigned long start_pfn = zone->zone_start_pfn;
        // zone的结束page frame number    
	unsigned long end_pfn = zone_end_pfn(zone);
        // 用于统计每一种迁移类型的page block数量
	unsigned long count[MIGRATE_TYPES] = { 0, };

        // 以page block大小的维度遍历该zone
	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
		struct page *page;

                // 将page frame number转换为page
		page = pfn_to_online_page(pfn);
		if (!page)
			continue;

		/* Watch for unexpected holes punched in the memmap */
                // 判断page是否落在内存空洞内
		if (!memmap_valid_within(pfn, page, zone))
			continue;

                // 判断page与zone是否匹配
		if (page_zone(page) != zone)
			continue;

                // 获取page block的迁移类型[见4.2节]
		mtype = get_pageblock_migratetype(page);

                // 该迁移类型的page block数量加1
		if (mtype < MIGRATE_TYPES)
			count[mtype]++;
	}

	/* Print counts */
        // 打印节点id和zone名称
	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
        // 打印每一种迁移类型的page block数量
	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
		seq_printf(m, "%12lu ", count[mtype]);
	seq_putc(m, '\n');
}

4.2 get_pageblock_migratetype

/* Bit indices that affect a whole block of pages */
enum pageblock_bits {
        // 0
	PB_migrate,
        // 2
	PB_migrate_end = PB_migrate + 3 - 1,
			/* 3 bits required for migrate types */
        // 3
	PB_migrate_skip,/* If set the block is skipped by compaction */

	/*
	 * Assume the bits will always align on a word. If this assumption
	 * changes then get/set pageblock needs updating.
	 */
        // 4
	NR_PAGEBLOCK_BITS
};
// 迁移类型比特位
// 3
#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1)
// 迁移类型掩码
// 1 << 3 - 1 = 8 - 1 = 7 = 0000 0111
#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1)

// [见4.3节]
#define get_pageblock_migratetype(page)					\
	get_pfnblock_flags_mask(page, page_to_pfn(page),		\
			PB_migrate_end, MIGRATETYPE_MASK)

\

4.3 get_pfnblock_flags_mask

unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
					unsigned long end_bitidx,
					unsigned long mask)
{
	return __get_pfnblock_flags_mask(page, pfn, end_bitidx, mask);
}
#ifdef CONFIG_64BIT
#define BITS_PER_LONG 64
#else
#define BITS_PER_LONG 32
#endif /* CONFIG_64BIT */

// page:待检测的page
// pfn:page的编号
// end_bitidx:2
// mask:0000 0111
static __always_inline unsigned long __get_pfnblock_flags_mask(struct page *page,
					unsigned long pfn,
					unsigned long end_bitidx,
					unsigned long mask)
{
	unsigned long *bitmap;
	unsigned long bitidx, word_bitidx;
	unsigned long word;

        // 见4.3.1节
	bitmap = get_pageblock_bitmap(page, pfn);
        // 见4.3.2节
	bitidx = pfn_to_bitidx(page, pfn);
	word_bitidx = bitidx / BITS_PER_LONG;
	bitidx &= (BITS_PER_LONG-1);

	word = bitmap[word_bitidx];
	bitidx += end_bitidx;
	return (word >> (BITS_PER_LONG - bitidx - 1)) & mask;
}

4.3.1 get_pageblock_bitmap

/* Return a pointer to the bitmap storing bits affecting a block of pages */
static inline unsigned long *get_pageblock_bitmap(struct page *page,
							unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
	return __pfn_to_section(pfn)->pageblock_flags;
#else
        // page block标志位默认为zone->pageblock_flags
	return page_zone(page)->pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
}

4.3.2 pfn_to_bitidx

static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
	pfn &= (PAGES_PER_SECTION-1);
	return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
#else
	pfn = pfn - round_down(page_zone(page)->zone_start_pfn, pageblock_nr_pages);
	return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
#endif /* CONFIG_SPARSEMEM */
}