1、问题现象
- 机器开机进行日常测试3小时
- USB wifi模块插入机器后,机器自动重启
2、kernel log文件
全部贴上来会很长,在下面一点点单独解释。
3、从log来看,重启原因是空指针导致
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
Mem abort info:
ESR = 0x96000006
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
Data abort info:
ISV = 0, ISS = 0x00000006
CM = 0, WnR = 0
user pgtable: 4k pages, 39-bit VAs, pgdp=000000000d701000
[0000000000000000] pgd=000000000d702003, pud=000000000d702003, pmd=0000000000000000
Internal error: Oops: 96000006 [#1] PREEMPT SMP
4、可能造成空指针的原因
继续往上查看log,发现插入USB wifi模块后,存在内存分配失败的现象,可能是内存分配失败导致使用时出现空指针。
kworker/0:0: page allocation failure: order:4, mode:0xa20(GFP_ATOMIC), nodemask=(null),cpuset=/,mems_allowed=0
CPU: 0 PID: 5171 Comm: kworker/0:0 Tainted: P WC O 5.4.125-android11-2-g6d2ca40337f4-dirty-ab2800 #1
Hardware name: Amlogic (DT)
Workqueue: usb_hub_wq hub_event
Call trace:
[ffffffc0200eb240+ 64][<ffffffc0100c63b4>] dump_backtrace+0x0/0x18c
[ffffffc0200eb280+ 32][<ffffffc0100c6564>] show_stack+0x24/0x34
[ffffffc0200eb2a0+ 160][<ffffffc010dce124>] dump_stack+0xbc/0x108
[ffffffc0200eb340+ 224][<ffffffc0102c6624>] warn_alloc+0xd8/0x128
[ffffffc0200eb420+ 144][<ffffffc0102c78e0>] __alloc_pages_slowpath+0xb10/0xb3c
[ffffffc0200eb4b0+ 80][<ffffffc0102c69b8>] __alloc_pages_nodemask+0x2d4/0x35c
[ffffffc0200eb500+ 96][<ffffffc01029e8d8>] kmalloc_order+0x5c/0x1a8
[ffffffc0200eb560+ 96][<ffffffc01029ea64>] kmalloc_order_trace+0x40/0x108
[ffffffc0200eb5c0+ 144][<ffffffc00a72fd30>] glSetHifInfo+0x670/0x7c0 [wlan_mt7663_usb]
[ffffffc0200eb650+ 96][<ffffffc00a6c5e70>] cleanup_module+0x10bc/0x2438 [wlan_mt7663_usb]
[ffffffc0200eb6b0+ 48][<ffffffc00a72e434>] glRegisterBus+0x1b4/0x978 [wlan_mt7663_usb]
4、查看代码
根据打印的堆栈,问题出在函数glSetHifInfo()中,查看源码可知,glSetHifInfo中会使用kmalloc分配连续的物理内存。
prUsbReq->prBufCtrl->pucBuf = kmalloc(USB_TX_CMD_BUF_SIZE, GFP_ATOMIC);
5、kmalloc介绍
kmalloc用来在内核中分配在物理上连续的内存,虚拟地址自然也是连续的。
kmalloc使用GFP_ATOMIC、GFP_KERNEL分配内存的区别?
- GFP_ATOMIC:分配内存的过程是一个原子过程,分配内存的过程不会被(高优先级进程或中断)打断,可能返回失败;
- GFP_KERNEL:正常分配内存,可能会阻塞;
- GFP_DMA —— 给 DMA 控制器分配内存,需要使用该标志(DMA要求分配虚拟地址和物理地址连续)。
#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) //0xa20u
#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) //0xcc0u
#define GFP_DMA __GFP_DMA //0x01u
#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) //表示更高优先级
#define __GFP_IO ((__force gfp_t)___GFP_IO) //表示调用者不可以回收页面或者睡眠
#define __GFP_FS ((__force gfp_t)___GFP_FS)
#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
#define __GFP_DMA ((__force gfp_t)___GFP_DMA)
#define ___GFP_DMA 0x01u
#define ___GFP_HIGHMEM 0x02u
#define ___GFP_DMA32 0x04u
#define ___GFP_MOVABLE 0x08u
#define ___GFP_RECLAIMABLE 0x10u
#define ___GFP_HIGH 0x20u
#define ___GFP_IO 0x40u
#define ___GFP_FS 0x80u
#define ___GFP_ZERO 0x100u
#define ___GFP_ATOMIC 0x200u
#define ___GFP_DIRECT_RECLAIM 0x400u
#define ___GFP_KSWAPD_RECLAIM 0x800u
为什么使用GFP_ATOMIC参数申请内存呢?
结合出现问题的场景,当插入USB wifi模块,hub驱动会响应到port端口有设备插入,然后在中断内的工作队列中回调hub_event(),并开始进行usb枚举,此时是在中断上下文中执行的(linux USB子系统详解)。故最好使用GFP_ATOMIC分配内存,避免等待。不过在工作队列中,也可以使用GFP_KERNEL进行分配。
6、分析log
"kworker/0:0: page allocation failure......"是warn_alloc()的输出,表示无法满足分配2^order大小的页面。warn_alloc()被如下函数调用:__alloc_pages_slowpath()、__vmalloc_area_node()、__vmalloc_node_range。
warn_alloc()打印的含义
//path: mm/page_alloc.c
static inline struct page *
__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct alloc_context *ac)
{
......
alloc_flags = gfp_to_alloc_flags(gfp_mask);
......
fail:
warn_alloc(gfp_mask, ac->nodemask,
"page allocation failure: order:%u", order);
got_pg:
return page;
}
void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
static DEFINE_RATELIMIT_STATE(nopage_rs, 10*HZ, 1);
if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
return;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
//显示对应进程名称
pr_warn("%s: %pV, mode:%#x(%pGg), nodemask=%*pbl",
current->comm, &vaf, gfp_mask, &gfp_mask,
nodemask_pr_args(nodemask));
va_end(args);
cpuset_print_current_mems_allowed();
pr_cont("\n");
//显示栈信息
dump_stack();
warn_alloc_show_mem(gfp_mask, nodemask);
}
static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
{
unsigned int filter = SHOW_MEM_FILTER_NODES;
if (!(gfp_mask & __GFP_NOMEMALLOC))
if (tsk_is_oom_victim(current) ||
(current->flags & (PF_MEMALLOC | PF_EXITING)))
filter &= ~SHOW_MEM_FILTER_NODES;
if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
filter &= ~SHOW_MEM_FILTER_NODES;
//显示内存信息,这里是重点
show_mem(filter, nodemask);
}
//lib/show_mem.c
void show_mem(unsigned int filter, nodemask_t *nodemask)
{
pg_data_t *pgdat;
unsigned long total = 0, reserved = 0, highmem = 0;
printk("Mem-Info:\n");
show_free_areas(filter, nodemask);
for_each_online_pgdat(pgdat) {
int zoneid;
for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
struct zone *zone = &pgdat->node_zones[zoneid];
if (!populated_zone(zone))
continue;
total += zone->present_pages;
reserved += zone->present_pages - zone_managed_pages(zone);
if (is_highmem_idx(zoneid))
highmem += zone->present_pages;
}
}
//整个平台的页面统计信息:所有页面数、reserved、cma等等
printk("%lu pages RAM\n", total);
printk("%lu pages HighMem/MovableOnly\n", highmem);
printk("%lu pages reserved\n", reserved);
#ifdef CONFIG_CMA
printk("%lu pages cma reserved\n", totalcma_pages);
#endif
#ifdef CONFIG_MEMORY_FAILURE
printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
#endif
}
show_mem()打印内容详解
如下是出问题时,kernel中打印出来的内存信息:
//show_mem()打印内容详解:
Mem-Info:
//显示所有node的统计信息
active_anon:152878 inactive_anon:92312 isolated_anon:0
active_file:55513 inactive_file:50038 isolated_file:0
unevictable:585 dirty:46 writeback:0 unstable:0
slab_reclaimable:13087 slab_unreclaimable:23878 slab_unreclaimable_Order:2154
mapped:74780 shmem:6497 pagetables:11885 bounce:0
[cma] driver:46165 anon:36301 file:10015 isolate:1939456 total:242688
free:158325 free_pcp:1023 free_cma:145990
//分别显示不同node的统计信息,只有一个node
Node 0 active_anon:611512kB inactive_anon:369248kB active_file:222052kB inactive_file:200152kB
unevictable:2340kB isolated(anon):0kB isolated(file):0kB mapped:299120kB dirty:184kB writeback:0kB
shmem:25988kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no
//分别显示所有zone的统计信息
DMA32 free:633300kB min:10240kB low:21996kB high:32740kB active_anon:611512kB inactive_anon:369248kB
active_file:222052kB inactive_file:200152kB unevictable:2340kB writepending:184kB present:3670016kB
managed:3582536kB mlocked:2340kB kernel_stack:32112kB shadow_call_stack:2008kB pagetables:47540kB
bounce:0kB free_pcp:4092kB local_pcp:860kB free_cma:583960kB
lowmem_reserve[]: 0 0 0
//显示所有zone下不同order空闲数目统计信息
//'U':不可移动
//'M':可移动
//'E':可回收
//'H':等同于MIGRATE_PCPTYPES
//'C':CMA区域页面
DMA32: 1587*4kB (UMEC) 1959*8kB (UMEC) 1803*16kB (UMEC) 676*32kB (UMEC) 305*64kB (C) 86*128kB (C)
19*256kB (C) 10*512kB (C) 26*1024kB (C) 5*2048kB (C) 118*4096kB (C) = 633204kB
//AML添加的各migratetype空闲的内存
Free_Unmovable:7440
Free_Movable:4775
Free_Reclaimable:96
Free_CMA:145990
Free_HighAtomic:0
Free_Isolate:0
//总的文件缓存页面数量
112537 total pagecache pages
//显示swap cache统计信息
52 pages in swap cache
Swap cache stats: add 17548, delete 17497, find 26/64
Free swap = 453884kB
Total swap = 524284kB
//整个平台的页面统计信息:所有页面数、reserved、cma等等
917504 pages RAM
0 pages HighMem/MovableOnly
21870 pages reserved
242688 pages cma reserved
7、内存分配失败原因
从log中可以看到:
kworker/0:0: page allocation failure: order:4, mode:0xa20(GFP_ATOMIC), nodemask=(null),cpuset=/,mems_allowed=0
表示在系统通过kmalloc分配内存时,小于8k的内存,采用slub进行分配;大于8k的内存,使用伙伴系统buddy进行分配。在分配order:4(4k*2^4 = 64kb)的内存时,使用buddy分配失败。
查看zone下内存的空闲情况:
DMA32: 1587*4kB (UMEC) 1959*8kB (UMEC) 1803*16kB (UMEC) 676*32kB (UMEC) 305*64kB (C) 86*128kB (C) 19*256kB (C) 10*512kB (C) 26*1024kB (C) 5*2048kB (C) 118*4096kB (C) = 633204kB
在DMA32中,64kb的内存块剩下305,但是都是"C",表示都是CMA的内存。
CMA还剩下这么大的内存,为什么申请还是失败呢?
在CMA存在的情况下根据migratetype决定是否可用CMA区域,而gfp_mask决定了申请页面的migratetype。在__alloc_pages_slowpath()中,gfp_to_alloc_flags()来进行gfp_mask和migrate转换。此问题中,
static inline unsigned int
gfp_to_alloc_flags(gfp_t gfp_mask)
{
unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
/* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
//__GFP_HIGH到ALLOC_HIGH转换
alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
if (gfp_mask & __GFP_ATOMIC) {
if (!(gfp_mask & __GFP_NOMEMALLOC))
alloc_flags |= ALLOC_HARDER;
alloc_flags &= ~ALLOC_CPUSET;
} else if (unlikely(rt_task(current)) && !in_interrupt())
alloc_flags |= ALLOC_HARDER;
if (gfp_mask & __GFP_KSWAPD_RECLAIM)
alloc_flags |= ALLOC_KSWAPD;
#ifdef CONFIG_CMA
//将gfp_mask转换到migratetype,判断是否是MIGRATE_MOVABLE。
//如果是,则可以在CMA中去分配。
//也就是说必须gfp_flags中包含__GFP_MOVABLE才可以在CMA中分配。
//MIGRATE_MOVABLE值为1,此问题gfpflags_to_migratetype()返回为0
if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
#endif
return alloc_flags;
}
enum migratetype {
MIGRATE_UNMOVABLE,
MIGRATE_MOVABLE,
MIGRATE_RECLAIMABLE,
#ifdef CONFIG_CMA
MIGRATE_CMA,
#endif
MIGRATE_PCPTYPES, /* the number of types on the pcp lists */
MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
#ifdef CONFIG_MEMORY_ISOLATION
MIGRATE_ISOLATE, /* can't allocate from here */
#endif
MIGRATE_TYPES
};
#define ___GFP_RECLAIMABLE 0x10u
#define ___GFP_MOVABLE 0x08u
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE)
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) //0x18u
#define GFP_MOVABLE_SHIFT 3
//gfp_flags = GFP_ATOMIC = 0xa20u
static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
{
VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
if (unlikely(page_group_by_mobility_disabled))
return MIGRATE_UNMOVABLE;
//(0xa20u & 0x18u) >> 3 = 0
return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
}
从DMA32区域空闲页面可以看出,有305个64kb块空闲,但是属于CMA区域。所以申请不成功。
那么GFP_KERNEL申请CMA的区域会成功吗?
如果要gfpflags_to_migratetype()返回为1,则gfp_flags需要包含___GFP_MOVABLE。GFP_KERNEL = (__GFP_RECLAIM | __GFP_IO | __GFP_FS) = 0xcc0u,不包含___GFP_MOVABLE,则也会返回失败。
故问题的根本原因为:虽然存在很多64kb大小的空闲内存,但是这些内存全是CMA区域,kmalloc()无法使用。
抓取正常开机时候的pagetypeinfo,可看到开机时order为4时,是由内存可供分配的。所以这里怀疑是内存泄漏,通过下面脚本跟踪MemFree。
console:/ # cat /proc/pagetypeinfo
Page block order: 10
Pages per block: 1024
Free pages count per migrate type at order 0 1 2 3 4 5 6 7 8 9 10
Node 0, zone DMA32, type Unmovable 123 149 95 66 45 23 13 5 9 0 0
Node 0, zone DMA32, type Movable 14 10 23 9 1 2 6 2 2 0 0
Node 0, zone DMA32, type Reclaimable 0 0 1 27 30 16 7 0 0 0 0
Node 0, zone DMA32, type CMA 0 1 0 1 1 1 1 3 3 2 170
Node 0, zone DMA32, type HighAtomic 0 0 0 0 0 0 0 0 0 0 0
Node 0, zone DMA32, type Isolate 0 0 0 0 0 0 0 0 0 0 0
Number of blocks type Unmovable Movable Reclaimable CMA HighAtomic Isolate
Node 0, zone DMA32 150 495 14 237 0 0
针对此问题措施如下:
- 临时措施:可以在开机的时候预先分配内存,防止内存泄漏后再插入模块,导致申请不到内存;在申请不到内存时,判断内存是否为NULL,为NULL则return,防止系统崩溃。
- 永久措施:找到内存泄漏的地方。