2、同一bus下两次注册同一个device

479 阅读4分钟

在linux驱动开发过程中,经常会使用到platform总线来开发,通过platform_driver_register()注册驱动,platform_device_register()来注册设备。如果在同一个platform bus下两次注册同一个device,那么会出现什么情况呢?

1、创建一个platform_device

#define SWLED_DEV       "sw_led"
static struct platform_device swled_plfdev = {
    .name   = SWLED_DEV,
};
struct platform_driver swled_plfdrv = {
    .probe = swled_probe,
    .driver     = {
        .owner  = THIS_MODULE,
        .name   = SWLED_DEV,
    },
};

2、注册device与driver

ret = platform_device_register(&swled_plfdev);
ret = platform_device_register(&swled_plfdev);
ret = platform_driver_register(&swled_plfdrv);

3、编译软件后烧录到板卡查看结果

系统启动后直接崩溃后重启,崩溃信息如下:

[    1.808981@2]  load module: swoled
[    1.809829@2]  kobject (00000000d497ec8e): tried to init an initialized object, something is seriously wrong.
[    1.810849@2]  CPU: 2 PID: 1 Comm: init Tainted: G        WC O      5.4.125-android11-2-g2ac4107d4ee9-dirty-ab1646 #1
[    1.812142@2]  Hardware name: Amlogic (DT)
[    1.812651@2]  Call trace:
[    1.812996@2]  [ffffffc020003980+  64][<ffffffc0100c63b4>] dump_backtrace+0x0/0x18c
[    1.813941@2]  [ffffffc0200039c0+  32][<ffffffc0100c6564>] show_stack+0x24/0x34
[    1.814855@2]  [ffffffc0200039e0+  64][<ffffffc010dcc6b8>] dump_stack+0xbc/0x108
[    1.815773@2]  [ffffffc020003a20+  32][<ffffffc010dd2774>] kobject_init+0x9c/0xa4
[    1.816707@2]  [ffffffc020003a40+  32][<ffffffc01076dd90>] device_initialize+0x34/0xe0
[    1.817692@2]  [ffffffc020003a60+  32][<ffffffc010777014>] platform_device_register+0x20/0x6c
[    1.818760@2]  [ffffffc020003a80+ 400][<ffffffc0099a0660>] swled_dev_init+0x58/0x9c [swoled]
[    1.819804@2]  [ffffffc020003c10+  64][<ffffffc01008c748>] do_one_initcall+0x130/0x294
[    1.820791@2]  [ffffffc020003c50+ 240][<ffffffc0101aed78>] do_init_module+0x5c/0x21c
[    1.821753@2]  [ffffffc020003d40+ 240][<ffffffc0101aec18>] load_module+0x3a3c/0x3aac
[    1.822716@2]  [ffffffc020003e30+  48][<ffffffc0101a9964>] __arm64_sys_finit_module+0xbc/0xd8
[    1.823780@2]  [ffffffc020003e60+  64][<ffffffc0100d1408>] el0_svc_common+0xb0/0x188
[    1.824743@2]  [ffffffc020003ea0+ 336][<ffffffc0100d150c>] el0_svc_compat_handler+0x2c/0x3c
[    1.825782@2]  [ffffffc020003ff0+   0][<ffffffc010083524>] el0_svc_compat+0x8/0x24
[    1.826788@2]  list_add corruption. prev->next should be next (ffffff80a9224000), but was ffffffc0099a4258. (prev=ffffffc0099a4258).
[    1.828220@2]  ------------[ cut here ]------------
[    1.828814@2]  kernel BUG at lib/list_debug.c:28!
[    1.829401@2]  Internal error: Oops - BUG: 0 [#1] PREEMPT SMP

......

[    2.038039@2]  Call trace:
[    2.038378@2]  [ffffffc0200038d0+  16][<ffffffc0105bb6f8>] __list_add_valid+0x8c/0x90[ffffffc0200038e0+ 176][<ffffffc010dd34b0>] kobject_add_internal+0xd4/0x420
[    2.040380@2]  [ffffffc020003990+  64][<fffft_add+0x88/0xdc
[    2.041303@2]  [ffffffc0200039d0+  64][<ffffffc01076dfd8>] device_add+0x150/0xa48
[    2.042234@2]  [ff][<ffffffc010777440>] platform_device_add+0x88/0x228
[    2.043250@2]  [ffffffc020003a60+  32][<ffffffc01077702c>] platform_init+0x58/0x9c [swoled]
[    2.045366@2]  [ffffffc020003c10+  64][<ffffffc01008c748>] do_one_initcall+0x130/0x294
[    2.ffffc0101aed78>] do_init_module+0x5c/0x21c
[    2.047313@2]  [ffffffc020003d40+ 240][<ffffffc0101aec18>] load_module+0x3a3c [ffffffc020003e30+  48][<ffffffc0101a9964>] __arm64_sys_finit_module+0xbc/0xd8
[    2.049341@2]  [ffffffc020003e60+  64][<n+0xb0/0x188
[    2.050304@2]  [ffffffc020003ea0+ 336][<ffffffc0100d150c>] el0_svc_compat_handler+0x2c/0x3c
[    2.051343@03ff0+   0][<ffffffc010083524>] el0_svc_compat+0x8/0x24
[    2.052288@2]  Code: 9138fc00 aa0303e1 aa0803e3 97ee869a (d42100--[ end trace 18856db82685ee12 ]---
[    2.066232@2]  Kernel panic - not syncing: Fatal exception
[    2.066273@2]  SMP: stopping secondary CPUs
[    2.066629@3]  usb 3-1.1: new high-speed USB device number 3 using xhci-hcd
[    2.067611@2]  Kernel Offset: disabled
[    2.067617@2]  CPU features: 0x00010006,2a00aa10
[    2.067621@2]  Memory Limit: none
[    2.080919@2]  Rebooting in 5 seconds..

关键信息:

[    1.826788@2]  list_add corruption. prev->next should be next (ffffff80a9224000), but was ffffffc0099a4258. (prev=ffffffc0099a4258).
[    1.828220@2]  ------------[ cut here ]------------
[    1.828814@2]  kernel BUG at lib/list_debug.c:28!

lib/list_debug.c第28行如下:

image.png

可以看到是由于prev->next != next导致,此处涉及到内核双向list操作。因为device在注册到内核时,会将 struct device中的struct kobject kobj成员的struct list_head entry挂载到总线struct bus_typestruct subsys_private *p中的struct kset *devices_kset中的struct list_head list中,便于系统统一管理。

关于kset、kobject的详细内容可参考:

跟着platform_device_register()找到整个过程的函数调用栈:

platform_device_register()
    platform_device_add()
    |---device_initialize()
    |       kobject_init()
    |           kobject_init_internal()
    |               INIT_LIST_HEAD(&kobj->entry);
    |---device_add()
            kobject_add()
                kobject_add_varg()
                    kobject_add_internal()
                        kobj_kset_join()
                            list_add_tail(&kobj->entry, &kobj->kset->list);

在第一次进行platform_device_register()注册时,假设要加入到链表中的kobj为kobj_B,则将kobj_B->entry加入到kobj_B->kset->list中,得到链表如下:

image.png

在第二次进行platform_device_register()注册时,先调用device_initialize()初始化设备。在这个过程中,将kobj_B->entry进行初始化后:

static inline void INIT_LIST_HEAD(struct list_head *list)
{
	WRITE_ONCE(list->next, list);
	list->prev = list;
}

image.png

此时kobj_B->entry的nxet指向了本身。

然后接着在platform_device_add()中调用device_add()添加设备到系统。

|---device_add()
            kobject_add()
                kobject_add_varg()
                    kobject_add_internal()
                        kobj_kset_join()
                            list_add_tail(&kobj->entry, &kobj->kset->list);

在调用list_add_tail()时,参数new指向kobj_B->entry,head指向kobj_B->kset->list。

static inline void list_add_tail(struct list_head *new, struct list_head *head)
{
    __list_add(new, head->prev, head);
}

在调用__list_add()时,参数new指向kobj_B->entry,prev指向kobj_B->kset->list->prev(即kobj_B->entry),next指向kobj_B->kset->list。

static inline void __list_add(struct list_head *new,
			      struct list_head *prev,
			      struct list_head *next)
{
	if (!__list_add_valid(new, prev, next))
		return;

	next->prev = new;
	new->next = next;
	new->prev = prev;
	WRITE_ONCE(prev->next, new);
}

则在__list_add_valid()中判断prev->next != next时,prev->next指向prev指向kobj_B->kset->list->prev->next(即kobj_B->entry本身),next指向kobj_B->kset->list,故二者不相等。

bool __list_add_valid(struct list_head *new, struct list_head *prev,
		      struct list_head *next)
{
	if (CHECK_DATA_CORRUPTION(next->prev != prev,
			"list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
			prev, next->prev, next) ||
	    CHECK_DATA_CORRUPTION(prev->next != next,
			"list_add corruption. prev->next should be next (%px), but was %px. (prev=%px).\n",
			next, prev->next, prev) ||
	    CHECK_DATA_CORRUPTION(new == prev || new == next,
			"list_add double add: new=%px, prev=%px, next=%px.\n",
			new, prev, next))
		return false;

	return true;
}