1. Overview
Linux内核链表为双向循环链表,设计的巧妙之处在于将链“嵌入”表项而非表项“包含”自身指针,从而将链和表项解耦合,保证通用性。
先看下常规双向链表的典型定义:
/* Define a structure for doubly-linked list elements. */
typedef struct DListElmt_ {
void *data;
struct DListElmt_ *next;
struct DListElmt_ *prev;
} DListElmt;
/* Define a structure for doubly-linked lists. */
typedef struct DList_ {
int size;
int (*match)(const void *key1, const void *key2);
void (*destroy)(void *data);
DListElmt *head;
DListElmt *tail;
} DList;
以上定义DListElmt中“包含”了指向自己的指针,从而将每个Element(也常用Node命名)链接起来。虽然可以借助void指针屏蔽具体“表对象”差异,但实质上仍然没有将表项和链解耦。
再看下Linux内核链表结构的定义:
struct list_head {
struct list_head *next, *prev;
};
链就是链,不夹杂任何表项的信息。那么如何用链将表项串起来呢?就是用“嵌入”。定义如下一个简单的结构,将struct list_head放入其中:
struct node {
int id;
struct list_head link;
};
如果将成员link展开,其实就是嵌入了struct list_head类型的两个指针,然后通过针对struct list_head结构的一系列接口操纵表结构。
2. Interface
2.1 初始化头节点
#define LIST_HEAD_INIT(name) { &(name), &(name) }
#define LIST_HEAD(name) \
struct list_head name = LIST_HEAD_INIT(name)
static inline void INIT_LIST_HEAD(struct list_head *list)
{
list->next = list;
list->prev = list;
}
构造链表前,需要初始化头节点。LIST_HEAD和INIT_LIST_HEAD功能一样,都是将前驱和后继指针指向自己,区别仅仅是后者是个函数,需要预先定义一个struct list_head类型变量,并将其地址作为参数传入。
// 定义一个头节点的两种方式
#if 1
LIST_HEAD(head);
#else
struct list_head head;
INIT_LIST_HEAD(&head);
#endif
2.2 添加节点
list_add
/*
* Insert a new entry between two known consecutive entries.
*
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
static inline void __list_add(struct list_head *new,
struct list_head *prev,
struct list_head *next)
{
next->prev = new;
new->next = next;
new->prev = prev;
prev->next = new;
}
/**
* list_add - add a new entry
* @new: new entry to be added
* @head: list head to add it after
*
* Insert a new entry after the specified head.
* This is good for implementing stacks.
*/
static inline void list_add(struct list_head *new, struct list_head *head)
{
__list_add(new, head, head->next);
}
从注释中可看出,new节点是插入到head节点之后的。
for (int i = 0; i < 10; i++) {
struct node *new = (struct node *)malloc(sizeof(struct node));
assert(new != NULL);
new->id = i;
list_add(&new->link, &head);
/*
* list_add(struct list_head *new, struct list_head *head) 把元素插入特定元素head之后
* 最后一个插入的节点,将是链表结构中的第一个节点。
*
* 0
* |
* v
* head
* head <---> 0
*
* 1
* |
* v
* head <---> 0
* head <---> 1 <---> 0
*
* 2
* |
* v
* head <---> 1 <---> 0
* head <---> 2 <---> 1 ---> 0
* ...
*/
}
如注释所说,list_add特别适合实现栈操作,head.next即指向栈顶元素。
list_add_tail
/**
* list_add_tail - add a new entry
* @new: new entry to be added
* @head: list head to add it before
*
* Insert a new entry before the specified head.
* This is useful for implementing queues.
*/
static inline void list_add_tail(struct list_head *new, struct list_head *head)
{
__list_add(new, head->prev, head);
}
从注释中可看出,new节点是插入到head节点之前的。
for (int i = 0; i < 10; i++) {
struct node *new = (struct node *)malloc(sizeof(struct node));
assert(new != NULL);
new->id = i;
list_add_tail(&new->link, &head);
/*
* list_add_tail(struct list_head *new, struct list_head *head) 把元素插到特定元素head之前
*
* 0
* |
* v
* head
* 0 <---> head
*
* 1
* |
* v
* 0 <---> head
* 0 <---> 1 <---> head
*
* 2
* |
* v
* 0 <---> 1 <---> head
* 0 <---> 1 <---> 2 <---> head
* ...
*/
}
如注释所说,list_add_tail特别适合实现队列,由于是循环链表,head.next即指向队头元素,而head.prev指向队尾元素。
2.3 删除节点
/*
* Delete a list entry by making the prev/next entries
* point to each other.
*
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
static inline void __list_del(struct list_head *prev, struct list_head *next)
{
next->prev = prev;
prev->next = next;
}
/**
* list_del - deletes entry from list.
* @entry: the element to delete from the list.
* Note: list_empty() on entry does not return true after this, the entry is
* in an undefined state.
*/
static inline void list_del(struct list_head *entry)
{
__list_del(entry->prev, entry->next);
entry->next = NULL;
entry->prev = NULL;
}
注意,在较新版本的内核中,3,4被替换为:
entry->next = LIST_POISON1;
entry->prev = LIST_POISON2;
目的是可以更方便的定位到use-after-free这种错误。 see more:lists.kernelnewbies.org/pipermail/k…
2.4 遍历链表及container_of
遍历链表可以采用如下两种方式:
void travel()
{
#if 1
struct list_head *pos;
list_for_each(pos, &head) {
struct node *n;
n = list_entry(pos, struct node, link);
printf("%d\n", n->id);
}
#else
struct node *n;
list_for_each_entry(n, &head, link)
printf("%d\n", n->id);
#endif
}
list_for_each_entry的方式看起来更简洁,但看下内部实现,两种方式其实差别不大。
/**
* list_for_each_entry - iterate over list of given type
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the list_head within the struct.
*/
#define list_for_each_entry(pos, head, member) \
for (pos = list_first_entry(head, typeof(*pos), member); \
&pos->member != (head); \
pos = list_next_entry(pos, member))
list_first_entry和list_next_entry都是调用list_entry这个宏。从而引出拥有“Linux内核第一美宏”赞誉的container_of。前面提过,“嵌入”带来了通用性,但如何通过嵌入的通用链结构,找到被嵌入的表项结构呢?就是靠container_of。
/**
* list_entry - get the struct for this entry
* @ptr: the &struct list_head pointer.
* @type: the type of the struct this is embedded in.
* @member: the name of the list_head within the struct.
*/
#define list_entry(ptr, type, member) \
container_of(ptr, type, member)
#undef offsetof
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
/**
* container_of - cast a member of a structure out to the containing structure
* @ptr: the pointer to the member.
* @type: the type of the container struct this is embedded in.
* @member: the name of the member within the struct.
*/
#define container_of(ptr, type, member) ({ \
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type, member) );})
先看个container_of宏使用的例子:
struct node {
int id;
struct list_head link;
};
int main()
{
struct node n = {1024, NULL, NULL};
struct node *p = container_of(&n.link, struct node, link);
printf("id=%d\n", p->id);
return 0;
}
我们不能保证嵌入的struct list_head总是放入目标结构体的顶部,所以需要计算嵌入成员在目标结构体中的偏移,然后用已知的struct list_head的地址减去这个偏移量,即可获得目标结构体的地址。
先分析下offsetof宏,其作用是获取MEMBER成员在TYPE类型结构体中的偏移量。宏整体分三个部分:
将0地址强制转换为指向TYPE类型结构的指针后,通过该指针获取到的MEMBER成员的地址值,其大小在数值上等于MEMBER成员在TYPE类型结构体中的偏移量。在如下程序中验证:
struct node {
int id;
struct list_head link;
};
int main()
{
printf("&id=%p\n", &((struct node *)0)->id);
printf("&link=%p\n", &((struct node *)0)->link);
printf("offsetof link=%lu\n", offsetof(struct node, link));
return 0;
}
./a.out
&id=(nil)
&link=0x8
offsetof link=8
再继续分析container_of宏:
整体为一个“语句表达式”,其值为内嵌语句中最后一个表达式的值(圈2)。圈1中typeof为GNU C扩展的一个关键字,用于获取一个变量或表达式的类型,圈1即是定义了一个临时指针变量__mptr并赋值为结构体成员member的地址。用member成员的地址减去其在type结构体中的偏移量,即可获得type结构体的首地址,最后再转换成(type *)类型。
3. Practice
最后,以一道笔者亲身经历的面试题作为练习。
题目:
当前路径和相对路径拼接, 输出绝对路径.
需求如下:
- 输入: 一个当前路径和一个相对路径
- 输出: 拼接后输出一个绝对路径
要求:
- 对输入的当前路径和相对路径要做有效性检查
- 路径格式都是linux方式
例如:
输入: /usr/local/bin ../.././//share/./locale
输出: /usr/share/locale
输入: /usr/ .././../var/tmp//
输出: /var/tmp
输入: /usr/ .././.../var/tmp/
输出: 目录不存在
题目理解
chdir(pwd);
chdir(relpath);
getcwd();
在给出的当前路径下,cd到相对路径,最后执行pwd即可获得到绝对路径。当然,如果你直接调用以上三个接口来实现,可能会被直接pass,但你的思路完全正确。
利用list_head结构实现
static char *abspath(char *pwd, char *relpath)
{
LIST_HEAD(abs);
LIST_HEAD(rel);
char *ptr, *p;
ptr = strtok_r(pwd, "/", &p);
while (ptr != NULL) {
struct dir *new = (struct dir *) malloc(sizeof(struct dir));
assert(new != NULL);
new->name = ptr;
list_add(&new->list, &abs); /* stack push */
ptr = strtok_r(NULL, "/", &p);
}
ptr = strtok_r(relpath, "/", &p);
while (ptr != NULL) {
struct dir *new = (struct dir *) malloc(sizeof(struct dir));
assert(new != NULL);
new->name = ptr;
list_add_tail(&new->list, &rel); /* queue enqueue */
ptr = strtok_r(NULL, "/", &p);
}
struct list_head *pos;
struct dir *d;
list_for_each(pos, &rel) {
d = list_entry(pos, struct dir, list);
if ( strcmp(d->name, ".") == 0 || strcmp(d->name, "") == 0)
continue;
else if (strcmp(d->name, "..") == 0) {
if (!list_empty(&abs))
list_del(abs.next);
} else {
struct dir *new = (struct dir *) malloc(sizeof(struct dir));
assert(new != NULL);
new->name = d->name;
list_add_tail(&new->list, &abs);
}
}
char *abspath = (char *) malloc(sizeof(char) * MAX_PATH_LEN);
assert(abspath != NULL);
strcat(abspath, "/");
list_for_each(pos, &abs) {
d = list_entry(pos, struct dir, list);
strcat(abspath, d->name);
strcat(abspath, "/");
}
return abspath;
}
如上,实现大概分为三个部分: 1)将当前路径入“栈” 2)将相对路径入“队” 3)遍历分析相对路径,“..”则pop退栈一个当前路径的目录节点,“.”和“/”则跳过,非这两种情况则追加到绝对路径。 注意,3)中的追加破坏了栈只允许在栈顶执行pop/push操作的严格定义,但我们也没说这就是栈。
附注
参考
《Mastering Algorithms with C》