Linux - 进程 - 进程初识

187 阅读5分钟

一. 程序 & 进程

  • 程序 (program) 是指编译过的, 可执行的二进制代码, 保存在存储介质(如磁盘)上, 不运行.

  • 进程 (process) 是指已经加载到内存中, 正在运行的程序.

二. 进程的定义

硬件角度看, 进程可以视为由一组元素组成的实体, 两个基本元素是程序代码 (program code) 和与代码相关联的数据集合 (set of data), 当 cpu 开始执行这个程序代码的时候, 我们把这个执行实体称为进程.

内核观点看, 进程的目的就是担当分配系统资源 (cpu 时间, 内存等) 的实体, 进程执行的任意时刻, 都可以由如下元素来表征:

  • 标示符: 描述本进程的唯一标示符,用来与其他进程进行区分.

  • 状态: 进程状态.

  • 优先级: 相对于其他进程的优先顺序.

  • 程序计数器: 程序中即将被执行的下一条指令的地址.

  • 内存指针: 包括程序代码和进程相关数据的指针, 还有和其他进程共享的内存块的指针.

  • 上下文数据: 进程执行时处理器的寄存器中的数据.

  • I/O状态信息: 包括显示的I/O请求, 分配给进程的I/O设备和被进程使用的文件列表.

  • 记账信息: 包括处理器时间总和, 使用的时钟数总和, 时间限制, 记账号等.

image.png

上述列表信息存放在一个叫做进程控制块 (process control block) 的数据结构中 (如上图所示), PCB 由操作系统创建和管理. 比较有意义的一点是, PCB 包含了充分的信息, 因此可以中断一个进程的执行, 并在合适的时机恢复进程的执行, 就好像进程从未被中断过一样. 进程中断时, 操作系统会把程序计数器和 cpu 寄存器 (上下文数据) 保存到 PCB 的相应位置, 进程状态相应地改为其他值. 当需要恢复该进程时, 操作系统将进程置为运行态, 并把该进程的程序计数器和上下文数据加载到 cpu 寄存器中, 进而执行这一进程.

因此, 可以说进程由内核PCB数据结构对象及程序代码和相关数据组成 (如下图所示). 单处理器计算机在任何时刻最多都只能执行一个进程, 而正在运行的进程的状态称为运行态.

image.png

三. Linux 下如何描述并组织进程

进程描述符 (process describer)

Linux下的进程描述符都是 task_struct 类型结构, 它的字段包含了与一个进程相关的所有信息. 因为进程描述符中存放了大量的信息, 所以它是相当复杂的, 它不仅包含了很多进程属性的字段, 而且一些字段还包括了指向其他数据结构的指针. 下图示意性地描述了Linux的进程描述符.

image.png

如下为Linux 2.6.11版的内核中task_struct的实现.

struct task_struct {
	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
	struct thread_info *thread_info;
	atomic_t usage;
	unsigned long flags;	/* per process flags, defined below */
	unsigned long ptrace;

	int lock_depth;		/* Lock depth */

	int prio, static_prio;
	struct list_head run_list;
	prio_array_t *array;

	unsigned long sleep_avg;
	unsigned long long timestamp, last_ran;
	int activated;

	unsigned long policy;
	cpumask_t cpus_allowed;
	unsigned int time_slice, first_time_slice;

#ifdef CONFIG_SCHEDSTATS
	struct sched_info sched_info;
#endif

	struct list_head tasks;
	/*
	 * ptrace_list/ptrace_children forms the list of my children
	 * that were stolen by a ptracer.
	 */
	struct list_head ptrace_children;
	struct list_head ptrace_list;

	struct mm_struct *mm, *active_mm;

/* task state */
	struct linux_binfmt *binfmt;
	long exit_state;
	int exit_code, exit_signal;
	int pdeath_signal;  /*  The signal sent when the parent dies  */
	/* ??? */
	unsigned long personality;
	unsigned did_exec:1;
	pid_t pid;
	pid_t tgid;
	/* 
	 * pointers to (original) parent process, youngest child, younger sibling,
	 * older sibling, respectively.  (p->father can be replaced with 
	 * p->parent->pid)
	 */
	struct task_struct *real_parent; /* real parent process (when being debugged) */
	struct task_struct *parent;	/* parent process */
	/*
	 * children/sibling forms the list of my children plus the
	 * tasks I'm ptracing.
	 */
	struct list_head children;	/* list of my children */
	struct list_head sibling;	/* linkage in my parent's children list */
	struct task_struct *group_leader;	/* threadgroup leader */

	/* PID/PID hash table linkage. */
	struct pid pids[PIDTYPE_MAX];

	struct completion *vfork_done;		/* for vfork() */
	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */

	unsigned long rt_priority;
	unsigned long it_real_value, it_real_incr;
	cputime_t it_virt_value, it_virt_incr;
	cputime_t it_prof_value, it_prof_incr;
	struct timer_list real_timer;
	cputime_t utime, stime;
	unsigned long nvcsw, nivcsw; /* context switch counts */
	struct timespec start_time;
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
	unsigned long min_flt, maj_flt;
/* process credentials */
	uid_t uid,euid,suid,fsuid;
	gid_t gid,egid,sgid,fsgid;
	struct group_info *group_info;
	kernel_cap_t   cap_effective, cap_inheritable, cap_permitted;
	unsigned keep_capabilities:1;
	struct user_struct *user;
#ifdef CONFIG_KEYS
	struct key *session_keyring;	/* keyring inherited over fork */
	struct key *process_keyring;	/* keyring private to this process (CLONE_THREAD) */
	struct key *thread_keyring;	/* keyring private to this thread */
#endif
	int oomkilladj; /* OOM kill score adjustment (bit shift). */
	char comm[TASK_COMM_LEN];
/* file system info */
	int link_count, total_link_count;
/* ipc stuff */
	struct sysv_sem sysvsem;
/* CPU-specific state of this task */
	struct thread_struct thread;
/* filesystem information */
	struct fs_struct *fs;
/* open file information */
	struct files_struct *files;
/* namespace */
	struct namespace *namespace;
/* signal handlers */
	struct signal_struct *signal;
	struct sighand_struct *sighand;

	sigset_t blocked, real_blocked;
	struct sigpending pending;

	unsigned long sas_ss_sp;
	size_t sas_ss_size;
	int (*notifier)(void *priv);
	void *notifier_data;
	sigset_t *notifier_mask;
	
	void *security;
	struct audit_context *audit_context;

/* Thread group tracking */
   	u32 parent_exec_id;
   	u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
	spinlock_t alloc_lock;
/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
	spinlock_t proc_lock;
/* context-switch lock */
	spinlock_t switch_lock;

/* journalling filesystem info */
	void *journal_info;

/* VM state */
	struct reclaim_state *reclaim_state;

	struct dentry *proc_dentry;
	struct backing_dev_info *backing_dev_info;

	struct io_context *io_context;

	unsigned long ptrace_message;
	siginfo_t *last_siginfo; /* For ptrace use.  */
/*
 * current io wait handle: wait queue entry to use for io waits
 * If this thread is processing aio, this points at the waitqueue
 * inside the currently handled kiocb. It may be NULL (i.e. default
 * to a stack based synchronous wait) if its doing sync IO.
 */
	wait_queue_t *io_wait;
/* i/o counters(bytes read/written, #syscalls */
	u64 rchar, wchar, syscr, syscw;
#if defined(CONFIG_BSD_PROCESS_ACCT)
	u64 acct_rss_mem1;	/* accumulated rss usage */
	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
	clock_t acct_stimexpd;	/* clock_t-converted stime since last update */
#endif
#ifdef CONFIG_NUMA
  	struct mempolicy *mempolicy;
	short il_next;
#endif
};

双向链表 (doubly linked list)

Linux内核使用 list_head 双向链表这样的数据结构来对进程描述符进行组织, 字段 nextprev 分别表示双向链表向前和向后的指针元素. 值得特别关注的是, list_head 字段中的指针存放的是另一个 list_head 字段的地址, 而不是含有 list_head 结构的整个数据结构(如进程描述符 task_struct)的地址.

image.png