
1. memcg_stock_pcp
#define CHARGE_BATCH 32U
struct memcg_stock_pcp {
struct mem_cgroup *cached;
unsigned int nr_pages;
struct work_struct work;
unsigned long flags;
#define FLUSHING_CACHED_CHARGE 0
};
static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
static DEFINE_MUTEX(percpu_charge_mutex);
2. mem_cgroup_try_charge
#ifdef CONFIG_MEMCG_SWAP
int do_swap_account __read_mostly;
#else
#define do_swap_account 0
#endif
int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask, struct mem_cgroup **memcgp,
bool compound)
{
struct mem_cgroup *memcg = NULL;
unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1;
int ret = 0;
if (mem_cgroup_disabled())
goto out;
if (PageSwapCache(page)) {
VM_BUG_ON_PAGE(!PageLocked(page), page);
if (compound_head(page)->mem_cgroup)
goto out;
if (do_swap_account) {
swp_entry_t ent = { .val = page_private(page), };
unsigned short id = lookup_swap_cgroup_id(ent);
rcu_read_lock();
memcg = mem_cgroup_from_id(id);
if (memcg && !css_tryget_online(&memcg->css))
memcg = NULL;
rcu_read_unlock();
}
}
if (!memcg)
memcg = get_mem_cgroup_from_mm(mm);
ret = try_charge(memcg, gfp_mask, nr_pages);
css_put(&memcg->css);
out:
*memcgp = memcg;
return ret;
}
2.1 try_charge
static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
unsigned int nr_pages)
{
unsigned int batch = max(CHARGE_BATCH, nr_pages);
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup *mem_over_limit;
struct page_counter *counter;
unsigned long nr_reclaimed;
bool may_swap = true;
bool drained = false;
if (mem_cgroup_is_root(memcg))
return 0;
retry:
// 如果当前cpu的记账缓存从准备记账的内存控制组预留的页数足够多
// 那么从记账缓存减去准备记账的页数(而无需向内存控制组记账), 并结束记账返回成功[见2.2节]
if (consume_stock(memcg, nr_pages))
return 0;
if (!do_memsw_account() ||
page_counter_try_charge(&memcg->memsw, batch, &counter)) {
if (page_counter_try_charge(&memcg->memory, batch, &counter))
goto done_restock;
if (do_memsw_account())
page_counter_uncharge(&memcg->memsw, batch);
mem_over_limit = mem_cgroup_from_counter(counter, memory);
} else {
mem_over_limit = mem_cgroup_from_counter(counter, memsw);
may_swap = false;
}
if (batch > nr_pages) {
batch = nr_pages;
goto retry;
}
if (unlikely(tsk_is_oom_victim(current) ||
fatal_signal_pending(current) ||
current->flags & PF_EXITING))
goto force;
if (unlikely(current->flags & PF_MEMALLOC))
goto force;
if (unlikely(task_in_memcg_oom(current)))
goto nomem;
if (!gfpflags_allow_blocking(gfp_mask))
goto nomem;
mem_cgroup_event(mem_over_limit, MEMCG_MAX);
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
gfp_mask, may_swap);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
goto retry;
if (!drained) {
drain_all_stock(mem_over_limit);
drained = true;
goto retry;
}
if (gfp_mask & __GFP_NORETRY)
goto nomem;
if (nr_reclaimed && nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER))
goto retry;
if (mem_cgroup_wait_acct_move(mem_over_limit))
goto retry;
if (nr_retries--)
goto retry;
if (gfp_mask & __GFP_NOFAIL)
goto force;
if (fatal_signal_pending(current))
goto force;
mem_cgroup_event(mem_over_limit, MEMCG_OOM);
mem_cgroup_oom(mem_over_limit, gfp_mask,
get_order(nr_pages * PAGE_SIZE));
nomem:
// 如果申请页面时允许失败, 则返回错误码ENOMEM
if (!(gfp_mask & __GFP_NOFAIL))
return -ENOMEM;
force:
// 如果申请页面时不允许失败, 则强制记账, 允许内存使用量临时超过硬限制
page_counter_charge(&memcg->memory, nr_pages);
if (do_memsw_account())
page_counter_charge(&memcg->memsw, nr_pages);
css_get_many(&memcg->css, nr_pages);
return 0;
done_restock:
// 内存控制组引用计数加上记账的页数
css_get_many(&memcg->css, batch);
if (batch > nr_pages)
refill_stock(memcg, batch - nr_pages);
do {
if (page_counter_read(&memcg->memory) > memcg->high) {
if (in_interrupt()) {
schedule_work(&memcg->high_work);
break;
}
current->memcg_nr_pages_over_high += batch;
set_notify_resume(current);
break;
}
} while ((memcg = parent_mem_cgroup(memcg)));
return 0;
}
2.2 consume_stock
static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
struct memcg_stock_pcp *stock
unsigned long flags
bool ret = false
// 如果需要申请的页数大于记账缓存预留的最大页数32, 则直接返回, 准备从内存控制组中申请
if (nr_pages > CHARGE_BATCH)
return ret
local_irq_save(flags)
// 取出当前cpu的记账缓存
stock = this_cpu_ptr(&memcg_stock)
// 满足以下2个条件时, 从记账缓存中分配相应的页数
// 1. 准备记账的内存控制组与当前记账缓存保存的内存控制组是同一个
// 2. 记账缓存中预留的页数大于申请的页数
if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
// 记账缓存预留的页数减去记账页数
stock->nr_pages -= nr_pages
ret = true
}
local_irq_restore(flags)
return ret
}
2.3 page_counter_try_charge
bool page_counter_try_charge(struct page_counter *counter,
unsigned long nr_pages,
struct page_counter **fail)
{
struct page_counter *c;
for (c = counter; c; c = c->parent) {
long new;
new = atomic_long_add_return(nr_pages, &c->count);
if (new > c->limit) {
atomic_long_sub(nr_pages, &c->count);
c->failcnt++;
*fail = c;
goto failed;
}
if (new > c->watermark)
c->watermark = new;
}
return true;
failed:
for (c = counter; c != *fail; c = c->parent)
page_counter_cancel(c, nr_pages);
return false;
}
2.4 mem_cgroup_margin
static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
{
unsigned long margin = 0;
unsigned long count;
unsigned long limit;
count = page_counter_read(&memcg->memory);
limit = READ_ONCE(memcg->memory.limit);
if (count < limit)
margin = limit - count;
if (do_memsw_account()) {
count = page_counter_read(&memcg->memsw);
limit = READ_ONCE(memcg->memsw.limit);
if (count <= limit)
margin = min(margin, limit - count);
else
margin = 0;
}
return margin;
}
2.5 drain_all_stock
static void drain_all_stock(struct mem_cgroup *root_memcg)
{
int cpu, curcpu;
if (!mutex_trylock(&percpu_charge_mutex))
return;
curcpu = get_cpu();
for_each_online_cpu(cpu) {
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
struct mem_cgroup *memcg;
memcg = stock->cached;
if (!memcg || !stock->nr_pages || !css_tryget(&memcg->css))
continue;
if (!mem_cgroup_is_descendant(memcg, root_memcg)) {
css_put(&memcg->css);
continue;
}
if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
if (cpu == curcpu)
drain_local_stock(&stock->work);
else
schedule_work_on(cpu, &stock->work);
}
css_put(&memcg->css);
}
put_cpu();
mutex_unlock(&percpu_charge_mutex);
}
2.5.1 drain_local_stock
static void drain_local_stock(struct work_struct *dummy)
{
struct memcg_stock_pcp *stock;
unsigned long flags;
local_irq_save(flags);
stock = this_cpu_ptr(&memcg_stock);
drain_stock(stock);
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
local_irq_restore(flags);
}
2.5.2 drain_stock
static void drain_stock(struct memcg_stock_pcp *stock)
{
struct mem_cgroup *old = stock->cached;
if (stock->nr_pages) {
page_counter_uncharge(&old->memory, stock->nr_pages);
if (do_memsw_account())
page_counter_uncharge(&old->memsw, stock->nr_pages);
css_put_many(&old->css, stock->nr_pages);
stock->nr_pages = 0;
}
stock->cached = NULL;
}
2.6 refill_stock
static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
struct memcg_stock_pcp *stock;
unsigned long flags;
local_irq_save(flags);
stock = this_cpu_ptr(&memcg_stock);
if (stock->cached != memcg) {
drain_stock(stock);
stock->cached = memcg;
}
stock->nr_pages += nr_pages;
if (stock->nr_pages > CHARGE_BATCH)
drain_stock(stock);
local_irq_restore(flags);
}
3. mem_cgroup_commit_charge
void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
bool lrucare, bool compound)
{
unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1;
VM_BUG_ON_PAGE(!page->mapping, page);
VM_BUG_ON_PAGE(PageLRU(page) && !lrucare, page);
if (mem_cgroup_disabled())
return;
if (!memcg)
return;
commit_charge(page, memcg, lrucare);
local_irq_disable();
mem_cgroup_charge_statistics(memcg, page, compound, nr_pages);
memcg_check_events(memcg, page);
local_irq_enable();
if (do_memsw_account() && PageSwapCache(page)) {
swp_entry_t entry = { .val = page_private(page) };
mem_cgroup_uncharge_swap(entry, nr_pages);
}
}
3.1 commit_charge
static void commit_charge(struct page *page, struct mem_cgroup *memcg,
bool lrucare)
{
int isolated;
VM_BUG_ON_PAGE(page->mem_cgroup, page);
if (lrucare)
lock_page_lru(page, &isolated);
page->mem_cgroup = memcg;
if (lrucare)
unlock_page_lru(page, isolated);
}
3.1.1 lock_page_lru
static void lock_page_lru(struct page *page, int *isolated)
{
struct zone *zone = page_zone(page);
spin_lock_irq(zone_lru_lock(zone));
if (PageLRU(page)) {
struct lruvec *lruvec;
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
ClearPageLRU(page);
del_page_from_lru_list(page, lruvec, page_lru(page));
*isolated = 1;
} else
*isolated = 0;
}
3.1.2 unlock_page_lru
static void unlock_page_lru(struct page *page, int isolated)
{
struct zone *zone = page_zone(page);
if (isolated) {
struct lruvec *lruvec;
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
VM_BUG_ON_PAGE(PageLRU(page), page);
SetPageLRU(page);
add_page_to_lru_list(page, lruvec, page_lru(page));
}
spin_unlock_irq(zone_lru_lock(zone));
}
3.2 mem_cgroup_charge_statistics
static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
struct page *page,
bool compound, int nr_pages)
{
if (PageAnon(page))
__this_cpu_add(memcg->stat->count[MEMCG_RSS], nr_pages);
else {
__this_cpu_add(memcg->stat->count[MEMCG_CACHE], nr_pages);
if (PageSwapBacked(page))
__this_cpu_add(memcg->stat->count[NR_SHMEM], nr_pages);
}
if (compound) {
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
__this_cpu_add(memcg->stat->count[MEMCG_RSS_HUGE], nr_pages);
}
if (nr_pages > 0)
__this_cpu_inc(memcg->stat->events[PGPGIN]);
else {
__this_cpu_inc(memcg->stat->events[PGPGOUT]);
nr_pages = -nr_pages;
}
__this_cpu_add(memcg->stat->nr_page_events, nr_pages);
}
4. mem_cgroup_cancel_charge
void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
bool compound)
{
unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1;
if (mem_cgroup_disabled())
return;
if (!memcg)
return;
cancel_charge(memcg, nr_pages);
}
4.1 cancel_charge
static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
{
if (mem_cgroup_is_root(memcg))
return;
page_counter_uncharge(&memcg->memory, nr_pages);
if (do_memsw_account())
page_counter_uncharge(&memcg->memsw, nr_pages);
css_put_many(&memcg->css, nr_pages);
}