最近遇到一个irq中断风暴导致中断来不及处理,出现系统watchdog的问题,问题报错如下:
irq 69: nobody cared (try booting with the "irqpoll" option)
......
Disabling IRQ #69
1、查看log来源
irq 69: nobody cared (try booting with the "irqpoll" option)来自于:
//https://elixir.bootlin.com/linux/v4.12/source/kernel/irq/spurious.c#L193
static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
{
......
if (bad_action_ret(action_ret)) {
printk(KERN_ERR "irq event %d: bogus return value %x\n",
irq, action_ret);
} else {
printk(KERN_ERR "irq %d: nobody cared (try booting with "
"the "irqpoll" option)\n", irq);
}
dump_stack();
......
}
Disabling IRQ #69来自于:
void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
{
......
desc->irq_count = 0;
if (unlikely(desc->irqs_unhandled > 99900)) {
/*
* The interrupt is stuck
*/
__report_bad_irq(desc, action_ret);
/*
* Now kill the IRQ
*/
printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
......
}
}
2、代码分析过程
在# linux中断子系统详解中有介绍,中断处理过程中,把硬件中断号映射到linux中断号的时候,根据硬件中断的类型设置中断描述符的成员handle_irq。以GIC v2为例: //irq_create_mapping() -> irq_domain_associate() -> domain->ops->map() -> gic_irq_domain_map()
- 硬件中断号小于32,说明是SGI或者PPI,handle_irq设置为handle_percpu_devid_irq
- 硬件中断号大于等于32,说明是SPI,handle_irq设置为handle_fasteoi_irq
上述中断号为69,故会执行handle_fasteoi_irq() -> handle_irq_event(desc),handle_irq_event()主要把工作委托给函数handle_irq_event_percpu()。
irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
{
irqreturn_t retval;
unsigned int flags = 0;
retval = __handle_irq_event_percpu(desc, &flags);
add_interrupt_randomness(desc->irq_data.irq, flags);
if (!noirqdebug)
note_interrupt(desc, retval);
return retval;
}
__handle_irq_event_percpu去遍历中断描述符的中断处理链表,执行每个中断处理描述符的处理函数。
irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags)
{
irqreturn_t retval = IRQ_NONE;
unsigned int irq = desc->irq_data.irq;
struct irqaction *action;
for_each_action_of_desc(desc, action) {
irqreturn_t res;
trace_irq_handler_entry(irq, action);
//中断处理函数
res = action->handler(irq, action->dev_id);
trace_irq_handler_exit(irq, action, res);
if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
irq, action->handler))
local_irq_disable();
switch (res) {
case IRQ_WAKE_THREAD:
......
//唤醒中断线程处理
__irq_wake_thread(desc, action);
//继续往下走,把action->flags作为生成随机数的一个因子
case IRQ_HANDLED:
*flags |= action->flags;
break;
default:
break;
}
retval |= res;
}
return retval;
}
由于项目中均采用中断线程化,所以request irq时,调用request_threaded_irq() ----> __setup_irq() ----> irq_setup_forced_threading()与setup_irq_thread(),action->handler被设置为irq_default_primary_handler(),返回res = IRQ_WAKE_THREAD。则__handle_irq_event_percpu()返回时,retval |= IRQ_WAKE_THREAD;
static int irq_setup_forced_threading(struct irqaction *new)
{
//使能了CONFIG_IRQ_FORCED_THREADING与CONFIG_PREEMPT_RT(RT patch)则force_irqthreads为true
//使能CONFIG_IRQ_FORCED_THREADING,没有CONFIG_PREEMPT_RT,如果cmdline有threadirqs则force_irqthreads为true
if (!force_irqthreads)
return 0;
if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
return 0;
/*
* No further action required for interrupts which are requested as
* threaded interrupts already
*/
if (new->handler == irq_default_primary_handler)
return 0;
new->flags |= IRQF_ONESHOT;
/*
* Handle the case where we have a real primary handler and a
* thread handler. We force thread them as well by creating a
* secondary action.
*/
if (new->handler && new->thread_fn) {
/* Allocate the secondary action */
new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
if (!new->secondary)
return -ENOMEM;
//
new->secondary->handler = irq_forced_secondary_handler;
new->secondary->thread_fn = new->thread_fn;
new->secondary->dev_id = new->dev_id;
new->secondary->irq = new->irq;
new->secondary->name = new->name;
}
/* Deal with the primary handler */
//设置IRQTF_FORCED_THREAD标志位
set_bit(IRQTF_FORCED_THREAD标志位, &new->thread_flags);
//handler改为thread_fn线程化
new->thread_fn = new->handler;
//修改handler,仅仅返回IRQ_WAKE_THREAD,去唤醒线程
new->handler = irq_default_primary_handler;
return 0;
}
static irqreturn_t irq_default_primary_handler(int irq, void *dev_id)
{
return IRQ_WAKE_THREAD;
}
接着调用note_interrupt(desc, retval);
#define SPURIOUS_DEFERRED 0x80000000
void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
{
unsigned int irq;
//因为是中断线程化,action_ret & IRQ_WAKE_THREAD为真
if (action_ret & IRQ_WAKE_THREAD) {
/*
* There is a thread woken. Check whether one of the
* shared primary handlers returned IRQ_HANDLED. If
* not we defer the spurious detection to the next
* interrupt.
*/
if (action_ret == IRQ_WAKE_THREAD) {
int handled;
// 我们使用 thread_handled_last 的第 31 位来表示延迟的虚假检测处于活动状态。
//第一次进来的时候,desc->threads_handled_last & SPURIOUS_DEFERRED为假,直接return
if (!(desc->threads_handled_last & SPURIOUS_DEFERRED)) {
desc->threads_handled_last |= SPURIOUS_DEFERRED;
return;
}
// 检查自上次中断发生后线程处理程序是否返回 IRQ_HANDLED,处理完desc->threads_handled会增加
handled = atomic_read(&desc->threads_handled);
handled |= SPURIOUS_DEFERRED;
//判断上一次的中断处理线程是否处理完
if (handled != desc->threads_handled_last) {
action_ret = IRQ_HANDLED;
desc->threads_handled_last = handled;
} else {
//上一次的中断处理线程没有处理完
action_ret = IRQ_NONE;
}
} else {
desc->threads_handled_last &= ~SPURIOUS_DEFERRED;
}
}
//上一次的中断处理线程没有处理完
if (unlikely(action_ret == IRQ_NONE)) {
// HZ=100,判断时间是否超过了100ms=(HZ/10)=100 / 10 * 10ms
if (time_after(jiffies, desc->last_unhandled + HZ/10))
desc->irqs_unhandled = 1;
else // 在100ms内,irqs_unhandled++计数
desc->irqs_unhandled++;
desc->last_unhandled = jiffies;
}
irq = irq_desc_get_irq(desc);
if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
int ok = misrouted_irq(irq);
if (action_ret == IRQ_NONE)
desc->irqs_unhandled -= ok;
}
//如果之前的 100,000 个中断中有 99,900 个未被处理,则假设 IRQ 以某种方式卡住。 删除诊断并尝试关闭 IRQ。
//(其他 100,000 个中断中的 100 个可能是一个正常运行的设备,与失败的设备共享一个 IRQ)
desc->irq_count++;
if (likely(desc->irq_count < 100000))
return;
desc->irq_count = 0;
if (unlikely(desc->irqs_unhandled > 99900)) {
/*
* The interrupt is stuck
*/
__report_bad_irq(desc, action_ret);
/*
* Now kill the IRQ
*/
printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
desc->istate |= IRQS_SPURIOUS_DISABLED;
desc->depth++;
irq_disable(desc);
mod_timer(&poll_spurious_irq_timer,
jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
}
desc->irqs_unhandled = 0;
}
//https://elixir.bootlin.com/linux/v4.12/source/kernel/irq/spurious.c#L193
static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
{
unsigned int irq = irq_desc_get_irq(desc);
struct irqaction *action;
unsigned long flags;
if (bad_action_ret(action_ret)) {
printk(KERN_ERR "irq event %d: bogus return value %x\n",
irq, action_ret);
} else {
printk(KERN_ERR "irq %d: nobody cared (try booting with "
"the "irqpoll" option)\n", irq);
}
dump_stack();
printk(KERN_ERR "handlers:\n");
/*
* We need to take desc->lock here. note_interrupt() is called
* w/o desc->lock held, but IRQ_PROGRESS set. We might race
* with something else removing an action. It's ok to take
* desc->lock here. See synchronize_irq().
*/
raw_spin_lock_irqsave(&desc->lock, flags);
for_each_action_of_desc(desc, action) {
printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler);
if (action->thread_fn)
printk(KERN_CONT " threaded [<%p>] %pf",
action->thread_fn, action->thread_fn);
printk(KERN_CONT "\n");
}
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
综上所述,出现问题时,100ms内,超过99,900个69号中断未被处理,导致了这个问题。具体解决方案涉及到项目机密,无法记录,大概过程为针对出现问题的场景,irq号,结合中断子系统处理过程进行分析。可能原因为:
- 硬件错误频繁上报中断
- 中断处理函数设置错误(如参考文档)
- 中断处理函数没有清中断