

 waston 2015-11-26
来源:Linux内核基础-事件通知链(notifier chain)
  1. 概述
  2. 数据结构
    1. 向事件通知链注册的步骤
    2. 通知子系统有事件发生
    3. 事件列表
  3. 简单一例

1.1. 概述

       通知链只能用在各个子系统之间,而不能在内核和用户空间进行事件的通知。组成内核的核心系统代码均位于kernel目录下,通知链表位于 kernel/notifier.c中,对应的头文件为include/linux/notifier.h。通知链表机制并不复杂,实现它的代码只有区区 几百行。



 struct notifier_block {
     int (*notifier_call)(struct notifier_block *, unsigned long, void *);
     struct notifier_block *next;
     int priority;
  1. notifier_call:当相应事件发生时应该调用的函数,由被通知方提供,如other_subsys_1;
  2. notifier_block *next:用于链接成链表的指针;
  3. priority:回调函数的优先级,一般默认为0。
内核代码中一般把通知链命名为xxx_chain, xxx_nofitier_chain这种形式的变量名。围绕核心数据结构notifier_block,内核定义了四种通知链类型:
1. 原子通知链(Atomic notifier chains):通知链元素的回调函数(当事件发生时要执行的函数)在中断或原子操作上下文中运行,不允许阻塞。对应的链表头结构:
 struct atomic_notifier_head {
     spinlock_t lock;
     struct notifier_block *head;
2. 可阻塞通知链(Blocking notifier chains):通知链元素的回调函数在进程上下文中运行,允许阻塞。对应的链表头:
 struct blocking_notifier_head {
     struct rw_semaphore rwsem;
     struct notifier_block *head;
3. 原始通知链(Raw notifierchains):对通知链元素的回调函数没有任何限制,所有锁和保护机制都由调用者维护。对应的链表头: 网络子系统就是该类型,通过以下宏实现head的初始化
 static RAW_NOTIFIER_HEAD(netdev_chain);
 #define RAW_NOTIFIER_INIT(name)  {  \
        .head= NULL }
 #define RAW_NOTIFIER_HEAD(name)    \  //调用他就好了
 struct raw_notifier_head name =    \
 struct raw_notifier_head netdev_chain = {
      .head = NULL;
 struct raw_notifier_head {
     struct notifier_block *head;
4. SRCU 通知链(SRCU notifier chains):可阻塞通知链的一种变体。对应的链表头:
 struct srcu_notifier_head {
     struct mutex mutex;
     struct srcu_struct srcu;
     struct notifier_block *head;
1.3. 运行机理

1.3.1. 向事件通知链注册的步骤

1. 申明struct notifier_block结构
2. 编写notifier_call函数
3. 调用特定的事件通知链的注册函数,将notifier_block注册到通知链中

1.3.2. 通知子系统有事件发生

notifier_call_chain会按照通知链上各成员的优先级顺序执行回调函数(notifier_call_x);回调函数的执行现场在 notifier_call_chain进程地址空间;其返回值是NOTIFY_XXX的形式,在include/linux/notifier.h中:
 #define NOTIFY_DONE             0x0000 /* 对事件视而不见 */
 #define NOTIFY_OK                   0x0001 /* 事件正确处理 */
 #define NOTIFY_STOP_MASK 0x8000 /*由notifier_call_chain检查,看继续调用回调函数,还是停止,_BAD和_STOP中包含该标志 */
 #define NOTIFY_BAD   (NOTIFY_STOP_MASK|0x0002) /*事件处理出错,不再继续调用回调函数 */
  *Clean way to return from the notifier and stop further calls.

 #define NOTIFY_STOP   (NOTIFY_OK|NOTIFY_STOP_MASK) /* 回调出错,不再继续调用该事件回调函数 */

1.3.3. 事件列表

       对于网络子系统而言,其事件常以NETDEV_XXX命名;描述了网络设备状态(dev->flags)、传送队列状态 (dev->state)、设备注册状态(dev->reg_state),以及设备的硬件功能特性(dev->features):
 /* netdevice notifier chain */
 #define NETDEV_UP                        0x0001 /* 激活一个网络设备 */
 #define NETDEV_DOWN                 0x0002f /* 停止一个网络设备,所有对该设备的引用都应释放 */
 #define NETDEV_REBOOT       0x0003   /* 检查到网络设备接口硬件崩溃,硬件重启 */
 #define NETDEV_CHANGE            0x0004 /* 网络设备的数据包队列状态发生改变 */
 #define NETDEV_REGISTER         0x0005 /* 一个网络设备事例注册到系统中,但尚未激活 */
 #define NETDEV_UNREGISTER   0x0006   /* 网络设备驱动已卸载 */
 #define NETDEV_CHANGEMTU    0x0007 /* MTU发生了改变 */
 #define NETDEV_CHANGEADDR 0x0008 /* 硬件地址发生了改变 */
 #define NETDEV_GOING_DOWN   0x0009 /* 网络设备即将注销,有dev->close报告,通知相关子系统处理 */
 #define NETDEV_CHANGENAME           0x000A /* 网络设备名改变 */
 #define NETDEV_FEAT_CHANGE           0x000B /* feature网络硬件功能改变 */
 #define NETDEV_PRE_UP                        0x000D

1.4. 简单一例:

       通过上面所述,notifier_chain机制只能在内核个子系统间使用,因此,这里使用3个模块:test_notifier_chain_0、 test_notifier_chain_1、test_notifier_chain_2。
       当 test_notifier_chain_2通过module_init初始化模块时发出事件TESTCHAIN_2_INIT;然后 test_notifier_chain_1作出相应的处理:打印 test_notifier_chain_2正在初始化。
 /* test_chain_0.c :0. 申明一个通知链;1. 向内核注册通知链;2. 定义事件; 3. 导出符号,因而必需最后退出*/

 #include <linux/notifier.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h> /* printk() */
 #include <linux/fs.h> /* everything() */

 #define TESTCHAIN_INIT 0x52U
 static RAW_NOTIFIER_HEAD(test_chain);

 /* define our own notifier_call_chain */
 int call_test_notifiers(unsigned long val, void *v)
      return raw_notifier_call_chain(&test_chain, val, v);

 /* define our own notifier_chain_register func */
 int register_test_notifier(struct notifier_block *nb)
     int err;
     err = raw_notifier_chain_register(&test_chain, nb);
     return err;

 static int __init test_chain_0_init(void)
     printk(KERN_DEBUG "I'm in test_chain_0\n");
     return 0;

 static void __exit test_chain_0_exit(void)
     printk(KERN_DEBUG "Goodbye to test_chain_0\n");
 //  call_test_notifiers(TESTCHAIN_EXIT, (int *)NULL);


 /* test_chain_1.c :1. 定义回调函数;2. 定义notifier_block;3. 向chain_0注册notifier_block;*/

 #include <linux/notifier.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h> /* printk() */
 #include <linux/fs.h> /* everything() */

 extern int register_test_notifier(struct notifier_block *nb);
 #define TESTCHAIN_INIT 0x52U

 /* realize the notifier_call func */
 int test_init_event(struct notifier_block *nb, unsigned long event, void *v)
     switch (event){
         case TESTCHAIN_INIT:
             printk(KERN_DEBUG "I got the chain event: test_chain_2 is on the way of init\n");
     return NOTIFY_DONE;
 /* define a notifier_block */
 static struct notifier_block test_init_notifier = {
     .notifier_call = test_init_event,
 static int __init test_chain_1_init(void)
     printk(KERN_DEBUG "I'm in test_chain_1\n");
     register_test_notifier(&test_init_notifier); // 由chain_0提供的设施
     return 0;

 static void __exit test_chain_1_exit(void)
     printk(KERN_DEBUG "Goodbye to test_clain_l\n");



 /* test_chain_2.c:发出通知链事件*/

 #include <linux/notifier.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h> /* printk() */
 #include <linux/fs.h> /* everything() */

 extern int call_test_notifiers(unsigned long val, void *v);
 #define TESTCHAIN_INIT 0x52U

 static int __init test_chain_2_init(void)
     printk(KERN_DEBUG "I'm in test_chain_2\n");
     call_test_notifiers(TESTCHAIN_INIT, "no_use");
     return 0;

 static void __exit test_chain_2_exit(void)
     printk(KERN_DEBUG "Goodbye to test_chain_2\n");



 # Makefile

 # Comment/uncomment the following line to disable/enable debugging
 # DEBUG = y

 # Add your debugging flag (or not) to CFLAGS
 ifeq ($(DEBUG),y)
  DEBFLAGS = -O -g -DSCULL_DEBUG # "-O" is needed to expand inlines

 ifneq ($(KERNELRELEASE),)
 # call from kernel build system

 obj-m := test_chain_0.o test_chain_1.o test_chain_2.o


 KERNELDIR ?= /lib/modules/$(shell uname -r)/build
 PWD   := $(shell pwd)

   $(MAKE) -C $(KERNELDIR) M=$(PWD) modules


   rm -rf *.o *~ core .depend .*.cmd *.ko *.mod.c .tmp_versions

 depend .depend dep:
   $(CC) $(CFLAGS) -M *.c > .depend

 ifeq (.depend,$(wildcard .depend))
 include .depend

 [wang2@iwooing: notifier_chian]$ sudo insmod./test_chain_0.ko
 [wang2@iwooing: notifier_chian]$ sudo insmod./test_chain_1.ko
 [wang2@iwooing: notifier_chian]$ sudo insmod./test_chain_2.ko

 [wang2@iwooing: notifier_chian]$ dmesg

 [ 5950.112649] I'm in test_chain_0
 [ 5956.766610] I'm in test_chain_1
 [ 5962.570003] I'm in test_chain_2
 [ 5962.570008] I got the chain event: test_chain_2 is on the way of init

 [ 6464.042975] Goodbye to test_chain_2
 [ 6466.368030] Goodbye to test_clain_l
 [ 6468.371479] Goodbye to test_chain_0

来源: 内核通知链原理及机制

   内核源码参考: include/linux/notifier.h   
 struct notifier_block {
  int (*notifier_call)(struct notifier_block *, unsigned long, void *);
  struct notifier_block __rcu *next;
  int priority;
 * Notifier chain core routines. The exported routines below
 * are layered on top of these, with appropriate locking added.
static int notifier_chain_register(struct notifier_block **nl,
        struct notifier_block *n)
    while ((*nl) != NULL) {
        if (n->priority > (*nl)->priority)
        nl = &((*nl)->next);
    n->next = *nl;
    rcu_assign_pointer(*nl, n);
    return 0;
 * notifier_call_chain - Informs the registered notifiers about an event.
 * @nl:       Pointer to head of the blocking notifier chain
 * @val:      Value passed unmodified to notifier function
 * @v:    Pointer passed unmodified to notifier function
 * @nr_to_call:   Number of notifier functions to be called. Don't care
 *         value of this parameter is -1.
 * @nr_calls: Records the number of notifications sent. Don't care
 *         value of this field is NULL.
 * @returns:  notifier_call_chain returns the value returned by the
 *         last notifier function called.
static int __kprobes notifier_call_chain(struct notifier_block **nl,
                    unsigned long val, void *v,
                    int nr_to_call, int *nr_calls)
    int ret = NOTIFY_DONE;
    struct notifier_block *nb, *next_nb;
    nb = rcu_dereference_raw(*nl);
    while (nb && nr_to_call) {
        next_nb = rcu_dereference_raw(nb->next);
        if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
            WARN(1, "Invalid notifier called!");
            nb = next_nb;
        ret = nb->notifier_call(nb, val, v);
        if (nr_calls)
        if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
        nb = next_nb;
    return ret;
 * Notifier chains are of four types:
 * Atomic notifier chains: Chain callbacks run in interrupt/atomic
 * context. Callouts are not allowed to block.
 * Blocking notifier chains: Chain callbacks run in process context.
 * Callouts are allowed to block.
 * Raw notifier chains: There are no restrictions on callbacks,
 * registration, or unregistration.  All locking and protection
 * must be provided by the caller.
 * SRCU notifier chains: A variant of blocking notifier chains, with
 * the same restrictions.
 * atomic_notifier_chain_register() may be called from an atomic context,
 * but blocking_notifier_chain_register() and srcu_notifier_chain_register()
 * must be called from a process context.  Ditto for the corresponding
 * _unregister() routines.
 * atomic_notifier_chain_unregister(), blocking_notifier_chain_unregister(),
 * and srcu_notifier_chain_unregister() _must not_ be called from within
 * the call chain.
 * SRCU notifier chains are an alternative form of blocking notifier chains.
 * They use SRCU (Sleepable Read-Copy Update) instead of rw-semaphores for
 * protection of the chain links.  This means there is _very_ low overhead
 * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
 * As compensation, srcu_notifier_chain_unregister() is rather expensive.
 * SRCU notifier chains should be used when the chain will be called very
 * often but notifier_blocks will seldom be removed.  Also, SRCU notifier
 * chains are slightly more difficult to use because they require special
 * runtime initialization.

1. 原子通知链( Atomic notifier chains):通知链元素的回调函数(当事件发生时要执行的函数)只能在中断上下文中运行,不允许阻塞
   struct atomic_notifier_head {
     spinlock_t lock;
     struct notifier_block __rcu *head;
 2. 可阻塞通知链( Blocking notifier chains):通知链元素的回调函数在进程上下文中运行,允许阻塞
   struct blocking_notifier_head {
     struct rw_semaphore rwsem;
     struct notifier_block __rcu *head;
 3. 原始通知链( Raw notifier chains):对通知链元素的回调函数没有任何限制,所有锁和保护机制都由调用者维护
   struct raw_notifier_head {
     struct notifier_block __rcu *head;
4. SRCU 通知链( SRCU notifier chains ):可阻塞通知链的一种变体
   struct srcu_notifier_head {
     struct mutex mutex;
     struct srcu_struct srcu;
     struct notifier_block __rcu *head;
这里我们并不会逐个分析,只分析原始通知链( Raw notifier chains). 
  死亡提醒 通过register_die_notifier注册, 当内核函数触发了一个陷阱或违例错误发送,由oops页错误或断点命中引发。例如为一个医学级别卡写设备驱动,你可能想注册自己给死亡提醒者,以在内核崩溃发生时关闭医疗电子信号。
  网路设备提醒  通过register_netdevice_notifier注册:网络接口启动或关闭时产生。
   CPU频率提醒  通过cpufreq_register_notifier注册:当处理器频率跃变时分发出去。
   因特网地址提醒 通过register_inetaddr_notifier注册:当网络接口的IP地址发生变化被检测时发送
 * netif_carrier_on - set carrier
 * @dev: network device
 * Device has detected that carrier.
void netif_carrier_on(struct net_device *dev)
    if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
        if (dev->reg_state == NETREG_UNINITIALIZED)
        if (netif_running(dev))
void linkwatch_fire_event(struct net_device *dev)
   bool urgent = linkwatch_urgent_event(dev);
   if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
   } else if (!urgent)
linkwatch_add_event(dev);和 linkwatch_schedule_work(urgent);
在看上面代码中我们知道在调用linkwatch_add_event(dev); 前做了一些有助于后续工作的初始化工作:
bool urgent = linkwatch_urgent_event(dev);
static void linkwatch_add_event(struct net_device *dev)
    unsigned long flags;
    spin_lock_irqsave(&lweventlist_lock, flags);
    if (list_empty(&dev->link_watch_list)) {
        list_add_tail(&dev->link_watch_list, &lweventlist);
    spin_unlock_irqrestore(&lweventlist_lock, flags);
static void linkwatch_schedule_work(int urgent)
    unsigned long delay = linkwatch_nextevent - jiffies;
    if (test_bit(LW_URGENT, &linkwatch_flags))
    /* Minimise down-time: drop delay for up event. */
    if (urgent) {
        if (test_and_set_bit(LW_URGENT, &linkwatch_flags))
        delay = 0;
    /* If we wrap around we'll delay it by at most HZ. */
    if (delay > HZ)
        delay = 0;
     * This is true if we've scheduled it immeditately or if we don't
     * need an immediate execution and it's already pending.
    if (schedule_delayed_work(&linkwatch_work, delay) == !delay)
    /* Don't bother if there is nothing urgent. */
    if (!test_bit(LW_URGENT, &linkwatch_flags))
    /* It's already running which is good enough. */
    if (!__cancel_delayed_work(&linkwatch_work))
    /* Otherwise we reschedule it again for immediate execution. */
    schedule_delayed_work(&linkwatch_work, 0);
static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); 
static void linkwatch_event(struct work_struct *dummy)
  __linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies));
static void __linkwatch_run_queue(int urgent_only)
    struct net_device *dev;
     * Limit the number of linkwatch events to one
     * per second so that a runaway driver does not
     * cause a storm of messages on the netlink
     * socket. This limit does not apply to up events
     * while the device qdisc is down.
    if (!urgent_only)
        linkwatch_nextevent = jiffies + HZ;
    /* Limit wrap-around effect on delay. */
    else if (time_after(linkwatch_nextevent, jiffies + HZ))
        linkwatch_nextevent = jiffies;
    clear_bit(LW_URGENT, &linkwatch_flags);
    list_splice_init(&lweventlist, &wrk);
    while (!list_empty(&wrk)) {
        dev = list_first_entry(&wrk, struct net_device, link_watch_list);
        if (urgent_only && !linkwatch_urgent_event(dev)) {
            list_add_tail(&dev->link_watch_list, &lweventlist);
    if (!list_empty(&lweventlist))
 * netdev_state_change - device changes state
 * @dev: device to cause notification
 * Called to indicate a device has changed state. This function calls
 * the notifier chains for netdev_chain and sends a NEWLINK message
 * to the routing socket.

void netdev_state_change(struct net_device *dev)
     if (dev->flags & IFF_UP) {
         call_netdevice_notifiers(NETDEV_CHANGE, dev);
         rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
这里我们只关心call_netdevice_notifiers(NETDEV_CHANGE, dev);
 * call_netdevice_notifiers - call all network notifier blocks
 *    @val: value passed unmodified to notifier function
 *    @dev: net_device pointer passed unmodified to notifier function
 * Call all network notifier blocks.  Parameters and return value
 * are as for raw_notifier_call_chain().

int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
   return raw_notifier_call_chain(&netdev_chain, val, dev);
 * dev_open - prepare an interface for use.
 * @dev: device to open
 * Takes a device from down to up state. The device's private open
 * function is invoked and then the multicast lists are loaded. Finally
 * the device is moved into the up state and a %NETDEV_UP message is
 * sent to the netdev notifier chain.
 * Calling this function on an active interface is a nop. On a failure
 * a negative errno code is returned.

int dev_open(struct net_device *dev)
     int ret;

     if (dev->flags & IFF_UP)
          return 0;

     ret = __dev_open(dev);
     if (ret < 0)
          return ret;

     rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
     call_netdevice_notifiers(NETDEV_UP, dev);

     return ret;

    转藏 分享 献花(0



    请遵守用户 评论公约

    类似文章 更多