分享

linux网桥代码分析

 A_Geek 2013-07-06

网桥?路由器的老爸是也。当然,一代要比一代强,路由器是一种多端口,自适应,再加上各种其他更好的性能,但这些形容词之后,仍然只是个“网桥”。

网桥涉及各协议,stp。网桥、终端等构成的网络,是个闭合的拓扑图,自然会有很多回环,圈圈什么的。数据包,当然不能无休止的转圈圈,所以,这个图,要有个逻辑概念,于是要修剪成无环路的树型网络。

首先从init开始:

01 static int __init br_init(void)
02 {
03 int err;
04 err = stp_proto_register(&br_stp_proto);
05 if (err < 0) {
06 pr_err("bridge: can't register sap for STP\n");
07 return err;
08 }
09 err = br_fdb_init();  //kmem_cache_create
10 if (err)
11 goto err_out;
12 err = register_pernet_subsys(&br_net_ops);
13 if (err)
14 goto err_out1;
15 //网桥的netfiter处理
16 err = br_netfilter_init();
17 if (err)
18 goto err_out2;
19 //在netdev_chain通知链表上注册
20 err = register_netdevice_notifier(&br_device_notifier);
21 if (err)
22 goto err_out3;
23 err = br_netlink_init();
24 if (err)
25 goto err_out4;
26 //用户空间ioctl调用的函数
27 brioctl_set(br_ioctl_deviceless_stub); //-->b:
28 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
29 br_fdb_test_addr_hook = br_fdb_test_addr;
30 #endif
31 return 0;
32 }

  <b>


int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user * uarg)
{
switch (cmd) {
case SIOCGIFBR:
case SIOCSIFBR:
return old_deviceless(net, uarg);

//新建网桥
case SIOCBRADDBR:
//删除网桥
case SIOCBRDELBR:
{
char buf[IFNAMSIZ];

if (!capable(CAP_NET_ADMIN))
return -EPERM;

if (copy_from_user(buf, uarg, IFNAMSIZ))
return -EFAULT;

buf[IFNAMSIZ
-1] = 0;
if (cmd == SIOCBRADDBR)
return br_add_bridge(net, buf); //-->c:

return br_del_bridge(net, buf);
}

return -EOPNOTSUPP;
}

  <c>

int br_add_bridge(struct net *net, const char *name)
{
struct net_device *dev;
int ret;

//为虚拟桥新建一个net_device
dev = new_bridge_dev(net, name); //-->d:
if (!dev)
return -ENOMEM;

rtnl_lock();
if (strchr(dev->name, '%')) {
ret
= dev_alloc_name(dev, dev->name); //内核给分配个名字
if (ret < 0)
goto out_free;
}

SET_NETDEV_DEVTYPE(dev,
&br_type);

ret
= register_netdevice(dev); //然后注册该网络设备
if (ret)
goto out_free;

ret
= br_sysfs_addbr(dev); //sysfs中建立相关信息
if (ret)
unregister_netdevice(dev);
out:
rtnl_unlock();
return ret;

out_free:
free_netdev(dev);
goto out;
}

  <d>

static struct net_device *new_bridge_dev(struct net *net, const char *name)
{
struct net_bridge *br;
struct net_device *dev;

dev
= alloc_netdev(sizeof(struct net_bridge), name,
br_dev_setup);
//-->e:

if (!dev)
return NULL;
dev_net_set(dev, net);

br
= netdev_priv(dev); //获得私有区间
br->dev = dev;

br
->stats = alloc_percpu(struct br_cpu_netstats);
if (!br->stats) {
free_netdev(dev);
return NULL;
}

spin_lock_init(
&br->lock);

//队列初始化。在port_list中保存了这个桥上的端口列表
INIT_LIST_HEAD(&br->port_list);
spin_lock_init(
&br->hash_lock);

//stp协议相关
br->bridge_id.prio[0] = 0x80;
br
->bridge_id.prio[1] = 0x00;

memcpy(br
->group_addr, br_group_address, ETH_ALEN);

br
->feature_mask = dev->features;
br
->stp_enabled = BR_NO_STP;
br
->designated_root = br->bridge_id;
br
->root_path_cost = 0;
br
->root_port = 0;
br
->bridge_max_age = br->max_age = 20 * HZ;
br
->bridge_hello_time = br->hello_time = 2 * HZ;
br
->bridge_forward_delay = br->forward_delay = 15 * HZ;
br
->topology_change = 0;
br
->topology_change_detected = 0;
br
->ageing_time = 300 * HZ;

br_netfilter_rtable_init(br);

br_stp_timer_init(br);
br_multicast_init(br);

return dev;
}

  该函数主要是为*br (struct net_bridge) 赋值,但首先要初始化 dev (struct net_device)。

  <e>

void br_dev_setup(struct net_device *dev)
{
//将桥的MAC地址设为零
random_ether_addr(dev->dev_addr);
//dev以太网部分初始化
ether_setup(dev);
dev
->netdev_ops = &br_netdev_ops;  //***

dev
->destructor = br_dev_free;
SET_ETHTOOL_OPS(dev,
&br_ethtool_ops);
dev
->tx_queue_len = 0;
dev
->priv_flags = IFF_EBRIDGE;

dev
->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
NETIF_F_GSO_MASK
| NETIF_F_NO_CSUM | NETIF_F_LLTX |
NETIF_F_NETNS_LOCAL
| NETIF_F_GSO | NETIF_F_HW_VLAN_TX;
}

代码中的网络处理函数部分:

接口添进网桥时,用户空间调用ioctl(br_socket_fd, SIOCBRADDIF,& ifr)

dev->netdev_ops = &br_netdev_ops中,回调函数:

static const struct net_device_ops br_netdev_ops = {
... ...
.ndo_do_ioctl
= br_dev_ioctl,
... ...
}

  具体的网桥ioctl :

int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
struct net_bridge *br = netdev_priv(dev);
switch(cmd) {
case SIOCDEVPRIVATE:
return old_dev_ioctl(dev, rq, cmd);
//添加一个接口
case SIOCBRADDIF:
//删除一个接口
case SIOCBRDELIF:
return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF); //-->f:
}
br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd);
return -EOPNOTSUPP;
}

  <f>

/* called with RTNL */
static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
{
struct net_device *dev;
int ret;

if (!capable(CAP_NET_ADMIN))
return -EPERM;

dev
= __dev_get_by_index(dev_net(br->dev), ifindex);
if (dev == NULL)
return -EINVAL;

if (isadd) //isadd: cmd == SIOCBRADDIF 为真
ret = br_add_if(br, dev); //-->g:
else
ret
= br_del_if(br, dev);

return ret;
}

  <g>

01 int br_add_if(struct net_bridge *br, struct net_device *dev)
02 {
03 struct net_bridge_port *p;
04 int err = 0;
05 bool changed_addr;
06 /* Don't allow bridging non-ethernet like devices */
07 if ((dev->flags & IFF_LOOPBACK) ||
08 dev->type != ARPHRD_ETHER ||
09 dev->addr_len != ETH_ALEN)
10 return -EINVAL;
11 /* No bridging of bridges */
12 if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
13 return -ELOOP;
14 /* Device is already being bridged */
15 if (br_port_exists(dev))
16 return -EBUSY;
17 /* No bridging devices that dislike that (e.g. wireless) */
18 if (dev->priv_flags & IFF_DONT_BRIDGE)
19 return -EOPNOTSUPP;
20 //为接口创建net_bridge_port
21 p = new_nbp(br, dev); //-->h:
22 if (IS_ERR(p))
23 return PTR_ERR(p);
24 //设置接口为混杂模式
25 err = dev_set_promiscuity(dev, 1);
26 if (err)
27 goto put_back;
28 err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
29 SYSFS_BRIDGE_PORT_ATTR);
30 if (err)
31 goto err0;
32 //更新port->MAC对应表
33 err = br_fdb_insert(br, p, dev->dev_addr); //-->i:
34 if (err)
35 goto err1;
36 err = br_sysfs_addif(p);
37 if (err)
38 goto err2;
39 if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
40 goto err3;
41 err = netdev_rx_handler_register(dev, br_handle_frame, p); //-->k:
42 if (err)
43 goto err4;
44 dev->priv_flags |= IFF_BRIDGE_PORT;
45 dev_disable_lro(dev);
46 list_add_rcu(&p->list, &br->port_list);
47 spin_lock_bh(&br->lock);
48 changed_addr = br_stp_recalculate_bridge_id(br); //-->j:
49 br_features_recompute(br);
50 if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
51 (br->dev->flags & IFF_UP))
52 br_stp_enable_port(p);
53 spin_unlock_bh(&br->lock);
54 br_ifinfo_notify(RTM_NEWLINK, p);
55 if (changed_addr)
56 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
57 dev_set_mtu(br->dev, br_min_mtu(br));
58 kobject_uevent(&p->kobj, KOBJ_ADD);
59 return 0;
60 err4:
61 netdev_set_master(dev, NULL);
62 err3:
63 sysfs_remove_link(br->ifobj, p->dev->name);
64 err2:
65 br_fdb_delete_by_port(br, p, 1);
66 err1:
67 kobject_put(&p->kobj);
68 p = NULL; /* kobject_put frees */
69 err0:
70 dev_set_promiscuity(dev, -1);
71 put_back:
72 dev_put(dev);
73 kfree(p);
74 return err;
75 }

  <h>

/* 为接口创建net_bridge_port */
static struct net_bridge_port *new_nbp(struct net_bridge *br,
struct net_device *dev)
{
int index;
struct net_bridge_port *p;

index
= find_portno(br);
if (index < 0)
return ERR_PTR(index);

p
= kzalloc(sizeof(*p), GFP_KERNEL);
if (p == NULL)
return ERR_PTR(-ENOMEM);

p
->br = br;
dev_hold(dev);
p
->dev = dev;
p
->path_cost = port_cost(dev);
p
->priority = 0x8000 >> BR_PORT_BITS;
p
->port_no = index;
p
->flags = 0;
br_init_port(p);
p
->state = BR_STATE_DISABLED;
br_stp_port_timer_init(p);
br_multicast_add_port(p);

return p;
}

之后,把要加入的 接口对应的mac 与 接口

作为本机静态项 加入到port—mac对应表。

<i>

int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr)
{
int ret;

spin_lock_bh(
&br->hash_lock);
ret
= fdb_insert(br, source, addr);  //-->
spin_unlock_bh(
&br->hash_lock);
return ret;
}

/*
* 此函数先判断要插入项是否存在,
* 若是已存在,且不为静态项,具更新对应项。
* 若不存在该项,则分配一个net_bridge_fdb_entry,插入到CAM表
*/
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr)
{
struct hlist_head *head = &br->hash[br_mac_hash(addr)];
struct net_bridge_fdb_entry *fdb;

//判断是否为有效的mac 地址
if (!is_valid_ether_addr(addr))
return -EINVAL;

fdb
= fdb_find(head, addr);
if (fdb) {
/* it is okay to have multiple ports with same
* address, just use the first one.
*/
if (fdb->is_local)
return 0;
br_warn(br,
"adding interface %s with same address "
"as a received packet\n",
source
->dev->name);
fdb_delete(fdb);
}

if (!fdb_create(head, source, addr, 1))
return -ENOMEM;

return 0;
}

  <j>

bool br_stp_recalculate_bridge_id(struct net_bridge *br)
{
const unsigned char *br_mac_zero =
(
const unsigned char *)br_mac_zero_aligned;
const unsigned char *addr = br_mac_zero;
struct net_bridge_port *p;

/* user has chosen a value so keep it */
if (br->flags & BR_SET_MAC_ADDR)
return false;

//遍历桥中所有的端口
list_for_each_entry(p, &br->port_list, list) {
if (addr == br_mac_zero ||
memcmp(p
->dev->dev_addr, addr, ETH_ALEN) < 0)
addr
= p->dev->dev_addr;

}

//如果不与现在桥的MAC相同
if (compare_ether_addr(br->bridge_id.addr, addr) == 0)
return false; /* no change */

br_stp_change_bridge_id(br, addr);  //-->
return true;
}

  遍历桥对应的所有接口,然后取最小的MAC。然后判断最小MAC跟现在的MAC是否相同。

void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
{
/* should be aligned on 2 bytes for compare_ether_addr() */
unsigned
short oldaddr_aligned[ETH_ALEN >> 1];
unsigned
char *oldaddr = (unsigned char *)oldaddr_aligned;
struct net_bridge_port *p;
int wasroot;

wasroot
= br_is_root_bridge(br);

memcpy(oldaddr, br
->bridge_id.addr, ETH_ALEN);
memcpy(br
->bridge_id.addr, addr, ETH_ALEN);

//到这里,桥的MAC更新了!
memcpy(br->dev->dev_addr, addr, ETH_ALEN);

list_for_each_entry(p,
&br->port_list, list) {
if (!compare_ether_addr(p->designated_bridge.addr, oldaddr))
memcpy(p
->designated_bridge.addr, addr, ETH_ALEN);

if (!compare_ether_addr(p->designated_root.addr, oldaddr))
memcpy(p
->designated_root.addr, addr, ETH_ALEN);

}

br_configuration_update(br);
br_port_state_selection(br);
if (br_is_root_bridge(br) && !wasroot)
br_become_root_bridge(br);
}

  以上的大致的网桥配置过程,配置好之后,便是发送接收数据,这里先瞧一眼网桥的接收数据的实现。

<k>

int netdev_rx_handler_register(struct net_device *dev,
rx_handler_func_t *rx_handler,
void *rx_handler_data)
{
ASSERT_RTNL();
if (dev->rx_handler)
return -EBUSY;
rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
rcu_assign_pointer(dev->rx_handler, rx_handler); //回调
return 0;
}

  利用回调,实际的数据处理函数便是:br_handle_frame

/*进行接收数据的处理*/
rx_handler_result_t br_handle_frame(
struct sk_buff **pskb)
{
struct net_bridge_port *p;
struct sk_buff *skb = *pskb;
const unsigned char *dest = eth_hdr(skb)->h_dest; //目的mac地址
br_should_route_hook_t *rhook;

if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
return RX_HANDLER_PASS;

if (!is_valid_ether_addr(eth_hdr(skb)->h_source))//源mac 为多播或者广播,丢弃
goto drop;

skb
= skb_share_check(skb, GFP_ATOMIC);
if (!skb)
return RX_HANDLER_CONSUMED;

p
= br_port_get_rcu(skb->dev);

if (unlikely(is_link_local(dest))) {
/* Pause frames shouldn't be passed up by driver anyway */
if (skb->protocol == htons(ETH_P_PAUSE))
goto drop;

/* If STP is turned off, then forward */
if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0)
goto forward;

if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
NULL, br_handle_local_finish)) {
return RX_HANDLER_CONSUMED; /* consumed by filter */
}
else {
*pskb = skb;
return RX_HANDLER_PASS; /* continue processing */
}
}

forward:
switch (p->state) {
case BR_STATE_FORWARDING: //状态为转发
rhook = rcu_dereference(br_should_route_hook);
if (rhook) {
if ((*rhook)(skb)) {
*pskb = skb;
return RX_HANDLER_PASS;
}
dest
= eth_hdr(skb)->h_dest;
}
/* fall through */
case BR_STATE_LEARNING: //状态为学习
if (!compare_ether_addr(p->br->dev->dev_addr, dest))
skb
->pkt_type = PACKET_HOST;

NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb
->dev, NULL,
br_handle_frame_finish);
//-->
break;
default:
drop:
kfree_skb(skb);
}
return RX_HANDLER_CONSUMED;
}

  br_handle_frame_finish: 正常的数据包会流进br_handle_frame_finish()进行处理 :

int br_handle_frame_finish(struct sk_buff *skb)
{
//取得目的MAC地址
const unsigned char *dest = eth_hdr(skb)->h_dest;
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
struct net_bridge *br;
struct net_bridge_fdb_entry *dst;
struct net_bridge_mdb_entry *mdst;
struct sk_buff *skb2;

if (!p || p->state == BR_STATE_DISABLED)
goto drop;

/* insert into forwarding database after filtering to avoid spoofing */
br
= p->br;
br_fdb_update(br, p, eth_hdr(skb)
->h_source);

if (is_multicast_ether_addr(dest) &&
br_multicast_rcv(br, p, skb))
goto drop;

if (p->state == BR_STATE_LEARNING)
goto drop;

BR_INPUT_SKB_CB(skb)
->brdev = br->dev;

/* The packet skb2 goes to the local host (NULL to skip). */
skb2
= NULL;

/*如果网桥的虚拟网卡处于混杂模式,那么每个接收到的数据包都需要克隆一份,送到AF_PACKET协议处理体(网络软中断函数net_rx_action中ptype_all链的处理)*/
if (br->dev->flags & IFF_PROMISC)
skb2
= skb;

dst
= NULL;

if (is_multicast_ether_addr(dest)) { //目的mac为多播或者广播,则需要传至上层进行处理
mdst = br_mdb_get(br, skb);
if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) {
if ((mdst && mdst->mglist) ||
br_multicast_is_router(br))
skb2
= skb;
br_multicast_forward(mdst, skb, skb2);
skb
= NULL;
if (!skb2)
goto out;
}
else
skb2
= skb;

br
->dev->stats.multicast++;
}
else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { //查询CAM 表,到本机的则传至上层协议处理
skb2 = skb;
/* Do not forward the packet since it's local. */
skb
= NULL;
}

if (skb) {
if (dst)
br_forward(dst
->dst, skb, skb2); //不是本机的数据,则转发
else
br_flood_forward(br, skb, skb2);
//如果查询不到,在其它端口上都发送此包
}

if (skb2)
return br_pass_frame_up(skb2);

out:
return 0;
drop:
kfree_skb(skb);
goto out;
}

  该函数,通过查找CAM表,取得发送端口,如果当前CAM表里没有到目的MAC的端口,则在其它端口上都发送此数据包。

在这个函数里,我们看到,查询CAM表的函数为:__br_fdb_get()

struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
const unsigned char *addr)
{
struct hlist_node *h;
struct net_bridge_fdb_entry *fdb;

hlist_for_each_entry_rcu(fdb, h,
&br->hash[br_mac_hash(addr)], hlist) {
if (!compare_ether_addr(fdb->addr.addr, addr)) { //遍历,比较
if (unlikely(has_expired(br, fdb)))
break;
return fdb;
}
}

return NULL;
}

首先取得目的MAC对应的哈希项。

然后再遍历里面的数据,查看是否含有目的地址的项,fdb返回。
如果是送给本机的数据包,则传至上层协议,
如不是,则需要转发

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多