网桥?路由器的老爸是也。当然,一代要比一代强,路由器是一种多端口,自适应,再加上各种其他更好的性能,但这些形容词之后,仍然只是个“网桥”。
网桥涉及各协议,stp。网桥、终端等构成的网络,是个闭合的拓扑图,自然会有很多回环,圈圈什么的。数据包,当然不能无休止的转圈圈,所以,这个图,要有个逻辑概念,于是要修剪成无环路的树型网络。
首先从init开始:
01 |
static int __init br_init( void ) |
04 |
err = stp_proto_register(&br_stp_proto); |
06 |
pr_err( "bridge: can't register sap for STP\n" ); |
12 |
err = register_pernet_subsys(&br_net_ops); |
16 |
err = br_netfilter_init(); |
20 |
err = register_netdevice_notifier(&br_device_notifier); |
23 |
err = br_netlink_init(); |
27 |
brioctl_set(br_ioctl_deviceless_stub); |
28 |
#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) |
29 |
br_fdb_test_addr_hook = br_fdb_test_addr; |
<b>
int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user * uarg) { switch (cmd) { case SIOCGIFBR: case SIOCSIFBR: return old_deviceless(net, uarg);
//新建网桥 case SIOCBRADDBR: //删除网桥 case SIOCBRDELBR: { char buf[IFNAMSIZ];
if (!capable(CAP_NET_ADMIN)) return -EPERM;
if (copy_from_user(buf, uarg, IFNAMSIZ)) return -EFAULT;
buf[IFNAMSIZ-1] = 0; if (cmd == SIOCBRADDBR) return br_add_bridge(net, buf); //-->c:
return br_del_bridge(net, buf); }
return -EOPNOTSUPP; }
<c>
int br_add_bridge(struct net *net, const char *name) { struct net_device *dev; int ret;
//为虚拟桥新建一个net_device dev = new_bridge_dev(net, name); //-->d: if (!dev) return -ENOMEM;
rtnl_lock(); if (strchr(dev->name, '%')) { ret = dev_alloc_name(dev, dev->name); //内核给分配个名字 if (ret < 0) goto out_free; }
SET_NETDEV_DEVTYPE(dev, &br_type);
ret = register_netdevice(dev); //然后注册该网络设备 if (ret) goto out_free;
ret = br_sysfs_addbr(dev); //sysfs中建立相关信息 if (ret) unregister_netdevice(dev); out: rtnl_unlock(); return ret;
out_free: free_netdev(dev); goto out; }
<d>
static struct net_device *new_bridge_dev(struct net *net, const char *name) { struct net_bridge *br; struct net_device *dev;
dev = alloc_netdev(sizeof(struct net_bridge), name, br_dev_setup); //-->e:
if (!dev) return NULL; dev_net_set(dev, net);
br = netdev_priv(dev); //获得私有区间 br->dev = dev;
br->stats = alloc_percpu(struct br_cpu_netstats); if (!br->stats) { free_netdev(dev); return NULL; }
spin_lock_init(&br->lock);
//队列初始化。在port_list中保存了这个桥上的端口列表 INIT_LIST_HEAD(&br->port_list); spin_lock_init(&br->hash_lock);
//stp协议相关 br->bridge_id.prio[0] = 0x80; br->bridge_id.prio[1] = 0x00;
memcpy(br->group_addr, br_group_address, ETH_ALEN);
br->feature_mask = dev->features; br->stp_enabled = BR_NO_STP; br->designated_root = br->bridge_id; br->root_path_cost = 0; br->root_port = 0; br->bridge_max_age = br->max_age = 20 * HZ; br->bridge_hello_time = br->hello_time = 2 * HZ; br->bridge_forward_delay = br->forward_delay = 15 * HZ; br->topology_change = 0; br->topology_change_detected = 0; br->ageing_time = 300 * HZ;
br_netfilter_rtable_init(br);
br_stp_timer_init(br); br_multicast_init(br);
return dev; }
该函数主要是为*br (struct net_bridge) 赋值,但首先要初始化 dev (struct net_device)。
<e>
void br_dev_setup(struct net_device *dev) { //将桥的MAC地址设为零 random_ether_addr(dev->dev_addr); //dev以太网部分初始化 ether_setup(dev); dev->netdev_ops = &br_netdev_ops; //***
dev->destructor = br_dev_free; SET_ETHTOOL_OPS(dev, &br_ethtool_ops); dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE;
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | NETIF_F_GSO | NETIF_F_HW_VLAN_TX; }
代码中的网络处理函数部分:
将接口添进网桥时,用户空间调用ioctl(br_socket_fd, SIOCBRADDIF,& ifr)
在 dev->netdev_ops = &br_netdev_ops中,回调函数:
static const struct net_device_ops br_netdev_ops = { ... ... .ndo_do_ioctl = br_dev_ioctl, ... ... }
具体的网桥ioctl :
int br_dev_ioctl( struct net_device *dev, struct ifreq *rq, int cmd) |
struct net_bridge *br = netdev_priv(dev); |
return old_dev_ioctl(dev, rq, cmd); |
return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF); |
br_debug(br, "Bridge does not support ioctl 0x%x\n" , cmd); |
<f>
/* called with RTNL */ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) { struct net_device *dev; int ret;
if (!capable(CAP_NET_ADMIN)) return -EPERM;
dev = __dev_get_by_index(dev_net(br->dev), ifindex); if (dev == NULL) return -EINVAL;
if (isadd) //isadd: cmd == SIOCBRADDIF 为真 ret = br_add_if(br, dev); //-->g: else ret = br_del_if(br, dev);
return ret; }
<g>
01 |
int br_add_if( struct net_bridge *br, struct net_device *dev) |
03 |
struct net_bridge_port *p; |
07 |
if ((dev->flags & IFF_LOOPBACK) || |
08 |
dev->type != ARPHRD_ETHER || |
09 |
dev->addr_len != ETH_ALEN) |
12 |
if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) |
15 |
if (br_port_exists(dev)) |
18 |
if (dev->priv_flags & IFF_DONT_BRIDGE) |
25 |
err = dev_set_promiscuity(dev, 1); |
28 |
err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj), |
29 |
SYSFS_BRIDGE_PORT_ATTR); |
33 |
err = br_fdb_insert(br, p, dev->dev_addr); |
36 |
err = br_sysfs_addif(p); |
39 |
if (br_netpoll_info(br) && ((err = br_netpoll_enable(p)))) |
41 |
err = netdev_rx_handler_register(dev, br_handle_frame, p); |
44 |
dev->priv_flags |= IFF_BRIDGE_PORT; |
46 |
list_add_rcu(&p->list, &br->port_list); |
47 |
spin_lock_bh(&br->lock); |
48 |
changed_addr = br_stp_recalculate_bridge_id(br); |
49 |
br_features_recompute(br); |
50 |
if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) && |
51 |
(br->dev->flags & IFF_UP)) |
52 |
br_stp_enable_port(p); |
53 |
spin_unlock_bh(&br->lock); |
54 |
br_ifinfo_notify(RTM_NEWLINK, p); |
56 |
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); |
57 |
dev_set_mtu(br->dev, br_min_mtu(br)); |
58 |
kobject_uevent(&p->kobj, KOBJ_ADD); |
61 |
netdev_set_master(dev, NULL); |
63 |
sysfs_remove_link(br->ifobj, p->dev->name); |
65 |
br_fdb_delete_by_port(br, p, 1); |
67 |
kobject_put(&p->kobj); |
70 |
dev_set_promiscuity(dev, -1); |
<h>
/* 为接口创建net_bridge_port */ static struct net_bridge_port *new_nbp(struct net_bridge *br, struct net_device *dev) { int index; struct net_bridge_port *p;
index = find_portno(br); if (index < 0) return ERR_PTR(index);
p = kzalloc(sizeof(*p), GFP_KERNEL); if (p == NULL) return ERR_PTR(-ENOMEM);
p->br = br; dev_hold(dev); p->dev = dev; p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; p->flags = 0; br_init_port(p); p->state = BR_STATE_DISABLED; br_stp_port_timer_init(p); br_multicast_add_port(p);
return p; }
之后,把要加入的 接口对应的mac 与 接口
作为本机静态项 加入到port—mac对应表。
<i>
int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr) { int ret;
spin_lock_bh(&br->hash_lock); ret = fdb_insert(br, source, addr); //--> spin_unlock_bh(&br->hash_lock); return ret; }
/* * 此函数先判断要插入项是否存在, * 若是已存在,且不为静态项,具更新对应项。 * 若不存在该项,则分配一个net_bridge_fdb_entry,插入到CAM表 */ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr) { struct hlist_head *head = &br->hash[br_mac_hash(addr)]; struct net_bridge_fdb_entry *fdb;
//判断是否为有效的mac 地址 if (!is_valid_ether_addr(addr)) return -EINVAL;
fdb = fdb_find(head, addr); if (fdb) { /* it is okay to have multiple ports with same * address, just use the first one. */ if (fdb->is_local) return 0; br_warn(br, "adding interface %s with same address " "as a received packet\n", source->dev->name); fdb_delete(fdb); }
if (!fdb_create(head, source, addr, 1)) return -ENOMEM;
return 0; }
<j>
bool br_stp_recalculate_bridge_id(struct net_bridge *br) { const unsigned char *br_mac_zero = (const unsigned char *)br_mac_zero_aligned; const unsigned char *addr = br_mac_zero; struct net_bridge_port *p;
/* user has chosen a value so keep it */ if (br->flags & BR_SET_MAC_ADDR) return false;
//遍历桥中所有的端口 list_for_each_entry(p, &br->port_list, list) { if (addr == br_mac_zero || memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0) addr = p->dev->dev_addr;
}
//如果不与现在桥的MAC相同 if (compare_ether_addr(br->bridge_id.addr, addr) == 0) return false; /* no change */
br_stp_change_bridge_id(br, addr); //--> return true; }
遍历桥对应的所有接口,然后取最小的MAC。然后判断最小MAC跟现在的MAC是否相同。
void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr) { /* should be aligned on 2 bytes for compare_ether_addr() */ unsigned short oldaddr_aligned[ETH_ALEN >> 1]; unsigned char *oldaddr = (unsigned char *)oldaddr_aligned; struct net_bridge_port *p; int wasroot;
wasroot = br_is_root_bridge(br);
memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN); memcpy(br->bridge_id.addr, addr, ETH_ALEN);
//到这里,桥的MAC更新了! memcpy(br->dev->dev_addr, addr, ETH_ALEN);
list_for_each_entry(p, &br->port_list, list) { if (!compare_ether_addr(p->designated_bridge.addr, oldaddr)) memcpy(p->designated_bridge.addr, addr, ETH_ALEN);
if (!compare_ether_addr(p->designated_root.addr, oldaddr)) memcpy(p->designated_root.addr, addr, ETH_ALEN);
}
br_configuration_update(br); br_port_state_selection(br); if (br_is_root_bridge(br) && !wasroot) br_become_root_bridge(br); }
以上的大致的网桥配置过程,配置好之后,便是发送接收数据,这里先瞧一眼网桥的接收数据的实现。
<k>
int netdev_rx_handler_register( struct net_device *dev, |
rx_handler_func_t *rx_handler, |
rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); |
rcu_assign_pointer(dev->rx_handler, rx_handler); |
利用回调,实际的数据处理函数便是:br_handle_frame
/*进行接收数据的处理*/ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) { struct net_bridge_port *p; struct sk_buff *skb = *pskb; const unsigned char *dest = eth_hdr(skb)->h_dest; //目的mac地址 br_should_route_hook_t *rhook;
if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return RX_HANDLER_PASS;
if (!is_valid_ether_addr(eth_hdr(skb)->h_source))//源mac 为多播或者广播,丢弃 goto drop;
skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) return RX_HANDLER_CONSUMED;
p = br_port_get_rcu(skb->dev);
if (unlikely(is_link_local(dest))) { /* Pause frames shouldn't be passed up by driver anyway */ if (skb->protocol == htons(ETH_P_PAUSE)) goto drop;
/* If STP is turned off, then forward */ if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) goto forward;
if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, NULL, br_handle_local_finish)) { return RX_HANDLER_CONSUMED; /* consumed by filter */ } else { *pskb = skb; return RX_HANDLER_PASS; /* continue processing */ } }
forward: switch (p->state) { case BR_STATE_FORWARDING: //状态为转发 rhook = rcu_dereference(br_should_route_hook); if (rhook) { if ((*rhook)(skb)) { *pskb = skb; return RX_HANDLER_PASS; } dest = eth_hdr(skb)->h_dest; } /* fall through */ case BR_STATE_LEARNING: //状态为学习 if (!compare_ether_addr(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST;
NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_handle_frame_finish); //--> break; default: drop: kfree_skb(skb); } return RX_HANDLER_CONSUMED; }
br_handle_frame_finish: 正常的数据包会流进br_handle_frame_finish()进行处理 :
int br_handle_frame_finish(struct sk_buff *skb) { //取得目的MAC地址 const unsigned char *dest = eth_hdr(skb)->h_dest; struct net_bridge_port *p = br_port_get_rcu(skb->dev); struct net_bridge *br; struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct sk_buff *skb2;
if (!p || p->state == BR_STATE_DISABLED) goto drop;
/* insert into forwarding database after filtering to avoid spoofing */ br = p->br; br_fdb_update(br, p, eth_hdr(skb)->h_source);
if (is_multicast_ether_addr(dest) && br_multicast_rcv(br, p, skb)) goto drop;
if (p->state == BR_STATE_LEARNING) goto drop;
BR_INPUT_SKB_CB(skb)->brdev = br->dev;
/* The packet skb2 goes to the local host (NULL to skip). */ skb2 = NULL;
/*如果网桥的虚拟网卡处于混杂模式,那么每个接收到的数据包都需要克隆一份,送到AF_PACKET协议处理体(网络软中断函数net_rx_action中ptype_all链的处理)*/ if (br->dev->flags & IFF_PROMISC) skb2 = skb;
dst = NULL;
if (is_multicast_ether_addr(dest)) { //目的mac为多播或者广播,则需要传至上层进行处理 mdst = br_mdb_get(br, skb); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { if ((mdst && mdst->mglist) || br_multicast_is_router(br)) skb2 = skb; br_multicast_forward(mdst, skb, skb2); skb = NULL; if (!skb2) goto out; } else skb2 = skb;
br->dev->stats.multicast++; } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { //查询CAM 表,到本机的则传至上层协议处理 skb2 = skb; /* Do not forward the packet since it's local. */ skb = NULL; }
if (skb) { if (dst) br_forward(dst->dst, skb, skb2); //不是本机的数据,则转发 else br_flood_forward(br, skb, skb2); //如果查询不到,在其它端口上都发送此包 }
if (skb2) return br_pass_frame_up(skb2);
out: return 0; drop: kfree_skb(skb); goto out; }
该函数,通过查找CAM表,取得发送端口,如果当前CAM表里没有到目的MAC的端口,则在其它端口上都发送此数据包。
在这个函数里,我们看到,查询CAM表的函数为:__br_fdb_get()
struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, const unsigned char *addr) { struct hlist_node *h; struct net_bridge_fdb_entry *fdb;
hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) { if (!compare_ether_addr(fdb->addr.addr, addr)) { //遍历,比较 if (unlikely(has_expired(br, fdb))) break; return fdb; } }
return NULL; }
首先取得目的MAC对应的哈希项。
然后再遍历里面的数据,查看是否含有目的地址的项,fdb返回。 如果是送给本机的数据包,则传至上层协议, 如不是,则需要转发。
|