1. 前言 本文简要介绍数据包在进入桥网卡后在Linux网络协议栈的处理流程,并描述netfilter的hook点的挂接处理情况,具体各部分的详细处理待后续文章中说明。 以下内核代码版本为2.6.19.2. 2. 函数处理流程 bridge入口点handle_bridge()
- /* net/core/dev.c */
- int netif_receive_skb(struct sk_buff *skb)
- {
- //......
- if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
goto out;
- //......
} bridge基本挂接点处理函数:br_handle_frame_hook()
- static __inline__ int handle_bridge(struct sk_buff **pskb,
- struct packet_type **pt_prev, int *ret,
- struct net_device *orig_dev)
- {
- struct net_bridge_port *port;
- if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
- (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
- return 0;
- if (*pt_prev) {
- *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
- *pt_prev = NULL;
- }
-
- return br_handle_frame_hook(port, pskb);
- }
bridge_handle_frame_hook()的实际实现:
- /* net/bridge/br.c */
- static int __init br_init(void)
- {
- //......
- br_handle_frame_hook = br_handle_frame;
- //......
} br_handle_frame: PF_BEIDGE的prerouting点
- /* net/bridge/br_input.c */
- int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
- {
- struct sk_buff *skb = *pskb;
- const unsigned char *dest = eth_hdr(skb)->h_dest;
- if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
- goto err;
- if (unlikely(is_link_local(dest))) {
- // 自身包进入PF_BEIDGE的INPUT点, 一般处理的包数不多
- skb->pkt_type = PACKET_HOST;
- // 正常是返回1的, 然后就返回1, 表示桥模块全权处理该包了
- return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
- NULL, br_handle_local_finish) != 0;
- }
- if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) {
- // br_should_route_hook函数一般没定义
if (br_should_route_hook) {
- if (br_should_route_hook(pskb))
- return 0;
- skb = *pskb;
- dest = eth_hdr(skb)->h_dest;
- }
- if (!compare_ether_addr(p->br->dev->dev_addr, dest))
- skb->pkt_type = PACKET_HOST;
- // PF_BRIDGE的prerouting处理结束后进入br_handle_frame_finish
- NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
- br_handle_frame_finish);
- // 处理后始终返回1, 表示不再进行其他协议族处理,该数据包已经完全由bridge处理完毕
- return 1;
- }
- err:
- kfree_skb(skb);
- // 处理后始终返回1, 表示不再进行其他协议族处理,该数据包已经完全由bridge处理完毕
- return 1;
- }
通过br_handle_frame_finish进入bridge的转发:
- /* note: already called with rcu_read_lock (preempt_disabled) */
- int br_handle_frame_finish(struct sk_buff *skb)
- {
- const unsigned char *dest = eth_hdr(skb)->h_dest;
- struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
- struct net_bridge *br;
- struct net_bridge_fdb_entry *dst;
- int passedup = 0;
- if (!p || p->state == BR_STATE_DISABLED)
- goto drop;
- /* insert into forwarding database after filtering to avoid spoofing */
- br = p->br;
- br_fdb_update(br, p, eth_hdr(skb)->h_source);
- if (p->state == BR_STATE_LEARNING)
- goto drop;
- if (br->dev->flags & IFF_PROMISC) {
- struct sk_buff *skb2;
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2 != NULL) {
- passedup = 1;
- br_pass_frame_up(br, skb2);
- }
- }
- if (is_multicast_ether_addr(dest)) {
- // 多播转发,也是调用广播处理
- br->statistics.multicast++;
- br_flood_forward(br, skb, !passedup);
- if (!passedup)
- br_pass_frame_up(br, skb);
- goto out;
- }
- // 根据目的MAC找目的出口
- dst = __br_fdb_get(br, dest);
- if (dst != NULL && dst->is_local) {
- if (!passedup)
- br_pass_frame_up(br, skb);
- else
- kfree_skb(skb);
- goto out;
- }
- if (dst != NULL) {
- // 单播转发
br_forward(dst->dst, skb);
- goto out;
- }
- // 广播转发
- br_flood_forward(br, skb, 0);
- out:
- return 0;
- drop:
- kfree_skb(skb);
- goto out;
- }
广播/多播转发: br_flood_forward/br_flood
- /* called under bridge lock */
- void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone)
- {
- br_flood(br, skb, clone, __br_forward);
- }
- /* called under bridge lock */
- static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone,
- void (*__packet_hook)(const struct net_bridge_port *p,
- struct sk_buff *skb))
- {
- struct net_bridge_port *p;
- struct net_bridge_port *prev;
- if (clone) {
- struct sk_buff *skb2;
- if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
- br->statistics.tx_dropped++;
- return;
- }
- skb = skb2;
- }
- prev = NULL;
- list_for_each_entry_rcu(p, &br->port_list, list) {
- if (should_deliver(p, skb)) {
- if (prev != NULL) {
- struct sk_buff *skb2;
- if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
- br->statistics.tx_dropped++;
- kfree_skb(skb);
- return;
- }
- // 这里实际是__br_forward
- __packet_hook(prev, skb2);
- }
- prev = p;
- }
- }
- if (prev != NULL) {
- // 这里实际是__br_forward
- __packet_hook(prev, skb);
- return;
- }
- kfree_skb(skb);
- }
单播转发: br_forward
- /* net/bridge/br_forward.c */
- /* called with rcu_read_lock */
- void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
- {
- if (should_deliver(to, skb)) {
- // 也是调用__br_forward
- __br_forward(to, skb);
- return;
- }
- kfree_skb(skb);
- }
FORWARD点:
- static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
- {
- struct net_device *indev;
- indev = skb->dev;
- skb->dev = to->dev;
- skb->ip_summed = CHECKSUM_NONE;
- // 进入PF_BRIDGE的forward hook, 结束后进入br_forward_finish()
- NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
- br_forward_finish);
- }
POSTROUTING点:
- // 从FORWARD点处理后直接进入POSTROUTING点处理
- int br_forward_finish(struct sk_buff *skb)
- {
- // 进入PF_BRIDGE的postrouting hook, 结束后进入br_dev_queue_push_xmit()
- return NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
- br_dev_queue_push_xmit);
- }
数据包发出:
- int br_dev_queue_push_xmit(struct sk_buff *skb)
- {
- /* drop mtu oversized packets except gso */
- if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
- kfree_skb(skb);
- else {
- /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
- if (nf_bridge_maybe_copy_header(skb))
- kfree_skb(skb);
- else {
- skb_push(skb, ETH_HLEN);
- // 此处调用dev设备的hard_start_xmit()函数
- dev_queue_xmit(skb);
- }
- }
- return 0;
- }
桥网卡设备的hard_start_xmit()函数定义为:
- /* net/bridge/br_device.c */
- void br_dev_setup(struct net_device *dev)
- {
- //......
- dev->hard_start_xmit = br_dev_xmit;
- //......
- }
- /* net device transmit always called with no BH (preempt_disabled) */
- int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
- {
- struct net_bridge *br = netdev_priv(dev);
- const unsigned char *dest = skb->data;
- struct net_bridge_fdb_entry *dst;
- br->statistics.tx_packets++;
- br->statistics.tx_bytes += skb->len;
- skb->mac.raw = skb->data;
- skb_pull(skb, ETH_HLEN);
- if (dest[0] & 1)
- // 多播发送
- br_flood_deliver(br, skb, 0);
- else if ((dst = __br_fdb_get(br, dest)) != NULL)
- // 单播发送
- br_deliver(dst->dst, skb);
- else
- // 广播发送
- br_flood_deliver(br, skb, 0);
- // 这些发送函数最终都会调用__br_deliver()函数
- return 0;
- }
-
- /* net/bridge/br_forward.c */
- static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
- {
- skb->dev = to->dev;
- // 此处是PF_BRIDGE的OUTPUT点
- NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
- br_forward_finish);
- }
总结: PF_BRIDGE中的各个hook点和PF_INET不同, 可用下面的图表示: PREROUTING --+--FORWARD-----POSTROUTING------+----OUTPUT | | | | INPUT 3. BF_BRIDGE的hook点 在net/bridge/br_netfilter.c中定义了以下hook点,注意这些hook点主要是PF_BRIDGE协议族的。
- /* net/bridge/br_netfilter.c */
- /* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
- * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
- * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
- * ip_refrag() can return NF_STOLEN. */
- static struct nf_hook_ops br_nf_ops[] = {
- // PF_BRIDGE的挂接点
- // PREROUTING点
- { .hook = br_nf_pre_routing,
- .owner = THIS_MODULE,
- .pf = PF_BRIDGE,
- .hooknum = NF_BR_PRE_ROUTING,
- .priority = NF_BR_PRI_BRNF, },
- // INPUT点
- { .hook = br_nf_local_in,
- .owner = THIS_MODULE,
- .pf = PF_BRIDGE,
- .hooknum = NF_BR_LOCAL_IN,
- .priority = NF_BR_PRI_BRNF, },
- // FORWARD点
- { .hook = br_nf_forward_ip,
- .owner = THIS_MODULE,
- .pf = PF_BRIDGE,
- .hooknum = NF_BR_FORWARD,
- .priority = NF_BR_PRI_BRNF - 1, },
- // FORWARD点
- { .hook = br_nf_forward_arp,
- .owner = THIS_MODULE,
- .pf = PF_BRIDGE,
- .hooknum = NF_BR_FORWARD,
- .priority = NF_BR_PRI_BRNF, },
- // OUTPUT点
- { .hook = br_nf_local_out,
- .owner = THIS_MODULE,
- .pf = PF_BRIDGE,
- .hooknum = NF_BR_LOCAL_OUT,
- .priority = NF_BR_PRI_FIRST, },
- // POSTROUTING点
- { .hook = br_nf_post_routing,
- .owner = THIS_MODULE,
- .pf = PF_BRIDGE,
- .hooknum = NF_BR_POST_ROUTING,
- .priority = NF_BR_PRI_LAST, },
- // 后面是PF_INET/PF_INET6的挂接点, 其实也没进行什么数据包操作,
- // 就是自身的输入输出包不通过桥处理,要短路掉
- { .hook = ip_sabotage_in,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_FIRST, },
- { .hook = ip_sabotage_in,
- .owner = THIS_MODULE,
- .pf = PF_INET6,
- .hooknum = NF_IP6_PRE_ROUTING,
- .priority = NF_IP6_PRI_FIRST, },
- { .hook = ip_sabotage_out,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_FORWARD,
- .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, },
- { .hook = ip_sabotage_out,
- .owner = THIS_MODULE,
- .pf = PF_INET6,
- .hooknum = NF_IP6_FORWARD,
- .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, },
- { .hook = ip_sabotage_out,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
- { .hook = ip_sabotage_out,
- .owner = THIS_MODULE,
- .pf = PF_INET6,
- .hooknum = NF_IP6_LOCAL_OUT,
- .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
- { .hook = ip_sabotage_out,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_FIRST, },
- { .hook = ip_sabotage_out,
- .owner = THIS_MODULE,
- .pf = PF_INET6,
- .hooknum = NF_IP6_POST_ROUTING,
- .priority = NF_IP6_PRI_FIRST, },
- };
-
- // PF_BRIDGE的PRROUTING点处理函数
- static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
- ......
- // 此处继续调用PF_INET族的PREROUTING点的hook处理
- NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
- br_nf_pre_routing_finish);
- return NF_STOLEN;
- inhdr_error:
- // IP_INC_STATS_BH(IpInHdrErrors);
- out:
- return NF_DROP;
- }
-
- // PF_BRIDGE的FORWARD点处理
- static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
- ......
- // 此处继续调用PF_INET/PF_INET6族的FORWARD点的hook处理
- NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), parent,
- br_nf_forward_finish);
- return NF_STOLEN;
- }
- // PF_BRIDGE的OUTPUT点处理
- static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
- ......
- /* IP forwarded traffic has a physindev, locally
- * generated traffic hasn't. */
- if (realindev != NULL) {
- if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) {
- struct net_device *parent = bridge_parent(realindev);
- if (parent)
- realindev = parent;
- }
- // 此处继续调用PF_INET/PF_INET6族的FORWARD点的hook处理, 不过优先权值要在//
- NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1以上
- NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev,
- realoutdev, br_nf_local_out_finish,
- NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1);
- } else {
- // 此处继续调用PF_INET/PF_INET6族的FORWARD点的hook处理, 不过优先权值要在
- // NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1以上
- NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev,
- realoutdev, br_nf_local_out_finish,
- NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1);
- }
- out:
- return NF_STOLEN;
- }
-
- // PF_BRIDGE的POSTROUTING点
- static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
- ......
- // 此处继续调用PF_INET/PF_INET6族的POSTROUTING点的hook处理
- NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev,
- br_nf_dev_queue_xmit);
- return NF_STOLEN;
- #ifdef CONFIG_NETFILTER_DEBUG
- print_error:
- if (skb->dev != NULL) {
- printk("[%s]", skb->dev->name);
- if (realoutdev)
- printk("[%s]", realoutdev->name);
- }
- printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw,
- skb->data);
- dump_stack();
- return NF_ACCEPT;
- #endif
- }
由此可见, PF_INET的各个hook点也被PF_BRIDGE的各个hook点调用,因此可以在桥网卡中进行过滤或NAT等操作。 4. 结论 BRIDGE的数据处理流程是是一个独立的处理过程, 如果处理正常的话就不再返回到其他协议处理。 在桥的处理层次也和IP协议一样,可以挂接多个PF_BRIDGE的挂接点,这些挂接点中又调用了PF_INET族的挂接点,从而实现了桥下的过滤、NAT等功能。
|