【原】思科VPP系列砖题三：VPP节点注册

方小米 2023-11-21 发布于浙江

展开全文

在前面的章节中，我们讲述了如何编译思科开源的fdio/VPP

Ubuntu22.04如何编译vpp-master版本

和介绍了VPP的启动流程介绍

思科VPP系列砖题二：VPP启动流程分析

本章节将要介绍VPP node的注册机制，在介绍VPP的node机制之前，我们首先介绍一下VPP的软件架构核和设计思想。

软件架构描述：（图片来着腾讯大佬，公众号：Flowlet）

工作原理描述：

矢量图节点的数据处理架构：

矢量处理逻辑描述：

二、VPP节点注册流程

函数名称：	void vlib_register_all_static_nodes (vlib_main_t * vm)
调用关系	vlib_main()初始化时候调用；

函数调用关系描述：

函数vlib_register_all_static_nodes 处理逻辑如下所示：

voidvlib_register_all_static_nodes (vlib_main_t * vm){  vlib_global_main_t *vgm = vlib_get_global_main ();  vlib_node_registration_t *r;
  static char *null_node_error_strings[] = {    "blackholed packets",  };
  /* 定义一个null_node节点，作为第一个节点，其id为0 */  static vlib_node_registration_t null_node_reg = {    .function = null_node_fn,    .vector_size = sizeof (u32),    .name = "null-node",    .n_errors = 1,    .error_strings = null_node_error_strings,  };
  /* make sure that node index 0 is not used by     real node */  register_node (vm, &null_node_reg);
  /* 从 vlib_global_main_t -> node_registrations 链表的起始地址开始遍历*/  r = vgm->node_registrations;  while (r)    {      /* 将添加的所有静态节点的链表进行遍历，并且注册 */      register_node (vm, r);      r = r->next_registration;    }}

如下所示：

函数register_node处理逻辑如下所示：


u32vlib_register_node (vlib_main_t *vm, vlib_node_registration_t *r, char *fmt,        ...){  vlib_node_main_t *nm = &vm->node_main;  vlib_node_t *n;  va_list va;  u32 size;  int i;
  if (CLIB_DEBUG > 0)    {      /* Default (0) type should match INTERNAL. */      vlib_node_t zero = { 0 };      ASSERT (VLIB_NODE_TYPE_INTERNAL == zero.type);    }
  if (r->node_fn_registrations)    {      /* to avoid confusion, please remove ".function " statiement from         CLIB_NODE_REGISTRATION() if using function function candidates */      ASSERT (r->function == 0);      /* 注册节点的处理函数，按照优先级选择处理函数 */      r->function =  vlib_node_get_preferred_node_fn_variant (vm, r->node_fn_registrations);    }
  ASSERT (r->function != 0);
  /* 分配节点需要的空间，并且填充节点vlib_node_t 的字段 */  n = clib_mem_alloc_no_fail (sizeof (n[0]));  clib_memset (n, 0, sizeof (n[0]));  n->index = vec_len (nm->nodes);  n->node_fn_registrations = r->node_fn_registrations;  n->protocol_hint = r->protocol_hint;
  vec_add1 (nm->nodes, n);
  va_start (va, fmt);  n->name = va_format (0, fmt, &va);  va_end (va);
  /* 创建hash查找关系，需要注意的是node的名字不能重复 */  if (!nm->node_by_name)    nm->node_by_name = hash_create_vec ( /* size */ 32,          sizeof (n->name[0]), sizeof (uword));
  /* Node names must be unique. */  {    /* vlib_get_node_by_name() expects NULL-terminated strings      * 此处需要注意的是format格式化的字符串结尾不带0 */    u8 *name = format (0, "%v%c", n->name, 0);    vlib_node_t *o = vlib_get_node_by_name (vm, name);    /* 释放name */    vec_free (name);    if (o)      clib_error ("more than one node named `%v'", n->name);  }  /* 哈希绑定 */  hash_set (nm->node_by_name, n->name, n->index);
  /* 节点index和节点名称 */  r->index = n->index;    /* save index in registration */  n->function = r->function;
  /* Node index of next sibling will be filled in by vlib_node_main_init. */  n->sibling_of = r->sibling_of;  if (r->sibling_of && r->n_next_nodes > 0)    clib_error ("sibling node should not have any next nodes `%v'", n->name);
  if (r->type == VLIB_NODE_TYPE_INTERNAL)    ASSERT (r->vector_size > 0);
#define _(f) n->f = r->f
  _(type);  _(flags);  _(state);  _(format_buffer);  _(unformat_buffer);  _(format_trace);  _(validate_frame);
  size = round_pow2 (sizeof (vlib_frame_t), VLIB_FRAME_DATA_ALIGN);
  /* scalar data size */  if (r->scalar_size)    {      n->scalar_offset = size;      size += round_pow2 (r->scalar_size, VLIB_FRAME_DATA_ALIGN);    }  else    n->scalar_offset = 0;
  /* Vecor data size */  n->vector_offset = size;  size += r->vector_size * VLIB_FRAME_SIZE;
  /* Allocate a few extra slots of vector data to support     speculative vector enqueues which overflow vector data in next frame. */  size += r->vector_size * VLIB_FRAME_SIZE_EXTRA;
  /* space for VLIB_FRAME_MAGIC */  n->magic_offset = size;  size += sizeof (u32);
  /* round size to VLIB_FRAME_DATA_ALIGN */  size = round_pow2 (size, VLIB_FRAME_DATA_ALIGN);
  if (r->aux_size)    {      n->aux_offset = size;      size += r->aux_size * VLIB_FRAME_SIZE;    }  else    n->aux_offset = 0;
  /* final size */  n->frame_size = size = round_pow2 (size, CLIB_CACHE_LINE_BYTES);  ASSERT (size <= __UINT16_MAX__);
  vlib_frame_size_t *fs = 0;
  n->frame_size_index = (u16) ~0;  vec_foreach (fs, nm->frame_sizes)    if (fs->frame_size == size)      {  n->frame_size_index = fs - nm->frame_sizes;  break;      }
  if (n->frame_size_index == (u16) ~0)    {      vec_add2 (nm->frame_sizes, fs, 1);      fs->frame_size = size;      n->frame_size_index = fs - nm->frame_sizes;    }
  /* Register error counters. */  vlib_register_errors (vm, n->index, r->n_errors, r->error_strings,      r->error_counters);  node_elog_init (vm, n->index);
  _(runtime_data_bytes);  if (r->runtime_data_bytes > 0)    {      vec_resize (n->runtime_data, r->runtime_data_bytes);      if (r->runtime_data)  clib_memcpy (n->runtime_data, r->runtime_data, r->runtime_data_bytes);    }
  vec_resize (n->next_node_names, r->n_next_nodes);  for (i = 0; i < r->n_next_nodes; i++)    n->next_node_names[i] = r->next_nodes[i];
  vec_validate_init_empty (n->next_nodes, r->n_next_nodes - 1, ~0);  vec_validate (n->n_vectors_by_next_node, r->n_next_nodes - 1);
  n->owner_node_index = n->owner_next_index = ~0;
  /* Initialize node runtime. */  {    vlib_node_runtime_t *rt;    u32 i;
    if (n->type == VLIB_NODE_TYPE_PROCESS)      {  vlib_process_t *p;  uword log2_n_stack_bytes;
  log2_n_stack_bytes = clib_max (r->process_log2_n_stack_bytes,               VLIB_PROCESS_LOG2_STACK_SIZE);  log2_n_stack_bytes = clib_max (log2_n_stack_bytes,               clib_mem_get_log2_page_size ());
  p = clib_mem_alloc_aligned (sizeof (p[0]), CLIB_CACHE_LINE_BYTES);  clib_memset (p, 0, sizeof (p[0]));  p->log2_n_stack_bytes = log2_n_stack_bytes;
  p->stack = clib_mem_vm_map_stack (1ULL << log2_n_stack_bytes,            CLIB_MEM_PAGE_SZ_DEFAULT,            "process stack: %U",            format_vlib_node_name, vm,            n->index);
  if (p->stack == CLIB_MEM_VM_MAP_FAILED)    clib_panic ("failed to allocate process stack (%d bytes)",          1ULL << log2_n_stack_bytes);
  /* Process node's runtime index is really index into process     pointer vector. */  n->runtime_index = vec_len (nm->processes);  /* 将注册的添加到 vlib_main_t 的process */  vec_add1 (nm->processes, p);
  /* Paint first stack word with magic number so we can at least     detect process stack overruns. */  p->stack[0] = VLIB_PROCESS_STACK_MAGIC;
  /* Node runtime is stored inside of process. */  rt = &p->node_runtime;      }    else      {  vec_add2_aligned (nm->nodes_by_type[n->type], rt, 1,        /* align */ CLIB_CACHE_LINE_BYTES);  if (n->type == VLIB_NODE_TYPE_INPUT)    clib_interrupt_resize (&nm->interrupts,         vec_len (nm->nodes_by_type[n->type]));  n->runtime_index = rt - nm->nodes_by_type[n->type];      }
    if (n->type == VLIB_NODE_TYPE_INPUT)      nm->input_node_counts_by_state[n->state] += 1;
    rt->function = n->function;    rt->flags = n->flags;    rt->state = n->state;    rt->node_index = n->index;
    rt->n_next_nodes = r->n_next_nodes;    rt->next_frame_index = vec_len (nm->next_frames);
    vec_resize (nm->next_frames, rt->n_next_nodes);    for (i = 0; i < rt->n_next_nodes; i++)      vlib_next_frame_init (nm->next_frames + rt->next_frame_index + i);
    vec_resize (rt->errors, r->n_errors);    for (i = 0; i < vec_len (rt->errors); i++)      rt->errors[i] = n->error_heap_index + i;
    STATIC_ASSERT_SIZEOF (vlib_node_runtime_t, 128);    ASSERT (vec_len (n->runtime_data) <= VLIB_NODE_RUNTIME_DATA_SIZE);
    if (vec_len (n->runtime_data) > 0)      clib_memcpy (rt->runtime_data, n->runtime_data,       vec_len (n->runtime_data));    else      clib_memset (rt->runtime_data, 0, VLIB_NODE_RUNTIME_DATA_SIZE);
    vec_free (n->runtime_data);  }#undef _  return r->index;}

在注册节点vlib_register_node的流程逻辑中关键的逻辑片段处理分析：

node例子举例：以vxlan 节点为例子

/* *INDENT-OFF* */VLIB_REGISTER_NODE (vxlan4_input_node) ={  .name = "vxlan4-input",  .vector_size = sizeof (u32),  .n_errors = VXLAN_N_ERROR,  .error_strings = vxlan_error_strings,  .n_next_nodes = VXLAN_INPUT_N_NEXT,  .format_trace = format_vxlan_rx_trace,  .next_nodes = {#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n,    foreach_vxlan_input_next#undef _  },};

具体的流量在vpp上的数据报文分发，请参考上图矢量处理逻辑图片描述。

三、Node的初始化

请参考如上图描述的node节点的注册过程的具体逻辑实现。其调用关系如下所示：vlib_main()->vlib_register_all_static_nodes()->register_node()

  /* 分配节点需要的空间，并且填充节点vlib_node_t 的字段         */  n = clib_mem_alloc_no_fail (sizeof (n[0]));  clib_memset (n, 0, sizeof (n[0]));  n->index = vec_len (nm->nodes);  n->node_fn_registrations = r->node_fn_registrations;  n->protocol_hint = r->protocol_hint;
  vec_add1 (nm->nodes, n);
  va_start (va, fmt);  n->name = va_format (0, fmt, &va);  va_end (va);