Netfilter源码分析（8）

2020-05-27 00:00:00 函数用户规则调用内核

本节前面大部份为解决问题的思路，和前面的贴子有重复，可以直接跳到后半截看^o^。

内核中的match：

1、表与规则
内核中，表用struct ipt_table表示，其成员struct ipt_table_info *private;表示表的数据区，而private的成员
char entries[0] __attribute__((aligned(SMP_CACHE_BYTES)));;表示每个CPU的规则的入口；

2、规则
struct ipt_entry
{
struct ipt_ip ip; /*标准的match部份，如地址，网络接口等*/

/* 规则所关心的数据包的位置的标志，有些match使用了，有些没有用 */
unsigned int nfcache;

/* target区的偏移，通常target区位于match区之后，而match区则在ipt_entry的末尾；
初始化为sizeof(struct ipt_entry)，即假定没有match */
u_int16_t target_offset;
/* 下一条规则相对于本规则的偏移，也即本规则所用空间的总和，
初始化为sizeof(struct ipt_entry)+sizeof(struct ipt_target)，即没有match */
u_int16_t next_offset;

/* 位向量，为发现规则中存在”环路“提供手段*/
unsigned int comefrom;

/* 包和字节计数器. */
struct ipt_counters counters;

/*target或者是match(如果存在)的起始位置 */
unsigned char elems[0];
};

3、match的表示

A、用户态
struct iptables_match
{
      /* Match链，初始为NULL */
struct iptables_match *next;

      /* Match名，和核心模块加载类似，作为动态链接库存在的Iptables Extension的命名规则为libipt_'name'.so */
      ipt_chainlabel name;

      /*版本信息，一般设为NETFILTER_VERSION */
      const char *version;

      /* Match数据的大小，必须用IPT_ALIGN()宏指定对界*/
      size_t size;

      /*由于内核可能修改某些域，因此size可能与确切的用户数据不同，这时就应该把不会被改变的数据放在数据区的前面部分，而这里就应该填写被改变的数据区大小；一般来说，这个值和size相同*/
      size_t userspacesize;

      /*当iptables要求显示当前match的信息时（比如iptables-m ip_ext -h），就会调用这个函数，输出在iptables程序的通用信息之后. */
      void (*help)(void);

      /*初始化，在parse之前调用. */
      void (*init)(struct ipt_entry_match *m, unsigned int *nfcache);

      /*扫描并接收本match的命令行参数，正确接收时返回非0，flags用于保存状态信息*/
      int (*parse)(int c, char **argv, int invert, unsigned int *flags,
                  const struct ipt_entry *entry,
                  unsigned int *nfcache,
                  struct ipt_entry_match **match);

      /* 前面提到过这个函数，当命令行参数全部处理完毕以后调用，如果不正确，应该
退出（exit_error()）*/
      void (*final_check)(unsigned int flags);

      /*当查询当前表中的规则时，显示使用了当前match的规则*/
      void (*print)(const struct ipt_ip *ip,
                  const struct ipt_entry_match *match, int numeric);

      /*按照parse允许的格式将本match的命令行参数输出到标准输出，用于iptables-save命令. */
      void (*save)(const struct ipt_ip *ip,
                  const struct ipt_entry_match *match);

      /* NULL结尾的参数列表，struct option与getopt(3)使用的结构相同*/
      const struct option *extra_opts;

      /* Ignore these men behind the curtain: */
      unsigned int option_offset;
      struct ipt_entry_match *m;
      unsigned int mflags;
      unsigned int used;
#ifdef NO_SHARED_LIBS
      unsigned int loaded; /* simulate loading so options are merged properly */
#endif
};

成员指针m是一个struct ipt_entry_match类型，这个东东后面再分析。

B、内核中，核心用struct ipt_match表征一个Match数据结构：
struct ipt_match
{
/* 组织链表的成员，通常初始化成{NULL,NULL}，由核心使用 */
struct list_head list;

/* Match的名字*/
const char name[IPT_FUNCTION_MAXNAMELEN];

/*指向该Match的匹配函数，返回非0表示匹配成功，如果返回0且hotdrop设为1，则表示该报文应当立刻丢弃*/
int (*match)(const struct sk_buff *skb,
      const struct net_device *in,
      const struct net_device *out,
      const void *matchinfo,
      int offset,
      const void *hdr,
      u_int16_t datalen,
      int *hotdrop);

/* 在使用本Match的规则注入表中之前调用，进行有效性检查，如果返回0，规则就不会加入iptables中 */
int (*checkentry)(const char *tablename,
   const struct ipt_ip *ip,
   void *matchinfo,
   unsigned int matchinfosize,
   unsigned int hook_mask);

/* 在包含本Match的规则从表中删除时调用，与checkentry配合可用于动态内存分配和释放 */
void (*destroy)(void *matchinfo, unsigned int matchinfosize);

/* 表示当前Match是否为模块（NULL为否） */
struct module *me;

};

因为Match都是以模块的形式存在，这两个结构分别在iptables/Netfilter的模块初始化函数注册时被使用。

struct ipt_entry_match结构非常重要，它把内核态与用户态关连起来。如果说前面两个关于match的结构用来做
match的抽像点的处理的话，那么struct ipt_entry_match则表示了规则中具体的每个match，即规则中存储的一条规则是：
ipt_entry+ipt_entry_match1+ipt_entry_match2+ipt_entry_match3……

struct ipt_entry_match
{
union {
struct {
u_int16_t match_size;

/* Used by userspace */
char name[IPT_FUNCTION_MAXNAMELEN];
} user;
struct {
u_int16_t match_size;

/* Used inside the kernel */
struct ipt_match *match;
} kernel;

/* Total length */
u_int16_t match_size;
} u;

unsigned char data[0];
};

而在匹配每一条规则时：
/* fn returns 0 to continue iteration */
#define IPT_MATCH_ITERATE(e, fn, args...) \
({ \
unsigned int __i; \
int __ret = 0; \
struct ipt_entry_match *__match; \
\
for (__i = sizeof(struct ipt_entry); \
      __i < (e)->target_offset; \
      __i += __match->u.match_size) { \
__match = (void *)(e) + __i; \
\
__ret = fn(__match , ## args); \
if (__ret != 0) \
break; \
} \
__ret; \
})
宏IPT_MATCH_ITERATE用来遍历规则中的每一个match，i用来做循环变量。
struct ipt_entry用来表示一条规则，后一个成员unsigned char elems[0];用来紧跟
match和target，那么match的起始位置自然是：
__i = sizeof(struct ipt_entry);即跳过ipt_entry

target_offset表示规则中target的偏移位，即match的结束，所以i的结束自然为：
__i < (e)->target_offset;

而每次步增的空间大小为match的大小，结构struct ipt_entry_match中以成员u.match_size表示当前match的大小,即：
__i += __match->u.match_size

i是每个match的偏移量，那么(void *)(e) + __i;则为每个match的地址。

问题有又回到起始点上来了：规则匹配的时候，每条规则的__match.u.kernel.match 是如何与内核模块中每个match初始化
注册时建立链表的struct ipt_match东东关连起来的？

先来看看用户空间添加一条规则（虽然觉得极不可能是用户空间来做这件事情，报着侥幸的心理）
用户空间调用(iptables1.2.7 源码，Iptables.c:1653)：
switch (command) {
case CMD_APPEND:
ret = append_entry(……);

append_entry
iptc_append_entry
insert_rules一路下来直接处理规则了，并没有单独的match的处理，看来不是用户态完成这一工作了。再来看看内核中：

发现在内核添加规则之前，会调用函数translate_table来进行检查和传递用户空间传递过来的规则：
其中有一句：
static int
translate_table(const char *name,
unsigned int valid_hooks,
struct ipt_table_info *newinfo,
unsigned int size,
unsigned int number,
const unsigned int *hook_entries,
const unsigned int *underflows)
{
/*linux2.4.20 Ip_tables.c 318行*/
ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
check_entry, name, size, &i);
}

IPT_ENTRY_ITERATE这个宏前面分析过，用来遍历每一条规则，而check_entry为处理函数，在
check_entry函数中，又有如下语句：
static inline int
check_entry(struct ipt_entry *e, const char *name, unsigned int size,
      unsigned int *i)
{
ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
}

IPT_MATCH_ITERATE宏用来遍历某一条规则中的各个Match，check_matck为遍历到后的进一步处理函数。
再来看check_matck：
static inline int
check_match(struct ipt_entry_match *m,
      const char *name,
      const struct ipt_ip *ip,
      unsigned int hookmask,
      unsigned int *i)
{
int ret;
struct ipt_match *match;

/*根据规则中Match的名称，在已注册好的ipt_match双向链表中查找对应接点——已经接近要找的目标了*/
match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
if (!match) {
duprintf("check_match: `%s' not found\n", m->u.user.name);
return ret;
}
if (match->me)
__MOD_INC_USE_COUNT(match->me);
/*晕，找的就是它了，找了大半天*/
m->u.kernel.match = match;
up(&ipt_mutex);

if (m->u.kernel.match->checkentry
      && !m->u.kernel.match->checkentry(name, ip, m->data,
      m->u.match_size - sizeof(*m),
      hookmask)) {
if (m->u.kernel.match->me)
__MOD_DEC_USE_COUNT(m->u.kernel.match->me);
duprintf("ip_tables: check failed for `%s'.\n",
   m->u.kernel.match->name);
return -EINVAL;
}

(*i)++;
return 0;
}

知道了关连，理解do_match函数就不再是问题了：
static inline
int do_match(struct ipt_entry_match *m,
      const struct sk_buff *skb,
      const struct net_device *in,
      const struct net_device *out,
      int offset,
      const void *hdr,
      u_int16_t datalen,
      int *hotdrop)
{
/* Stop iteration if it doesn't match */
if (!m->u.kernel.match->match(skb, in, out, m->data,
      offset, hdr, datalen, hotdrop))
return 1;
else
return 0;
}

文章来源CU社区：[原创]Netfilter源码分析-我来抛砖，望能引玉

相关文章