Commit d78c6266893b44d3bd9662a4cf387d79fdb7153f

GH Cao 2022-12-08T21:37:10

Add Windows ARM64 support

diff --git a/src/dlfcn.c b/src/dlfcn.c
index 3a7ca64..1402df2 100644
--- a/src/dlfcn.c
+++ b/src/dlfcn.c
@@ -680,16 +680,54 @@ static BOOL is_valid_address( const void *addr )
     return TRUE;
 }
 
+#if defined(_M_ARM64) || defined(__aarch64__)
+static INT64 sign_extend(UINT64 value, UINT bits)
+{
+    const UINT left = 64 - bits;
+    const INT64 m1 = -1;
+    const INT64 wide = (INT64) (value << left);
+    const INT64 sign = ( wide < 0 ) ? ( m1 << left ) : 0;
+
+    return value | sign;
+}
+#endif
+
 /* Return state if address points to an import thunk
  *
- * An import thunk is setup with a 'jmp' instruction followed by an
+ * On x86, an import thunk is setup with a 'jmp' instruction followed by an
  * absolute address (32bit) or relative offset (64bit) pointing into
  * the import address table (iat), which is partially maintained by
  * the runtime linker.
+ *
+ * On ARM64, an import thunk is also a relative jump pointing into the
+ * import address table, implemented by the following three instructions:
+ * 
+ *      adrp x16, [page_offset]
+ * Calculates the page address (aligned to 4KB) the IAT is at, based 
+ * on the value of x16, with page_offset. 
+ *
+ *      ldr  x16, [x16, offset]
+ * Calculates the final IAT address, x16 <- x16 + offset.
+ * 
+ *      br   x16
+ * Jump to the address in x16.
+ * 
+ * The register used here is hardcoded to be x16.
  */
 static BOOL is_import_thunk( const void *addr )
 {
+#if defined(_M_ARM64) || defined(__aarch64__)
+    ULONG opCode1 = * (ULONG *) ( (BYTE *) addr );
+    ULONG opCode2 = * (ULONG *) ( (BYTE *) addr + 4 );
+    ULONG opCode3 = * (ULONG *) ( (BYTE *) addr + 8 );
+
+    return (opCode1 & 0x9f00001f) == 0x90000010    /* adrp x16, [page_offset] */
+        && (opCode2 & 0xffe003ff) == 0xf9400210    /* ldr  x16, [x16, offset] */
+        && opCode3 == 0xd61f0200                   /* br   x16 */
+        ? TRUE : FALSE;
+#else
     return *(short *) addr == 0x25ff ? TRUE : FALSE;
+#endif
 }
 
 /* Return adress from the import address table (iat),
@@ -698,11 +736,32 @@ static BOOL is_import_thunk( const void *addr )
 static void *get_address_from_import_address_table( void *iat, DWORD iat_size, const void *addr )
 {
     BYTE *thkp = (BYTE *) addr;
+#if defined(_M_ARM64) || defined(__aarch64__)
+    /*
+     *  typical import thunk in ARM64:
+     *  0x7ff772ae78c0 <+25760>: adrp   x16, 1
+     *  0x7ff772ae78c4 <+25764>: ldr    x16, [x16, #0xdc0]
+     *  0x7ff772ae78c8 <+25768>: br     x16
+     */
+    ULONG opCode1 = * (ULONG *) ( (BYTE *) addr );
+    ULONG opCode2 = * (ULONG *) ( (BYTE *) addr + 4 );
+
+    /* Extract the offset from adrp instruction */
+    UINT64 pageLow2 = (opCode1 >> 29) & 3;
+    UINT64 pageHigh19 = (opCode1 >> 5) & ~(~0ull << 19);
+    INT64 page = sign_extend((pageHigh19 << 2) | pageLow2, 21) << 12;
+
+    /* Extract the offset from ldr instruction */
+    UINT64 offset = ((opCode2 >> 10) & ~(~0ull << 12)) << 3;
+
+    /* Calculate the final address */
+    BYTE *ptr = (BYTE *) ( (ULONG64) thkp & ~0xfffull ) + page + offset;
+#else
     /* Get offset from thunk table (after instruction 0xff 0x25)
      *   4018c8 <_VirtualQuery>: ff 25 4a 8a 00 00
      */
     ULONG offset = *(ULONG *)( thkp + 2 );
-#ifdef _WIN64
+#if defined(_M_AMD64) || defined(__x86_64__)
     /* On 64 bit the offset is relative
      *      4018c8:   ff 25 4a 8a 00 00    jmpq    *0x8a4a(%rip)    # 40a318 <__imp_VirtualQuery>
      * And can be also negative (MSVC in WDK)
@@ -716,6 +775,7 @@ static void *get_address_from_import_address_table( void *iat, DWORD iat_size, c
      */
     BYTE *ptr = (BYTE *) offset;
 #endif
+#endif
 
     if( !is_valid_address( ptr ) || ptr < (BYTE *) iat || ptr > (BYTE *) iat + iat_size )
         return NULL;
diff --git a/tests/test-dladdr.c b/tests/test-dladdr.c
index e2f6c01..5568f23 100644
--- a/tests/test-dladdr.c
+++ b/tests/test-dladdr.c
@@ -140,12 +140,21 @@ __declspec(dllexport)
 #endif
 int main(int argc, char **argv)
 {
+#if defined(_M_ARM64) || defined(__aarch64__)
+    /* points to non reachable address */
+    unsigned char zero_thunk_address[12] = { 0x10, 0x00, 0x00, 0x90, 0x10, 0x02, 0x40, 0xF9, 0x00, 0x02, 0x1F, 0xD6 };
+    /* points to executable base */
+    unsigned char invalid_thunk_address[12] = { 0x10, 0x00, 0x00, 0xb0, 0x10, 0x06, 0x47, 0xF9, 0x00, 0x02, 0x1F, 0xD6 };
+    /* no import thunk */
+    unsigned char no_import_thunk[12] = { 0x11, 0x00, 0x00, 0xb0, 0x31, 0x06, 0x47, 0xF9, 0x20, 0x02, 0x1F, 0xD6 };
+#else
     /* points to non reachable address */
     unsigned char zero_thunk_address[6] = { 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00 };
     /* points to executable base */
     unsigned char invalid_thunk_address[6] = { 0xFF, 0x25, 0x00, 0x00, 0x40, 0x00 };
     /* no import thunk */
     unsigned char no_import_thunk[6] = { 0xFF, 0x26, 0x00, 0x00, 0x40, 0x00 };
+#endif
     int  result = 0;
     UNUSED(argv);