[Openmcl-devel] Progress toward a CCL-to-ARM64 compiler

Sun Feb 25 12:43:14 PST 2024

I have made some progress toward a CCL-to-ARM64 compiler, by taking
code from the existing CCL-to-PPC64 compiler, and modifying it to emit
ARM64 instruction sequences that resemble ARM64 assembly code that was
checked-in by Gary Byers before 2013-10-22.

It compiles the body of this function:

  (defun fixnum-fibonacci (n)
    (declare (type (mod 24) n)
             (optimize (safety 0) (speed 3)))
    (do ((a 1 b)
         (b 0 (the fixnum (+ a b)))
         (n n (1- n)))
        ((zerop n) b)
      (declare (fixnum a b n))))

to this ARM64 machine code:

  aa1e03f8 a9bf7bf9 f9402f80 eb2063ff 5400004a d4207d00 f81f8f2f
  f81f8f30 f81f8f31 f81f8f32 d2800112 d2800010 f9400f31 14000008
  f81f8f30 8b10024f f81f8f2f d1002231 f9400732 f9400330 91004339
  f100023f 54ffff01 aa1003ef f9400332 f9400731 f9400b30 a9407bf9
  aa1803fe 910043ff d65f03c0

(hand-disassembled here [1]), which, when pasted into this test
program [3], calculates "fibonacci(23) = 28657".

If you have an Apple Silicon device with Linux and GCC, I think you
should be able to run the test program on it.  (Darwin might also
work, with some tweaking.)  Paste the program's code [3] into a text
file named test-fib.s, then enter "gcc test-fib.s" and "./a.out".

For comparison, here is the result of running the existing PPC64
compiler on the same Fibonacci source code: [2].

Forge resources (source code, wiki wiki, issue tracker, mailing
lists) are available at https://ccl-arm64-2023-07.srht.site .

Some disclaimers...

I have not yet made any effort to make the compiled code thread-safe
or signal-safe or garbage-collection-safe, so I wouldn't expect it to
work correctly in a real CCL kernel.

I mostly have implemented only enough of the compiler for the Fibonacci
function above, so I wouldn't expect other functions to work correctly.

I don't fully understand how GB intended ARM64 register assignments
and stack discipline to work, so feedback in those areas would be
especially welcome.

-- Robert Munyer

[1] --------

fib     (mov    loc-pc lr)
        (stp    vsp lr (:-@! sp 16))
        (ldr    imm0 (:+@ rcontext 88))
        (cmp    sp imm0)
        (b.ge   l24)
        (brk    1000)
l24     (str    arg_z (:-@! vsp 8))
        (str    save0 (:-@! vsp 8))
        (str    save1 (:-@! vsp 8))
        (str    save2 (:-@! vsp 8))
        (mov    save2 '1)
        (mov    save0 '0)
        (ldr    save1 (:+@ vsp 24))
        (b      l84)
l56     (str    save0 (:-@! vsp 8))
        (add    arg_z save2 save0)
        (str    arg_z (:-@! vsp 8))
        (sub    save1 save1 '1)
        (ldr    save2 (:+@ vsp 8))
        (ldr    save0 (:@ vsp))
        (add    vsp vsp 16)
l84     (cmp    save1 '0)
        (b.ne   l56)
        (mov    arg_z save0)
        (ldr    save2 (:@ vsp))
        (ldr    save1 (:+@ vsp 8))
        (ldr    save0 (:+@ vsp 16))
        (ldp    vsp lr (:@ sp))
        (mov    lr loc-pc)
        (add    sp sp 16)
        (ret)

[2] --------

0000000000000000 <fib>:
  00:   7d c8 02 a6     mflr    loc_pc
  04:   f8 21 ff e1     stdu    sp,-32(sp)
  08:   fa 01 00 08     std     fn,8(sp)
  0c:   f9 c1 00 10     std     loc_pc,16(sp)
  10:   f9 e1 00 18     std     vsp,24(sp)
  14:   7e 50 93 78     mr      fn,nfn
  18:   e8 62 00 58     ld      imm0,88(rcontext)
  1c:   7c 41 18 88     tdllt   sp,imm0
  20:   fa ef ff f9     stdu    arg_z,-8(vsp)
  24:   fb ef ff f9     stdu    save0,-8(vsp)
  28:   fb cf ff f9     stdu    save1,-8(vsp)
  2c:   fb af ff f9     stdu    save2,-8(vsp)
  30:   3b a0 00 08     li      save2,8
  34:   3b e0 00 00     li      save0,0
  38:   eb cf 00 18     ld      save1,24(vsp)
  3c:   48 00 00 20     b       5c <fib+0x5c>
  40:   fb ef ff f9     stdu    save0,-8(vsp)
  44:   7e fd fa 14     add     arg_z,save2,save0
  48:   fa ef ff f9     stdu    arg_z,-8(vsp)
  4c:   3b de ff f8     addi    save1,save1,-8
  50:   eb af 00 08     ld      save2,8(vsp)
  54:   eb ef 00 00     ld      save0,0(vsp)
  58:   39 ef 00 10     addi    vsp,vsp,16
  5c:   2c 3e 00 00     cmpdi   save1,0
  60:   40 82 ff e0     bne     40 <fib+0x40>
  64:   7f f7 fb 78     mr      arg_z,save0
  68:   eb af 00 00     ld      save2,0(vsp)
  6c:   eb cf 00 08     ld      save1,8(vsp)
  70:   eb ef 00 10     ld      save0,16(vsp)
  74:   e9 c1 00 10     ld      loc_pc,16(sp)
  78:   e9 e1 00 18     ld      vsp,24(sp)
  7c:   ea 01 00 08     ld      fn,8(sp)
  80:   7d c8 03 a6     mtlr    loc_pc
  84:   38 21 00 20     addi    sp,sp,32
  88:   4e 80 00 20     blr
  8c:   83 a9 ff e0     lwz     save2,-32(allocptr)

[3] --------

        .global main
        .extern printf
        .text

fmt:    .asciz  "fibonacci(23) = %ld\n"

        .balign 4

fib:    .inst   0xAA1E03F8, 0xA9BF7BF9, 0xF9402F80, 0xEB2063FF, 0x5400004A
        .inst   0xD4207D00, 0xF81F8F2F, 0xF81F8F30, 0xF81F8F31, 0xF81F8F32
        .inst   0xD2800112, 0xD2800010, 0xF9400F31, 0x14000008, 0xF81F8F30
        .inst   0x8B10024F, 0xF81F8F2F, 0xD1002231, 0xF9400732, 0xF9400330
        .inst   0x91004339, 0xF100023F, 0x54FFFF01, 0xAA1003EF, 0xF9400332
        .inst   0xF9400731, 0xF9400B30, 0xA9407BF9, 0xAA1803FE, 0x910043FF
        .inst   0xD65F03C0

main:   mov     x0, sp
        stp     fp, lr, [sp, -64]!
        mov     fp, sp
        stp     x24, x25, [sp, -16]!
        mov     x25, x0
        sub     x0, sp, 32
        stp     x0, x28, [sp, -16]!
        sub     x28, sp, 88
        mov     x15, 23 << 3
        bl      fib
        asr     x1, x15, 3
        adr     x0, fmt
        bl      printf
        ldp     x0, x28, [sp], 16
        ldp     x24, x25, [sp], 16
        ldp     fp, lr, [sp], 64
        mov     x0, 0
        ret