[Openmcl-devel] Bignums using GMP.

Fri Feb 12 15:22:12 PST 2010

Attached are two files which together cause Closure CL to use GMP
for bignum arithmetic.  The routines are specific to 64-bit Amd
(and Intel) architecture.  For relatively small bignums current
Closure CL multiplication is quite competitive and the routines
use it, however for really large bignums GMP is faster.  For
division (truncate), gcd and integer square root GMP is faster
even for smallest bignums so the routines just use GMP.  In fact,
fixnum isqrt is slower than isqrt for small bignums via GMP.

I use GMP "mpn" routines.  Because the "mpn" interface is quite
restrictive I use two helper C routines.  On Lisp side there
are LAP routines to do reasonably fact copy and replacements
for Closuse CL bignums routines.  Like originals the replacements
are careful to stack allocate temporaries.

To try the routines compile the C file to get shared library
(as indicated at the top of the file), then compile the lisp
file.  After starting Closure CL load GMP library, then the
helper library and the fasl.  After that Closure CL will use
new routines.

To give you some idea of speed, on 2.4 GHz Core 2 Quadro,
gcd of 90949470177292823791503906250000000 and
7178979876918525887702490000000 takes 11us using current
version and 1us using GMP (I used GMP 5.0, GMP 4.2
is slower).

Comments welcame.  In particular what is best method to
make sure that multiplication works OK after starting
from saved image.  For my purpose I modified Closure CL
kernel makefile to link in my C routines and GMP.  But
without linking GMP to Lisp kernel there is risk that
replacement bignum routines will try to use GMP before
it is loaded (IIUC saved core image will contain
replacement bignum routines, but have no access to GMP
before it is explicitely loaded).

I wonder if there is interst to include such routines in
Closure CL (possibly as optional feature, if depending on
GMP is considered undesirable).
-- 
                              Waldek Hebisch
hebisch at math.uni.wroc.pl 
-------------- next part --------------
;;; First do:
;;; (ccl::open-shared-library "/usr/lib64/libgmp.so.3.6.0")
;;; (ccl::open-shared-library "./mulp.so")

(locally
  (declare (optimize (speed 3) (safety 0)))

(CCL::defx86lapfunction my-bignum-copy-from-lisp
    ((x arg_x) (y arg_y) (l arg_z))
    (movq ($ 0) (% imm1))
    (movq (@ x8664::misc-data-offset (% y)) (% imm2))
   @loop
    (movq (@ x8664::misc-data-offset (% x) (% imm1)) (% imm0))
    (movq (% imm0) (@ (% imm2) (% imm1)))
    (addq ($ 8) (% imm1))
    (cmpq (% imm1) (% l))
    (jnz @loop)
    (single-value-return))

(CCL::defx86lapfunction my-bignum-copy-negate-from-lisp
    ((x arg_x) (y arg_y) (l arg_z))
    (movq ($ 8) (% temp0))
    (andq (% l) (% temp0))
    (xorq (% temp0) (% l))
    (shrq ($ 1) (% l))
    (xorq (% imm1) (% imm1))
    (movq (@ x8664::misc-data-offset (% y)) (% imm2))
    (clc)
   @loop1
    (movq (@ x8664::misc-data-offset (% x) (% imm1)) (% imm0))
    (cmpq ($ 0) (% imm0))
    (jnz @negate)
    (movq ($ 0) (@ (% imm2) (% imm1)))
    (addq ($ 8) (% imm1))
    (cmpq (% imm1) (% l))
    (jnz @loop1)
    (cmpq ($ 0) (% temp0))
    (jz @return)
    (movslq (@ x8664::misc-data-offset (% x) (% imm1)) (% imm0))
    (xorq (% temp0) (% temp0))
   @negate
    (neg (% imm0))
    (movq (% imm0) (@ (% imm2) (% imm1)))
    (addq ($ 8) (% imm1))
    (cmpq (% imm1) (% l))
    (jle @finish) 
   @loop2
    (movq (@ x8664::misc-data-offset (% x) (% imm1)) (% imm0))
    (notq (% imm0))
    (movq (% imm0) (@ (% imm2) (% imm1)))
    (addq ($ 8) (% imm1))
    (cmpq (% imm1) (% l))
    (jnz @loop2)
   @finish
    (cmpq ($ 0) (% temp0))
    (jz @return)
    (movslq (@ x8664::misc-data-offset (% x) (% imm1)) (% imm0))
    (notq (% imm0))
    (movq (% imm0) (@ (% imm2) (% imm1)))
   @return
    (single-value-return))

(CCL::defx86lapfunction my-bignum-copy-to-lisp 
    ((x arg_x) (y arg_y) (l arg_z)) 
    (movq ($ 0) (% imm1))
    (movq (@ x8664::misc-data-offset (% x)) (% imm2))
   @loop
    (movq (@ (% imm2) (% imm1)) (% imm0))
    (movq (% imm0) (@ x8664::misc-data-offset (% y) (% imm1)))
    (addq ($ 8) (% imm1))
    (cmpq (% imm1) (% l))
    (jnz @loop)
    (single-value-return))

(CCL::defx86lapfunction my-bignum-copy-negate-to-lisp
    ((x arg_x) (y arg_y) (l arg_z))
    (xorq (% imm1) (% imm1))
    (movq (@ x8664::misc-data-offset (% x)) (% imm2))
   @loop1
    (movq (@ (% imm2) (% imm1)) (% imm0))
    (cmpq ($ 0) (% imm0))
    (jnz @negate)
    (movq ($ 0) (@ x8664::misc-data-offset (% y) (% imm1)))
    (addq ($ 8) (% imm1))
    (cmpq (% imm1) (% l))
    (jnz @loop1)
    (jmp @return)
   @negate
    (neg (% imm0))
    (movq (% imm0) (@ x8664::misc-data-offset (% y) (% imm1)))
    (addq ($ 8) (% imm1))
    (cmpq (% imm1) (% l))
    (jz @return)
   @loop2
    (movq (@ (% imm2) (% imm1)) (% imm0))
    (notq (% imm0))
    (movq (% imm0) (@ x8664::misc-data-offset (% y) (% imm1)))
    (addq ($ 8) (% imm1))
    (cmpq (% imm1) (% l))
    (jnz @loop2)
   @return
    (single-value-return))

(CCL::defx86lapfunction my-bignum-copy
    ((x arg_x) (y arg_y) (l arg_z))
    (movq ($ 0) (% imm1))
   @loop
    (movq (@ x8664::misc-data-offset (% x) (% imm1)) (% imm0))
    (movq (% imm0) (@ x8664::misc-data-offset (% y) (% imm1)))
    (addq ($ 8) (% imm1))
    (cmpq (% imm1) (% l))
    (jnz @loop)
    (single-value-return))

(eval-when (:load-toplevel)
    (SET-DEVELOPMENT-ENVIRONMENT T))

;;; Tests
;;; (bignum-isqrt (expt 10 50))
;;; (bignum-isqrt (expt 2 127))
(defun bignum-isqrt (x)
  (let* ((xl (ceiling (ccl::%bignum-length x) 2))
         (rl (ceiling xl 2))
         (xlb (ash xl 3))
         (rlb (ash rl 3))
         (rl2 (+ rl rl))
         (res (ccl::%alloc-misc rl2 X8664::SUBTAG-BIGNUM)))
        (declare (type fixnum xl rl xlb rlb rl2))
      (ccl::%stack-block ((tx xlb)
                          (tr rlb))
         (my-bignum-copy-from-lisp x tx xl)
         (ccl::external-call "gmp_isqrt"
             :address tr :long rl 
             :address tx :long xl)
         (my-bignum-copy-to-lisp tr res rl)
         (ccl::%normalize-bignum-2 t res))))

;;; based on version from sbcl "From discussion on comp.lang.lisp
;;; and Akira Kurihara."
(defun ccl::isqrt (n)
    (if (or (not (integerp n))(minusp n))
        (ccl::report-bad-arg n '(integer 0)))
    (if (not (fixnump n)) (return-from ccl::isqrt (bignum-isqrt n)))
    (locally
        (declare (type fixnum n))
        (if (<= n 24)
            (cond ((> n 15) 4)
                  ((> n  8) 3)
                  ((> n  3) 2)
                  ((> n  0) 1)
                  (t 0))
        (let* ((n-len-quarter (ash (integer-length n) -2))
               (n-half (ash n (- (ash n-len-quarter 1))))
               (n-half-isqrt (isqrt n-half))
               (init-value (ash (1+ n-half-isqrt) n-len-quarter)))
            (declare (type fixnum n-len-quarter n-half 
                              n-half-isqrt init-value))
            (loop
                (let ((iterated-value
                        (ash (+ init-value (truncate n init-value)) -1)))
                    (unless (< iterated-value init-value)
                         (return init-value))
                    (setq init-value iterated-value)))))))

(if (not (fboundp 'orig-multiply-bignums))
    (setf (symbol-function 'orig-multiply-bignums)
          (symbol-function 'ccl::multiply-bignums)))

(defun ccl::multiply-bignums(x y)
  (let ((xl0 (ccl::%bignum-length x))
        (yl0 (ccl::%bignum-length y)))
       (declare (type fixnum xl0 yl0))
  (if (< xl0 30)
      (return-from ccl::multiply-bignums (orig-multiply-bignums x y)))
  (if (< yl0 30)
      (return-from ccl::multiply-bignums (orig-multiply-bignums y x)))
  (if (< (+ xl0 yl0) 120)
      (return-from ccl::multiply-bignums (orig-multiply-bignums x y)))
  (let* ((xl (ceiling xl0 2))
         (yl (ceiling yl0 2))
         (x-plusp nil)
         (y-plusp nil)
         (rl (+ xl yl))
         (rl2 (+ rl rl))
         (xlb 0)
         (ylb 0)
         (itmp 0)
         (tmp nil)
         (rlb 0)
         (res (ccl::%alloc-misc rl2 X8664::SUBTAG-BIGNUM)))
        (declare (type fixnum xl yl rl rl2 xlb ylb rlb itmp))
        ;;; XXX Does not work
        ;;; (declare (dynamic-extent res))
      (if (< xl yl)
          (progn
              (setf itmp xl)
              (setf xl yl)
              (setf yl itmp)
              (setf itmp xl0)
              (setf xl0 yl0)
              (setf yl0 itmp)
              (setf tmp x)
              (setf x y)
              (setf y tmp)))
      (setf xlb (ash xl 3))
      (setf ylb (ash yl 3))
      (setf rlb (+ xlb ylb))
      (setf x-plusp (ccl::%bignum-0-or-plusp x xl0))
      (setf y-plusp (ccl::%bignum-0-or-plusp y yl0))
      (ccl::%stack-block ((tx xlb)
                          (ty ylb)
                          (tr rlb))
         (if x-plusp
             (my-bignum-copy-from-lisp x tx xl)
             (my-bignum-copy-negate-from-lisp x tx xl0))
         (if y-plusp
             (my-bignum-copy-from-lisp y ty yl)
             (my-bignum-copy-negate-from-lisp y ty yl0))
         (ccl::external-call "__gmpn_mul"
                  :address tr
                  :address tx :long xl
                  :address ty :long yl)
         (if (eq x-plusp y-plusp)
             (my-bignum-copy-to-lisp tr res rl)
             (my-bignum-copy-negate-to-lisp tr res rl))
         (ccl::%normalize-bignum-2 t res)))))

(defun ccl::%positive-bignum-bignum-gcd(x y)
  (let* ((xl (ceiling (ccl::%bignum-length x) 2))
         (yl (ceiling (ccl::%bignum-length y) 2))
         (rl (if (< xl yl) xl yl))
         (xlb (ash xl 3))
         (ylb (ash yl 3))
         (rlb (+ xlb ylb))
         (res nil))
        (declare (type fixnum xl yl rl xlb ylb rlb))
      (ccl::%stack-block ((tx xlb)
                          (ty ylb)
                          (tr rlb))
         (my-bignum-copy-from-lisp x tx xl)
         (my-bignum-copy-from-lisp y ty yl)
         (setf rl (ccl::external-call "gmp_gcd"
                     :address tr
                     :address tx :long xl
                     :address ty :long yl
                     :long))
         (setf res (ccl::%alloc-misc (+ rl rl) X8664::SUBTAG-BIGNUM))
         (my-bignum-copy-to-lisp tr res rl)
         (ccl::%normalize-bignum-2 t res))))
;;; Tests
;;;   (truncate -51520943106947801344 17339521378867071488)
;;;
(defun ccl::bignum-truncate (x y &optional norem)
    (declare (ignore norem))
    (let* ((x-plusp (ccl::%bignum-0-or-plusp x (ccl::%bignum-length x)))
           (y-plusp (ccl::%bignum-0-or-plusp y (ccl::%bignum-length y)))
           (x (if x-plusp x (ccl::negate-bignum x nil)))
           (y (if y-plusp y (ccl::negate-bignum y nil)))
           (yl0 (ccl::%bignum-length y))
           (xl (ceiling (ccl::%bignum-length x) 2))
           (yl1 (ceiling yl0 2))
           (yl (if (eq 0 (ccl::%typed-miscref :bignum y (- yl0 1)))
                   (ash yl0 -1)
                   yl1))
           (ql (max (+ 1 (- xl yl)) 1))
           (q nil)
           (r nil)
           (xlb (ash xl 3))
           (ylb (ash yl 3))
           (qlb (ash ql 3)))
      (declare (type fixnum xl yl yl0 yl1 ql xlb ylb qlb))
      (if (plusp (ccl::bignum-compare y x))
        (progn
               (setf r (ccl::%alloc-misc (+ xl xl) X8664::SUBTAG-BIGNUM))
               (my-bignum-copy x r xl)
               (setf q 0))
        (ccl::%stack-block ((tx xlb)
                          (ty ylb)
                          (tq qlb)
                          (tr ylb))
         (my-bignum-copy-from-lisp x tx xl)
         (my-bignum-copy-from-lisp y ty yl)
         (ccl::external-call "__gmpn_tdiv_qr"
                     :address tq :address tr :long 0
                     :address tx :long xl
                     :address ty :long yl)
         (setf q (ccl::%alloc-misc (+ ql ql) X8664::SUBTAG-BIGNUM))
         (setf r (ccl::%alloc-misc (+ yl1 yl) X8664::SUBTAG-BIGNUM))
         (my-bignum-copy-to-lisp tq q ql)
         (my-bignum-copy-to-lisp tr r yl)
         (if (> yl1 yl)
             (setf (ccl::%typed-miscref :bignum r (- yl0 1)) 0))
         (setf q (ccl::%normalize-bignum-2 t q))
         (setf r (ccl::%normalize-bignum-2 t r))))
      (let ((quotient (cond ((eq x-plusp y-plusp) q)
                            ((typep q 'fixnum) (the fixnum (- q)))
                            (t (ccl::negate-bignum-in-place q))))
            (rem (cond (x-plusp r)
                       ((typep r 'fixnum) (the fixnum (- r)))
                       (t (ccl::negate-bignum-in-place r)))))
            (values (if (typep quotient 'fixnum)
                        quotient
                        (ccl::%normalize-bignum-2 t quotient))
                    (if (typep rem 'fixnum)
                        rem
                        (ccl::%normalize-bignum-2 t rem))))))
(eval-when (:load-toplevel)
   (SET-USER-ENVIRONMENT T))
)
-------------- next part --------------
/*
Compilation:

 gcc -fPIC -O3 -Wall -shared mulp.c -o mulp.so

*/

#include <gmp.h>
#include <stdio.h>

void
gmp_isqrt(mp_limb_t *rp, mp_size_t rn, /* const */ mp_limb_t *sp, mp_size_t sn)
{
    if (sp[sn - 1] == 0) {
        sn--;
        rp[rn - 1] = 0;
    }
    mpn_sqrtrem(rp, 0, sp, sn);
}

int
count_zeros(mp_limb_t x)
{
    int res = 0;
    if (!(x & ((1l<<32) - 1))) {
        res = 32;
        x >>= 32;
    }
    if (!(x & ((1l<<16) - 1))) {
        res += 16;
        x >>= 16;
    }
    if (!(x & ((1l<<8) - 1))) {
        res += 8;
        x >>= 8;
    }
    if (!(x & ((1l<<4) - 1))) {
        res += 4 ;
        x >>= 4;
    }
    if (!(x & ((1l<<2) - 1))) {
        res += 2;
        x >>= 2;
    }
    if (!(x & 1)) {
        res += 1;
    }
    return res;
}

mp_size_t
gmp_gcd(mp_limb_t * rp, mp_limb_t *s1p,
       mp_size_t s1n, mp_limb_t *s2p, mp_size_t s2n)
{
    mp_limb_t rc;
    mp_size_t res;
    mp_size_t k1;
    mp_size_t k;
    mp_size_t z1l;
    mp_size_t z2l;
    mp_size_t zp;
    int z1;
    int z2;
    k1 = 1;
    while(k1 <= s1n && s1p[s1n - k1] == 0) k1++;
    s1n -= (k1 - 1);
    k1 = 0;
    while(k1 < s1n && s1p[k1] == 0) k1++;
    s1p += k1;
    s1n -= k1;
    z1l = k1*64;
    if (s1n == 0) {
        *rp = 0;
        return 0;
    }
    k1 = 1;
    while(k1 <= s2n && s2p[s2n - k1] == 0) k1++;
    s2n -= (k1 - 1);
    k1 = 0;
    while(k1 < s2n && s2p[k1] == 0) k1++;
    s2p += k1;
    s2n -= k1;
    z2l = k1*64;
    if (s2n == 0) {
        *rp = 0;
        return 0;
    }
    z2 = count_zeros(*s2p);
    mpn_rshift(s2p, s2p, s2n, z2);
    if (s2p[s2n - 1] == 0) {
        s2n--;
    }
    z1 = count_zeros(*s1p);
    mpn_rshift(s1p, s1p, s1n, z1);
    if (s1p[s1n - 1] == 0) {
        s1n--;
    }
    z1l += z1;
    z2l += z2;
    if (s2n > s1n || ((s2n == s1n) && s2p[s2n - 1] > s1p[s1n -1])) {
        mp_size_t tmp = s2n;
        mp_limb_t * ptmp = s2p;
        s2n = s1n;
        s2p = s1p;
        s1n = tmp;
        s1p = ptmp;
    }
    zp = (z1l < z2l)? z1l : z2l;
    k = zp / 64;
    zp = zp % 64;
    for(k1 = 0; k1 < k; k1++) {
        rp[k1] = 0;
    }
    rp += k;
    res = mpn_gcd(rp, s1p, s1n, s2p, s2n);
    rc = mpn_lshift(rp, rp, res, zp);
    if (rc) {
        rp[res] = rc;
        res++;
    }
    if (rp[res - 1] & (1ul << 63)) {
        rp[res] = 0;
        res++;
    }
    return res+k;
}

#if 0
int
main(void)
{
    int i;
    mp_limb_t num1[3] = {0};
    mp_limb_t num2[3] = {0};
    mp_limb_t res[3] = {0};
    num2[1] = 15;
    num1[0] = 20;
    gmp_gcd(res, num1, 3, num2, 3);
    printf("%lu\n", res[0]);
    return 0;
}
#endif