suckless.org

       zmul.c - libzahl - big integer library
 (HTM) git clone git://git.suckless.org/libzahl
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       zmul.c (1628B)
       ---
            1 /* See LICENSE file for copyright and license details. */
            2 #include "internals.h"
            3 
            4 
            5 static inline void
            6 zmul_ll_single_char(z_t a, z_t b, z_t c)
            7 {
            8         ENSURE_SIZE(a, 1);
            9         a->used = 1;
           10         a->chars[0] = b->chars[0] * c->chars[0];
           11         SET_SIGNUM(a, 1);
           12 }
           13 
           14 void
           15 zmul_ll(z_t a, z_t b, z_t c)
           16 {
           17         /*
           18          * Karatsuba algorithm
           19          * 
           20          * Basically, this is how you were taught to multiply large numbers
           21          * by hand in school: 4010⋅3020 = (4000 + 10)(3000 + 20) =
           22          * = 40⋅30⋅10⁴ + (40⋅20 + 30⋅10)⋅10² + 10⋅20, but the middle is
           23          * optimised to only one multiplication:
           24          * 40⋅20 + 30⋅10 = (40 + 10)(30 + 20) − 40⋅30 − 10⋅20.
           25          * This optimisation is crucial. Without it, the algorithm with
           26          * run in O(n²).
           27          */
           28 
           29 #define z2 c_low
           30 #define z1 b_low
           31 #define z0 a
           32         size_t m, m2;
           33         z_t b_high, b_low, c_high, c_low;
           34 
           35         if (unlikely(zzero1(b, c))) {
           36                 SET_SIGNUM(a, 0);
           37                 return;
           38         }
           39 
           40         m = zbits(b);
           41         m2 = b == c ? m : zbits(c);
           42 
           43         if (m + m2 <= BITS_PER_CHAR) {
           44                 zmul_ll_single_char(a, b, c);
           45                 return;
           46         }
           47 
           48         m = MAX(m, m2);
           49         m2 = m >> 1;
           50 
           51         zinit_temp(b_high);
           52         zinit_temp(b_low);
           53         zinit_temp(c_high);
           54         zinit_temp(c_low);
           55 
           56         zsplit_pz(b_high, b_low, b, m2);
           57         zsplit_pz(c_high, c_low, c, m2);
           58 
           59 
           60         zmul_ll(z0, b_low, c_low);
           61         zadd_unsigned_assign(b_low, b_high);
           62         zadd_unsigned_assign(c_low, c_high);
           63         zmul_ll(z1, b_low, c_low);
           64         zmul_ll(z2, b_high, c_high);
           65 
           66         zsub_nonnegative_assign(z1, z0);
           67         zsub_nonnegative_assign(z1, z2);
           68 
           69         zlsh(z1, z1, m2);
           70         m2 <<= 1;
           71         zlsh(z2, z2, m2);
           72         zadd_unsigned_assign(a, z1);
           73         zadd_unsigned_assign(a, z2);
           74 
           75 
           76         zfree_temp(c_low);
           77         zfree_temp(c_high);
           78         zfree_temp(b_low);
           79         zfree_temp(b_high);
           80 }