112 #ifndef HEADER_BN_LCL_H
113 #define HEADER_BN_LCL_H
115 #include <openssl/bn.h>
148 #define BN_window_bits_for_exponent_size(b) \
158 #define BN_window_bits_for_exponent_size(b) \
170 #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 )
171 #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
185 #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
187 # define BN_window_bits_for_ctime_exponent_size(b) \
192 # define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
194 #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
196 # define BN_window_bits_for_ctime_exponent_size(b) \
200 # define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
207 #define BN_MULL_SIZE_NORMAL (16)
208 #define BN_MUL_RECURSIVE_SIZE_NORMAL (16)
209 #define BN_SQR_RECURSIVE_SIZE_NORMAL (16)
210 #define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32)
211 #define BN_MONT_CTX_SET_SIZE_WORD (64)
213 #if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
237 # if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
240 # define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
241 # elif defined(__GNUC__)
242 # define BN_UMULT_HIGH(a,b) ({ \
243 register BN_ULONG ret; \
244 asm ("umulh %1,%2,%0" \
249 # elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
250 # if defined(__GNUC__)
251 # define BN_UMULT_HIGH(a,b) ({ \
252 register BN_ULONG ret; \
253 asm ("mulhdu %0,%1,%2" \
258 # elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
259 # if defined(__GNUC__)
260 # define BN_UMULT_HIGH(a,b) ({ \
261 register BN_ULONG ret,discard; \
263 : "=a"(discard),"=d"(ret) \
267 # define BN_UMULT_LOHI(low,high,a,b) \
269 : "=a"(low),"=d"(high) \
273 # elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT)
274 # if defined(_MSC_VER) && _MSC_VER>=1400
275 unsigned __int64 __umulh (
unsigned __int64 a,
unsigned __int64 b);
276 unsigned __int64 _umul128 (
unsigned __int64 a,
unsigned __int64 b,
277 unsigned __int64 *h);
278 # pragma intrinsic(__umulh,_umul128)
279 # define BN_UMULT_HIGH(a,b) __umulh((a),(b))
280 # define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high)))
288 #define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
289 #define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
292 #define bn_clear_top2max(a) \
294 int ind = (a)->dmax - (a)->top; \
295 BN_ULONG *ftl = &(a)->d[(a)->top-1]; \
296 for (; ind != 0; ind--) \
300 #define bn_clear_top2max(a)
304 #define mul_add(r,a,w,c) { \
306 t=(BN_ULLONG)w * (a) + (r) + (c); \
311 #define mul(r,a,w,c) { \
313 t=(BN_ULLONG)w * (a) + (c); \
318 #define sqr(r0,r1,a) { \
320 t=(BN_ULLONG)(a)*(a); \
325 #elif defined(BN_UMULT_LOHI)
326 #define mul_add(r,a,w,c) { \
327 BN_ULONG high,low,ret,tmp=(a); \
329 BN_UMULT_LOHI(low,high,w,tmp); \
331 (c) = (ret<(c))?1:0; \
334 (c) += (ret<low)?1:0; \
338 #define mul(r,a,w,c) { \
339 BN_ULONG high,low,ret,ta=(a); \
340 BN_UMULT_LOHI(low,high,w,ta); \
343 (c) += (ret<low)?1:0; \
347 #define sqr(r0,r1,a) { \
349 BN_UMULT_LOHI(r0,r1,tmp,tmp); \
352 #elif defined(BN_UMULT_HIGH)
353 #define mul_add(r,a,w,c) { \
354 BN_ULONG high,low,ret,tmp=(a); \
356 high= BN_UMULT_HIGH(w,tmp); \
359 (c) = (ret<(c))?1:0; \
362 (c) += (ret<low)?1:0; \
366 #define mul(r,a,w,c) { \
367 BN_ULONG high,low,ret,ta=(a); \
369 high= BN_UMULT_HIGH(w,ta); \
372 (c) += (ret<low)?1:0; \
376 #define sqr(r0,r1,a) { \
379 (r1) = BN_UMULT_HIGH(tmp,tmp); \
387 #define LBITS(a) ((a)&BN_MASK2l)
388 #define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
389 #define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
391 #define LLBITS(a) ((a)&BN_MASKl)
392 #define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl)
393 #define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
395 #define mul64(l,h,bl,bh) \
397 BN_ULONG m,m1,lt,ht; \
405 m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
408 lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
413 #define sqr64(lo,ho,in) \
423 h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
424 m =(m&BN_MASK2l)<<(BN_BITS4+1); \
425 l=(l+m)&BN_MASK2; if (l < m) h++; \
430 #define mul_add(r,a,bl,bh,c) { \
436 mul64(l,h,(bl),(bh)); \
439 l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
441 l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
446 #define mul(r,a,bl,bh,c) { \
452 mul64(l,h,(bl),(bh)); \
455 l+=(c); if ((l&BN_MASK2) < (c)) h++; \
461 void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,
int na,BN_ULONG *b,
int nb);
462 void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
463 void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
464 void bn_sqr_normal(BN_ULONG *r,
const BN_ULONG *a,
int n, BN_ULONG *tmp);
465 void bn_sqr_comba8(BN_ULONG *r,
const BN_ULONG *a);
466 void bn_sqr_comba4(BN_ULONG *r,
const BN_ULONG *a);
467 int bn_cmp_words(
const BN_ULONG *a,
const BN_ULONG *b,
int n);
468 int bn_cmp_part_words(
const BN_ULONG *a,
const BN_ULONG *b,
470 void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,
int n2,
471 int dna,
int dnb,BN_ULONG *t);
472 void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,
473 int n,
int tna,
int tnb,BN_ULONG *t);
474 void bn_sqr_recursive(BN_ULONG *r,
const BN_ULONG *a,
int n2, BN_ULONG *t);
475 void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,
int n);
476 void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,
int n2,
478 void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,
int n2,
480 BN_ULONG bn_add_part_words(BN_ULONG *r,
const BN_ULONG *a,
const BN_ULONG *b,
482 BN_ULONG bn_sub_part_words(BN_ULONG *r,
const BN_ULONG *a,
const BN_ULONG *b,