fe51_invert.c 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. /*
  2. This file is adapted from amd64-51/fe25519_invert.c:
  3. Loops of squares are replaced by nsquares for better performance.
  4. */
  5. #include "fe51.h"
  6. #ifdef HAVE_AVX_ASM
  7. #define fe51_square(x, y) fe51_nsquare(x, y, 1)
  8. void
  9. fe51_invert(fe51 *r, const fe51 *x)
  10. {
  11. fe51 z2;
  12. fe51 z9;
  13. fe51 z11;
  14. fe51 z2_5_0;
  15. fe51 z2_10_0;
  16. fe51 z2_20_0;
  17. fe51 z2_50_0;
  18. fe51 z2_100_0;
  19. fe51 t;
  20. /* 2 */ fe51_square(&z2,x);
  21. /* 4 */ fe51_square(&t,&z2);
  22. /* 8 */ fe51_square(&t,&t);
  23. /* 9 */ fe51_mul(&z9,&t,x);
  24. /* 11 */ fe51_mul(&z11,&z9,&z2);
  25. /* 22 */ fe51_square(&t,&z11);
  26. /* 2^5 - 2^0 = 31 */ fe51_mul(&z2_5_0,&t,&z9);
  27. /* 2^10 - 2^5 */ fe51_nsquare(&t,&z2_5_0, 5);
  28. /* 2^10 - 2^0 */ fe51_mul(&z2_10_0,&t,&z2_5_0);
  29. /* 2^20 - 2^10 */ fe51_nsquare(&t,&z2_10_0, 10);
  30. /* 2^20 - 2^0 */ fe51_mul(&z2_20_0,&t,&z2_10_0);
  31. /* 2^40 - 2^20 */ fe51_nsquare(&t,&z2_20_0, 20);
  32. /* 2^40 - 2^0 */ fe51_mul(&t,&t,&z2_20_0);
  33. /* 2^50 - 2^10 */ fe51_nsquare(&t,&t,10);
  34. /* 2^50 - 2^0 */ fe51_mul(&z2_50_0,&t,&z2_10_0);
  35. /* 2^100 - 2^50 */ fe51_nsquare(&t,&z2_50_0, 50);
  36. /* 2^100 - 2^0 */ fe51_mul(&z2_100_0,&t,&z2_50_0);
  37. /* 2^200 - 2^100 */ fe51_nsquare(&t,&z2_100_0, 100);
  38. /* 2^200 - 2^0 */ fe51_mul(&t,&t,&z2_100_0);
  39. /* 2^250 - 2^50 */ fe51_nsquare(&t,&t, 50);
  40. /* 2^250 - 2^0 */ fe51_mul(&t,&t,&z2_50_0);
  41. /* 2^255 - 2^5 */ fe51_nsquare(&t,&t,5);
  42. /* 2^255 - 21 */ fe51_mul(r,&t,&z11);
  43. }
  44. #endif