poly1305_donna64.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. /*
  2. poly1305 implementation using 64 bit * 64 bit = 128 bit multiplication
  3. and 128 bit addition
  4. */
  5. #include "private/common.h"
  6. #define MUL(out, x, y) out = ((uint128_t) x * y)
  7. #define ADD(out, in) out += in
  8. #define ADDLO(out, in) out += in
  9. #define SHR(in, shift) (unsigned long long) (in >> (shift))
  10. #define LO(in) (unsigned long long) (in)
  11. #if defined(_MSC_VER)
  12. # define POLY1305_NOINLINE __declspec(noinline)
  13. #elif defined(__clang__) || defined(__GNUC__)
  14. # define POLY1305_NOINLINE __attribute__((noinline))
  15. #else
  16. # define POLY1305_NOINLINE
  17. #endif
  18. #define poly1305_block_size 16
  19. /* 17 + sizeof(unsigned long long) + 8*sizeof(unsigned long long) */
  20. typedef struct poly1305_state_internal_t {
  21. unsigned long long r[3];
  22. unsigned long long h[3];
  23. unsigned long long pad[2];
  24. unsigned long long leftover;
  25. unsigned char buffer[poly1305_block_size];
  26. unsigned char final;
  27. } poly1305_state_internal_t;
  28. static void
  29. poly1305_init(poly1305_state_internal_t *st, const unsigned char key[32])
  30. {
  31. unsigned long long t0, t1;
  32. /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
  33. t0 = LOAD64_LE(&key[0]);
  34. t1 = LOAD64_LE(&key[8]);
  35. /* wiped after finalization */
  36. st->r[0] = (t0) & 0xffc0fffffff;
  37. st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
  38. st->r[2] = ((t1 >> 24)) & 0x00ffffffc0f;
  39. /* h = 0 */
  40. st->h[0] = 0;
  41. st->h[1] = 0;
  42. st->h[2] = 0;
  43. /* save pad for later */
  44. st->pad[0] = LOAD64_LE(&key[16]);
  45. st->pad[1] = LOAD64_LE(&key[24]);
  46. st->leftover = 0;
  47. st->final = 0;
  48. }
  49. static void
  50. poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
  51. unsigned long long bytes)
  52. {
  53. const unsigned long long hibit =
  54. (st->final) ? 0ULL : (1ULL << 40); /* 1 << 128 */
  55. unsigned long long r0, r1, r2;
  56. unsigned long long s1, s2;
  57. unsigned long long h0, h1, h2;
  58. unsigned long long c;
  59. uint128_t d0, d1, d2, d;
  60. r0 = st->r[0];
  61. r1 = st->r[1];
  62. r2 = st->r[2];
  63. h0 = st->h[0];
  64. h1 = st->h[1];
  65. h2 = st->h[2];
  66. s1 = r1 * (5 << 2);
  67. s2 = r2 * (5 << 2);
  68. while (bytes >= poly1305_block_size) {
  69. unsigned long long t0, t1;
  70. /* h += m[i] */
  71. t0 = LOAD64_LE(&m[0]);
  72. t1 = LOAD64_LE(&m[8]);
  73. h0 += t0 & 0xfffffffffff;
  74. h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffff;
  75. h2 += (((t1 >> 24)) & 0x3ffffffffff) | hibit;
  76. /* h *= r */
  77. MUL(d0, h0, r0);
  78. MUL(d, h1, s2);
  79. ADD(d0, d);
  80. MUL(d, h2, s1);
  81. ADD(d0, d);
  82. MUL(d1, h0, r1);
  83. MUL(d, h1, r0);
  84. ADD(d1, d);
  85. MUL(d, h2, s2);
  86. ADD(d1, d);
  87. MUL(d2, h0, r2);
  88. MUL(d, h1, r1);
  89. ADD(d2, d);
  90. MUL(d, h2, r0);
  91. ADD(d2, d);
  92. /* (partial) h %= p */
  93. c = SHR(d0, 44);
  94. h0 = LO(d0) & 0xfffffffffff;
  95. ADDLO(d1, c);
  96. c = SHR(d1, 44);
  97. h1 = LO(d1) & 0xfffffffffff;
  98. ADDLO(d2, c);
  99. c = SHR(d2, 42);
  100. h2 = LO(d2) & 0x3ffffffffff;
  101. h0 += c * 5;
  102. c = (h0 >> 44);
  103. h0 = h0 & 0xfffffffffff;
  104. h1 += c;
  105. m += poly1305_block_size;
  106. bytes -= poly1305_block_size;
  107. }
  108. st->h[0] = h0;
  109. st->h[1] = h1;
  110. st->h[2] = h2;
  111. }
  112. static POLY1305_NOINLINE void
  113. poly1305_finish(poly1305_state_internal_t *st, unsigned char mac[16])
  114. {
  115. unsigned long long h0, h1, h2, c;
  116. unsigned long long g0, g1, g2;
  117. unsigned long long t0, t1;
  118. unsigned long long mask;
  119. /* process the remaining block */
  120. if (st->leftover) {
  121. unsigned long long i = st->leftover;
  122. st->buffer[i] = 1;
  123. for (i = i + 1; i < poly1305_block_size; i++) {
  124. st->buffer[i] = 0;
  125. }
  126. st->final = 1;
  127. poly1305_blocks(st, st->buffer, poly1305_block_size);
  128. }
  129. /* fully carry h */
  130. h0 = st->h[0];
  131. h1 = st->h[1];
  132. h2 = st->h[2];
  133. c = h1 >> 44;
  134. h1 &= 0xfffffffffff;
  135. h2 += c;
  136. c = h2 >> 42;
  137. h2 &= 0x3ffffffffff;
  138. h0 += c * 5;
  139. c = h0 >> 44;
  140. h0 &= 0xfffffffffff;
  141. h1 += c;
  142. c = h1 >> 44;
  143. h1 &= 0xfffffffffff;
  144. h2 += c;
  145. c = h2 >> 42;
  146. h2 &= 0x3ffffffffff;
  147. h0 += c * 5;
  148. c = h0 >> 44;
  149. h0 &= 0xfffffffffff;
  150. h1 += c;
  151. /* compute h + -p */
  152. g0 = h0 + 5;
  153. c = g0 >> 44;
  154. g0 &= 0xfffffffffff;
  155. g1 = h1 + c;
  156. c = g1 >> 44;
  157. g1 &= 0xfffffffffff;
  158. g2 = h2 + c - (1ULL << 42);
  159. /* select h if h < p, or h + -p if h >= p */
  160. mask = (g2 >> ((sizeof(unsigned long long) * 8) - 1)) - 1;
  161. g0 &= mask;
  162. g1 &= mask;
  163. g2 &= mask;
  164. mask = ~mask;
  165. h0 = (h0 & mask) | g0;
  166. h1 = (h1 & mask) | g1;
  167. h2 = (h2 & mask) | g2;
  168. /* h = (h + pad) */
  169. t0 = st->pad[0];
  170. t1 = st->pad[1];
  171. h0 += ((t0) &0xfffffffffff);
  172. c = (h0 >> 44);
  173. h0 &= 0xfffffffffff;
  174. h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c;
  175. c = (h1 >> 44);
  176. h1 &= 0xfffffffffff;
  177. h2 += (((t1 >> 24)) & 0x3ffffffffff) + c;
  178. h2 &= 0x3ffffffffff;
  179. /* mac = h % (2^128) */
  180. h0 = (h0) | (h1 << 44);
  181. h1 = (h1 >> 20) | (h2 << 24);
  182. STORE64_LE(&mac[0], h0);
  183. STORE64_LE(&mac[8], h1);
  184. /* zero out the state */
  185. sodium_memzero((void *) st, sizeof *st);
  186. }