poly1305_donna32.h 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. /*
  2. poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication
  3. and 64 bit addition
  4. */
  5. #if defined(_MSC_VER)
  6. # define POLY1305_NOINLINE __declspec(noinline)
  7. #elif defined(__clang__) || defined(__GNUC__)
  8. # define POLY1305_NOINLINE __attribute__((noinline))
  9. #else
  10. # define POLY1305_NOINLINE
  11. #endif
  12. #include "private/common.h"
  13. #define poly1305_block_size 16
  14. /* 17 + sizeof(unsigned long long) + 14*sizeof(unsigned long) */
  15. typedef struct poly1305_state_internal_t {
  16. unsigned long r[5];
  17. unsigned long h[5];
  18. unsigned long pad[4];
  19. unsigned long long leftover;
  20. unsigned char buffer[poly1305_block_size];
  21. unsigned char final;
  22. } poly1305_state_internal_t;
  23. static void
  24. poly1305_init(poly1305_state_internal_t *st, const unsigned char key[32])
  25. {
  26. /* r &= 0xffffffc0ffffffc0ffffffc0fffffff - wiped after finalization */
  27. st->r[0] = (LOAD32_LE(&key[0])) & 0x3ffffff;
  28. st->r[1] = (LOAD32_LE(&key[3]) >> 2) & 0x3ffff03;
  29. st->r[2] = (LOAD32_LE(&key[6]) >> 4) & 0x3ffc0ff;
  30. st->r[3] = (LOAD32_LE(&key[9]) >> 6) & 0x3f03fff;
  31. st->r[4] = (LOAD32_LE(&key[12]) >> 8) & 0x00fffff;
  32. /* h = 0 */
  33. st->h[0] = 0;
  34. st->h[1] = 0;
  35. st->h[2] = 0;
  36. st->h[3] = 0;
  37. st->h[4] = 0;
  38. /* save pad for later */
  39. st->pad[0] = LOAD32_LE(&key[16]);
  40. st->pad[1] = LOAD32_LE(&key[20]);
  41. st->pad[2] = LOAD32_LE(&key[24]);
  42. st->pad[3] = LOAD32_LE(&key[28]);
  43. st->leftover = 0;
  44. st->final = 0;
  45. }
  46. static void
  47. poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
  48. unsigned long long bytes)
  49. {
  50. const unsigned long hibit = (st->final) ? 0UL : (1UL << 24); /* 1 << 128 */
  51. unsigned long r0, r1, r2, r3, r4;
  52. unsigned long s1, s2, s3, s4;
  53. unsigned long h0, h1, h2, h3, h4;
  54. unsigned long long d0, d1, d2, d3, d4;
  55. unsigned long c;
  56. r0 = st->r[0];
  57. r1 = st->r[1];
  58. r2 = st->r[2];
  59. r3 = st->r[3];
  60. r4 = st->r[4];
  61. s1 = r1 * 5;
  62. s2 = r2 * 5;
  63. s3 = r3 * 5;
  64. s4 = r4 * 5;
  65. h0 = st->h[0];
  66. h1 = st->h[1];
  67. h2 = st->h[2];
  68. h3 = st->h[3];
  69. h4 = st->h[4];
  70. while (bytes >= poly1305_block_size) {
  71. /* h += m[i] */
  72. h0 += (LOAD32_LE(m + 0)) & 0x3ffffff;
  73. h1 += (LOAD32_LE(m + 3) >> 2) & 0x3ffffff;
  74. h2 += (LOAD32_LE(m + 6) >> 4) & 0x3ffffff;
  75. h3 += (LOAD32_LE(m + 9) >> 6) & 0x3ffffff;
  76. h4 += (LOAD32_LE(m + 12) >> 8) | hibit;
  77. /* h *= r */
  78. d0 = ((unsigned long long) h0 * r0) + ((unsigned long long) h1 * s4) +
  79. ((unsigned long long) h2 * s3) + ((unsigned long long) h3 * s2) +
  80. ((unsigned long long) h4 * s1);
  81. d1 = ((unsigned long long) h0 * r1) + ((unsigned long long) h1 * r0) +
  82. ((unsigned long long) h2 * s4) + ((unsigned long long) h3 * s3) +
  83. ((unsigned long long) h4 * s2);
  84. d2 = ((unsigned long long) h0 * r2) + ((unsigned long long) h1 * r1) +
  85. ((unsigned long long) h2 * r0) + ((unsigned long long) h3 * s4) +
  86. ((unsigned long long) h4 * s3);
  87. d3 = ((unsigned long long) h0 * r3) + ((unsigned long long) h1 * r2) +
  88. ((unsigned long long) h2 * r1) + ((unsigned long long) h3 * r0) +
  89. ((unsigned long long) h4 * s4);
  90. d4 = ((unsigned long long) h0 * r4) + ((unsigned long long) h1 * r3) +
  91. ((unsigned long long) h2 * r2) + ((unsigned long long) h3 * r1) +
  92. ((unsigned long long) h4 * r0);
  93. /* (partial) h %= p */
  94. c = (unsigned long) (d0 >> 26);
  95. h0 = (unsigned long) d0 & 0x3ffffff;
  96. d1 += c;
  97. c = (unsigned long) (d1 >> 26);
  98. h1 = (unsigned long) d1 & 0x3ffffff;
  99. d2 += c;
  100. c = (unsigned long) (d2 >> 26);
  101. h2 = (unsigned long) d2 & 0x3ffffff;
  102. d3 += c;
  103. c = (unsigned long) (d3 >> 26);
  104. h3 = (unsigned long) d3 & 0x3ffffff;
  105. d4 += c;
  106. c = (unsigned long) (d4 >> 26);
  107. h4 = (unsigned long) d4 & 0x3ffffff;
  108. h0 += c * 5;
  109. c = (h0 >> 26);
  110. h0 = h0 & 0x3ffffff;
  111. h1 += c;
  112. m += poly1305_block_size;
  113. bytes -= poly1305_block_size;
  114. }
  115. st->h[0] = h0;
  116. st->h[1] = h1;
  117. st->h[2] = h2;
  118. st->h[3] = h3;
  119. st->h[4] = h4;
  120. }
  121. static POLY1305_NOINLINE void
  122. poly1305_finish(poly1305_state_internal_t *st, unsigned char mac[16])
  123. {
  124. unsigned long h0, h1, h2, h3, h4, c;
  125. unsigned long g0, g1, g2, g3, g4;
  126. unsigned long long f;
  127. unsigned long mask;
  128. /* process the remaining block */
  129. if (st->leftover) {
  130. unsigned long long i = st->leftover;
  131. st->buffer[i++] = 1;
  132. for (; i < poly1305_block_size; i++) {
  133. st->buffer[i] = 0;
  134. }
  135. st->final = 1;
  136. poly1305_blocks(st, st->buffer, poly1305_block_size);
  137. }
  138. /* fully carry h */
  139. h0 = st->h[0];
  140. h1 = st->h[1];
  141. h2 = st->h[2];
  142. h3 = st->h[3];
  143. h4 = st->h[4];
  144. c = h1 >> 26;
  145. h1 = h1 & 0x3ffffff;
  146. h2 += c;
  147. c = h2 >> 26;
  148. h2 = h2 & 0x3ffffff;
  149. h3 += c;
  150. c = h3 >> 26;
  151. h3 = h3 & 0x3ffffff;
  152. h4 += c;
  153. c = h4 >> 26;
  154. h4 = h4 & 0x3ffffff;
  155. h0 += c * 5;
  156. c = h0 >> 26;
  157. h0 = h0 & 0x3ffffff;
  158. h1 += c;
  159. /* compute h + -p */
  160. g0 = h0 + 5;
  161. c = g0 >> 26;
  162. g0 &= 0x3ffffff;
  163. g1 = h1 + c;
  164. c = g1 >> 26;
  165. g1 &= 0x3ffffff;
  166. g2 = h2 + c;
  167. c = g2 >> 26;
  168. g2 &= 0x3ffffff;
  169. g3 = h3 + c;
  170. c = g3 >> 26;
  171. g3 &= 0x3ffffff;
  172. g4 = h4 + c - (1UL << 26);
  173. /* select h if h < p, or h + -p if h >= p */
  174. mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1;
  175. g0 &= mask;
  176. g1 &= mask;
  177. g2 &= mask;
  178. g3 &= mask;
  179. g4 &= mask;
  180. mask = ~mask;
  181. h0 = (h0 & mask) | g0;
  182. h1 = (h1 & mask) | g1;
  183. h2 = (h2 & mask) | g2;
  184. h3 = (h3 & mask) | g3;
  185. h4 = (h4 & mask) | g4;
  186. /* h = h % (2^128) */
  187. h0 = ((h0) | (h1 << 26)) & 0xffffffff;
  188. h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
  189. h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
  190. h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
  191. /* mac = (h + pad) % (2^128) */
  192. f = (unsigned long long) h0 + st->pad[0];
  193. h0 = (unsigned long) f;
  194. f = (unsigned long long) h1 + st->pad[1] + (f >> 32);
  195. h1 = (unsigned long) f;
  196. f = (unsigned long long) h2 + st->pad[2] + (f >> 32);
  197. h2 = (unsigned long) f;
  198. f = (unsigned long long) h3 + st->pad[3] + (f >> 32);
  199. h3 = (unsigned long) f;
  200. STORE32_LE(mac + 0, (uint32_t) h0);
  201. STORE32_LE(mac + 4, (uint32_t) h1);
  202. STORE32_LE(mac + 8, (uint32_t) h2);
  203. STORE32_LE(mac + 12, (uint32_t) h3);
  204. /* zero out the state */
  205. sodium_memzero((void *) st, sizeof *st);
  206. }