ed25519_ref10_fe_25_5.h 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050
  1. #include <string.h>
  2. #include "private/common.h"
  3. #include "utils.h"
  4. /*
  5. h = 0
  6. */
  7. static inline void
  8. fe25519_0(fe25519 h)
  9. {
  10. memset(&h[0], 0, 10 * sizeof h[0]);
  11. }
  12. /*
  13. h = 1
  14. */
  15. static inline void
  16. fe25519_1(fe25519 h)
  17. {
  18. h[0] = 1;
  19. h[1] = 0;
  20. memset(&h[2], 0, 8 * sizeof h[0]);
  21. }
  22. /*
  23. h = f + g
  24. Can overlap h with f or g.
  25. *
  26. Preconditions:
  27. |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  28. |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  29. *
  30. Postconditions:
  31. |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  32. */
  33. static inline void
  34. fe25519_add(fe25519 h, const fe25519 f, const fe25519 g)
  35. {
  36. int32_t h0 = f[0] + g[0];
  37. int32_t h1 = f[1] + g[1];
  38. int32_t h2 = f[2] + g[2];
  39. int32_t h3 = f[3] + g[3];
  40. int32_t h4 = f[4] + g[4];
  41. int32_t h5 = f[5] + g[5];
  42. int32_t h6 = f[6] + g[6];
  43. int32_t h7 = f[7] + g[7];
  44. int32_t h8 = f[8] + g[8];
  45. int32_t h9 = f[9] + g[9];
  46. h[0] = h0;
  47. h[1] = h1;
  48. h[2] = h2;
  49. h[3] = h3;
  50. h[4] = h4;
  51. h[5] = h5;
  52. h[6] = h6;
  53. h[7] = h7;
  54. h[8] = h8;
  55. h[9] = h9;
  56. }
  57. /*
  58. h = f - g
  59. Can overlap h with f or g.
  60. *
  61. Preconditions:
  62. |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  63. |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  64. *
  65. Postconditions:
  66. |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  67. */
  68. static void
  69. fe25519_sub(fe25519 h, const fe25519 f, const fe25519 g)
  70. {
  71. int32_t h0 = f[0] - g[0];
  72. int32_t h1 = f[1] - g[1];
  73. int32_t h2 = f[2] - g[2];
  74. int32_t h3 = f[3] - g[3];
  75. int32_t h4 = f[4] - g[4];
  76. int32_t h5 = f[5] - g[5];
  77. int32_t h6 = f[6] - g[6];
  78. int32_t h7 = f[7] - g[7];
  79. int32_t h8 = f[8] - g[8];
  80. int32_t h9 = f[9] - g[9];
  81. h[0] = h0;
  82. h[1] = h1;
  83. h[2] = h2;
  84. h[3] = h3;
  85. h[4] = h4;
  86. h[5] = h5;
  87. h[6] = h6;
  88. h[7] = h7;
  89. h[8] = h8;
  90. h[9] = h9;
  91. }
  92. /*
  93. h = -f
  94. *
  95. Preconditions:
  96. |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  97. *
  98. Postconditions:
  99. |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  100. */
  101. static inline void
  102. fe25519_neg(fe25519 h, const fe25519 f)
  103. {
  104. int32_t h0 = -f[0];
  105. int32_t h1 = -f[1];
  106. int32_t h2 = -f[2];
  107. int32_t h3 = -f[3];
  108. int32_t h4 = -f[4];
  109. int32_t h5 = -f[5];
  110. int32_t h6 = -f[6];
  111. int32_t h7 = -f[7];
  112. int32_t h8 = -f[8];
  113. int32_t h9 = -f[9];
  114. h[0] = h0;
  115. h[1] = h1;
  116. h[2] = h2;
  117. h[3] = h3;
  118. h[4] = h4;
  119. h[5] = h5;
  120. h[6] = h6;
  121. h[7] = h7;
  122. h[8] = h8;
  123. h[9] = h9;
  124. }
  125. /*
  126. Replace (f,g) with (g,g) if b == 1;
  127. replace (f,g) with (f,g) if b == 0.
  128. *
  129. Preconditions: b in {0,1}.
  130. */
  131. static void
  132. fe25519_cmov(fe25519 f, const fe25519 g, unsigned int b)
  133. {
  134. const uint32_t mask = (uint32_t) (-(int32_t) b);
  135. int32_t f0 = f[0];
  136. int32_t f1 = f[1];
  137. int32_t f2 = f[2];
  138. int32_t f3 = f[3];
  139. int32_t f4 = f[4];
  140. int32_t f5 = f[5];
  141. int32_t f6 = f[6];
  142. int32_t f7 = f[7];
  143. int32_t f8 = f[8];
  144. int32_t f9 = f[9];
  145. int32_t x0 = f0 ^ g[0];
  146. int32_t x1 = f1 ^ g[1];
  147. int32_t x2 = f2 ^ g[2];
  148. int32_t x3 = f3 ^ g[3];
  149. int32_t x4 = f4 ^ g[4];
  150. int32_t x5 = f5 ^ g[5];
  151. int32_t x6 = f6 ^ g[6];
  152. int32_t x7 = f7 ^ g[7];
  153. int32_t x8 = f8 ^ g[8];
  154. int32_t x9 = f9 ^ g[9];
  155. x0 &= mask;
  156. x1 &= mask;
  157. x2 &= mask;
  158. x3 &= mask;
  159. x4 &= mask;
  160. x5 &= mask;
  161. x6 &= mask;
  162. x7 &= mask;
  163. x8 &= mask;
  164. x9 &= mask;
  165. f[0] = f0 ^ x0;
  166. f[1] = f1 ^ x1;
  167. f[2] = f2 ^ x2;
  168. f[3] = f3 ^ x3;
  169. f[4] = f4 ^ x4;
  170. f[5] = f5 ^ x5;
  171. f[6] = f6 ^ x6;
  172. f[7] = f7 ^ x7;
  173. f[8] = f8 ^ x8;
  174. f[9] = f9 ^ x9;
  175. }
  176. static void
  177. fe25519_cswap(fe25519 f, fe25519 g, unsigned int b)
  178. {
  179. const uint32_t mask = (uint32_t) (-(int64_t) b);
  180. int32_t f0 = f[0];
  181. int32_t f1 = f[1];
  182. int32_t f2 = f[2];
  183. int32_t f3 = f[3];
  184. int32_t f4 = f[4];
  185. int32_t f5 = f[5];
  186. int32_t f6 = f[6];
  187. int32_t f7 = f[7];
  188. int32_t f8 = f[8];
  189. int32_t f9 = f[9];
  190. int32_t g0 = g[0];
  191. int32_t g1 = g[1];
  192. int32_t g2 = g[2];
  193. int32_t g3 = g[3];
  194. int32_t g4 = g[4];
  195. int32_t g5 = g[5];
  196. int32_t g6 = g[6];
  197. int32_t g7 = g[7];
  198. int32_t g8 = g[8];
  199. int32_t g9 = g[9];
  200. int32_t x0 = f0 ^ g0;
  201. int32_t x1 = f1 ^ g1;
  202. int32_t x2 = f2 ^ g2;
  203. int32_t x3 = f3 ^ g3;
  204. int32_t x4 = f4 ^ g4;
  205. int32_t x5 = f5 ^ g5;
  206. int32_t x6 = f6 ^ g6;
  207. int32_t x7 = f7 ^ g7;
  208. int32_t x8 = f8 ^ g8;
  209. int32_t x9 = f9 ^ g9;
  210. x0 &= mask;
  211. x1 &= mask;
  212. x2 &= mask;
  213. x3 &= mask;
  214. x4 &= mask;
  215. x5 &= mask;
  216. x6 &= mask;
  217. x7 &= mask;
  218. x8 &= mask;
  219. x9 &= mask;
  220. f[0] = f0 ^ x0;
  221. f[1] = f1 ^ x1;
  222. f[2] = f2 ^ x2;
  223. f[3] = f3 ^ x3;
  224. f[4] = f4 ^ x4;
  225. f[5] = f5 ^ x5;
  226. f[6] = f6 ^ x6;
  227. f[7] = f7 ^ x7;
  228. f[8] = f8 ^ x8;
  229. f[9] = f9 ^ x9;
  230. g[0] = g0 ^ x0;
  231. g[1] = g1 ^ x1;
  232. g[2] = g2 ^ x2;
  233. g[3] = g3 ^ x3;
  234. g[4] = g4 ^ x4;
  235. g[5] = g5 ^ x5;
  236. g[6] = g6 ^ x6;
  237. g[7] = g7 ^ x7;
  238. g[8] = g8 ^ x8;
  239. g[9] = g9 ^ x9;
  240. }
  241. /*
  242. h = f
  243. */
  244. static inline void
  245. fe25519_copy(fe25519 h, const fe25519 f)
  246. {
  247. int32_t f0 = f[0];
  248. int32_t f1 = f[1];
  249. int32_t f2 = f[2];
  250. int32_t f3 = f[3];
  251. int32_t f4 = f[4];
  252. int32_t f5 = f[5];
  253. int32_t f6 = f[6];
  254. int32_t f7 = f[7];
  255. int32_t f8 = f[8];
  256. int32_t f9 = f[9];
  257. h[0] = f0;
  258. h[1] = f1;
  259. h[2] = f2;
  260. h[3] = f3;
  261. h[4] = f4;
  262. h[5] = f5;
  263. h[6] = f6;
  264. h[7] = f7;
  265. h[8] = f8;
  266. h[9] = f9;
  267. }
  268. /*
  269. return 1 if f is in {1,3,5,...,q-2}
  270. return 0 if f is in {0,2,4,...,q-1}
  271. Preconditions:
  272. |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  273. */
  274. static inline int
  275. fe25519_isnegative(const fe25519 f)
  276. {
  277. unsigned char s[32];
  278. fe25519_tobytes(s, f);
  279. return s[0] & 1;
  280. }
  281. /*
  282. return 1 if f == 0
  283. return 0 if f != 0
  284. Preconditions:
  285. |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  286. */
  287. static inline int
  288. fe25519_iszero(const fe25519 f)
  289. {
  290. unsigned char s[32];
  291. fe25519_tobytes(s, f);
  292. return sodium_is_zero(s, 32);
  293. }
  294. /*
  295. h = f * g
  296. Can overlap h with f or g.
  297. *
  298. Preconditions:
  299. |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
  300. |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
  301. *
  302. Postconditions:
  303. |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
  304. */
  305. /*
  306. Notes on implementation strategy:
  307. *
  308. Using schoolbook multiplication.
  309. Karatsuba would save a little in some cost models.
  310. *
  311. Most multiplications by 2 and 19 are 32-bit precomputations;
  312. cheaper than 64-bit postcomputations.
  313. *
  314. There is one remaining multiplication by 19 in the carry chain;
  315. one *19 precomputation can be merged into this,
  316. but the resulting data flow is considerably less clean.
  317. *
  318. There are 12 carries below.
  319. 10 of them are 2-way parallelizable and vectorizable.
  320. Can get away with 11 carries, but then data flow is much deeper.
  321. *
  322. With tighter constraints on inputs can squeeze carries into int32.
  323. */
  324. static void
  325. fe25519_mul(fe25519 h, const fe25519 f, const fe25519 g)
  326. {
  327. int32_t f0 = f[0];
  328. int32_t f1 = f[1];
  329. int32_t f2 = f[2];
  330. int32_t f3 = f[3];
  331. int32_t f4 = f[4];
  332. int32_t f5 = f[5];
  333. int32_t f6 = f[6];
  334. int32_t f7 = f[7];
  335. int32_t f8 = f[8];
  336. int32_t f9 = f[9];
  337. int32_t g0 = g[0];
  338. int32_t g1 = g[1];
  339. int32_t g2 = g[2];
  340. int32_t g3 = g[3];
  341. int32_t g4 = g[4];
  342. int32_t g5 = g[5];
  343. int32_t g6 = g[6];
  344. int32_t g7 = g[7];
  345. int32_t g8 = g[8];
  346. int32_t g9 = g[9];
  347. int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
  348. int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
  349. int32_t g3_19 = 19 * g3;
  350. int32_t g4_19 = 19 * g4;
  351. int32_t g5_19 = 19 * g5;
  352. int32_t g6_19 = 19 * g6;
  353. int32_t g7_19 = 19 * g7;
  354. int32_t g8_19 = 19 * g8;
  355. int32_t g9_19 = 19 * g9;
  356. int32_t f1_2 = 2 * f1;
  357. int32_t f3_2 = 2 * f3;
  358. int32_t f5_2 = 2 * f5;
  359. int32_t f7_2 = 2 * f7;
  360. int32_t f9_2 = 2 * f9;
  361. int64_t f0g0 = f0 * (int64_t) g0;
  362. int64_t f0g1 = f0 * (int64_t) g1;
  363. int64_t f0g2 = f0 * (int64_t) g2;
  364. int64_t f0g3 = f0 * (int64_t) g3;
  365. int64_t f0g4 = f0 * (int64_t) g4;
  366. int64_t f0g5 = f0 * (int64_t) g5;
  367. int64_t f0g6 = f0 * (int64_t) g6;
  368. int64_t f0g7 = f0 * (int64_t) g7;
  369. int64_t f0g8 = f0 * (int64_t) g8;
  370. int64_t f0g9 = f0 * (int64_t) g9;
  371. int64_t f1g0 = f1 * (int64_t) g0;
  372. int64_t f1g1_2 = f1_2 * (int64_t) g1;
  373. int64_t f1g2 = f1 * (int64_t) g2;
  374. int64_t f1g3_2 = f1_2 * (int64_t) g3;
  375. int64_t f1g4 = f1 * (int64_t) g4;
  376. int64_t f1g5_2 = f1_2 * (int64_t) g5;
  377. int64_t f1g6 = f1 * (int64_t) g6;
  378. int64_t f1g7_2 = f1_2 * (int64_t) g7;
  379. int64_t f1g8 = f1 * (int64_t) g8;
  380. int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
  381. int64_t f2g0 = f2 * (int64_t) g0;
  382. int64_t f2g1 = f2 * (int64_t) g1;
  383. int64_t f2g2 = f2 * (int64_t) g2;
  384. int64_t f2g3 = f2 * (int64_t) g3;
  385. int64_t f2g4 = f2 * (int64_t) g4;
  386. int64_t f2g5 = f2 * (int64_t) g5;
  387. int64_t f2g6 = f2 * (int64_t) g6;
  388. int64_t f2g7 = f2 * (int64_t) g7;
  389. int64_t f2g8_19 = f2 * (int64_t) g8_19;
  390. int64_t f2g9_19 = f2 * (int64_t) g9_19;
  391. int64_t f3g0 = f3 * (int64_t) g0;
  392. int64_t f3g1_2 = f3_2 * (int64_t) g1;
  393. int64_t f3g2 = f3 * (int64_t) g2;
  394. int64_t f3g3_2 = f3_2 * (int64_t) g3;
  395. int64_t f3g4 = f3 * (int64_t) g4;
  396. int64_t f3g5_2 = f3_2 * (int64_t) g5;
  397. int64_t f3g6 = f3 * (int64_t) g6;
  398. int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
  399. int64_t f3g8_19 = f3 * (int64_t) g8_19;
  400. int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
  401. int64_t f4g0 = f4 * (int64_t) g0;
  402. int64_t f4g1 = f4 * (int64_t) g1;
  403. int64_t f4g2 = f4 * (int64_t) g2;
  404. int64_t f4g3 = f4 * (int64_t) g3;
  405. int64_t f4g4 = f4 * (int64_t) g4;
  406. int64_t f4g5 = f4 * (int64_t) g5;
  407. int64_t f4g6_19 = f4 * (int64_t) g6_19;
  408. int64_t f4g7_19 = f4 * (int64_t) g7_19;
  409. int64_t f4g8_19 = f4 * (int64_t) g8_19;
  410. int64_t f4g9_19 = f4 * (int64_t) g9_19;
  411. int64_t f5g0 = f5 * (int64_t) g0;
  412. int64_t f5g1_2 = f5_2 * (int64_t) g1;
  413. int64_t f5g2 = f5 * (int64_t) g2;
  414. int64_t f5g3_2 = f5_2 * (int64_t) g3;
  415. int64_t f5g4 = f5 * (int64_t) g4;
  416. int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
  417. int64_t f5g6_19 = f5 * (int64_t) g6_19;
  418. int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
  419. int64_t f5g8_19 = f5 * (int64_t) g8_19;
  420. int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
  421. int64_t f6g0 = f6 * (int64_t) g0;
  422. int64_t f6g1 = f6 * (int64_t) g1;
  423. int64_t f6g2 = f6 * (int64_t) g2;
  424. int64_t f6g3 = f6 * (int64_t) g3;
  425. int64_t f6g4_19 = f6 * (int64_t) g4_19;
  426. int64_t f6g5_19 = f6 * (int64_t) g5_19;
  427. int64_t f6g6_19 = f6 * (int64_t) g6_19;
  428. int64_t f6g7_19 = f6 * (int64_t) g7_19;
  429. int64_t f6g8_19 = f6 * (int64_t) g8_19;
  430. int64_t f6g9_19 = f6 * (int64_t) g9_19;
  431. int64_t f7g0 = f7 * (int64_t) g0;
  432. int64_t f7g1_2 = f7_2 * (int64_t) g1;
  433. int64_t f7g2 = f7 * (int64_t) g2;
  434. int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
  435. int64_t f7g4_19 = f7 * (int64_t) g4_19;
  436. int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
  437. int64_t f7g6_19 = f7 * (int64_t) g6_19;
  438. int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
  439. int64_t f7g8_19 = f7 * (int64_t) g8_19;
  440. int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
  441. int64_t f8g0 = f8 * (int64_t) g0;
  442. int64_t f8g1 = f8 * (int64_t) g1;
  443. int64_t f8g2_19 = f8 * (int64_t) g2_19;
  444. int64_t f8g3_19 = f8 * (int64_t) g3_19;
  445. int64_t f8g4_19 = f8 * (int64_t) g4_19;
  446. int64_t f8g5_19 = f8 * (int64_t) g5_19;
  447. int64_t f8g6_19 = f8 * (int64_t) g6_19;
  448. int64_t f8g7_19 = f8 * (int64_t) g7_19;
  449. int64_t f8g8_19 = f8 * (int64_t) g8_19;
  450. int64_t f8g9_19 = f8 * (int64_t) g9_19;
  451. int64_t f9g0 = f9 * (int64_t) g0;
  452. int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
  453. int64_t f9g2_19 = f9 * (int64_t) g2_19;
  454. int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
  455. int64_t f9g4_19 = f9 * (int64_t) g4_19;
  456. int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
  457. int64_t f9g6_19 = f9 * (int64_t) g6_19;
  458. int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
  459. int64_t f9g8_19 = f9 * (int64_t) g8_19;
  460. int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
  461. int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 +
  462. f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
  463. int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 +
  464. f7g4_19 + f8g3_19 + f9g2_19;
  465. int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 +
  466. f7g5_38 + f8g4_19 + f9g3_38;
  467. int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 +
  468. f7g6_19 + f8g5_19 + f9g4_19;
  469. int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 +
  470. f7g7_38 + f8g6_19 + f9g5_38;
  471. int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 +
  472. f8g7_19 + f9g6_19;
  473. int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 +
  474. f7g9_38 + f8g8_19 + f9g7_38;
  475. int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 +
  476. f8g9_19 + f9g8_19;
  477. int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 +
  478. f8g0 + f9g9_38;
  479. int64_t h9 =
  480. f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0;
  481. int64_t carry0;
  482. int64_t carry1;
  483. int64_t carry2;
  484. int64_t carry3;
  485. int64_t carry4;
  486. int64_t carry5;
  487. int64_t carry6;
  488. int64_t carry7;
  489. int64_t carry8;
  490. int64_t carry9;
  491. /*
  492. |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
  493. i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
  494. |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
  495. i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
  496. */
  497. carry0 = (h0 + (int64_t)(1L << 25)) >> 26;
  498. h1 += carry0;
  499. h0 -= carry0 * ((uint64_t) 1L << 26);
  500. carry4 = (h4 + (int64_t)(1L << 25)) >> 26;
  501. h5 += carry4;
  502. h4 -= carry4 * ((uint64_t) 1L << 26);
  503. /* |h0| <= 2^25 */
  504. /* |h4| <= 2^25 */
  505. /* |h1| <= 1.71*2^59 */
  506. /* |h5| <= 1.71*2^59 */
  507. carry1 = (h1 + (int64_t)(1L << 24)) >> 25;
  508. h2 += carry1;
  509. h1 -= carry1 * ((uint64_t) 1L << 25);
  510. carry5 = (h5 + (int64_t)(1L << 24)) >> 25;
  511. h6 += carry5;
  512. h5 -= carry5 * ((uint64_t) 1L << 25);
  513. /* |h1| <= 2^24; from now on fits into int32 */
  514. /* |h5| <= 2^24; from now on fits into int32 */
  515. /* |h2| <= 1.41*2^60 */
  516. /* |h6| <= 1.41*2^60 */
  517. carry2 = (h2 + (int64_t)(1L << 25)) >> 26;
  518. h3 += carry2;
  519. h2 -= carry2 * ((uint64_t) 1L << 26);
  520. carry6 = (h6 + (int64_t)(1L << 25)) >> 26;
  521. h7 += carry6;
  522. h6 -= carry6 * ((uint64_t) 1L << 26);
  523. /* |h2| <= 2^25; from now on fits into int32 unchanged */
  524. /* |h6| <= 2^25; from now on fits into int32 unchanged */
  525. /* |h3| <= 1.71*2^59 */
  526. /* |h7| <= 1.71*2^59 */
  527. carry3 = (h3 + (int64_t)(1L << 24)) >> 25;
  528. h4 += carry3;
  529. h3 -= carry3 * ((uint64_t) 1L << 25);
  530. carry7 = (h7 + (int64_t)(1L << 24)) >> 25;
  531. h8 += carry7;
  532. h7 -= carry7 * ((uint64_t) 1L << 25);
  533. /* |h3| <= 2^24; from now on fits into int32 unchanged */
  534. /* |h7| <= 2^24; from now on fits into int32 unchanged */
  535. /* |h4| <= 1.72*2^34 */
  536. /* |h8| <= 1.41*2^60 */
  537. carry4 = (h4 + (int64_t)(1L << 25)) >> 26;
  538. h5 += carry4;
  539. h4 -= carry4 * ((uint64_t) 1L << 26);
  540. carry8 = (h8 + (int64_t)(1L << 25)) >> 26;
  541. h9 += carry8;
  542. h8 -= carry8 * ((uint64_t) 1L << 26);
  543. /* |h4| <= 2^25; from now on fits into int32 unchanged */
  544. /* |h8| <= 2^25; from now on fits into int32 unchanged */
  545. /* |h5| <= 1.01*2^24 */
  546. /* |h9| <= 1.71*2^59 */
  547. carry9 = (h9 + (int64_t)(1L << 24)) >> 25;
  548. h0 += carry9 * 19;
  549. h9 -= carry9 * ((uint64_t) 1L << 25);
  550. /* |h9| <= 2^24; from now on fits into int32 unchanged */
  551. /* |h0| <= 1.1*2^39 */
  552. carry0 = (h0 + (int64_t)(1L << 25)) >> 26;
  553. h1 += carry0;
  554. h0 -= carry0 * ((uint64_t) 1L << 26);
  555. /* |h0| <= 2^25; from now on fits into int32 unchanged */
  556. /* |h1| <= 1.01*2^24 */
  557. h[0] = (int32_t) h0;
  558. h[1] = (int32_t) h1;
  559. h[2] = (int32_t) h2;
  560. h[3] = (int32_t) h3;
  561. h[4] = (int32_t) h4;
  562. h[5] = (int32_t) h5;
  563. h[6] = (int32_t) h6;
  564. h[7] = (int32_t) h7;
  565. h[8] = (int32_t) h8;
  566. h[9] = (int32_t) h9;
  567. }
  568. /*
  569. h = f * f
  570. Can overlap h with f.
  571. *
  572. Preconditions:
  573. |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
  574. *
  575. Postconditions:
  576. |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
  577. */
  578. static void
  579. fe25519_sq(fe25519 h, const fe25519 f)
  580. {
  581. int32_t f0 = f[0];
  582. int32_t f1 = f[1];
  583. int32_t f2 = f[2];
  584. int32_t f3 = f[3];
  585. int32_t f4 = f[4];
  586. int32_t f5 = f[5];
  587. int32_t f6 = f[6];
  588. int32_t f7 = f[7];
  589. int32_t f8 = f[8];
  590. int32_t f9 = f[9];
  591. int32_t f0_2 = 2 * f0;
  592. int32_t f1_2 = 2 * f1;
  593. int32_t f2_2 = 2 * f2;
  594. int32_t f3_2 = 2 * f3;
  595. int32_t f4_2 = 2 * f4;
  596. int32_t f5_2 = 2 * f5;
  597. int32_t f6_2 = 2 * f6;
  598. int32_t f7_2 = 2 * f7;
  599. int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
  600. int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
  601. int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
  602. int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
  603. int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
  604. int64_t f0f0 = f0 * (int64_t) f0;
  605. int64_t f0f1_2 = f0_2 * (int64_t) f1;
  606. int64_t f0f2_2 = f0_2 * (int64_t) f2;
  607. int64_t f0f3_2 = f0_2 * (int64_t) f3;
  608. int64_t f0f4_2 = f0_2 * (int64_t) f4;
  609. int64_t f0f5_2 = f0_2 * (int64_t) f5;
  610. int64_t f0f6_2 = f0_2 * (int64_t) f6;
  611. int64_t f0f7_2 = f0_2 * (int64_t) f7;
  612. int64_t f0f8_2 = f0_2 * (int64_t) f8;
  613. int64_t f0f9_2 = f0_2 * (int64_t) f9;
  614. int64_t f1f1_2 = f1_2 * (int64_t) f1;
  615. int64_t f1f2_2 = f1_2 * (int64_t) f2;
  616. int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
  617. int64_t f1f4_2 = f1_2 * (int64_t) f4;
  618. int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
  619. int64_t f1f6_2 = f1_2 * (int64_t) f6;
  620. int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
  621. int64_t f1f8_2 = f1_2 * (int64_t) f8;
  622. int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
  623. int64_t f2f2 = f2 * (int64_t) f2;
  624. int64_t f2f3_2 = f2_2 * (int64_t) f3;
  625. int64_t f2f4_2 = f2_2 * (int64_t) f4;
  626. int64_t f2f5_2 = f2_2 * (int64_t) f5;
  627. int64_t f2f6_2 = f2_2 * (int64_t) f6;
  628. int64_t f2f7_2 = f2_2 * (int64_t) f7;
  629. int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
  630. int64_t f2f9_38 = f2 * (int64_t) f9_38;
  631. int64_t f3f3_2 = f3_2 * (int64_t) f3;
  632. int64_t f3f4_2 = f3_2 * (int64_t) f4;
  633. int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
  634. int64_t f3f6_2 = f3_2 * (int64_t) f6;
  635. int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
  636. int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
  637. int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
  638. int64_t f4f4 = f4 * (int64_t) f4;
  639. int64_t f4f5_2 = f4_2 * (int64_t) f5;
  640. int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
  641. int64_t f4f7_38 = f4 * (int64_t) f7_38;
  642. int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
  643. int64_t f4f9_38 = f4 * (int64_t) f9_38;
  644. int64_t f5f5_38 = f5 * (int64_t) f5_38;
  645. int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
  646. int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
  647. int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
  648. int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
  649. int64_t f6f6_19 = f6 * (int64_t) f6_19;
  650. int64_t f6f7_38 = f6 * (int64_t) f7_38;
  651. int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
  652. int64_t f6f9_38 = f6 * (int64_t) f9_38;
  653. int64_t f7f7_38 = f7 * (int64_t) f7_38;
  654. int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
  655. int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
  656. int64_t f8f8_19 = f8 * (int64_t) f8_19;
  657. int64_t f8f9_38 = f8 * (int64_t) f9_38;
  658. int64_t f9f9_38 = f9 * (int64_t) f9_38;
  659. int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
  660. int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
  661. int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
  662. int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
  663. int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
  664. int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
  665. int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
  666. int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
  667. int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
  668. int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
  669. int64_t carry0;
  670. int64_t carry1;
  671. int64_t carry2;
  672. int64_t carry3;
  673. int64_t carry4;
  674. int64_t carry5;
  675. int64_t carry6;
  676. int64_t carry7;
  677. int64_t carry8;
  678. int64_t carry9;
  679. carry0 = (h0 + (int64_t)(1L << 25)) >> 26;
  680. h1 += carry0;
  681. h0 -= carry0 * ((uint64_t) 1L << 26);
  682. carry4 = (h4 + (int64_t)(1L << 25)) >> 26;
  683. h5 += carry4;
  684. h4 -= carry4 * ((uint64_t) 1L << 26);
  685. carry1 = (h1 + (int64_t)(1L << 24)) >> 25;
  686. h2 += carry1;
  687. h1 -= carry1 * ((uint64_t) 1L << 25);
  688. carry5 = (h5 + (int64_t)(1L << 24)) >> 25;
  689. h6 += carry5;
  690. h5 -= carry5 * ((uint64_t) 1L << 25);
  691. carry2 = (h2 + (int64_t)(1L << 25)) >> 26;
  692. h3 += carry2;
  693. h2 -= carry2 * ((uint64_t) 1L << 26);
  694. carry6 = (h6 + (int64_t)(1L << 25)) >> 26;
  695. h7 += carry6;
  696. h6 -= carry6 * ((uint64_t) 1L << 26);
  697. carry3 = (h3 + (int64_t)(1L << 24)) >> 25;
  698. h4 += carry3;
  699. h3 -= carry3 * ((uint64_t) 1L << 25);
  700. carry7 = (h7 + (int64_t)(1L << 24)) >> 25;
  701. h8 += carry7;
  702. h7 -= carry7 * ((uint64_t) 1L << 25);
  703. carry4 = (h4 + (int64_t)(1L << 25)) >> 26;
  704. h5 += carry4;
  705. h4 -= carry4 * ((uint64_t) 1L << 26);
  706. carry8 = (h8 + (int64_t)(1L << 25)) >> 26;
  707. h9 += carry8;
  708. h8 -= carry8 * ((uint64_t) 1L << 26);
  709. carry9 = (h9 + (int64_t)(1L << 24)) >> 25;
  710. h0 += carry9 * 19;
  711. h9 -= carry9 * ((uint64_t) 1L << 25);
  712. carry0 = (h0 + (int64_t)(1L << 25)) >> 26;
  713. h1 += carry0;
  714. h0 -= carry0 * ((uint64_t) 1L << 26);
  715. h[0] = (int32_t) h0;
  716. h[1] = (int32_t) h1;
  717. h[2] = (int32_t) h2;
  718. h[3] = (int32_t) h3;
  719. h[4] = (int32_t) h4;
  720. h[5] = (int32_t) h5;
  721. h[6] = (int32_t) h6;
  722. h[7] = (int32_t) h7;
  723. h[8] = (int32_t) h8;
  724. h[9] = (int32_t) h9;
  725. }
  726. /*
  727. h = 2 * f * f
  728. Can overlap h with f.
  729. *
  730. Preconditions:
  731. |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
  732. *
  733. Postconditions:
  734. |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
  735. */
  736. static void
  737. fe25519_sq2(fe25519 h, const fe25519 f)
  738. {
  739. int32_t f0 = f[0];
  740. int32_t f1 = f[1];
  741. int32_t f2 = f[2];
  742. int32_t f3 = f[3];
  743. int32_t f4 = f[4];
  744. int32_t f5 = f[5];
  745. int32_t f6 = f[6];
  746. int32_t f7 = f[7];
  747. int32_t f8 = f[8];
  748. int32_t f9 = f[9];
  749. int32_t f0_2 = 2 * f0;
  750. int32_t f1_2 = 2 * f1;
  751. int32_t f2_2 = 2 * f2;
  752. int32_t f3_2 = 2 * f3;
  753. int32_t f4_2 = 2 * f4;
  754. int32_t f5_2 = 2 * f5;
  755. int32_t f6_2 = 2 * f6;
  756. int32_t f7_2 = 2 * f7;
  757. int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
  758. int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
  759. int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
  760. int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
  761. int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
  762. int64_t f0f0 = f0 * (int64_t) f0;
  763. int64_t f0f1_2 = f0_2 * (int64_t) f1;
  764. int64_t f0f2_2 = f0_2 * (int64_t) f2;
  765. int64_t f0f3_2 = f0_2 * (int64_t) f3;
  766. int64_t f0f4_2 = f0_2 * (int64_t) f4;
  767. int64_t f0f5_2 = f0_2 * (int64_t) f5;
  768. int64_t f0f6_2 = f0_2 * (int64_t) f6;
  769. int64_t f0f7_2 = f0_2 * (int64_t) f7;
  770. int64_t f0f8_2 = f0_2 * (int64_t) f8;
  771. int64_t f0f9_2 = f0_2 * (int64_t) f9;
  772. int64_t f1f1_2 = f1_2 * (int64_t) f1;
  773. int64_t f1f2_2 = f1_2 * (int64_t) f2;
  774. int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
  775. int64_t f1f4_2 = f1_2 * (int64_t) f4;
  776. int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
  777. int64_t f1f6_2 = f1_2 * (int64_t) f6;
  778. int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
  779. int64_t f1f8_2 = f1_2 * (int64_t) f8;
  780. int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
  781. int64_t f2f2 = f2 * (int64_t) f2;
  782. int64_t f2f3_2 = f2_2 * (int64_t) f3;
  783. int64_t f2f4_2 = f2_2 * (int64_t) f4;
  784. int64_t f2f5_2 = f2_2 * (int64_t) f5;
  785. int64_t f2f6_2 = f2_2 * (int64_t) f6;
  786. int64_t f2f7_2 = f2_2 * (int64_t) f7;
  787. int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
  788. int64_t f2f9_38 = f2 * (int64_t) f9_38;
  789. int64_t f3f3_2 = f3_2 * (int64_t) f3;
  790. int64_t f3f4_2 = f3_2 * (int64_t) f4;
  791. int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
  792. int64_t f3f6_2 = f3_2 * (int64_t) f6;
  793. int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
  794. int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
  795. int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
  796. int64_t f4f4 = f4 * (int64_t) f4;
  797. int64_t f4f5_2 = f4_2 * (int64_t) f5;
  798. int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
  799. int64_t f4f7_38 = f4 * (int64_t) f7_38;
  800. int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
  801. int64_t f4f9_38 = f4 * (int64_t) f9_38;
  802. int64_t f5f5_38 = f5 * (int64_t) f5_38;
  803. int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
  804. int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
  805. int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
  806. int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
  807. int64_t f6f6_19 = f6 * (int64_t) f6_19;
  808. int64_t f6f7_38 = f6 * (int64_t) f7_38;
  809. int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
  810. int64_t f6f9_38 = f6 * (int64_t) f9_38;
  811. int64_t f7f7_38 = f7 * (int64_t) f7_38;
  812. int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
  813. int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
  814. int64_t f8f8_19 = f8 * (int64_t) f8_19;
  815. int64_t f8f9_38 = f8 * (int64_t) f9_38;
  816. int64_t f9f9_38 = f9 * (int64_t) f9_38;
  817. int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
  818. int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
  819. int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
  820. int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
  821. int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
  822. int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
  823. int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
  824. int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
  825. int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
  826. int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
  827. int64_t carry0;
  828. int64_t carry1;
  829. int64_t carry2;
  830. int64_t carry3;
  831. int64_t carry4;
  832. int64_t carry5;
  833. int64_t carry6;
  834. int64_t carry7;
  835. int64_t carry8;
  836. int64_t carry9;
  837. h0 += h0;
  838. h1 += h1;
  839. h2 += h2;
  840. h3 += h3;
  841. h4 += h4;
  842. h5 += h5;
  843. h6 += h6;
  844. h7 += h7;
  845. h8 += h8;
  846. h9 += h9;
  847. carry0 = (h0 + (int64_t)(1L << 25)) >> 26;
  848. h1 += carry0;
  849. h0 -= carry0 * ((uint64_t) 1L << 26);
  850. carry4 = (h4 + (int64_t)(1L << 25)) >> 26;
  851. h5 += carry4;
  852. h4 -= carry4 * ((uint64_t) 1L << 26);
  853. carry1 = (h1 + (int64_t)(1L << 24)) >> 25;
  854. h2 += carry1;
  855. h1 -= carry1 * ((uint64_t) 1L << 25);
  856. carry5 = (h5 + (int64_t)(1L << 24)) >> 25;
  857. h6 += carry5;
  858. h5 -= carry5 * ((uint64_t) 1L << 25);
  859. carry2 = (h2 + (int64_t)(1L << 25)) >> 26;
  860. h3 += carry2;
  861. h2 -= carry2 * ((uint64_t) 1L << 26);
  862. carry6 = (h6 + (int64_t)(1L << 25)) >> 26;
  863. h7 += carry6;
  864. h6 -= carry6 * ((uint64_t) 1L << 26);
  865. carry3 = (h3 + (int64_t)(1L << 24)) >> 25;
  866. h4 += carry3;
  867. h3 -= carry3 * ((uint64_t) 1L << 25);
  868. carry7 = (h7 + (int64_t)(1L << 24)) >> 25;
  869. h8 += carry7;
  870. h7 -= carry7 * ((uint64_t) 1L << 25);
  871. carry4 = (h4 + (int64_t)(1L << 25)) >> 26;
  872. h5 += carry4;
  873. h4 -= carry4 * ((uint64_t) 1L << 26);
  874. carry8 = (h8 + (int64_t)(1L << 25)) >> 26;
  875. h9 += carry8;
  876. h8 -= carry8 * ((uint64_t) 1L << 26);
  877. carry9 = (h9 + (int64_t)(1L << 24)) >> 25;
  878. h0 += carry9 * 19;
  879. h9 -= carry9 * ((uint64_t) 1L << 25);
  880. carry0 = (h0 + (int64_t)(1L << 25)) >> 26;
  881. h1 += carry0;
  882. h0 -= carry0 * ((uint64_t) 1L << 26);
  883. h[0] = (int32_t) h0;
  884. h[1] = (int32_t) h1;
  885. h[2] = (int32_t) h2;
  886. h[3] = (int32_t) h3;
  887. h[4] = (int32_t) h4;
  888. h[5] = (int32_t) h5;
  889. h[6] = (int32_t) h6;
  890. h[7] = (int32_t) h7;
  891. h[8] = (int32_t) h8;
  892. h[9] = (int32_t) h9;
  893. }
  894. static void
  895. fe25519_scalar_product(fe25519 h, const fe25519 f, uint32_t n)
  896. {
  897. int64_t sn = (int64_t) n;
  898. int32_t f0 = f[0];
  899. int32_t f1 = f[1];
  900. int32_t f2 = f[2];
  901. int32_t f3 = f[3];
  902. int32_t f4 = f[4];
  903. int32_t f5 = f[5];
  904. int32_t f6 = f[6];
  905. int32_t f7 = f[7];
  906. int32_t f8 = f[8];
  907. int32_t f9 = f[9];
  908. int64_t h0 = f0 * sn;
  909. int64_t h1 = f1 * sn;
  910. int64_t h2 = f2 * sn;
  911. int64_t h3 = f3 * sn;
  912. int64_t h4 = f4 * sn;
  913. int64_t h5 = f5 * sn;
  914. int64_t h6 = f6 * sn;
  915. int64_t h7 = f7 * sn;
  916. int64_t h8 = f8 * sn;
  917. int64_t h9 = f9 * sn;
  918. int64_t carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7,
  919. carry8, carry9;
  920. carry9 = (h9 + ((int64_t) 1 << 24)) >> 25;
  921. h0 += carry9 * 19;
  922. h9 -= carry9 * ((int64_t) 1 << 25);
  923. carry1 = (h1 + ((int64_t) 1 << 24)) >> 25;
  924. h2 += carry1;
  925. h1 -= carry1 * ((int64_t) 1 << 25);
  926. carry3 = (h3 + ((int64_t) 1 << 24)) >> 25;
  927. h4 += carry3;
  928. h3 -= carry3 * ((int64_t) 1 << 25);
  929. carry5 = (h5 + ((int64_t) 1 << 24)) >> 25;
  930. h6 += carry5;
  931. h5 -= carry5 * ((int64_t) 1 << 25);
  932. carry7 = (h7 + ((int64_t) 1 << 24)) >> 25;
  933. h8 += carry7;
  934. h7 -= carry7 * ((int64_t) 1 << 25);
  935. carry0 = (h0 + ((int64_t) 1 << 25)) >> 26;
  936. h1 += carry0;
  937. h0 -= carry0 * ((int64_t) 1 << 26);
  938. carry2 = (h2 + ((int64_t) 1 << 25)) >> 26;
  939. h3 += carry2;
  940. h2 -= carry2 * ((int64_t) 1 << 26);
  941. carry4 = (h4 + ((int64_t) 1 << 25)) >> 26;
  942. h5 += carry4;
  943. h4 -= carry4 * ((int64_t) 1 << 26);
  944. carry6 = (h6 + ((int64_t) 1 << 25)) >> 26;
  945. h7 += carry6;
  946. h6 -= carry6 * ((int64_t) 1 << 26);
  947. carry8 = (h8 + ((int64_t) 1 << 25)) >> 26;
  948. h9 += carry8;
  949. h8 -= carry8 * ((int64_t) 1 << 26);
  950. h[0] = (int32_t) h0;
  951. h[1] = (int32_t) h1;
  952. h[2] = (int32_t) h2;
  953. h[3] = (int32_t) h3;
  954. h[4] = (int32_t) h4;
  955. h[5] = (int32_t) h5;
  956. h[6] = (int32_t) h6;
  957. h[7] = (int32_t) h7;
  958. h[8] = (int32_t) h8;
  959. h[9] = (int32_t) h9;
  960. }