fe51_pack.S 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. #ifdef IN_SANDY2X
  2. /*
  3. This file is the result of merging
  4. amd64-51/fe25519_pack.c and amd64-51/fe25519_freeze.s.
  5. */
  6. #include "fe51_namespace.h"
  7. #include "consts_namespace.h"
  8. .p2align 5
  9. #ifdef ASM_HIDE_SYMBOL
  10. ASM_HIDE_SYMBOL fe51_pack
  11. ASM_HIDE_SYMBOL _fe51_pack
  12. #endif
  13. .globl fe51_pack
  14. .globl _fe51_pack
  15. #ifdef __ELF__
  16. .type fe51_pack, @function
  17. .type _fe51_pack, @function
  18. #endif
  19. fe51_pack:
  20. _fe51_pack:
  21. mov %rsp,%r11
  22. and $31,%r11
  23. add $32,%r11
  24. sub %r11,%rsp
  25. movq %r11,0(%rsp)
  26. movq %r12,8(%rsp)
  27. movq 0(%rsi),%rdx
  28. movq 8(%rsi),%rcx
  29. movq 16(%rsi),%r8
  30. movq 24(%rsi),%r9
  31. movq 32(%rsi),%rsi
  32. movq REDMASK51(%rip),%rax
  33. lea -18(%rax),%r10
  34. mov $3,%r11
  35. .p2align 4
  36. ._reduceloop:
  37. mov %rdx,%r12
  38. shr $51,%r12
  39. and %rax,%rdx
  40. add %r12,%rcx
  41. mov %rcx,%r12
  42. shr $51,%r12
  43. and %rax,%rcx
  44. add %r12,%r8
  45. mov %r8,%r12
  46. shr $51,%r12
  47. and %rax,%r8
  48. add %r12,%r9
  49. mov %r9,%r12
  50. shr $51,%r12
  51. and %rax,%r9
  52. add %r12,%rsi
  53. mov %rsi,%r12
  54. shr $51,%r12
  55. and %rax,%rsi
  56. imulq $19, %r12,%r12
  57. add %r12,%rdx
  58. sub $1,%r11
  59. ja ._reduceloop
  60. mov $1,%r12
  61. cmp %r10,%rdx
  62. cmovl %r11,%r12
  63. cmp %rax,%rcx
  64. cmovne %r11,%r12
  65. cmp %rax,%r8
  66. cmovne %r11,%r12
  67. cmp %rax,%r9
  68. cmovne %r11,%r12
  69. cmp %rax,%rsi
  70. cmovne %r11,%r12
  71. neg %r12
  72. and %r12,%rax
  73. and %r12,%r10
  74. sub %r10,%rdx
  75. sub %rax,%rcx
  76. sub %rax,%r8
  77. sub %rax,%r9
  78. sub %rax,%rsi
  79. mov %rdx,%rax
  80. and $0xFF,%eax
  81. movb %al,0(%rdi)
  82. mov %rdx,%rax
  83. shr $8,%rax
  84. and $0xFF,%eax
  85. movb %al,1(%rdi)
  86. mov %rdx,%rax
  87. shr $16,%rax
  88. and $0xFF,%eax
  89. movb %al,2(%rdi)
  90. mov %rdx,%rax
  91. shr $24,%rax
  92. and $0xFF,%eax
  93. movb %al,3(%rdi)
  94. mov %rdx,%rax
  95. shr $32,%rax
  96. and $0xFF,%eax
  97. movb %al,4(%rdi)
  98. mov %rdx,%rax
  99. shr $40,%rax
  100. and $0xFF,%eax
  101. movb %al,5(%rdi)
  102. mov %rdx,%rdx
  103. shr $48,%rdx
  104. mov %rcx,%rax
  105. shl $3,%rax
  106. and $0xF8,%eax
  107. xor %rdx,%rax
  108. movb %al,6(%rdi)
  109. mov %rcx,%rdx
  110. shr $5,%rdx
  111. and $0xFF,%edx
  112. movb %dl,7(%rdi)
  113. mov %rcx,%rdx
  114. shr $13,%rdx
  115. and $0xFF,%edx
  116. movb %dl,8(%rdi)
  117. mov %rcx,%rdx
  118. shr $21,%rdx
  119. and $0xFF,%edx
  120. movb %dl,9(%rdi)
  121. mov %rcx,%rdx
  122. shr $29,%rdx
  123. and $0xFF,%edx
  124. movb %dl,10(%rdi)
  125. mov %rcx,%rdx
  126. shr $37,%rdx
  127. and $0xFF,%edx
  128. movb %dl,11(%rdi)
  129. mov %rcx,%rdx
  130. shr $45,%rdx
  131. mov %r8,%rcx
  132. shl $6,%rcx
  133. and $0xC0,%ecx
  134. xor %rdx,%rcx
  135. movb %cl,12(%rdi)
  136. mov %r8,%rdx
  137. shr $2,%rdx
  138. and $0xFF,%edx
  139. movb %dl,13(%rdi)
  140. mov %r8,%rdx
  141. shr $10,%rdx
  142. and $0xFF,%edx
  143. movb %dl,14(%rdi)
  144. mov %r8,%rdx
  145. shr $18,%rdx
  146. and $0xFF,%edx
  147. movb %dl,15(%rdi)
  148. mov %r8,%rdx
  149. shr $26,%rdx
  150. and $0xFF,%edx
  151. movb %dl,16(%rdi)
  152. mov %r8,%rdx
  153. shr $34,%rdx
  154. and $0xFF,%edx
  155. movb %dl,17(%rdi)
  156. mov %r8,%rdx
  157. shr $42,%rdx
  158. movb %dl,18(%rdi)
  159. mov %r8,%rdx
  160. shr $50,%rdx
  161. mov %r9,%rcx
  162. shl $1,%rcx
  163. and $0xFE,%ecx
  164. xor %rdx,%rcx
  165. movb %cl,19(%rdi)
  166. mov %r9,%rdx
  167. shr $7,%rdx
  168. and $0xFF,%edx
  169. movb %dl,20(%rdi)
  170. mov %r9,%rdx
  171. shr $15,%rdx
  172. and $0xFF,%edx
  173. movb %dl,21(%rdi)
  174. mov %r9,%rdx
  175. shr $23,%rdx
  176. and $0xFF,%edx
  177. movb %dl,22(%rdi)
  178. mov %r9,%rdx
  179. shr $31,%rdx
  180. and $0xFF,%edx
  181. movb %dl,23(%rdi)
  182. mov %r9,%rdx
  183. shr $39,%rdx
  184. and $0xFF,%edx
  185. movb %dl,24(%rdi)
  186. mov %r9,%rdx
  187. shr $47,%rdx
  188. mov %rsi,%rcx
  189. shl $4,%rcx
  190. and $0xF0,%ecx
  191. xor %rdx,%rcx
  192. movb %cl,25(%rdi)
  193. mov %rsi,%rdx
  194. shr $4,%rdx
  195. and $0xFF,%edx
  196. movb %dl,26(%rdi)
  197. mov %rsi,%rdx
  198. shr $12,%rdx
  199. and $0xFF,%edx
  200. movb %dl,27(%rdi)
  201. mov %rsi,%rdx
  202. shr $20,%rdx
  203. and $0xFF,%edx
  204. movb %dl,28(%rdi)
  205. mov %rsi,%rdx
  206. shr $28,%rdx
  207. and $0xFF,%edx
  208. movb %dl,29(%rdi)
  209. mov %rsi,%rdx
  210. shr $36,%rdx
  211. and $0xFF,%edx
  212. movb %dl,30(%rdi)
  213. mov %rsi,%rsi
  214. shr $44,%rsi
  215. movb %sil,31(%rdi)
  216. movq 0(%rsp),%r11
  217. movq 8(%rsp),%r12
  218. add %r11,%rsp
  219. ret
  220. #endif