fe51_mul.S 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. #ifdef IN_SANDY2X
  2. /*
  3. This file is basically amd64-51/fe25519_mul.s.
  4. */
  5. #include "fe51_namespace.h"
  6. #include "consts_namespace.h"
  7. .text
  8. .p2align 5
  9. #ifdef ASM_HIDE_SYMBOL
  10. ASM_HIDE_SYMBOL fe51_mul
  11. ASM_HIDE_SYMBOL _fe51_mul
  12. #endif
  13. .globl fe51_mul
  14. .globl _fe51_mul
  15. #ifdef __ELF__
  16. .type fe51_mul, @function
  17. .type _fe51_mul, @function
  18. #endif
  19. fe51_mul:
  20. _fe51_mul:
  21. mov %rsp,%r11
  22. and $31,%r11
  23. add $96,%r11
  24. sub %r11,%rsp
  25. movq %r11,0(%rsp)
  26. movq %r12,8(%rsp)
  27. movq %r13,16(%rsp)
  28. movq %r14,24(%rsp)
  29. movq %r15,32(%rsp)
  30. movq %rbx,40(%rsp)
  31. movq %rbp,48(%rsp)
  32. movq %rdi,56(%rsp)
  33. mov %rdx,%rcx
  34. movq 24(%rsi),%rdx
  35. imulq $19,%rdx,%rax
  36. movq %rax,64(%rsp)
  37. mulq 16(%rcx)
  38. mov %rax,%r8
  39. mov %rdx,%r9
  40. movq 32(%rsi),%rdx
  41. imulq $19,%rdx,%rax
  42. movq %rax,72(%rsp)
  43. mulq 8(%rcx)
  44. add %rax,%r8
  45. adc %rdx,%r9
  46. movq 0(%rsi),%rax
  47. mulq 0(%rcx)
  48. add %rax,%r8
  49. adc %rdx,%r9
  50. movq 0(%rsi),%rax
  51. mulq 8(%rcx)
  52. mov %rax,%r10
  53. mov %rdx,%r11
  54. movq 0(%rsi),%rax
  55. mulq 16(%rcx)
  56. mov %rax,%r12
  57. mov %rdx,%r13
  58. movq 0(%rsi),%rax
  59. mulq 24(%rcx)
  60. mov %rax,%r14
  61. mov %rdx,%r15
  62. movq 0(%rsi),%rax
  63. mulq 32(%rcx)
  64. mov %rax,%rbx
  65. mov %rdx,%rbp
  66. movq 8(%rsi),%rax
  67. mulq 0(%rcx)
  68. add %rax,%r10
  69. adc %rdx,%r11
  70. movq 8(%rsi),%rax
  71. mulq 8(%rcx)
  72. add %rax,%r12
  73. adc %rdx,%r13
  74. movq 8(%rsi),%rax
  75. mulq 16(%rcx)
  76. add %rax,%r14
  77. adc %rdx,%r15
  78. movq 8(%rsi),%rax
  79. mulq 24(%rcx)
  80. add %rax,%rbx
  81. adc %rdx,%rbp
  82. movq 8(%rsi),%rdx
  83. imulq $19,%rdx,%rax
  84. mulq 32(%rcx)
  85. add %rax,%r8
  86. adc %rdx,%r9
  87. movq 16(%rsi),%rax
  88. mulq 0(%rcx)
  89. add %rax,%r12
  90. adc %rdx,%r13
  91. movq 16(%rsi),%rax
  92. mulq 8(%rcx)
  93. add %rax,%r14
  94. adc %rdx,%r15
  95. movq 16(%rsi),%rax
  96. mulq 16(%rcx)
  97. add %rax,%rbx
  98. adc %rdx,%rbp
  99. movq 16(%rsi),%rdx
  100. imulq $19,%rdx,%rax
  101. mulq 24(%rcx)
  102. add %rax,%r8
  103. adc %rdx,%r9
  104. movq 16(%rsi),%rdx
  105. imulq $19,%rdx,%rax
  106. mulq 32(%rcx)
  107. add %rax,%r10
  108. adc %rdx,%r11
  109. movq 24(%rsi),%rax
  110. mulq 0(%rcx)
  111. add %rax,%r14
  112. adc %rdx,%r15
  113. movq 24(%rsi),%rax
  114. mulq 8(%rcx)
  115. add %rax,%rbx
  116. adc %rdx,%rbp
  117. movq 64(%rsp),%rax
  118. mulq 24(%rcx)
  119. add %rax,%r10
  120. adc %rdx,%r11
  121. movq 64(%rsp),%rax
  122. mulq 32(%rcx)
  123. add %rax,%r12
  124. adc %rdx,%r13
  125. movq 32(%rsi),%rax
  126. mulq 0(%rcx)
  127. add %rax,%rbx
  128. adc %rdx,%rbp
  129. movq 72(%rsp),%rax
  130. mulq 16(%rcx)
  131. add %rax,%r10
  132. adc %rdx,%r11
  133. movq 72(%rsp),%rax
  134. mulq 24(%rcx)
  135. add %rax,%r12
  136. adc %rdx,%r13
  137. movq 72(%rsp),%rax
  138. mulq 32(%rcx)
  139. add %rax,%r14
  140. adc %rdx,%r15
  141. movq REDMASK51(%rip),%rsi
  142. shld $13,%r8,%r9
  143. and %rsi,%r8
  144. shld $13,%r10,%r11
  145. and %rsi,%r10
  146. add %r9,%r10
  147. shld $13,%r12,%r13
  148. and %rsi,%r12
  149. add %r11,%r12
  150. shld $13,%r14,%r15
  151. and %rsi,%r14
  152. add %r13,%r14
  153. shld $13,%rbx,%rbp
  154. and %rsi,%rbx
  155. add %r15,%rbx
  156. imulq $19,%rbp,%rdx
  157. add %rdx,%r8
  158. mov %r8,%rdx
  159. shr $51,%rdx
  160. add %r10,%rdx
  161. mov %rdx,%rcx
  162. shr $51,%rdx
  163. and %rsi,%r8
  164. add %r12,%rdx
  165. mov %rdx,%r9
  166. shr $51,%rdx
  167. and %rsi,%rcx
  168. add %r14,%rdx
  169. mov %rdx,%rax
  170. shr $51,%rdx
  171. and %rsi,%r9
  172. add %rbx,%rdx
  173. mov %rdx,%r10
  174. shr $51,%rdx
  175. and %rsi,%rax
  176. imulq $19,%rdx,%rdx
  177. add %rdx,%r8
  178. and %rsi,%r10
  179. movq %r8,0(%rdi)
  180. movq %rcx,8(%rdi)
  181. movq %r9,16(%rdi)
  182. movq %rax,24(%rdi)
  183. movq %r10,32(%rdi)
  184. movq 0(%rsp),%r11
  185. movq 8(%rsp),%r12
  186. movq 16(%rsp),%r13
  187. movq 24(%rsp),%r14
  188. movq 32(%rsp),%r15
  189. movq 40(%rsp),%rbx
  190. movq 48(%rsp),%rbp
  191. add %r11,%rsp
  192. mov %rdi,%rax
  193. mov %rsi,%rdx
  194. ret
  195. #endif