salsa20_xmm6-asm.S 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960
  1. #ifdef HAVE_AMD64_ASM
  2. .text
  3. .p2align 5
  4. #ifdef ASM_HIDE_SYMBOL
  5. ASM_HIDE_SYMBOL stream_salsa20_xmm6
  6. ASM_HIDE_SYMBOL _stream_salsa20_xmm6
  7. #endif
  8. .globl stream_salsa20_xmm6
  9. .globl _stream_salsa20_xmm6
  10. #ifdef __ELF__
  11. .type stream_salsa20_xmm6, @function
  12. .type _stream_salsa20_xmm6, @function
  13. #endif
  14. stream_salsa20_xmm6:
  15. _stream_salsa20_xmm6:
  16. mov %rsp,%r11
  17. and $31,%r11
  18. add $512,%r11
  19. sub %r11,%rsp
  20. movq %r11,416(%rsp)
  21. movq %r12,424(%rsp)
  22. movq %r13,432(%rsp)
  23. movq %r14,440(%rsp)
  24. movq %r15,448(%rsp)
  25. movq %rbx,456(%rsp)
  26. movq %rbp,464(%rsp)
  27. mov %rsi,%r9
  28. mov %rdi,%rdi
  29. mov %rdi,%rsi
  30. mov %rdx,%rdx
  31. mov %rcx,%r10
  32. cmp $0,%r9
  33. jbe ._done
  34. mov $0,%rax
  35. mov %r9,%rcx
  36. rep stosb
  37. sub %r9,%rdi
  38. movq $0,472(%rsp)
  39. jmp ._start
  40. .text
  41. .p2align 5
  42. #ifdef ASM_HIDE_SYMBOL
  43. ASM_HIDE_SYMBOL stream_salsa20_xmm6_xor_ic
  44. ASM_HIDE_SYMBOL _stream_salsa20_xmm6_xor_ic
  45. #endif
  46. .globl stream_salsa20_xmm6_xor_ic
  47. .globl _stream_salsa20_xmm6_xor_ic
  48. #ifdef __ELF__
  49. .type stream_salsa20_xmm6_xor_ic, @function
  50. .type _stream_salsa20_xmm6_xor_ic, @function
  51. #endif
  52. stream_salsa20_xmm6_xor_ic:
  53. _stream_salsa20_xmm6_xor_ic:
  54. mov %rsp,%r11
  55. and $31,%r11
  56. add $512,%r11
  57. sub %r11,%rsp
  58. movq %r11,416(%rsp)
  59. movq %r12,424(%rsp)
  60. movq %r13,432(%rsp)
  61. movq %r14,440(%rsp)
  62. movq %r15,448(%rsp)
  63. movq %rbx,456(%rsp)
  64. movq %rbp,464(%rsp)
  65. mov %rdi,%rdi
  66. mov %rsi,%rsi
  67. mov %r9,%r10
  68. movq %r8,472(%rsp)
  69. mov %rdx,%r9
  70. mov %rcx,%rdx
  71. cmp $0,%r9
  72. jbe ._done
  73. ._start:
  74. movl 20(%r10),%ecx
  75. movl 0(%r10),%r8d
  76. movl 0(%rdx),%eax
  77. movl 16(%r10),%r11d
  78. movl %ecx,64(%rsp)
  79. movl %r8d,4+64(%rsp)
  80. movl %eax,8+64(%rsp)
  81. movl %r11d,12+64(%rsp)
  82. movl 24(%r10),%r8d
  83. movl 4(%r10),%eax
  84. movl 4(%rdx),%edx
  85. movq 472(%rsp),%rcx
  86. movl %ecx,80(%rsp)
  87. movl %r8d,4+80(%rsp)
  88. movl %eax,8+80(%rsp)
  89. movl %edx,12+80(%rsp)
  90. movl 12(%r10),%edx
  91. shr $32,%rcx
  92. movl 28(%r10),%r8d
  93. movl 8(%r10),%eax
  94. movl %edx,96(%rsp)
  95. movl %ecx,4+96(%rsp)
  96. movl %r8d,8+96(%rsp)
  97. movl %eax,12+96(%rsp)
  98. mov $1634760805,%rdx
  99. mov $857760878,%rcx
  100. mov $2036477234,%r8
  101. mov $1797285236,%rax
  102. movl %edx,112(%rsp)
  103. movl %ecx,4+112(%rsp)
  104. movl %r8d,8+112(%rsp)
  105. movl %eax,12+112(%rsp)
  106. cmp $256,%r9
  107. jb ._bytesbetween1and255
  108. movdqa 112(%rsp),%xmm0
  109. pshufd $0x55,%xmm0,%xmm1
  110. pshufd $0xaa,%xmm0,%xmm2
  111. pshufd $0xff,%xmm0,%xmm3
  112. pshufd $0x00,%xmm0,%xmm0
  113. movdqa %xmm1,128(%rsp)
  114. movdqa %xmm2,144(%rsp)
  115. movdqa %xmm3,160(%rsp)
  116. movdqa %xmm0,176(%rsp)
  117. movdqa 64(%rsp),%xmm0
  118. pshufd $0xaa,%xmm0,%xmm1
  119. pshufd $0xff,%xmm0,%xmm2
  120. pshufd $0x00,%xmm0,%xmm3
  121. pshufd $0x55,%xmm0,%xmm0
  122. movdqa %xmm1,192(%rsp)
  123. movdqa %xmm2,208(%rsp)
  124. movdqa %xmm3,224(%rsp)
  125. movdqa %xmm0,240(%rsp)
  126. movdqa 80(%rsp),%xmm0
  127. pshufd $0xff,%xmm0,%xmm1
  128. pshufd $0x55,%xmm0,%xmm2
  129. pshufd $0xaa,%xmm0,%xmm0
  130. movdqa %xmm1,256(%rsp)
  131. movdqa %xmm2,272(%rsp)
  132. movdqa %xmm0,288(%rsp)
  133. movdqa 96(%rsp),%xmm0
  134. pshufd $0x00,%xmm0,%xmm1
  135. pshufd $0xaa,%xmm0,%xmm2
  136. pshufd $0xff,%xmm0,%xmm0
  137. movdqa %xmm1,304(%rsp)
  138. movdqa %xmm2,320(%rsp)
  139. movdqa %xmm0,336(%rsp)
  140. .p2align 4
  141. ._bytesatleast256:
  142. movq 472(%rsp),%rdx
  143. mov %rdx,%rcx
  144. shr $32,%rcx
  145. movl %edx,352(%rsp)
  146. movl %ecx,368(%rsp)
  147. add $1,%rdx
  148. mov %rdx,%rcx
  149. shr $32,%rcx
  150. movl %edx,4+352(%rsp)
  151. movl %ecx,4+368(%rsp)
  152. add $1,%rdx
  153. mov %rdx,%rcx
  154. shr $32,%rcx
  155. movl %edx,8+352(%rsp)
  156. movl %ecx,8+368(%rsp)
  157. add $1,%rdx
  158. mov %rdx,%rcx
  159. shr $32,%rcx
  160. movl %edx,12+352(%rsp)
  161. movl %ecx,12+368(%rsp)
  162. add $1,%rdx
  163. mov %rdx,%rcx
  164. shr $32,%rcx
  165. movl %edx,80(%rsp)
  166. movl %ecx,4+96(%rsp)
  167. movq %rdx,472(%rsp)
  168. movq %r9,480(%rsp)
  169. mov $20,%rdx
  170. movdqa 128(%rsp),%xmm0
  171. movdqa 144(%rsp),%xmm1
  172. movdqa 160(%rsp),%xmm2
  173. movdqa 320(%rsp),%xmm3
  174. movdqa 336(%rsp),%xmm4
  175. movdqa 192(%rsp),%xmm5
  176. movdqa 208(%rsp),%xmm6
  177. movdqa 240(%rsp),%xmm7
  178. movdqa 256(%rsp),%xmm8
  179. movdqa 272(%rsp),%xmm9
  180. movdqa 288(%rsp),%xmm10
  181. movdqa 368(%rsp),%xmm11
  182. movdqa 176(%rsp),%xmm12
  183. movdqa 224(%rsp),%xmm13
  184. movdqa 304(%rsp),%xmm14
  185. movdqa 352(%rsp),%xmm15
  186. .p2align 4
  187. ._mainloop1:
  188. movdqa %xmm1,384(%rsp)
  189. movdqa %xmm2,400(%rsp)
  190. movdqa %xmm13,%xmm1
  191. paddd %xmm12,%xmm1
  192. movdqa %xmm1,%xmm2
  193. pslld $7,%xmm1
  194. pxor %xmm1,%xmm14
  195. psrld $25,%xmm2
  196. pxor %xmm2,%xmm14
  197. movdqa %xmm7,%xmm1
  198. paddd %xmm0,%xmm1
  199. movdqa %xmm1,%xmm2
  200. pslld $7,%xmm1
  201. pxor %xmm1,%xmm11
  202. psrld $25,%xmm2
  203. pxor %xmm2,%xmm11
  204. movdqa %xmm12,%xmm1
  205. paddd %xmm14,%xmm1
  206. movdqa %xmm1,%xmm2
  207. pslld $9,%xmm1
  208. pxor %xmm1,%xmm15
  209. psrld $23,%xmm2
  210. pxor %xmm2,%xmm15
  211. movdqa %xmm0,%xmm1
  212. paddd %xmm11,%xmm1
  213. movdqa %xmm1,%xmm2
  214. pslld $9,%xmm1
  215. pxor %xmm1,%xmm9
  216. psrld $23,%xmm2
  217. pxor %xmm2,%xmm9
  218. movdqa %xmm14,%xmm1
  219. paddd %xmm15,%xmm1
  220. movdqa %xmm1,%xmm2
  221. pslld $13,%xmm1
  222. pxor %xmm1,%xmm13
  223. psrld $19,%xmm2
  224. pxor %xmm2,%xmm13
  225. movdqa %xmm11,%xmm1
  226. paddd %xmm9,%xmm1
  227. movdqa %xmm1,%xmm2
  228. pslld $13,%xmm1
  229. pxor %xmm1,%xmm7
  230. psrld $19,%xmm2
  231. pxor %xmm2,%xmm7
  232. movdqa %xmm15,%xmm1
  233. paddd %xmm13,%xmm1
  234. movdqa %xmm1,%xmm2
  235. pslld $18,%xmm1
  236. pxor %xmm1,%xmm12
  237. psrld $14,%xmm2
  238. pxor %xmm2,%xmm12
  239. movdqa 384(%rsp),%xmm1
  240. movdqa %xmm12,384(%rsp)
  241. movdqa %xmm9,%xmm2
  242. paddd %xmm7,%xmm2
  243. movdqa %xmm2,%xmm12
  244. pslld $18,%xmm2
  245. pxor %xmm2,%xmm0
  246. psrld $14,%xmm12
  247. pxor %xmm12,%xmm0
  248. movdqa %xmm5,%xmm2
  249. paddd %xmm1,%xmm2
  250. movdqa %xmm2,%xmm12
  251. pslld $7,%xmm2
  252. pxor %xmm2,%xmm3
  253. psrld $25,%xmm12
  254. pxor %xmm12,%xmm3
  255. movdqa 400(%rsp),%xmm2
  256. movdqa %xmm0,400(%rsp)
  257. movdqa %xmm6,%xmm0
  258. paddd %xmm2,%xmm0
  259. movdqa %xmm0,%xmm12
  260. pslld $7,%xmm0
  261. pxor %xmm0,%xmm4
  262. psrld $25,%xmm12
  263. pxor %xmm12,%xmm4
  264. movdqa %xmm1,%xmm0
  265. paddd %xmm3,%xmm0
  266. movdqa %xmm0,%xmm12
  267. pslld $9,%xmm0
  268. pxor %xmm0,%xmm10
  269. psrld $23,%xmm12
  270. pxor %xmm12,%xmm10
  271. movdqa %xmm2,%xmm0
  272. paddd %xmm4,%xmm0
  273. movdqa %xmm0,%xmm12
  274. pslld $9,%xmm0
  275. pxor %xmm0,%xmm8
  276. psrld $23,%xmm12
  277. pxor %xmm12,%xmm8
  278. movdqa %xmm3,%xmm0
  279. paddd %xmm10,%xmm0
  280. movdqa %xmm0,%xmm12
  281. pslld $13,%xmm0
  282. pxor %xmm0,%xmm5
  283. psrld $19,%xmm12
  284. pxor %xmm12,%xmm5
  285. movdqa %xmm4,%xmm0
  286. paddd %xmm8,%xmm0
  287. movdqa %xmm0,%xmm12
  288. pslld $13,%xmm0
  289. pxor %xmm0,%xmm6
  290. psrld $19,%xmm12
  291. pxor %xmm12,%xmm6
  292. movdqa %xmm10,%xmm0
  293. paddd %xmm5,%xmm0
  294. movdqa %xmm0,%xmm12
  295. pslld $18,%xmm0
  296. pxor %xmm0,%xmm1
  297. psrld $14,%xmm12
  298. pxor %xmm12,%xmm1
  299. movdqa 384(%rsp),%xmm0
  300. movdqa %xmm1,384(%rsp)
  301. movdqa %xmm4,%xmm1
  302. paddd %xmm0,%xmm1
  303. movdqa %xmm1,%xmm12
  304. pslld $7,%xmm1
  305. pxor %xmm1,%xmm7
  306. psrld $25,%xmm12
  307. pxor %xmm12,%xmm7
  308. movdqa %xmm8,%xmm1
  309. paddd %xmm6,%xmm1
  310. movdqa %xmm1,%xmm12
  311. pslld $18,%xmm1
  312. pxor %xmm1,%xmm2
  313. psrld $14,%xmm12
  314. pxor %xmm12,%xmm2
  315. movdqa 400(%rsp),%xmm12
  316. movdqa %xmm2,400(%rsp)
  317. movdqa %xmm14,%xmm1
  318. paddd %xmm12,%xmm1
  319. movdqa %xmm1,%xmm2
  320. pslld $7,%xmm1
  321. pxor %xmm1,%xmm5
  322. psrld $25,%xmm2
  323. pxor %xmm2,%xmm5
  324. movdqa %xmm0,%xmm1
  325. paddd %xmm7,%xmm1
  326. movdqa %xmm1,%xmm2
  327. pslld $9,%xmm1
  328. pxor %xmm1,%xmm10
  329. psrld $23,%xmm2
  330. pxor %xmm2,%xmm10
  331. movdqa %xmm12,%xmm1
  332. paddd %xmm5,%xmm1
  333. movdqa %xmm1,%xmm2
  334. pslld $9,%xmm1
  335. pxor %xmm1,%xmm8
  336. psrld $23,%xmm2
  337. pxor %xmm2,%xmm8
  338. movdqa %xmm7,%xmm1
  339. paddd %xmm10,%xmm1
  340. movdqa %xmm1,%xmm2
  341. pslld $13,%xmm1
  342. pxor %xmm1,%xmm4
  343. psrld $19,%xmm2
  344. pxor %xmm2,%xmm4
  345. movdqa %xmm5,%xmm1
  346. paddd %xmm8,%xmm1
  347. movdqa %xmm1,%xmm2
  348. pslld $13,%xmm1
  349. pxor %xmm1,%xmm14
  350. psrld $19,%xmm2
  351. pxor %xmm2,%xmm14
  352. movdqa %xmm10,%xmm1
  353. paddd %xmm4,%xmm1
  354. movdqa %xmm1,%xmm2
  355. pslld $18,%xmm1
  356. pxor %xmm1,%xmm0
  357. psrld $14,%xmm2
  358. pxor %xmm2,%xmm0
  359. movdqa 384(%rsp),%xmm1
  360. movdqa %xmm0,384(%rsp)
  361. movdqa %xmm8,%xmm0
  362. paddd %xmm14,%xmm0
  363. movdqa %xmm0,%xmm2
  364. pslld $18,%xmm0
  365. pxor %xmm0,%xmm12
  366. psrld $14,%xmm2
  367. pxor %xmm2,%xmm12
  368. movdqa %xmm11,%xmm0
  369. paddd %xmm1,%xmm0
  370. movdqa %xmm0,%xmm2
  371. pslld $7,%xmm0
  372. pxor %xmm0,%xmm6
  373. psrld $25,%xmm2
  374. pxor %xmm2,%xmm6
  375. movdqa 400(%rsp),%xmm2
  376. movdqa %xmm12,400(%rsp)
  377. movdqa %xmm3,%xmm0
  378. paddd %xmm2,%xmm0
  379. movdqa %xmm0,%xmm12
  380. pslld $7,%xmm0
  381. pxor %xmm0,%xmm13
  382. psrld $25,%xmm12
  383. pxor %xmm12,%xmm13
  384. movdqa %xmm1,%xmm0
  385. paddd %xmm6,%xmm0
  386. movdqa %xmm0,%xmm12
  387. pslld $9,%xmm0
  388. pxor %xmm0,%xmm15
  389. psrld $23,%xmm12
  390. pxor %xmm12,%xmm15
  391. movdqa %xmm2,%xmm0
  392. paddd %xmm13,%xmm0
  393. movdqa %xmm0,%xmm12
  394. pslld $9,%xmm0
  395. pxor %xmm0,%xmm9
  396. psrld $23,%xmm12
  397. pxor %xmm12,%xmm9
  398. movdqa %xmm6,%xmm0
  399. paddd %xmm15,%xmm0
  400. movdqa %xmm0,%xmm12
  401. pslld $13,%xmm0
  402. pxor %xmm0,%xmm11
  403. psrld $19,%xmm12
  404. pxor %xmm12,%xmm11
  405. movdqa %xmm13,%xmm0
  406. paddd %xmm9,%xmm0
  407. movdqa %xmm0,%xmm12
  408. pslld $13,%xmm0
  409. pxor %xmm0,%xmm3
  410. psrld $19,%xmm12
  411. pxor %xmm12,%xmm3
  412. movdqa %xmm15,%xmm0
  413. paddd %xmm11,%xmm0
  414. movdqa %xmm0,%xmm12
  415. pslld $18,%xmm0
  416. pxor %xmm0,%xmm1
  417. psrld $14,%xmm12
  418. pxor %xmm12,%xmm1
  419. movdqa %xmm9,%xmm0
  420. paddd %xmm3,%xmm0
  421. movdqa %xmm0,%xmm12
  422. pslld $18,%xmm0
  423. pxor %xmm0,%xmm2
  424. psrld $14,%xmm12
  425. pxor %xmm12,%xmm2
  426. movdqa 384(%rsp),%xmm12
  427. movdqa 400(%rsp),%xmm0
  428. sub $2,%rdx
  429. ja ._mainloop1
  430. paddd 176(%rsp),%xmm12
  431. paddd 240(%rsp),%xmm7
  432. paddd 288(%rsp),%xmm10
  433. paddd 336(%rsp),%xmm4
  434. movd %xmm12,%rdx
  435. movd %xmm7,%rcx
  436. movd %xmm10,%r8
  437. movd %xmm4,%r9
  438. pshufd $0x39,%xmm12,%xmm12
  439. pshufd $0x39,%xmm7,%xmm7
  440. pshufd $0x39,%xmm10,%xmm10
  441. pshufd $0x39,%xmm4,%xmm4
  442. xorl 0(%rsi),%edx
  443. xorl 4(%rsi),%ecx
  444. xorl 8(%rsi),%r8d
  445. xorl 12(%rsi),%r9d
  446. movl %edx,0(%rdi)
  447. movl %ecx,4(%rdi)
  448. movl %r8d,8(%rdi)
  449. movl %r9d,12(%rdi)
  450. movd %xmm12,%rdx
  451. movd %xmm7,%rcx
  452. movd %xmm10,%r8
  453. movd %xmm4,%r9
  454. pshufd $0x39,%xmm12,%xmm12
  455. pshufd $0x39,%xmm7,%xmm7
  456. pshufd $0x39,%xmm10,%xmm10
  457. pshufd $0x39,%xmm4,%xmm4
  458. xorl 64(%rsi),%edx
  459. xorl 68(%rsi),%ecx
  460. xorl 72(%rsi),%r8d
  461. xorl 76(%rsi),%r9d
  462. movl %edx,64(%rdi)
  463. movl %ecx,68(%rdi)
  464. movl %r8d,72(%rdi)
  465. movl %r9d,76(%rdi)
  466. movd %xmm12,%rdx
  467. movd %xmm7,%rcx
  468. movd %xmm10,%r8
  469. movd %xmm4,%r9
  470. pshufd $0x39,%xmm12,%xmm12
  471. pshufd $0x39,%xmm7,%xmm7
  472. pshufd $0x39,%xmm10,%xmm10
  473. pshufd $0x39,%xmm4,%xmm4
  474. xorl 128(%rsi),%edx
  475. xorl 132(%rsi),%ecx
  476. xorl 136(%rsi),%r8d
  477. xorl 140(%rsi),%r9d
  478. movl %edx,128(%rdi)
  479. movl %ecx,132(%rdi)
  480. movl %r8d,136(%rdi)
  481. movl %r9d,140(%rdi)
  482. movd %xmm12,%rdx
  483. movd %xmm7,%rcx
  484. movd %xmm10,%r8
  485. movd %xmm4,%r9
  486. xorl 192(%rsi),%edx
  487. xorl 196(%rsi),%ecx
  488. xorl 200(%rsi),%r8d
  489. xorl 204(%rsi),%r9d
  490. movl %edx,192(%rdi)
  491. movl %ecx,196(%rdi)
  492. movl %r8d,200(%rdi)
  493. movl %r9d,204(%rdi)
  494. paddd 304(%rsp),%xmm14
  495. paddd 128(%rsp),%xmm0
  496. paddd 192(%rsp),%xmm5
  497. paddd 256(%rsp),%xmm8
  498. movd %xmm14,%rdx
  499. movd %xmm0,%rcx
  500. movd %xmm5,%r8
  501. movd %xmm8,%r9
  502. pshufd $0x39,%xmm14,%xmm14
  503. pshufd $0x39,%xmm0,%xmm0
  504. pshufd $0x39,%xmm5,%xmm5
  505. pshufd $0x39,%xmm8,%xmm8
  506. xorl 16(%rsi),%edx
  507. xorl 20(%rsi),%ecx
  508. xorl 24(%rsi),%r8d
  509. xorl 28(%rsi),%r9d
  510. movl %edx,16(%rdi)
  511. movl %ecx,20(%rdi)
  512. movl %r8d,24(%rdi)
  513. movl %r9d,28(%rdi)
  514. movd %xmm14,%rdx
  515. movd %xmm0,%rcx
  516. movd %xmm5,%r8
  517. movd %xmm8,%r9
  518. pshufd $0x39,%xmm14,%xmm14
  519. pshufd $0x39,%xmm0,%xmm0
  520. pshufd $0x39,%xmm5,%xmm5
  521. pshufd $0x39,%xmm8,%xmm8
  522. xorl 80(%rsi),%edx
  523. xorl 84(%rsi),%ecx
  524. xorl 88(%rsi),%r8d
  525. xorl 92(%rsi),%r9d
  526. movl %edx,80(%rdi)
  527. movl %ecx,84(%rdi)
  528. movl %r8d,88(%rdi)
  529. movl %r9d,92(%rdi)
  530. movd %xmm14,%rdx
  531. movd %xmm0,%rcx
  532. movd %xmm5,%r8
  533. movd %xmm8,%r9
  534. pshufd $0x39,%xmm14,%xmm14
  535. pshufd $0x39,%xmm0,%xmm0
  536. pshufd $0x39,%xmm5,%xmm5
  537. pshufd $0x39,%xmm8,%xmm8
  538. xorl 144(%rsi),%edx
  539. xorl 148(%rsi),%ecx
  540. xorl 152(%rsi),%r8d
  541. xorl 156(%rsi),%r9d
  542. movl %edx,144(%rdi)
  543. movl %ecx,148(%rdi)
  544. movl %r8d,152(%rdi)
  545. movl %r9d,156(%rdi)
  546. movd %xmm14,%rdx
  547. movd %xmm0,%rcx
  548. movd %xmm5,%r8
  549. movd %xmm8,%r9
  550. xorl 208(%rsi),%edx
  551. xorl 212(%rsi),%ecx
  552. xorl 216(%rsi),%r8d
  553. xorl 220(%rsi),%r9d
  554. movl %edx,208(%rdi)
  555. movl %ecx,212(%rdi)
  556. movl %r8d,216(%rdi)
  557. movl %r9d,220(%rdi)
  558. paddd 352(%rsp),%xmm15
  559. paddd 368(%rsp),%xmm11
  560. paddd 144(%rsp),%xmm1
  561. paddd 208(%rsp),%xmm6
  562. movd %xmm15,%rdx
  563. movd %xmm11,%rcx
  564. movd %xmm1,%r8
  565. movd %xmm6,%r9
  566. pshufd $0x39,%xmm15,%xmm15
  567. pshufd $0x39,%xmm11,%xmm11
  568. pshufd $0x39,%xmm1,%xmm1
  569. pshufd $0x39,%xmm6,%xmm6
  570. xorl 32(%rsi),%edx
  571. xorl 36(%rsi),%ecx
  572. xorl 40(%rsi),%r8d
  573. xorl 44(%rsi),%r9d
  574. movl %edx,32(%rdi)
  575. movl %ecx,36(%rdi)
  576. movl %r8d,40(%rdi)
  577. movl %r9d,44(%rdi)
  578. movd %xmm15,%rdx
  579. movd %xmm11,%rcx
  580. movd %xmm1,%r8
  581. movd %xmm6,%r9
  582. pshufd $0x39,%xmm15,%xmm15
  583. pshufd $0x39,%xmm11,%xmm11
  584. pshufd $0x39,%xmm1,%xmm1
  585. pshufd $0x39,%xmm6,%xmm6
  586. xorl 96(%rsi),%edx
  587. xorl 100(%rsi),%ecx
  588. xorl 104(%rsi),%r8d
  589. xorl 108(%rsi),%r9d
  590. movl %edx,96(%rdi)
  591. movl %ecx,100(%rdi)
  592. movl %r8d,104(%rdi)
  593. movl %r9d,108(%rdi)
  594. movd %xmm15,%rdx
  595. movd %xmm11,%rcx
  596. movd %xmm1,%r8
  597. movd %xmm6,%r9
  598. pshufd $0x39,%xmm15,%xmm15
  599. pshufd $0x39,%xmm11,%xmm11
  600. pshufd $0x39,%xmm1,%xmm1
  601. pshufd $0x39,%xmm6,%xmm6
  602. xorl 160(%rsi),%edx
  603. xorl 164(%rsi),%ecx
  604. xorl 168(%rsi),%r8d
  605. xorl 172(%rsi),%r9d
  606. movl %edx,160(%rdi)
  607. movl %ecx,164(%rdi)
  608. movl %r8d,168(%rdi)
  609. movl %r9d,172(%rdi)
  610. movd %xmm15,%rdx
  611. movd %xmm11,%rcx
  612. movd %xmm1,%r8
  613. movd %xmm6,%r9
  614. xorl 224(%rsi),%edx
  615. xorl 228(%rsi),%ecx
  616. xorl 232(%rsi),%r8d
  617. xorl 236(%rsi),%r9d
  618. movl %edx,224(%rdi)
  619. movl %ecx,228(%rdi)
  620. movl %r8d,232(%rdi)
  621. movl %r9d,236(%rdi)
  622. paddd 224(%rsp),%xmm13
  623. paddd 272(%rsp),%xmm9
  624. paddd 320(%rsp),%xmm3
  625. paddd 160(%rsp),%xmm2
  626. movd %xmm13,%rdx
  627. movd %xmm9,%rcx
  628. movd %xmm3,%r8
  629. movd %xmm2,%r9
  630. pshufd $0x39,%xmm13,%xmm13
  631. pshufd $0x39,%xmm9,%xmm9
  632. pshufd $0x39,%xmm3,%xmm3
  633. pshufd $0x39,%xmm2,%xmm2
  634. xorl 48(%rsi),%edx
  635. xorl 52(%rsi),%ecx
  636. xorl 56(%rsi),%r8d
  637. xorl 60(%rsi),%r9d
  638. movl %edx,48(%rdi)
  639. movl %ecx,52(%rdi)
  640. movl %r8d,56(%rdi)
  641. movl %r9d,60(%rdi)
  642. movd %xmm13,%rdx
  643. movd %xmm9,%rcx
  644. movd %xmm3,%r8
  645. movd %xmm2,%r9
  646. pshufd $0x39,%xmm13,%xmm13
  647. pshufd $0x39,%xmm9,%xmm9
  648. pshufd $0x39,%xmm3,%xmm3
  649. pshufd $0x39,%xmm2,%xmm2
  650. xorl 112(%rsi),%edx
  651. xorl 116(%rsi),%ecx
  652. xorl 120(%rsi),%r8d
  653. xorl 124(%rsi),%r9d
  654. movl %edx,112(%rdi)
  655. movl %ecx,116(%rdi)
  656. movl %r8d,120(%rdi)
  657. movl %r9d,124(%rdi)
  658. movd %xmm13,%rdx
  659. movd %xmm9,%rcx
  660. movd %xmm3,%r8
  661. movd %xmm2,%r9
  662. pshufd $0x39,%xmm13,%xmm13
  663. pshufd $0x39,%xmm9,%xmm9
  664. pshufd $0x39,%xmm3,%xmm3
  665. pshufd $0x39,%xmm2,%xmm2
  666. xorl 176(%rsi),%edx
  667. xorl 180(%rsi),%ecx
  668. xorl 184(%rsi),%r8d
  669. xorl 188(%rsi),%r9d
  670. movl %edx,176(%rdi)
  671. movl %ecx,180(%rdi)
  672. movl %r8d,184(%rdi)
  673. movl %r9d,188(%rdi)
  674. movd %xmm13,%rdx
  675. movd %xmm9,%rcx
  676. movd %xmm3,%r8
  677. movd %xmm2,%r9
  678. xorl 240(%rsi),%edx
  679. xorl 244(%rsi),%ecx
  680. xorl 248(%rsi),%r8d
  681. xorl 252(%rsi),%r9d
  682. movl %edx,240(%rdi)
  683. movl %ecx,244(%rdi)
  684. movl %r8d,248(%rdi)
  685. movl %r9d,252(%rdi)
  686. movq 480(%rsp),%r9
  687. sub $256,%r9
  688. add $256,%rsi
  689. add $256,%rdi
  690. cmp $256,%r9
  691. jae ._bytesatleast256
  692. cmp $0,%r9
  693. jbe ._done
  694. ._bytesbetween1and255:
  695. cmp $64,%r9
  696. jae ._nocopy
  697. mov %rdi,%rdx
  698. leaq 0(%rsp),%rdi
  699. mov %r9,%rcx
  700. rep movsb
  701. leaq 0(%rsp),%rdi
  702. leaq 0(%rsp),%rsi
  703. ._nocopy:
  704. movq %r9,480(%rsp)
  705. movdqa 112(%rsp),%xmm0
  706. movdqa 64(%rsp),%xmm1
  707. movdqa 80(%rsp),%xmm2
  708. movdqa 96(%rsp),%xmm3
  709. movdqa %xmm1,%xmm4
  710. mov $20,%rcx
  711. .p2align 4
  712. ._mainloop2:
  713. paddd %xmm0,%xmm4
  714. movdqa %xmm0,%xmm5
  715. movdqa %xmm4,%xmm6
  716. pslld $7,%xmm4
  717. psrld $25,%xmm6
  718. pxor %xmm4,%xmm3
  719. pxor %xmm6,%xmm3
  720. paddd %xmm3,%xmm5
  721. movdqa %xmm3,%xmm4
  722. movdqa %xmm5,%xmm6
  723. pslld $9,%xmm5
  724. psrld $23,%xmm6
  725. pxor %xmm5,%xmm2
  726. pshufd $0x93,%xmm3,%xmm3
  727. pxor %xmm6,%xmm2
  728. paddd %xmm2,%xmm4
  729. movdqa %xmm2,%xmm5
  730. movdqa %xmm4,%xmm6
  731. pslld $13,%xmm4
  732. psrld $19,%xmm6
  733. pxor %xmm4,%xmm1
  734. pshufd $0x4e,%xmm2,%xmm2
  735. pxor %xmm6,%xmm1
  736. paddd %xmm1,%xmm5
  737. movdqa %xmm3,%xmm4
  738. movdqa %xmm5,%xmm6
  739. pslld $18,%xmm5
  740. psrld $14,%xmm6
  741. pxor %xmm5,%xmm0
  742. pshufd $0x39,%xmm1,%xmm1
  743. pxor %xmm6,%xmm0
  744. paddd %xmm0,%xmm4
  745. movdqa %xmm0,%xmm5
  746. movdqa %xmm4,%xmm6
  747. pslld $7,%xmm4
  748. psrld $25,%xmm6
  749. pxor %xmm4,%xmm1
  750. pxor %xmm6,%xmm1
  751. paddd %xmm1,%xmm5
  752. movdqa %xmm1,%xmm4
  753. movdqa %xmm5,%xmm6
  754. pslld $9,%xmm5
  755. psrld $23,%xmm6
  756. pxor %xmm5,%xmm2
  757. pshufd $0x93,%xmm1,%xmm1
  758. pxor %xmm6,%xmm2
  759. paddd %xmm2,%xmm4
  760. movdqa %xmm2,%xmm5
  761. movdqa %xmm4,%xmm6
  762. pslld $13,%xmm4
  763. psrld $19,%xmm6
  764. pxor %xmm4,%xmm3
  765. pshufd $0x4e,%xmm2,%xmm2
  766. pxor %xmm6,%xmm3
  767. paddd %xmm3,%xmm5
  768. movdqa %xmm1,%xmm4
  769. movdqa %xmm5,%xmm6
  770. pslld $18,%xmm5
  771. psrld $14,%xmm6
  772. pxor %xmm5,%xmm0
  773. pshufd $0x39,%xmm3,%xmm3
  774. pxor %xmm6,%xmm0
  775. paddd %xmm0,%xmm4
  776. movdqa %xmm0,%xmm5
  777. movdqa %xmm4,%xmm6
  778. pslld $7,%xmm4
  779. psrld $25,%xmm6
  780. pxor %xmm4,%xmm3
  781. pxor %xmm6,%xmm3
  782. paddd %xmm3,%xmm5
  783. movdqa %xmm3,%xmm4
  784. movdqa %xmm5,%xmm6
  785. pslld $9,%xmm5
  786. psrld $23,%xmm6
  787. pxor %xmm5,%xmm2
  788. pshufd $0x93,%xmm3,%xmm3
  789. pxor %xmm6,%xmm2
  790. paddd %xmm2,%xmm4
  791. movdqa %xmm2,%xmm5
  792. movdqa %xmm4,%xmm6
  793. pslld $13,%xmm4
  794. psrld $19,%xmm6
  795. pxor %xmm4,%xmm1
  796. pshufd $0x4e,%xmm2,%xmm2
  797. pxor %xmm6,%xmm1
  798. paddd %xmm1,%xmm5
  799. movdqa %xmm3,%xmm4
  800. movdqa %xmm5,%xmm6
  801. pslld $18,%xmm5
  802. psrld $14,%xmm6
  803. pxor %xmm5,%xmm0
  804. pshufd $0x39,%xmm1,%xmm1
  805. pxor %xmm6,%xmm0
  806. paddd %xmm0,%xmm4
  807. movdqa %xmm0,%xmm5
  808. movdqa %xmm4,%xmm6
  809. pslld $7,%xmm4
  810. psrld $25,%xmm6
  811. pxor %xmm4,%xmm1
  812. pxor %xmm6,%xmm1
  813. paddd %xmm1,%xmm5
  814. movdqa %xmm1,%xmm4
  815. movdqa %xmm5,%xmm6
  816. pslld $9,%xmm5
  817. psrld $23,%xmm6
  818. pxor %xmm5,%xmm2
  819. pshufd $0x93,%xmm1,%xmm1
  820. pxor %xmm6,%xmm2
  821. paddd %xmm2,%xmm4
  822. movdqa %xmm2,%xmm5
  823. movdqa %xmm4,%xmm6
  824. pslld $13,%xmm4
  825. psrld $19,%xmm6
  826. pxor %xmm4,%xmm3
  827. pshufd $0x4e,%xmm2,%xmm2
  828. pxor %xmm6,%xmm3
  829. sub $4,%rcx
  830. paddd %xmm3,%xmm5
  831. movdqa %xmm1,%xmm4
  832. movdqa %xmm5,%xmm6
  833. pslld $18,%xmm5
  834. pxor %xmm7,%xmm7
  835. psrld $14,%xmm6
  836. pxor %xmm5,%xmm0
  837. pshufd $0x39,%xmm3,%xmm3
  838. pxor %xmm6,%xmm0
  839. ja ._mainloop2
  840. paddd 112(%rsp),%xmm0
  841. paddd 64(%rsp),%xmm1
  842. paddd 80(%rsp),%xmm2
  843. paddd 96(%rsp),%xmm3
  844. movd %xmm0,%rcx
  845. movd %xmm1,%r8
  846. movd %xmm2,%r9
  847. movd %xmm3,%rax
  848. pshufd $0x39,%xmm0,%xmm0
  849. pshufd $0x39,%xmm1,%xmm1
  850. pshufd $0x39,%xmm2,%xmm2
  851. pshufd $0x39,%xmm3,%xmm3
  852. xorl 0(%rsi),%ecx
  853. xorl 48(%rsi),%r8d
  854. xorl 32(%rsi),%r9d
  855. xorl 16(%rsi),%eax
  856. movl %ecx,0(%rdi)
  857. movl %r8d,48(%rdi)
  858. movl %r9d,32(%rdi)
  859. movl %eax,16(%rdi)
  860. movd %xmm0,%rcx
  861. movd %xmm1,%r8
  862. movd %xmm2,%r9
  863. movd %xmm3,%rax
  864. pshufd $0x39,%xmm0,%xmm0
  865. pshufd $0x39,%xmm1,%xmm1
  866. pshufd $0x39,%xmm2,%xmm2
  867. pshufd $0x39,%xmm3,%xmm3
  868. xorl 20(%rsi),%ecx
  869. xorl 4(%rsi),%r8d
  870. xorl 52(%rsi),%r9d
  871. xorl 36(%rsi),%eax
  872. movl %ecx,20(%rdi)
  873. movl %r8d,4(%rdi)
  874. movl %r9d,52(%rdi)
  875. movl %eax,36(%rdi)
  876. movd %xmm0,%rcx
  877. movd %xmm1,%r8
  878. movd %xmm2,%r9
  879. movd %xmm3,%rax
  880. pshufd $0x39,%xmm0,%xmm0
  881. pshufd $0x39,%xmm1,%xmm1
  882. pshufd $0x39,%xmm2,%xmm2
  883. pshufd $0x39,%xmm3,%xmm3
  884. xorl 40(%rsi),%ecx
  885. xorl 24(%rsi),%r8d
  886. xorl 8(%rsi),%r9d
  887. xorl 56(%rsi),%eax
  888. movl %ecx,40(%rdi)
  889. movl %r8d,24(%rdi)
  890. movl %r9d,8(%rdi)
  891. movl %eax,56(%rdi)
  892. movd %xmm0,%rcx
  893. movd %xmm1,%r8
  894. movd %xmm2,%r9
  895. movd %xmm3,%rax
  896. xorl 60(%rsi),%ecx
  897. xorl 44(%rsi),%r8d
  898. xorl 28(%rsi),%r9d
  899. xorl 12(%rsi),%eax
  900. movl %ecx,60(%rdi)
  901. movl %r8d,44(%rdi)
  902. movl %r9d,28(%rdi)
  903. movl %eax,12(%rdi)
  904. movq 480(%rsp),%r9
  905. movq 472(%rsp),%rcx
  906. add $1,%rcx
  907. mov %rcx,%r8
  908. shr $32,%r8
  909. movl %ecx,80(%rsp)
  910. movl %r8d,4+96(%rsp)
  911. movq %rcx,472(%rsp)
  912. cmp $64,%r9
  913. ja ._bytesatleast65
  914. jae ._bytesatleast64
  915. mov %rdi,%rsi
  916. mov %rdx,%rdi
  917. mov %r9,%rcx
  918. rep movsb
  919. ._bytesatleast64:
  920. ._done:
  921. movq 416(%rsp),%r11
  922. movq 424(%rsp),%r12
  923. movq 432(%rsp),%r13
  924. movq 440(%rsp),%r14
  925. movq 448(%rsp),%r15
  926. movq 456(%rsp),%rbx
  927. movq 464(%rsp),%rbp
  928. add %r11,%rsp
  929. xor %rax,%rax
  930. mov %rsi,%rdx
  931. ret
  932. ._bytesatleast65:
  933. sub $64,%r9
  934. add $64,%rdi
  935. add $64,%rsi
  936. jmp ._bytesbetween1and255
  937. #endif
  938. #if defined(__linux__) && defined(__ELF__)
  939. .section .note.GNU-stack,"",%progbits
  940. #endif