twofish.m 62 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747
  1. /*
  2. * Fast, portable, and easy-to-use Twofish implementation,
  3. * Version 0.3.
  4. * Copyright (c) 2002 by Niels Ferguson.
  5. * (See further down for the almost-unrestricted licensing terms.)
  6. *
  7. * --------------------------------------------------------------------------
  8. * There are two files for this implementation:
  9. * - twofish.h, the header file.
  10. * - twofish.c, the code file.
  11. *
  12. * To incorporate this code into your program you should:
  13. * - Check the licensing terms further down in this comment.
  14. * - Fix the two type definitions in twofish.h to suit your platform.
  15. * - Fix a few definitions in twofish.c in the section marked
  16. * PLATFORM FIXES. There is one important ones that affects
  17. * functionality, and then a few definitions that you can optimise
  18. * for efficiency but those have no effect on the functionality.
  19. * Don't change anything else.
  20. * - Put the code in your project and compile it.
  21. *
  22. * To use this library you should:
  23. * - Call twofish_initialise() in your program before any other function in
  24. * this library.
  25. * - Use twofish_prepare_key(...) to convert a key to internal form.
  26. * - Use twofish_encrypt_block(...) and twofish_decrypt_block(...) to encrypt and decrypt
  27. * data.
  28. * See the comments in the header file for details on these functions.
  29. * --------------------------------------------------------------------------
  30. *
  31. * There are many Twofish implementation available for free on the web.
  32. * Most of them are hard to integrate into your own program.
  33. * As we like people to use our cipher, I thought I would make it easier.
  34. * Here is a free and easy-to-integrate Twofish implementation in C.
  35. * The latest version is always available from my personal home page at
  36. * http://niels.ferguson.net/
  37. *
  38. * Integrating library code into a project is difficult because the library
  39. * header files interfere with the project's header files and code.
  40. * And of course the project's header files interfere with the library code.
  41. * I've tried to resolve these problems here.
  42. * The header file of this implementation is very light-weight.
  43. * It contains two typedefs, a structure, and a few function declarations.
  44. * All names it defines start with "twofish_".
  45. * The header file is therefore unlikely to cause problems in your project.
  46. * The code file of this implementation doesn't need to include the header
  47. * files of the project. There is thus no danger of the project interfering
  48. * with all the definitions and macros of the Twofish code.
  49. * In most situations, all you need to do is fill in a few platform-specific
  50. * definitions in the header file and code file,
  51. * and you should be able to run the Twofish code in your project.
  52. * I estimate it should take you less than an hour to integrate this code
  53. * into your project, most of it spent reading the comments telling you what
  54. * to do.
  55. *
  56. * For people using C++: it is very easy to wrap this library into a
  57. * TwofishKey class. One of the big advantages is that you can automate the
  58. * wiping of the key material in the destructor. I have not provided a C++
  59. * class because the interface depends too much on the abstract base class
  60. * you use for block ciphers in your program, which I don't know about.
  61. *
  62. * This implementation is designed for use on PC-class machines. It uses the
  63. * Twofish 'full' keying option which uses large tables. Total table size is
  64. * around 5-6 kB for static tables plus 4.5 kB for each pre-processed key.
  65. * If you need an implementation that uses less memory,
  66. * take a look at Brian Gladman's code on his web site:
  67. * http://fp.gladman.plus.com/cryptography_technology/aes/
  68. * He has code for all AES candidates.
  69. * His Twofish code has lots of options trading off table size vs. speed.
  70. * You can also take a look at the optimised code by Doug Whiting on the
  71. * Twofish web site
  72. * http://www.counterpane.com/twofish.html
  73. * which has loads of options.
  74. * I believe these existing implementations are harder to re-use because they
  75. * are not clean libraries and they impose requirements on the environment.
  76. * This implementation is very careful to minimise those,
  77. * and should be easier to integrate into any larger program.
  78. *
  79. * The default mode of this implementation is fully portable as it uses no
  80. * behaviour not defined in the C standard. (This is harder than you think.)
  81. * If you have any problems porting the default mode, please let me know
  82. * so that I can fix the problem. (But only if this code is at fault, I
  83. * don't fix compilers.)
  84. * Most of the platform fixes are related to non-portable but faster ways
  85. * of implementing certain functions.
  86. *
  87. * In general I've tried to make the code as fast as possible, at the expense
  88. * of memory and code size. However, C does impose limits, and this
  89. * implementation will be slower than an optimised assembler implementation.
  90. * But beware of assembler implementations: a good Pentium implementation
  91. * uses completely different code than a good Pentium II implementation.
  92. * You basically have to re-write the assembly code for every generation of
  93. * processor. Unless you are severely pressed for speed, stick with C.
  94. *
  95. * The initialisation routine of this implementation contains a self-test.
  96. * If initialisation succeeds without calling the fatal routine, then
  97. * the implementation works. I don't think you can break the implementation
  98. * in such a way that it still passes the tests, unless you are malicious.
  99. * In other words: if the initialisation routine returns,
  100. * you have successfully ported the implementation.
  101. * (Or not implemented the fatal routine properly, but that is your problem.)
  102. *
  103. * I'm indebted to many people who helped me in one way or another to write
  104. * this code. During the design of Twofish and the AES process I had very
  105. * extensive discussions of all implementation issues with various people.
  106. * Doug Whiting in particular provided a wealth of information. The Twofish
  107. * team spent untold hours discussion various cipher features, and their
  108. * implementation. Brian Gladman implemented all AES candidates in C,
  109. * and we had some fruitful discussions on how to implement Twofish in C.
  110. * Jan Nieuwenhuizen tested this code on Linux using GCC.
  111. *
  112. * Now for the license:
  113. * The author hereby grants a perpetual license to everybody to
  114. * use this code for any purpose as long as the copyright message is included
  115. * in the source code of this or any derived work.
  116. *
  117. * Yes, this means that you, your company, your club, and anyone else
  118. * can use this code anywhere you want. You can change it and distribute it
  119. * under the GPL, include it in your commercial product without releasing
  120. * the source code, put it on the web, etc.
  121. * The only thing you cannot do is remove my copyright message,
  122. * or distribute any source code based on this implementation that does not
  123. * include my copyright message.
  124. *
  125. * I appreciate a mention in the documentation or credits,
  126. * but I understand if that is difficult to do.
  127. * I also appreciate it if you tell me where and why you used my code.
  128. *
  129. * Please send any questions or comments to niels@ferguson.net
  130. *
  131. * Have Fun!
  132. *
  133. * Niels
  134. */
  135. /*
  136. * DISCLAIMER: As I'm giving away my work for free, I'm of course not going
  137. * to accept any liability of any form. This code, or the Twofish cipher,
  138. * might very well be flawed; you have been warned.
  139. * This software is provided as-is, without any kind of warrenty or
  140. * guarantee. And that is really all you can expect when you download
  141. * code for free from the Internet.
  142. *
  143. * I think it is really sad that disclaimers like this seem to be necessary.
  144. * If people only had a little bit more common sense, and didn't come
  145. * whining like little children every time something happens....
  146. */
  147. /*
  148. * Version history:
  149. * Version 0.0, 2002-08-30
  150. * First written.
  151. * Version 0.1, 2002-09-03
  152. * Added disclaimer. Improved self-tests.
  153. * Version 0.2, 2002-09-09
  154. * Removed last non-portabilities. Default now works completely within
  155. * the C standard. DWORD can be larger than 32 bits without problems.
  156. * Version 0.3, 2002-09-28
  157. * Bugfix: use <string.h> instead of <memory.h> to adhere to ANSI/ISO.
  158. * Rename BIG_ENDIAN macro to CPU_IS_BIG_ENDIAN. The gcc library
  159. * header <string.h> already defines BIG_ENDIAN, even though it is not
  160. * supposed to.
  161. */
  162. /*
  163. * Minimum set of include files.
  164. * You should not need any application-specific include files for this code.
  165. * In fact, adding you own header files could break one of the many macros or
  166. * functions in this file. Be very careful.
  167. * Standard include files will probably be ok.
  168. */
  169. #include <Foundation/Foundation.h>
  170. #include <string.h> /* for memset(), memcpy(), and memcmp() */
  171. #include "twofish.h"
  172. /*
  173. * PLATFORM FIXES
  174. * ==============
  175. *
  176. * Fix the type definitions in twofish.h first!
  177. *
  178. * The following definitions have to be fixed for each particular platform
  179. * you work on. If you have a multi-platform program, you no doubt have
  180. * portable definitions that you can substitute here without changing the
  181. * rest of the code.
  182. */
  183. /*
  184. * Function called if something is fatally wrong with the implementation.
  185. * This fatal function is called when a coding error is detected in the
  186. * Twofish implementation, or when somebody passes an obviously erroneous
  187. * parameter to this implementation. There is not much you can do when
  188. * the code contains bugs, so we just stop.
  189. *
  190. * The argument is a string. Ideally the fatal function prints this string
  191. * as an error message. Whatever else this function does, it should never
  192. * return. A typical implementation would stop the program completely after
  193. * printing the error message.
  194. *
  195. * This default implementation is not very useful,
  196. * but does not assume anything about your environment.
  197. * It will at least let you know something is wrong....
  198. * I didn't want to include any libraries to print and error or so,
  199. * as this makes the code much harder to integrate in a project.
  200. *
  201. * Note that the twofish_fatal function may not return to the caller.
  202. * Unfortunately this is not something the self-test can test for,
  203. * so you have to make sure of this yourself.
  204. *
  205. * If you want to call an external function, be careful about including
  206. * your own header files here. This code uses a lot of macros, and your
  207. * header file could easily break it. Maybe the best solution is to use
  208. * a separate extern statement for your fatal function.
  209. */
  210. #define twofish_fatal( msg ) [NSException raise:@"Towfish" format:@"%s", msg]
  211. /*
  212. * The rest of the settings are not important for the functionality
  213. * of this Twofish implementation. That is, their default settings
  214. * work on all platforms. You can change them to improve the
  215. * speed of the implementation on your platform. Erroneous settings
  216. * will result in erroneous implementations, but the self-test should
  217. * catch those.
  218. */
  219. /*
  220. * Macros to rotate a DWORD value left or right by the
  221. * specified number of bits. This should be a 32-bit rotation,
  222. * and not rotation of, say, 64-bit values.
  223. *
  224. * Every encryption or decryption operation uses 32 of these rotations,
  225. * so it is a good idea to make these macros efficient.
  226. *
  227. * This fully portable definition has one piece of tricky stuff.
  228. * The DWORD might be larger than 32 bits, so we have to mask
  229. * any higher bits off. The simplest way to do this is to 'and' the
  230. * value first with 0xffffffff and then shift it right. An optimising
  231. * compiler that has a 32-bit type can optimise this 'and' away.
  232. *
  233. * Unfortunately there is no portable way of writing the constant
  234. * 0xffffffff. You don't know which suffix to use (U, or UL?)
  235. * The UINT32_MASK definition uses a bit of trickery. Shift-left
  236. * is only defined if the shift amount is strictly less than the size
  237. * of the DWORD, so we can't use (1<<32). The answer it to take the value
  238. * 2, cast it to a DWORD, shift it left 31 positions, and subtract one.
  239. * Another example of how to make something very simple extremely difficult.
  240. * I hate C.
  241. *
  242. * The rotation macros are straightforward.
  243. * They are only applied to DWORD values, which are _unsigned_
  244. * so the >> operator must do a logical shift that brings in zeroes.
  245. * On most platforms you will only need to optimise the ROL32 macro; the
  246. * ROR32 macro is not inefficient on an optimising compiler as all rotation
  247. * amounts in this code are known at compile time.
  248. *
  249. * On many platforms there is a faster solution.
  250. * For example, MS compilers have the __rotl and __rotr functions
  251. * that generate x86 rotation instructions.
  252. */
  253. #define UINT32_MASK ( (((DWORD)2)<<31) - 1 )
  254. #define ROL32( x, n ) ( (x)<<(n) | ((x) & UINT32_MASK) >> (32-(n)) )
  255. #define ROR32( x, n ) ROL32( (x), 32-(n) )
  256. /*
  257. * Select data type for q-table entries.
  258. *
  259. * Larger entry types cost more memory (1.5 kB), and might be faster
  260. * or slower depending on the CPU and compiler details.
  261. *
  262. * This choice only affects the static data size and the key setup speed.
  263. * Functionality, expanded key size, or encryption speed are not affected.
  264. * Define to 1 to get large q-table entries.
  265. */
  266. #define LARGE_Q_TABLE 0 /* default = 0 */
  267. /*
  268. * Method to select a single byte from a DWORD.
  269. * WARNING: non-portable code if set; might not work on all platforms.
  270. *
  271. * Inside the inner loop of Twofish it is necessary to access the 4
  272. * individual bytes of a DWORD. This can be done using either shifts
  273. * and masks, or memory accesses.
  274. *
  275. * Set to 0 to use shift and mask operations for the byte selection.
  276. * This is more ALU intensive. It is also fully portable.
  277. *
  278. * Set to 1 to use memory accesses. The DWORD is stored in memory and
  279. * the individual bytes are read from memory one at a time.
  280. * This solution is more memory-intensive, and not fully portable.
  281. * It might be faster on your platform, or not. If you use this option,
  282. * make sure you set the CPU_IS_BIG_ENDIAN flag appropriately.
  283. *
  284. * This macro does not affect the conversion of the inputs and outputs
  285. * of the cipher. See the CONVERT_USING_CASTS macro for that.
  286. */
  287. #define SELECT_BYTE_FROM_UINT32_IN_MEMORY 0 /* default = 0 */
  288. /*
  289. * Method used to read the input and write the output.
  290. * WARNING: non-portable code if set; might not work on all platforms.
  291. *
  292. * Twofish operates on 32-bit words. The input to the cipher is
  293. * a byte array, as is the output. The portable method of doing the
  294. * conversion is a bunch of rotate and mask operations, but on many
  295. * platforms it can be done faster using a cast.
  296. * This only works if your CPU allows DWORD accesses to arbitrary BYTE
  297. * addresses.
  298. *
  299. * Set to 0 to use the shift and mask operations. This is fully
  300. * portable. .
  301. *
  302. * Set to 1 to use a cast. The BYTE * is cast to a DWORD *, and a
  303. * DWORD is read. If necessary (as indicated by the CPU_IS_BIG_ENDIAN
  304. * macro) the byte order in the DWORD is swapped. The reverse is done
  305. * to write the output of the encryption/decryption. Make sure you set
  306. * the CPU_IS_BIG_ENDIAN flag appropriately.
  307. * This option does not work unless a DWORD is exactly 32 bits.
  308. *
  309. * This macro only changes the reading/writing of the plaintext/ciphertext.
  310. * See the SELECT_BYTE_FROM_UINT32_IN_MEMORY to affect the way in which
  311. * a DWORD is split into 4 bytes for the S-box selection.
  312. */
  313. #define CONVERT_USING_CASTS 0 /* default = 0 */
  314. /*
  315. * Endianness switch.
  316. * Only relevant if SELECT_BYTE_FROM_UINT32_IN_MEMORY or
  317. * CONVERT_USING_CASTS is set.
  318. *
  319. * Set to 1 on a big-endian machine, and to 0 on a little-endian machine.
  320. * Twofish uses the little-endian convention (least significant byte first)
  321. * and big-endian machines (using most significant byte first)
  322. * have to do a few conversions.
  323. *
  324. * CAUTION: This code has never been tested on a big-endian machine,
  325. * because I don't have access to one. Feedback appreciated.
  326. */
  327. #define CPU_IS_BIG_ENDIAN 0
  328. /*
  329. * Macro to reverse the order of the bytes in a DWORD.
  330. * Used to convert to little-endian on big-endian machines.
  331. * This macro is always tested, but only used in the encryption and
  332. * decryption if CONVERT_USING_CASTS, and CPU_IS_BIG_ENDIAN
  333. * are both set. In other words: this macro is only speed-critical if
  334. * both these flags have been set.
  335. *
  336. * This default definition of SWAP works, but on many platforms there is a
  337. * more efficient implementation.
  338. */
  339. #define BSWAP(x) ((ROL32((x),8) & 0x00ff00ff) | (ROR32((x),8) & 0xff00ff00))
  340. /*
  341. * END OF PLATFORM FIXES
  342. * =====================
  343. *
  344. * You should not have to touch the rest of this file.
  345. */
  346. /*
  347. * Define a macro ENDIAN_CONVERT.
  348. *
  349. * We define a macro ENDIAN_CONVERT that performs a BSWAP on big-endian
  350. * machines, and is the identity function on little-endian machines.
  351. * The code then uses this macro without considering the endianness.
  352. */
  353. #if CPU_IS_BIG_ENDIAN
  354. #define ENDIAN_CONVERT(x) BSWAP(x)
  355. #else
  356. #define ENDIAN_CONVERT(x) (x)
  357. #endif
  358. /*
  359. * Compute byte offset within a DWORD stored in memory.
  360. *
  361. * This is only used when SELECT_BYTE_FROM_UINT32_IN_MEMORY is set.
  362. *
  363. * The input is the byte number 0..3, 0 for least significant.
  364. * Note the use of sizeof() to support DWORD types that are larger
  365. * than 4 bytes.
  366. */
  367. #if CPU_IS_BIG_ENDIAN
  368. #define BYTE_OFFSET( n ) (sizeof(DWORD) - 1 - (n) )
  369. #else
  370. #define BYTE_OFFSET( n ) (n)
  371. #endif
  372. /*
  373. * Macro to get BYTE no. b from DWORD value X.
  374. * We use two different definition, depending on the settings.
  375. */
  376. #if SELECT_BYTE_FROM_UINT32_IN_MEMORY
  377. /* Pick the byte from the memory in which X is stored. */
  378. #define SELECT_BYTE( X, b ) (((BYTE *)(&(X)))[BYTE_OFFSET(b)])
  379. #else
  380. /* Portable solution: Pick the byte directly from the X value. */
  381. #define SELECT_BYTE( X, b ) (((X) >> 8*(b)) & 0xff)
  382. #endif
  383. /* Some shorthands because we use byte selection in large formulae. */
  384. #define b0(X) SELECT_BYTE((X),0)
  385. #define b1(X) SELECT_BYTE((X),1)
  386. #define b2(X) SELECT_BYTE((X),2)
  387. #define b3(X) SELECT_BYTE((X),3)
  388. /*
  389. * We need macros to load and store DWORD from/to byte arrays
  390. * using the least-significant-byte-first convention.
  391. *
  392. * GET32( p ) gets a DWORD in lsb-first form from four bytes pointed to
  393. * by p.
  394. * PUT32( v, p ) writes the DWORD value v at address p in lsb-first form.
  395. */
  396. #if CONVERT_USING_CASTS
  397. /* Get DWORD from four bytes pointed to by p. */
  398. #define GET32( p ) ENDIAN_CONVERT( *((DWORD *)(p)) )
  399. /* Put DWORD into four bytes pointed to by p */
  400. #define PUT32( v, p ) *((DWORD *)(p)) = ENDIAN_CONVERT(v)
  401. #else
  402. /* Get DWORD from four bytes pointed to by p. */
  403. #define GET32( p ) \
  404. ( \
  405. (DWORD)((p)[0]) \
  406. | (DWORD)((p)[1])<< 8\
  407. | (DWORD)((p)[2])<<16\
  408. | (DWORD)((p)[3])<<24\
  409. )
  410. /* Put DWORD into four bytes pointed to by p */
  411. #define PUT32( v, p ) \
  412. (p)[0] = (BYTE)(((v) ) & 0xff);\
  413. (p)[1] = (BYTE)(((v) >> 8) & 0xff);\
  414. (p)[2] = (BYTE)(((v) >> 16) & 0xff);\
  415. (p)[3] = (BYTE)(((v) >> 24) & 0xff)
  416. #endif
  417. /*
  418. * Test the platform-specific macros.
  419. * This function tests the macros defined so far to make sure the
  420. * definitions are appropriate for this platform.
  421. * If you make any mistake in the platform configuration, this should detect
  422. * that and inform you what went wrong.
  423. * Somewhere, someday, this is going to save somebody a lot of time,
  424. * because misbehaving macros are hard to debug.
  425. */
  426. static void test_platform(void)
  427. {
  428. /* Buffer with test values. */
  429. BYTE buf[] = {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0};
  430. DWORD C;
  431. DWORD x,y;
  432. int i;
  433. /*
  434. * Some sanity checks on the types that can't be done in compile time.
  435. * A smart compiler will just optimise these tests away.
  436. * The pre-processor doesn't understand different types, so we cannot
  437. * do these checks in compile-time.
  438. *
  439. * I hate C.
  440. *
  441. * The first check in each case is to make sure the size is correct.
  442. * The second check is to ensure that it is an unsigned type.
  443. */
  444. if( ((DWORD) ((DWORD)1 << 31) == 0) || ((DWORD)-1 < 0) )
  445. {
  446. twofish_fatal( "Twofish code: DWORD type not suitable" );
  447. }
  448. if( (sizeof( BYTE ) != 1) || ((BYTE)-1 < 0) )
  449. {
  450. twofish_fatal( "Twofish code: BYTE type not suitable" );
  451. }
  452. /*
  453. * Sanity-check the endianness conversions.
  454. * This is just an aid to find problems. If you do the endianness
  455. * conversion macros wrong you will fail the full cipher test,
  456. * but that does not help you find the error.
  457. * Always make it easy to find the bugs!
  458. *
  459. * Detail: There is no fully portable way of writing DWORD constants,
  460. * as you don't know whether to use the U or UL suffix. Using only U you
  461. * might only be allowed 16-bit constants. Using UL you might get 64-bit
  462. * constants which cannot be stored in a DWORD without warnings, and
  463. * which generally behave subtly different from a true DWORD.
  464. * As long as we're just comparing with the constant,
  465. * we can always use the UL suffix and at worst lose some efficiency.
  466. * I use a separate '32-bit constant' macro in most of my other code.
  467. *
  468. * I hate C.
  469. *
  470. * Start with testing GET32. We test it on all positions modulo 4
  471. * to make sure we can handly any position of inputs. (Some CPUs
  472. * do not allow non-aligned accesses which we would do if you used
  473. * the CONVERT_USING_CASTS option.
  474. */
  475. if( GET32( buf ) != 0x78563412UL || GET32(buf+1) != 0x9a785634UL
  476. || GET32( buf+2 ) != 0xbc9a7856UL || GET32(buf+3) != 0xdebc9a78UL )
  477. {
  478. twofish_fatal( "Twofish code: GET32 not implemented properly" );
  479. }
  480. /*
  481. * We can now use GET32 to test PUT32.
  482. * We don't test the shifted versions. If GET32 can do that then
  483. * so should PUT32.
  484. */
  485. C = GET32( buf );
  486. PUT32( 3*C, buf );
  487. if( GET32( buf ) != 0x69029c36UL )
  488. {
  489. twofish_fatal( "Twofish code: PUT32 not implemented properly" );
  490. }
  491. /* Test ROL and ROR */
  492. for( i=1; i<32; i++ )
  493. {
  494. /* Just a simple test. */
  495. x = ROR32( C, i );
  496. y = ROL32( C, i );
  497. x ^= (C>>i) ^ (C<<(32-i));
  498. y ^= (C<<i) ^ (C>>(32-i));
  499. x |= y;
  500. /*
  501. * Now all we check is that x is zero in the least significant
  502. * 32 bits. Using the UL suffix is safe here, as it doesn't matter
  503. * if we get a larger type.
  504. */
  505. if( (x & 0xffffffffUL) != 0 )
  506. {
  507. twofish_fatal( "Twofish ROL or ROR not properly defined." );
  508. }
  509. }
  510. /* Test the BSWAP macro */
  511. if( (BSWAP(C)) != 0x12345678UL )
  512. {
  513. /*
  514. * The BSWAP macro should always work, even if you are not using it.
  515. * A smart optimising compiler will just remove this entire test.
  516. */
  517. twofish_fatal( "BSWAP not properly defined." );
  518. }
  519. /* And we can test the b<i> macros which use SELECT_BYTE. */
  520. if( (b0(C)!=0x12) || (b1(C) != 0x34) || (b2(C) != 0x56) || (b3(C) != 0x78) )
  521. {
  522. /*
  523. * There are many reasons why this could fail.
  524. * Most likely is that CPU_IS_BIG_ENDIAN has the wrong value.
  525. */
  526. twofish_fatal( "Twofish code: SELECT_BYTE not implemented properly" );
  527. }
  528. }
  529. /*
  530. * Finally, we can start on the Twofish-related code.
  531. * You really need the Twofish specifications to understand this code. The
  532. * best source is the Twofish book:
  533. * "The Twofish Encryption Algorithm", by Bruce Schneier, John Kelsey,
  534. * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson.
  535. * you can also use the AES submission document of Twofish, which is
  536. * available from my list of publications on my personal web site at
  537. * http://niels.ferguson.net/.
  538. *
  539. * The first thing we do is write the testing routines. This is what the
  540. * implementation has to satisfy in the end. We only test the external
  541. * behaviour of the implementation of course.
  542. */
  543. /*
  544. * Perform a single self test on a (plaintext,ciphertext,key) triple.
  545. * Arguments:
  546. * key array of key bytes
  547. * key_len length of key in bytes
  548. * p plaintext
  549. * c ciphertext
  550. */
  551. static void test_vector( BYTE key[], int key_len, BYTE p[16], BYTE c[16] )
  552. {
  553. BYTE tmp[16]; /* scratch pad. */
  554. twofish_key xkey; /* The expanded key */
  555. int i;
  556. /* Prepare the key */
  557. twofish_prepare_key( key, key_len, &xkey );
  558. /*
  559. * We run the test twice to ensure that the xkey structure
  560. * is not damaged by the first encryption.
  561. * Those are hideous bugs to find if you get them in an application.
  562. */
  563. for( i=0; i<2; i++ )
  564. {
  565. /* Encrypt and test */
  566. twofish_encrypt_block( &xkey, p, tmp );
  567. if( memcmp( c, tmp, 16 ) != 0 )
  568. {
  569. twofish_fatal( "Twofish encryption failure" );
  570. }
  571. /* Decrypt and test */
  572. twofish_decrypt_block( &xkey, c, tmp );
  573. if( memcmp( p, tmp, 16 ) != 0 )
  574. {
  575. twofish_fatal( "Twofish decryption failure" );
  576. }
  577. }
  578. /* The test keys are not secret, so we don't need to wipe xkey. */
  579. }
  580. /*
  581. * Check implementation using three (key,plaintext,ciphertext)
  582. * test vectors, one for each major key length.
  583. *
  584. * This is an absolutely minimal self-test.
  585. * This routine does not test odd-sized keys.
  586. */
  587. static void test_vectors()
  588. {
  589. /*
  590. * We run three tests, one for each major key length.
  591. * These test vectors come from the Twofish specification.
  592. * One encryption and one decryption using randomish data and key
  593. * will detect almost any error, especially since we generate the
  594. * tables ourselves, so we don't have the problem of a single
  595. * damaged table entry in the source.
  596. */
  597. /* 128-bit test is the I=3 case of section B.2 of the Twofish book. */
  598. static BYTE k128[] = {
  599. 0x9F, 0x58, 0x9F, 0x5C, 0xF6, 0x12, 0x2C, 0x32,
  600. 0xB6, 0xBF, 0xEC, 0x2F, 0x2A, 0xE8, 0xC3, 0x5A,
  601. };
  602. static BYTE p128[] = {
  603. 0xD4, 0x91, 0xDB, 0x16, 0xE7, 0xB1, 0xC3, 0x9E,
  604. 0x86, 0xCB, 0x08, 0x6B, 0x78, 0x9F, 0x54, 0x19
  605. };
  606. static BYTE c128[] = {
  607. 0x01, 0x9F, 0x98, 0x09, 0xDE, 0x17, 0x11, 0x85,
  608. 0x8F, 0xAA, 0xC3, 0xA3, 0xBA, 0x20, 0xFB, 0xC3
  609. };
  610. /* 192-bit test is the I=4 case of section B.2 of the Twofish book. */
  611. static BYTE k192[] = {
  612. 0x88, 0xB2, 0xB2, 0x70, 0x6B, 0x10, 0x5E, 0x36,
  613. 0xB4, 0x46, 0xBB, 0x6D, 0x73, 0x1A, 0x1E, 0x88,
  614. 0xEF, 0xA7, 0x1F, 0x78, 0x89, 0x65, 0xBD, 0x44
  615. };
  616. static BYTE p192[] = {
  617. 0x39, 0xDA, 0x69, 0xD6, 0xBA, 0x49, 0x97, 0xD5,
  618. 0x85, 0xB6, 0xDC, 0x07, 0x3C, 0xA3, 0x41, 0xB2
  619. };
  620. static BYTE c192[] = {
  621. 0x18, 0x2B, 0x02, 0xD8, 0x14, 0x97, 0xEA, 0x45,
  622. 0xF9, 0xDA, 0xAC, 0xDC, 0x29, 0x19, 0x3A, 0x65
  623. };
  624. /* 256-bit test is the I=4 case of section B.2 of the Twofish book. */
  625. static BYTE k256[] = {
  626. 0xD4, 0x3B, 0xB7, 0x55, 0x6E, 0xA3, 0x2E, 0x46,
  627. 0xF2, 0xA2, 0x82, 0xB7, 0xD4, 0x5B, 0x4E, 0x0D,
  628. 0x57, 0xFF, 0x73, 0x9D, 0x4D, 0xC9, 0x2C, 0x1B,
  629. 0xD7, 0xFC, 0x01, 0x70, 0x0C, 0xC8, 0x21, 0x6F
  630. };
  631. static BYTE p256[] = {
  632. 0x90, 0xAF, 0xE9, 0x1B, 0xB2, 0x88, 0x54, 0x4F,
  633. 0x2C, 0x32, 0xDC, 0x23, 0x9B, 0x26, 0x35, 0xE6
  634. };
  635. static BYTE c256[] = {
  636. 0x6C, 0xB4, 0x56, 0x1C, 0x40, 0xBF, 0x0A, 0x97,
  637. 0x05, 0x93, 0x1C, 0xB6, 0xD4, 0x08, 0xE7, 0xFA
  638. };
  639. /* Run the actual tests. */
  640. test_vector( k128, 16, p128, c128 );
  641. test_vector( k192, 24, p192, c192 );
  642. test_vector( k256, 32, p256, c256 );
  643. }
  644. /*
  645. * Perform extensive test for a single key size.
  646. *
  647. * Test a single key size against the test vectors from section
  648. * B.2 in the Twofish book. This is a sequence of 49 encryptions
  649. * and decryptions. Each plaintext is equal to the ciphertext of
  650. * the previous encryption. The key is made up from the ciphertext
  651. * two and three encryptions ago. Both plaintext and key start
  652. * at the zero value.
  653. * We should have designed a cleaner recurrence relation for
  654. * these tests, but it is too late for that now. At least we learned
  655. * how to do it better next time.
  656. * For details see appendix B of the book.
  657. *
  658. * Arguments:
  659. * key_len Number of bytes of key
  660. * final_value Final plaintext value after 49 iterations
  661. */
  662. static void test_sequence( int key_len, BYTE final_value[] )
  663. {
  664. BYTE buf[ (50+3)*16 ]; /* Buffer to hold our computation values. */
  665. BYTE tmp[16]; /* Temp for testing the decryption. */
  666. twofish_key xkey; /* The expanded key */
  667. int i;
  668. BYTE * p;
  669. /* Wipe the buffer */
  670. memset( buf, 0, sizeof( buf ) );
  671. /*
  672. * Because the recurrence relation is done in an inconvenient manner
  673. * we end up looping backwards over the buffer.
  674. */
  675. /* Pointer in buffer points to current plaintext. */
  676. p = &buf[50*16];
  677. for( i=1; i<50; i++ )
  678. {
  679. /*
  680. * Prepare a key.
  681. * This automatically checks that key_len is valid.
  682. */
  683. twofish_prepare_key( p+16, key_len, &xkey );
  684. /* Compute the next 16 bytes in the buffer */
  685. twofish_encrypt_block( &xkey, p, p-16 );
  686. /* Check that the decryption is correct. */
  687. twofish_decrypt_block( &xkey, p-16, tmp );
  688. if( memcmp( tmp, p, 16 ) != 0 )
  689. {
  690. twofish_fatal( "Twofish decryption failure in sequence" );
  691. }
  692. /* Move on to next 16 bytes in the buffer. */
  693. p -= 16;
  694. }
  695. /* And check the final value. */
  696. if( memcmp( p, final_value, 16 ) != 0 )
  697. {
  698. twofish_fatal( "Twofish encryption failure in sequence" );
  699. }
  700. /* None of the data was secret, so there is no need to wipe anything. */
  701. }
  702. /*
  703. * Run all three sequence tests from the Twofish test vectors.
  704. *
  705. * This checks the most extensive test vectors currently available
  706. * for Twofish. The data is from the Twofish book, appendix B.2.
  707. */
  708. static void test_sequences()
  709. {
  710. static BYTE r128[] = {
  711. 0x5D, 0x9D, 0x4E, 0xEF, 0xFA, 0x91, 0x51, 0x57,
  712. 0x55, 0x24, 0xF1, 0x15, 0x81, 0x5A, 0x12, 0xE0
  713. };
  714. static BYTE r192[] = {
  715. 0xE7, 0x54, 0x49, 0x21, 0x2B, 0xEE, 0xF9, 0xF4,
  716. 0xA3, 0x90, 0xBD, 0x86, 0x0A, 0x64, 0x09, 0x41
  717. };
  718. static BYTE r256[] = {
  719. 0x37, 0xFE, 0x26, 0xFF, 0x1C, 0xF6, 0x61, 0x75,
  720. 0xF5, 0xDD, 0xF4, 0xC3, 0x3B, 0x97, 0xA2, 0x05
  721. };
  722. /* Run the three sequence test vectors */
  723. test_sequence( 16, r128 );
  724. test_sequence( 24, r192 );
  725. test_sequence( 32, r256 );
  726. }
  727. /*
  728. * Test the odd-sized keys.
  729. *
  730. * Every odd-sized key is equivalent to a one of 128, 192, or 256 bits.
  731. * The equivalent key is found by padding at the end with zero bytes
  732. * until a regular key size is reached.
  733. *
  734. * We just test that the key expansion routine behaves properly.
  735. * If the expanded keys are identical, then the encryptions and decryptions
  736. * will behave the same.
  737. */
  738. static void test_odd_sized_keys()
  739. {
  740. BYTE buf[32];
  741. twofish_key xkey;
  742. twofish_key xkey_two;
  743. int i;
  744. /*
  745. * We first create an all-zero key to use as PRNG key.
  746. * Normally we would not have to fill the buffer with zeroes, as we could
  747. * just pass a zero key length to the twofish_prepare_key function.
  748. * However, this relies on using odd-sized keys, and those are just the
  749. * ones we are testing here. We can't use an untested function to test
  750. * itself.
  751. */
  752. memset( buf, 0, sizeof( buf ) );
  753. twofish_prepare_key( buf, 16, &xkey );
  754. /* Fill buffer with pseudo-random data derived from two encryptions */
  755. twofish_encrypt_block( &xkey, buf, buf );
  756. twofish_encrypt_block( &xkey, buf, buf+16 );
  757. /* Create all possible shorter keys that are prefixes of the buffer. */
  758. for( i=31; i>=0; i-- )
  759. {
  760. /* Set a byte to zero. This is the new padding byte */
  761. buf[i] = 0;
  762. /* Expand the key with only i bytes of length */
  763. twofish_prepare_key( buf, i, &xkey );
  764. /* Expand the corresponding padded key of regular length */
  765. twofish_prepare_key( buf, i<=16 ? 16 : i<= 24 ? 24 : 32, &xkey_two );
  766. /* Compare the two */
  767. if( memcmp( &xkey, &xkey_two, sizeof( xkey ) ) != 0 )
  768. {
  769. twofish_fatal( "Odd sized keys do not expand properly" );
  770. }
  771. }
  772. /* None of the key values are secret, so we don't need to wipe them. */
  773. }
  774. /*
  775. * Test the Twofish implementation.
  776. *
  777. * This routine runs all the self tests, in order of importance.
  778. * It is called by the twofish_initialise routine.
  779. *
  780. * In almost all applications the cost of running the self tests during
  781. * initialisation is insignificant, especially
  782. * compared to the time it takes to load the application from disk.
  783. * If you are very pressed for initialisation performance,
  784. * you could remove some of the tests. Make sure you did run them
  785. * once in the software and hardware configuration you are using.
  786. */
  787. static void self_test()
  788. {
  789. /* The three test vectors form an absolute minimal test set. */
  790. test_vectors();
  791. /*
  792. * If at all possible you should run these tests too. They take
  793. * more time, but provide a more thorough coverage.
  794. */
  795. test_sequences();
  796. /* Test the odd-sized keys. */
  797. test_odd_sized_keys();
  798. }
  799. /*
  800. * And now, the actual Twofish implementation.
  801. *
  802. * This implementation generates all the tables during initialisation.
  803. * I don't like large tables in the code, especially since they are easily
  804. * damaged in the source without anyone noticing it. You need code to
  805. * generate them anyway, and this way all the code is close together.
  806. * Generating them in the application leads to a smaller executable
  807. * (the code is smaller than the tables it generates) and a
  808. * larger static memory footprint.
  809. *
  810. * Twofish can be implemented in many ways. I have chosen to
  811. * use large tables with a relatively long key setup time.
  812. * If you encrypt more than a few blocks of data it pays to pre-compute
  813. * as much as possible. This implementation is relatively inefficient for
  814. * applications that need to re-key every block or so.
  815. */
  816. /*
  817. * We start with the t-tables, directly from the Twofish definition.
  818. * These are nibble-tables, but merging them and putting them two nibbles
  819. * in one byte is more work than it is worth.
  820. */
  821. static BYTE t_table[2][4][16] = {
  822. {
  823. {0x8,0x1,0x7,0xD,0x6,0xF,0x3,0x2,0x0,0xB,0x5,0x9,0xE,0xC,0xA,0x4},
  824. {0xE,0xC,0xB,0x8,0x1,0x2,0x3,0x5,0xF,0x4,0xA,0x6,0x7,0x0,0x9,0xD},
  825. {0xB,0xA,0x5,0xE,0x6,0xD,0x9,0x0,0xC,0x8,0xF,0x3,0x2,0x4,0x7,0x1},
  826. {0xD,0x7,0xF,0x4,0x1,0x2,0x6,0xE,0x9,0xB,0x3,0x0,0x8,0x5,0xC,0xA}
  827. },
  828. {
  829. {0x2,0x8,0xB,0xD,0xF,0x7,0x6,0xE,0x3,0x1,0x9,0x4,0x0,0xA,0xC,0x5},
  830. {0x1,0xE,0x2,0xB,0x4,0xC,0x3,0x7,0x6,0xD,0xA,0x5,0xF,0x9,0x0,0x8},
  831. {0x4,0xC,0x7,0x5,0x1,0x6,0x9,0xA,0x0,0xE,0xD,0x8,0x2,0xB,0x3,0xF},
  832. {0xB,0x9,0x5,0x1,0xC,0x3,0xD,0xE,0x6,0x4,0x7,0xF,0x2,0x0,0x8,0xA}
  833. }
  834. };
  835. /* A 1-bit rotation of 4-bit values. Input must be in range 0..15 */
  836. #define ROR4BY1( x ) (((x)>>1) | (((x)<<3) & 0x8) )
  837. /*
  838. * The q-boxes are only used during the key schedule computations.
  839. * These are 8->8 bit lookup tables. Some CPUs prefer to have 8->32 bit
  840. * lookup tables as it is faster to load a 32-bit value than to load an
  841. * 8-bit value and zero the rest of the register.
  842. * The LARGE_Q_TABLE switch allows you to choose 32-bit entries in
  843. * the q-tables. Here we just define the Qtype which is used to store
  844. * the entries of the q-tables.
  845. */
  846. #if LARGE_Q_TABLE
  847. typedef DWORD Qtype;
  848. #else
  849. typedef BYTE Qtype;
  850. #endif
  851. /*
  852. * The actual q-box tables.
  853. * There are two q-boxes, each having 256 entries.
  854. */
  855. static Qtype q_table[2][256];
  856. /*
  857. * Now the function that converts a single t-table into a q-table.
  858. *
  859. * Arguments:
  860. * t[4][16] : four 4->4bit lookup tables that define the q-box
  861. * q[256] : output parameter: the resulting q-box as a lookup table.
  862. */
  863. static void make_q_table( BYTE t[4][16], Qtype q[256] )
  864. {
  865. int ae,be,ao,bo; /* Some temporaries. */
  866. int i;
  867. /* Loop over all input values and compute the q-box result. */
  868. for( i=0; i<256; i++ ) {
  869. /*
  870. * This is straight from the Twofish specifications.
  871. *
  872. * The ae variable is used for the a_i values from the specs
  873. * with even i, and ao for the odd i's. Similarly for the b's.
  874. */
  875. ae = i>>4; be = i&0xf;
  876. ao = ae ^ be; bo = ae ^ ROR4BY1(be) ^ ((ae<<3)&8);
  877. ae = t[0][ao]; be = t[1][bo];
  878. ao = ae ^ be; bo = ae ^ ROR4BY1(be) ^ ((ae<<3)&8);
  879. ae = t[2][ao]; be = t[3][bo];
  880. /* Store the result in the q-box table, the cast avoids a warning. */
  881. q[i] = (Qtype) ((be<<4) | ae);
  882. }
  883. }
  884. /*
  885. * Initialise both q-box tables.
  886. */
  887. static void initialise_q_boxes() {
  888. /* Initialise each of the q-boxes using the t-tables */
  889. make_q_table( t_table[0], q_table[0] );
  890. make_q_table( t_table[1], q_table[1] );
  891. }
  892. /*
  893. * Next up is the MDS matrix multiplication.
  894. * The MDS matrix multiplication operates in the field
  895. * GF(2)[x]/p(x) with p(x)=x^8+x^6+x^5+x^3+1.
  896. * If you don't understand this, read a book on finite fields. You cannot
  897. * follow the finite-field computations without some background.
  898. *
  899. * In this field, multiplication by x is easy: shift left one bit
  900. * and if bit 8 is set then xor the result with 0x169.
  901. *
  902. * The MDS coefficients use a multiplication by 1/x,
  903. * or rather a division by x. This is easy too: first make the
  904. * value 'even' (i.e. bit 0 is zero) by xorring with 0x169 if necessary,
  905. * and then shift right one position.
  906. * Even easier: shift right and xor with 0xb4 if the lsbit was set.
  907. *
  908. * The MDS coefficients are 1, EF, and 5B, and we use the fact that
  909. * EF = 1 + 1/x + 1/x^2
  910. * 5B = 1 + 1/x^2
  911. * in this field. This makes multiplication by EF and 5B relatively easy.
  912. *
  913. * This property is no accident, the MDS matrix was designed to allow
  914. * this implementation technique to be used.
  915. *
  916. * We have four MDS tables, each mapping 8 bits to 32 bits.
  917. * Each table performs one column of the matrix multiplication.
  918. * As the MDS is always preceded by q-boxes, each of these tables
  919. * also implements the q-box just previous to that column.
  920. */
  921. /* The actual MDS tables. */
  922. static DWORD MDS_table[4][256];
  923. /* A small table to get easy conditional access to the 0xb4 constant. */
  924. static DWORD mds_poly_divx_const[] = {0,0xb4};
  925. /* Function to initialise the MDS tables. */
  926. static void initialise_mds_tables()
  927. {
  928. int i;
  929. DWORD q,qef,q5b; /* Temporary variables. */
  930. /* Loop over all 8-bit input values */
  931. for( i=0; i<256; i++ )
  932. {
  933. /*
  934. * To save some work during the key expansion we include the last
  935. * of the q-box layers from the h() function in these MDS tables.
  936. */
  937. /* We first do the inputs that are mapped through the q0 table. */
  938. q = q_table[0][i];
  939. /*
  940. * Here we divide by x, note the table to get 0xb4 only if the
  941. * lsbit is set.
  942. * This sets qef = (1/x)*q in the finite field
  943. */
  944. qef = (q >> 1) ^ mds_poly_divx_const[ q & 1 ];
  945. /*
  946. * Divide by x again, and add q to get (1+1/x^2)*q.
  947. * Note that (1+1/x^2) = 5B in the field, and addition in the field
  948. * is exclusive or on the bits.
  949. */
  950. q5b = (qef >> 1) ^ mds_poly_divx_const[ qef & 1 ] ^ q;
  951. /*
  952. * Add q5b to qef to set qef = (1+1/x+1/x^2)*q.
  953. * Again, (1+1/x+1/x^2) = EF in the field.
  954. */
  955. qef ^= q5b;
  956. /*
  957. * Now that we have q5b = 5B * q and qef = EF * q
  958. * we can fill two of the entries in the MDS matrix table.
  959. * See the Twofish specifications for the order of the constants.
  960. */
  961. MDS_table[1][i] = q <<24 | q5b<<16 | qef<<8 | qef;
  962. MDS_table[3][i] = q5b<<24 | qef<<16 | q <<8 | q5b;
  963. /* Now we do it all again for the two columns that have a q1 box. */
  964. q = q_table[1][i];
  965. qef = (q >> 1) ^ mds_poly_divx_const[ q & 1 ];
  966. q5b = (qef >> 1) ^ mds_poly_divx_const[ qef & 1 ] ^ q;
  967. qef ^= q5b;
  968. /* The other two columns use the coefficient in a different order. */
  969. MDS_table[0][i] = qef<<24 | qef<<16 | q5b<<8 | q ;
  970. MDS_table[2][i] = qef<<24 | q <<16 | qef<<8 | q5b;
  971. }
  972. }
  973. /*
  974. * The h() function is the heart of the Twofish cipher.
  975. * It is a complicated sequence of q-box lookups, key material xors,
  976. * and finally the MDS matrix.
  977. * We use lots of macros to make this reasonably fast.
  978. */
  979. /* First a shorthand for the two q-tables */
  980. #define q0 q_table[0]
  981. #define q1 q_table[1]
  982. /*
  983. * Each macro computes one column of the h for either 2, 3, or 4 stages.
  984. * As there are 4 columns, we have 12 macros in all.
  985. *
  986. * The key bytes are stored in the BYTE array L at offset
  987. * 0,1,2,3, 8,9,10,11, [16,17,18,19, [24,25,26,27]] as this is the
  988. * order we get the bytes from the user. If you look at the Twofish
  989. * specs, you'll see that h() is applied to the even key words or the
  990. * odd key words. The bytes of the even words appear in this spacing,
  991. * and those of the odd key words too.
  992. *
  993. * These macros are the only place where the q-boxes and the MDS table
  994. * are used.
  995. */
  996. #define H02( y, L ) MDS_table[0][q0[q0[y]^L[ 8]]^L[0]]
  997. #define H12( y, L ) MDS_table[1][q0[q1[y]^L[ 9]]^L[1]]
  998. #define H22( y, L ) MDS_table[2][q1[q0[y]^L[10]]^L[2]]
  999. #define H32( y, L ) MDS_table[3][q1[q1[y]^L[11]]^L[3]]
  1000. #define H03( y, L ) H02( q1[y]^L[16], L )
  1001. #define H13( y, L ) H12( q1[y]^L[17], L )
  1002. #define H23( y, L ) H22( q0[y]^L[18], L )
  1003. #define H33( y, L ) H32( q0[y]^L[19], L )
  1004. #define H04( y, L ) H03( q1[y]^L[24], L )
  1005. #define H14( y, L ) H13( q0[y]^L[25], L )
  1006. #define H24( y, L ) H23( q0[y]^L[26], L )
  1007. #define H34( y, L ) H33( q1[y]^L[27], L )
  1008. /*
  1009. * Now we can define the h() function given an array of key bytes.
  1010. * This function is only used in the key schedule, and not to pre-compute
  1011. * the keyed S-boxes.
  1012. *
  1013. * In the key schedule, the input is always of the form k*(1+2^8+2^16+2^24)
  1014. * so we only provide k as an argument.
  1015. *
  1016. * Arguments:
  1017. * k input to the h() function.
  1018. * L pointer to array of key bytes at
  1019. * offsets 0,1,2,3, ... 8,9,10,11, [16,17,18,19, [24,25,26,27]]
  1020. * kCycles # key cycles, 2, 3, or 4.
  1021. */
  1022. static DWORD h( int k, BYTE L[], int kCycles )
  1023. {
  1024. switch( kCycles ) {
  1025. /* We code all 3 cases separately for speed reasons. */
  1026. case 2:
  1027. return H02(k,L) ^ H12(k,L) ^ H22(k,L) ^ H32(k,L);
  1028. case 3:
  1029. return H03(k,L) ^ H13(k,L) ^ H23(k,L) ^ H33(k,L);
  1030. case 4:
  1031. return H04(k,L) ^ H14(k,L) ^ H24(k,L) ^ H34(k,L);
  1032. default:
  1033. /* This is always a coding error, which is fatal. */
  1034. twofish_fatal( "Twofish h(): Illegal argument" );
  1035. return 0;
  1036. }
  1037. }
  1038. /*
  1039. * Pre-compute the keyed S-boxes.
  1040. * Fill the pre-computed S-box array in the expanded key structure.
  1041. * Each pre-computed S-box maps 8 bits to 32 bits.
  1042. *
  1043. * The S argument contains half the number of bytes of the full key, but is
  1044. * derived from the full key. (See Twofish specifications for details.)
  1045. * S has the weird byte input order used by the Hxx macros.
  1046. *
  1047. * This function takes most of the time of a key expansion.
  1048. *
  1049. * Arguments:
  1050. * S pointer to array of 8*kCycles BYTEs containing the S vector.
  1051. * kCycles number of key words, must be in the set {2,3,4}
  1052. * xkey pointer to twofish_key structure that will contain the S-boxes.
  1053. */
  1054. static void fill_keyed_sboxes( BYTE S[], int kCycles, twofish_key * xkey )
  1055. {
  1056. int i;
  1057. switch( kCycles ) {
  1058. /* We code all 3 cases separately for speed reasons. */
  1059. case 2:
  1060. for( i=0; i<256; i++ )
  1061. {
  1062. xkey->s[0][i]= H02( i, S );
  1063. xkey->s[1][i]= H12( i, S );
  1064. xkey->s[2][i]= H22( i, S );
  1065. xkey->s[3][i]= H32( i, S );
  1066. }
  1067. break;
  1068. case 3:
  1069. for( i=0; i<256; i++ )
  1070. {
  1071. xkey->s[0][i]= H03( i, S );
  1072. xkey->s[1][i]= H13( i, S );
  1073. xkey->s[2][i]= H23( i, S );
  1074. xkey->s[3][i]= H33( i, S );
  1075. }
  1076. break;
  1077. case 4:
  1078. for( i=0; i<256; i++ )
  1079. {
  1080. xkey->s[0][i]= H04( i, S );
  1081. xkey->s[1][i]= H14( i, S );
  1082. xkey->s[2][i]= H24( i, S );
  1083. xkey->s[3][i]= H34( i, S );
  1084. }
  1085. break;
  1086. default:
  1087. /* This is always a coding error, which is fatal. */
  1088. twofish_fatal( "Twofish fill_keyed_sboxes(): Illegal argument" );
  1089. }
  1090. }
  1091. /* A flag to keep track of whether we have been initialised or not. */
  1092. static int twofish_initialised = 0;
  1093. /*
  1094. * Initialise the Twofish implementation.
  1095. * This function must be called before any other function in the
  1096. * Twofish implementation is called.
  1097. * This routine also does some sanity checks, to make sure that
  1098. * all the macros behave, and it tests the whole cipher.
  1099. */
  1100. void twofish_initialise(void)
  1101. {
  1102. /* First test the various platform-specific definitions. */
  1103. test_platform();
  1104. /* We can now generate our tables, in the right order of course. */
  1105. initialise_q_boxes();
  1106. initialise_mds_tables();
  1107. /* We're finished with the initialisation itself. */
  1108. twofish_initialised = 1;
  1109. /*
  1110. * And run some tests on the whole cipher.
  1111. * Yes, you need to do this every time you start your program.
  1112. * It is called assurance; you have to be certain that your program
  1113. * still works properly.
  1114. */
  1115. self_test();
  1116. }
  1117. /*
  1118. * The Twofish key schedule uses an Reed-Solomon code matrix multiply.
  1119. * Just like the MDS matrix, the RS-matrix is designed to be easy
  1120. * to implement. Details are below in the code.
  1121. *
  1122. * These constants make it easy to compute in the finite field used
  1123. * for the RS code.
  1124. *
  1125. * We use BYTEs for the RS computation, but these are automatically
  1126. * widened to unsigned integers in the expressions. Having unsigned
  1127. * ints in these tables therefore provides the fastest access.
  1128. */
  1129. static unsigned int rs_poly_const[] = {0, 0x14d};
  1130. static unsigned int rs_poly_div_const[] = {0, 0xa6 };
  1131. void twofish_setup(twofish_context *context, const BYTE key[TWOFISH_KEYSIZE], const BYTE iv[TWOFISH_IVSIZE], twofish_options options) {
  1132. twofish_initialise();
  1133. context->options = options;
  1134. twofish_prepare_key(key, TWOFISH_KEYSIZE, &(context->key));
  1135. memcpy(context->iv, iv, TWOFISH_IVSIZE);
  1136. }
  1137. /*
  1138. * Prepare a key for use in encryption and decryption.
  1139. * Like most block ciphers, Twofish allows the key schedule
  1140. * to be pre-computed given only the key.
  1141. * Twofish has a fairly 'heavy' key schedule that takes a lot of time
  1142. * to compute. The main work is pre-computing the S-boxes used in the
  1143. * encryption and decryption. We feel that this makes the cipher much
  1144. * harder to attack. The attacker doesn't even know what the S-boxes
  1145. * contain without including the entire key schedule in the analysis.
  1146. *
  1147. * Unlike most Twofish implementations, this one allows any key size from
  1148. * 0 to 32 bytes. Odd key sizes are defined for Twofish (see the
  1149. * specifications); the key is simply padded with zeroes to the next real
  1150. * key size of 16, 24, or 32 bytes.
  1151. * Each odd-sized key is thus equivalent to a single normal-sized key.
  1152. *
  1153. * Arguments:
  1154. * key array of key bytes
  1155. * key_len number of bytes in the key, must be in the range 0,...,32.
  1156. * xkey Pointer to an twofish_key structure that will be filled
  1157. * with the internal form of the cipher key.
  1158. */
  1159. void twofish_prepare_key(const BYTE *key, int key_len, twofish_key *xkey)
  1160. {
  1161. /* We use a single array to store all key material in,
  1162. * to simplify the wiping of the key material at the end.
  1163. * The first 32 bytes contain the actual (padded) cipher key.
  1164. * The next 32 bytes contain the S-vector in its weird format,
  1165. * and we have 4 bytes of overrun necessary for the RS-reduction.
  1166. */
  1167. BYTE K[32+32+4];
  1168. int kCycles; /* # key cycles, 2,3, or 4. */
  1169. int i;
  1170. DWORD A, B; /* Used to compute the round keys. */
  1171. BYTE * kptr; /* Three pointers for the RS computation. */
  1172. BYTE * sptr;
  1173. BYTE * t;
  1174. BYTE b,bx,bxx; /* Some more temporaries for the RS computation. */
  1175. /* Check that the Twofish implementation was initialised. */
  1176. if( twofish_initialised == 0 )
  1177. {
  1178. /*
  1179. * You didn't call twofish_initialise before calling this routine.
  1180. * This is a programming error, and therefore we call the fatal
  1181. * routine.
  1182. *
  1183. * I could of course call the initialisation routine here,
  1184. * but there are a few reasons why I don't. First of all, the
  1185. * self-tests have to be done at startup. It is no good to inform
  1186. * the user that the cipher implementation fails when he wants to
  1187. * write his data to disk in encrypted form. You have to warn him
  1188. * before he spends time typing his data. Second, the initialisation
  1189. * and self test are much slower than a single key expansion.
  1190. * Calling the initialisation here makes the performance of the
  1191. * cipher unpredictable. This can lead to really weird problems
  1192. * if you use the cipher for a real-time task. Suddenly it fails
  1193. * once in a while the first time you try to use it. Things like
  1194. * that are almost impossible to debug.
  1195. */
  1196. twofish_fatal( "Twofish implementation was not initialised." );
  1197. /*
  1198. * There is always a danger that the twofish_fatal routine returns,
  1199. * in spite of the specifications that it should not.
  1200. * (A good programming rule: don't trust the rest of the code.)
  1201. * This would be disasterous. If the q-tables and MDS-tables have
  1202. * not been initialised, they are probably still filled with zeroes.
  1203. * Suppose the MDS-tables are all zero. The key expansion would then
  1204. * generate all-zero round keys, and all-zero s-boxes. The danger
  1205. * is that nobody would notice as the encryption function still
  1206. * mangles the input, and the decryption still 'decrypts' it,
  1207. * but now in a completely key-independent manner.
  1208. * To stop such security disasters, we use blunt force.
  1209. * If your program hangs here: fix the fatal routine!
  1210. */
  1211. //for(;;); /* Infinite loop, which beats being insecure. */
  1212. return;
  1213. }
  1214. /* Check for valid key length. */
  1215. if( key_len < 0 || key_len > TWOFISH_KEYSIZE )
  1216. {
  1217. /*
  1218. * This can only happen if a programmer didn't read the limitations
  1219. * on the key size.
  1220. */
  1221. twofish_fatal( "twofish_prepare_key: illegal key length" );
  1222. /*
  1223. * A return statement just in case the fatal macro returns.
  1224. * The rest of the code assumes that key_len is in range, and would
  1225. * buffer-overflow if it wasn't.
  1226. *
  1227. * Why do we still use a programming language that has problems like
  1228. * buffer overflows, when these problems were solved in 1960 with
  1229. * the development of Algol? Have we not leared anything?
  1230. */
  1231. return;
  1232. }
  1233. /* Pad the key with zeroes to the next suitable key length. */
  1234. memcpy( K, key, key_len );
  1235. memset( K+key_len, 0, sizeof(K)-key_len );
  1236. /*
  1237. * Compute kCycles: the number of key cycles used in the cipher.
  1238. * 2 for 128-bit keys, 3 for 192-bit keys, and 4 for 256-bit keys.
  1239. */
  1240. kCycles = (key_len + 7) >> 3;
  1241. /* Handle the special case of very short keys: minimum 2 cycles. */
  1242. if( kCycles < 2 )
  1243. {
  1244. kCycles = 2;
  1245. }
  1246. /*
  1247. * From now on we just pretend to have 8*kCycles bytes of
  1248. * key material in K. This handles all the key size cases.
  1249. */
  1250. /*
  1251. * We first compute the 40 expanded key words,
  1252. * formulas straight from the Twofish specifications.
  1253. */
  1254. for( i=0; i<40; i+=2 )
  1255. {
  1256. /*
  1257. * Due to the byte spacing expected by the h() function
  1258. * we can pick the bytes directly from the key K.
  1259. * As we use bytes, we never have the little/big endian
  1260. * problem.
  1261. *
  1262. * Note that we apply the rotation function only to simple
  1263. * variables, as the rotation macro might evaluate its argument
  1264. * more than once.
  1265. */
  1266. A = h( i , K , kCycles );
  1267. B = h( i+1, K+4, kCycles );
  1268. B = ROL32( B, 8 );
  1269. /* Compute and store the round keys. */
  1270. A += B;
  1271. B += A;
  1272. xkey->K[i] = A;
  1273. xkey->K[i+1] = ROL32( B, 9 );
  1274. }
  1275. /* Wipe variables that contained key material. */
  1276. A=B=0;
  1277. /*
  1278. * And now the dreaded RS multiplication that few seem to understand.
  1279. * The RS matrix is not random, and is specially designed to compute the
  1280. * RS matrix multiplication in a simple way.
  1281. *
  1282. * We work in the field GF(2)[x]/x^8+x^6+x^3+x^2+1. Note that this is a
  1283. * different field than used for the MDS matrix.
  1284. * (At least, it is a different representation because all GF(2^8)
  1285. * representations are equivalent in some form.)
  1286. *
  1287. * We take 8 consecutive bytes of the key and interpret them as
  1288. * a polynomial k_0 + k_1 y + k_2 y^2 + ... + k_7 y^7 where
  1289. * the k_i bytes are the key bytes and are elements of the finite field.
  1290. * We multiply this polynomial by y^4 and reduce it modulo
  1291. * y^4 + (x + 1/x)y^3 + (x)y^2 + (x + 1/x)y + 1.
  1292. * using straightforward polynomial modulo reduction.
  1293. * The coefficients of the result are the result of the RS
  1294. * matrix multiplication. When we wrote the Twofish specification,
  1295. * the original RS definition used the polynomials,
  1296. * but that requires much more mathematical knowledge.
  1297. * We were already using matrix multiplication in a finite field for
  1298. * the MDS matrix, so I re-wrote the RS operation as a matrix
  1299. * multiplication to reduce the difficulty of understanding it.
  1300. * Some implementors have not picked up on this simpler method of
  1301. * computing the RS operation, even though it is mentioned in the
  1302. * specifications.
  1303. *
  1304. * It is possible to perform these computations faster by using 32-bit
  1305. * word operations, but that is not portable and this is not a speed-
  1306. * critical area.
  1307. *
  1308. * We explained the 1/x computation when we did the MDS matrix.
  1309. *
  1310. * The S vector is stored in K[32..64].
  1311. * The S vector has to be reversed, so we loop cross-wise.
  1312. *
  1313. * Note the weird byte spacing of the S-vector, to match the even
  1314. * or odd key words arrays. See the discussion at the Hxx macros for
  1315. * details.
  1316. */
  1317. kptr = K + 8*kCycles; /* Start at end of key */
  1318. sptr = K + 32; /* Start at start of S */
  1319. /* Loop over all key material */
  1320. while( kptr > K )
  1321. {
  1322. kptr -= 8;
  1323. /*
  1324. * Initialise the polynimial in sptr[0..12]
  1325. * The first four coefficients are 0 as we have to multiply by y^4.
  1326. * The next 8 coefficients are from the key material.
  1327. */
  1328. memset( sptr, 0, 4 );
  1329. memcpy( sptr+4, kptr, 8 );
  1330. /*
  1331. * The 12 bytes starting at sptr are now the coefficients of
  1332. * the polynomial we need to reduce.
  1333. */
  1334. /* Loop over the polynomial coefficients from high to low */
  1335. t = sptr+11;
  1336. /* Keep looping until polynomial is degree 3; */
  1337. while( t > sptr+3 )
  1338. {
  1339. /* Pick up the highest coefficient of the poly. */
  1340. b = *t;
  1341. /*
  1342. * Compute x and (x+1/x) times this coefficient.
  1343. * See the MDS matrix implementation for a discussion of
  1344. * multiplication by x and 1/x. We just use different
  1345. * constants here as we are in a
  1346. * different finite field representation.
  1347. *
  1348. * These two statements set
  1349. * bx = (x) * b
  1350. * bxx= (x + 1/x) * b
  1351. */
  1352. bx = (BYTE)((b<<1) ^ rs_poly_const[ b>>7 ]);
  1353. bxx= (BYTE)((b>>1) ^ rs_poly_div_const[ b&1 ] ^ bx);
  1354. /*
  1355. * Subtract suitable multiple of
  1356. * y^4 + (x + 1/x)y^3 + (x)y^2 + (x + 1/x)y + 1
  1357. * from the polynomial, except that we don't bother
  1358. * updating t[0] as it will become zero anyway.
  1359. */
  1360. t[-1] ^= bxx;
  1361. t[-2] ^= bx;
  1362. t[-3] ^= bxx;
  1363. t[-4] ^= b;
  1364. /* Go to the next coefficient. */
  1365. t--;
  1366. }
  1367. /* Go to next S-vector word, obeying the weird spacing rules. */
  1368. sptr += 8;
  1369. }
  1370. /* Wipe variables that contained key material. */
  1371. b = bx = bxx = 0;
  1372. /* And finally, we can compute the key-dependent S-boxes. */
  1373. fill_keyed_sboxes( &K[TWOFISH_KEYSIZE], kCycles, xkey );
  1374. /* Wipe array that contained key material. */
  1375. memset( K, 0, sizeof( K ) );
  1376. }
  1377. /*
  1378. * We can now start on the actual encryption and decryption code.
  1379. * As these are often speed-critical we will use a lot of macros.
  1380. */
  1381. /*
  1382. * The g() function is the heart of the round function.
  1383. * We have two versions of the g() function, one without an input
  1384. * rotation and one with.
  1385. * The pre-computed S-boxes make this pretty simple.
  1386. */
  1387. #define g0(X,xkey) \
  1388. (xkey->s[0][b0(X)]^xkey->s[1][b1(X)]^xkey->s[2][b2(X)]^xkey->s[3][b3(X)])
  1389. #define g1(X,xkey) \
  1390. (xkey->s[0][b3(X)]^xkey->s[1][b0(X)]^xkey->s[2][b1(X)]^xkey->s[3][b2(X)])
  1391. /*
  1392. * A single round of Twofish. The A,B,C,D are the four state variables,
  1393. * T0 and T1 are temporaries, xkey is the expanded key, and r the
  1394. * round number.
  1395. *
  1396. * Note that this macro does not implement the swap at the end of the round.
  1397. */
  1398. #define ENCRYPT_RND( A,B,C,D, T0, T1, xkey, r ) \
  1399. T0 = g0(A,xkey); T1 = g1(B,xkey);\
  1400. C ^= T0+T1+xkey->K[8+2*(r)]; C = ROR32(C,1);\
  1401. D = ROL32(D,1); D ^= T0+2*T1+xkey->K[8+2*(r)+1]
  1402. /*
  1403. * Encrypt a single cycle, consisting of two rounds.
  1404. * This avoids the swapping of the two halves.
  1405. * Parameter r is now the cycle number.
  1406. */
  1407. #define ENCRYPT_CYCLE( A, B, C, D, T0, T1, xkey, r ) \
  1408. ENCRYPT_RND( A,B,C,D,T0,T1,xkey,2*(r) );\
  1409. ENCRYPT_RND( C,D,A,B,T0,T1,xkey,2*(r)+1 )
  1410. /* Full 16-round encryption */
  1411. #define ENCRYPT( A,B,C,D,T0,T1,xkey ) \
  1412. ENCRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 0 );\
  1413. ENCRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 1 );\
  1414. ENCRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 2 );\
  1415. ENCRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 3 );\
  1416. ENCRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 4 );\
  1417. ENCRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 5 );\
  1418. ENCRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 6 );\
  1419. ENCRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 7 )
  1420. /*
  1421. * A single round of Twofish for decryption. It differs from
  1422. * ENCRYTP_RND only because of the 1-bit rotations.
  1423. */
  1424. #define DECRYPT_RND( A,B,C,D, T0, T1, xkey, r ) \
  1425. T0 = g0(A,xkey); T1 = g1(B,xkey);\
  1426. C = ROL32(C,1); C ^= T0+T1+xkey->K[8+2*(r)];\
  1427. D ^= T0+2*T1+xkey->K[8+2*(r)+1]; D = ROR32(D,1)
  1428. /*
  1429. * Decrypt a single cycle, consisting of two rounds.
  1430. * This avoids the swapping of the two halves.
  1431. * Parameter r is now the cycle number.
  1432. */
  1433. #define DECRYPT_CYCLE( A, B, C, D, T0, T1, xkey, r ) \
  1434. DECRYPT_RND( A,B,C,D,T0,T1,xkey,2*(r)+1 );\
  1435. DECRYPT_RND( C,D,A,B,T0,T1,xkey,2*(r) )
  1436. /* Full 16-round decryption. */
  1437. #define DECRYPT( A,B,C,D,T0,T1, xkey ) \
  1438. DECRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 7 );\
  1439. DECRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 6 );\
  1440. DECRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 5 );\
  1441. DECRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 4 );\
  1442. DECRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 3 );\
  1443. DECRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 2 );\
  1444. DECRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 1 );\
  1445. DECRYPT_CYCLE( A,B,C,D,T0,T1,xkey, 0 )
  1446. /*
  1447. * A macro to read the state from the plaintext and do the initial key xors.
  1448. * The koff argument allows us to use the same macro
  1449. * for the decryption which uses different key words at the start.
  1450. */
  1451. #define GET_INPUT( src, A,B,C,D, xkey, koff ) \
  1452. A = GET32(src )^xkey->K[ koff]; B = GET32(src+ 4)^xkey->K[1+koff]; \
  1453. C = GET32(src+ 8)^xkey->K[2+koff]; D = GET32(src+12)^xkey->K[3+koff]
  1454. /*
  1455. * Similar macro to put the ciphertext in the output buffer.
  1456. * We xor the keys into the state variables before we use the PUT32
  1457. * macro as the macro might use its argument multiple times.
  1458. */
  1459. #define PUT_OUTPUT( A,B,C,D, dst, xkey, koff ) \
  1460. A ^= xkey->K[ koff]; B ^= xkey->K[1+koff]; \
  1461. C ^= xkey->K[2+koff]; D ^= xkey->K[3+koff]; \
  1462. PUT32( A, dst ); PUT32( B, dst+ 4 ); \
  1463. PUT32( C, dst+8 ); PUT32( D, dst+12 )
  1464. /*
  1465. * Twofish block encryption
  1466. *
  1467. * Arguments:
  1468. * xkey expanded key array
  1469. * p 16 bytes of plaintext
  1470. * c 16 bytes in which to store the ciphertext
  1471. */
  1472. void twofish_encrypt_block(twofish_key * xkey, BYTE p[TWOFISH_BLOCKSIZE], BYTE c[TWOFISH_BLOCKSIZE]) {
  1473. DWORD A,B,C,D,T0,T1; /* Working variables */
  1474. /* Get the four plaintext words xorred with the key */
  1475. GET_INPUT( p, A,B,C,D, xkey, 0 );
  1476. /* Do 8 cycles (= 16 rounds) */
  1477. ENCRYPT( A,B,C,D,T0,T1,xkey );
  1478. /* Store them with the final swap and the output whitening. */
  1479. PUT_OUTPUT( C,D,A,B, c, xkey, 4 );
  1480. }
  1481. /*
  1482. * Twofish block decryption.
  1483. *
  1484. * Arguments:
  1485. * xkey expanded key array
  1486. * p 16 bytes of plaintext
  1487. * c 16 bytes in which to store the ciphertext
  1488. */
  1489. void twofish_decrypt_block(const twofish_key * xkey, const BYTE c[TWOFISH_BLOCKSIZE], BYTE p[TWOFISH_BLOCKSIZE]) {
  1490. DWORD A,B,C,D,T0,T1; /* Working variables */
  1491. /* Get the four plaintext words xorred with the key */
  1492. GET_INPUT( c, A,B,C,D, xkey, 4 );
  1493. /* Do 8 cycles (= 16 rounds) */
  1494. DECRYPT( A,B,C,D,T0,T1,xkey );
  1495. /* Store them with the final swap and the output whitening. */
  1496. PUT_OUTPUT( C,D,A,B, p, xkey, 0 );
  1497. }
  1498. /*
  1499. * Using the macros it is easy to make special routines for
  1500. * CBC mode, CTR mode etc. The only thing you might want to
  1501. * add is a XOR_PUT_OUTPUT which xors the outputs into the
  1502. * destinationa instead of overwriting the data. This requires
  1503. * a XOR_PUT32 macro as well, but that should all be trivial.
  1504. *
  1505. * I thought about including routines for the separate cipher
  1506. * modes here, but it is unclear which modes should be included,
  1507. * and each encryption or decryption routine takes up a lot of code space.
  1508. * Also, I don't have any test vectors for any cipher modes
  1509. * with Twofish.
  1510. */
  1511. SIZE twofish_get_block_count(const twofish_context *context, SIZE input_lenght) {
  1512. if(context->options & twofish_option_PaddingPKCS7) {
  1513. return 1 + (input_lenght / TWOFISH_BLOCKSIZE);
  1514. }
  1515. return (input_lenght / TWOFISH_BLOCKSIZE);
  1516. }
  1517. SIZE twofish_get_output_length(const twofish_context *context, SIZE input_lenght) {
  1518. return TWOFISH_BLOCKSIZE * twofish_get_block_count(context, input_lenght);
  1519. }
  1520. #define TWOFISH_MIN(x,y) (x) < (y) ? (x) : (y)
  1521. void twofish_encrypt(twofish_context *context, const BYTE *input, SIZE input_length, BYTE *output, SIZE output_length) {
  1522. SIZE blockCount = twofish_get_block_count(context, input_length);
  1523. if(output_length < twofish_get_output_length(context, input_length)) {
  1524. return;
  1525. }
  1526. for(DWORD blockIndex = 0; blockIndex < blockCount; blockIndex++) {
  1527. BYTE inputBlock[TWOFISH_BLOCKSIZE];
  1528. BYTE copy_length = TWOFISH_MIN(input_length - blockIndex * TWOFISH_BLOCKSIZE, TWOFISH_BLOCKSIZE);
  1529. BYTE paddingCount = (TWOFISH_BLOCKSIZE - copy_length);
  1530. if(copy_length > 0) {
  1531. memcpy(inputBlock, (input+(blockIndex*TWOFISH_BLOCKSIZE)), TWOFISH_BLOCKSIZE);
  1532. }
  1533. for(int index=0; index < paddingCount; index++) {
  1534. inputBlock[TWOFISH_BLOCKSIZE - 1 - index] = paddingCount;
  1535. }
  1536. for(BYTE index=0; index < TWOFISH_BLOCKSIZE; index++) {
  1537. inputBlock[index] ^= context->iv[index];
  1538. }
  1539. BYTE outputBlock[TWOFISH_BLOCKSIZE];
  1540. twofish_encrypt_block(&(context->key), inputBlock, outputBlock);
  1541. /* update iv with block */
  1542. memcpy(context->iv, outputBlock, TWOFISH_BLOCKSIZE);
  1543. memcpy((output + (blockIndex * TWOFISH_BLOCKSIZE)), outputBlock, TWOFISH_BLOCKSIZE);
  1544. /* whipe input plain text */
  1545. //memset(inputBlock, 0, sizeof(inputBlock));
  1546. }
  1547. }
  1548. void twofish_decrypt(twofish_context *context, const BYTE *input, SIZE input_length, BYTE *output, SIZE *output_length) {
  1549. if(NULL == output) {
  1550. return;
  1551. }
  1552. if(*output_length < input_length) {
  1553. return;
  1554. }
  1555. SIZE blockCount = input_length / TWOFISH_BLOCKSIZE;
  1556. for(DWORD blockIndex = 0; blockIndex < blockCount; blockIndex++) {
  1557. BYTE inputBlock[TWOFISH_BLOCKSIZE];
  1558. memcpy(inputBlock, (input+(blockIndex*TWOFISH_BLOCKSIZE)), TWOFISH_BLOCKSIZE);
  1559. BYTE outputBlock[TWOFISH_BLOCKSIZE];
  1560. twofish_decrypt_block(&(context->key), inputBlock, outputBlock);
  1561. for(BYTE index=0; index < TWOFISH_BLOCKSIZE; index++) {
  1562. outputBlock[index] ^= context->iv[index];
  1563. }
  1564. /* update iv with block */
  1565. memcpy(context->iv, inputBlock, TWOFISH_BLOCKSIZE);
  1566. memcpy((output + (blockIndex * TWOFISH_BLOCKSIZE)), outputBlock, TWOFISH_BLOCKSIZE);
  1567. }
  1568. *output_length = (input_length - (BYTE)output[input_length-1]);
  1569. }