Smart-Home am Beispiel der Präsenzerkennung im Raum Projektarbeit Lennart Heimbs, Johannes Krug, Sebastian Dohle und Kevin Holzschuh bei Prof. Oliver Hofmann SS2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

MyASM.S 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074
  1. /*
  2. * The MySensors Arduino library handles the wireless radio link and protocol
  3. * between your home built sensors/actuators and HA controller of choice.
  4. * The sensors forms a self healing radio network with optional repeaters. Each
  5. * repeater and gateway builds a routing tables in EEPROM which keeps track of the
  6. * network topology allowing messages to be routed to nodes.
  7. *
  8. * Created by Henrik Ekblad <henrik.ekblad@mysensors.org>
  9. * Copyright (C) 2013-2018 Sensnology AB
  10. * Full contributor list: https://github.com/mysensors/MySensors/graphs/contributors
  11. *
  12. * Documentation: http://www.mysensors.org
  13. * Support Forum: http://forum.mysensors.org
  14. *
  15. * This program is free software; you can redistribute it and/or
  16. * modify it under the terms of the GNU General Public License
  17. * version 2 as published by the Free Software Foundation.
  18. *
  19. */
  20. #if defined(ARDUINO_ARCH_SAMD)
  21. /* workaround to prevent compiler error */
  22. .thumb_func
  23. doNothing:
  24. nop
  25. .size doNothing, .-doNothing
  26. #elif defined(ARDUINO_ARCH_NRF5)
  27. /* workaround to prevent compiler error */
  28. .thumb_func
  29. doNothing:
  30. nop
  31. .size doNothing, .-doNothing
  32. #elif defined(ARDUINO_ARCH_AVR)
  33. /*
  34. * This file is part of the AVR-Crypto-Lib.
  35. * Copyright (C) 2006-2015 Daniel Otte (bg@nerilex.org)
  36. *
  37. * This program is free software: you can redistribute it and/or modify
  38. * it under the terms of the GNU General Public License as published by
  39. * the Free Software Foundation, either version 3 of the License, or
  40. * (at your option) any later version.
  41. *
  42. * This program is distributed in the hope that it will be useful,
  43. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  44. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  45. * GNU General Public License for more details.
  46. *
  47. * You should have received a copy of the GNU General Public License
  48. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  49. *
  50. * Author: Daniel Otte
  51. *
  52. * License: GPLv3 or later
  53. */
  54. ; sha-256 implementation in assembler
  55. SHA256_BLOCK_BITS = 512
  56. SHA256_HASH_BITS = 256
  57. .macro precall
  58. /* push r18 - r27, r30 - r31*/
  59. push r0
  60. push r1
  61. push r18
  62. push r19
  63. push r20
  64. push r21
  65. push r22
  66. push r23
  67. push r24
  68. push r25
  69. push r26
  70. push r27
  71. push r30
  72. push r31
  73. clr r1
  74. .endm
  75. .macro postcall
  76. pop r31
  77. pop r30
  78. pop r27
  79. pop r26
  80. pop r25
  81. pop r24
  82. pop r23
  83. pop r22
  84. pop r21
  85. pop r20
  86. pop r19
  87. pop r18
  88. pop r1
  89. pop r0
  90. .endm
  91. .macro hexdump length
  92. push r27
  93. push r26
  94. ldi r25, '\r'
  95. mov r24, r25
  96. call uart_putc
  97. ldi r25, '\n'
  98. mov r24, r25
  99. call uart_putc
  100. pop r26
  101. pop r27
  102. movw r24, r26
  103. .if \length > 16
  104. ldi r22, lo8(16)
  105. ldi r23, hi8(16)
  106. push r27
  107. push r26
  108. call uart_hexdump
  109. pop r26
  110. pop r27
  111. adiw r26, 16
  112. hexdump \length-16
  113. .else
  114. ldi r22, lo8(\length)
  115. ldi r23, hi8(\length)
  116. call uart_hexdump
  117. .endif
  118. .endm
  119. /* X points to Block */
  120. .macro dbg_hexdump length
  121. precall
  122. hexdump \length
  123. postcall
  124. .endm
  125. .section .text
  126. SPL = 0x3D
  127. SPH = 0x3E
  128. SREG = 0x3F
  129. ;
  130. ;sha256_ctx_t is:
  131. ;
  132. ; [h0][h1][h2][h3][h4][h5][h6][h7][length]
  133. ; hn is 32 bit large, length is 64 bit large
  134. ;###########################################################
  135. .global sha256_ctx2hash
  136. ; === sha256_ctx2hash ===
  137. ; this function converts a state into a normal hash (bytestring)
  138. ; param1: the 16-bit destination pointer
  139. ; given in r25,r24 (r25 is most significant)
  140. ; param2: the 16-bit pointer to sha256_ctx structure
  141. ; given in r23,r22
  142. sha256_ctx2hash:
  143. movw r26, r22
  144. movw r30, r24
  145. ldi r21, 8
  146. sbiw r26, 4
  147. 1:
  148. ldi r20, 4
  149. adiw r26, 8
  150. 2:
  151. ld r0, -X
  152. st Z+, r0
  153. dec r20
  154. brne 2b
  155. dec r21
  156. brne 1b
  157. ret
  158. ;###########################################################
  159. .global sha256
  160. ; === sha256 ===
  161. ; this function calculates SHA-256 hashes from messages in RAM
  162. ; param1: the 16-bit hash destination pointer
  163. ; given in r25,r24 (r25 is most significant)
  164. ; param2: the 16-bit pointer to message
  165. ; given in r23,r22
  166. ; param3: 32-bit length value (length of message in bits)
  167. ; given in r21,r20,r19,r18
  168. sha256:
  169. sha256_prolog:
  170. push r8
  171. push r9
  172. push r10
  173. push r11
  174. push r12
  175. push r13
  176. push r16
  177. push r17
  178. in r30, SPL
  179. in r31, SPH
  180. sbiw r30, 8*4+8
  181. in r0, SREG
  182. cli
  183. out SPL, r30
  184. out SREG, r0
  185. out SPH, r31
  186. push r25
  187. push r24
  188. adiw r30, 1
  189. movw r16, r30
  190. movw r8, r18 /* backup of length*/
  191. movw r10, r20
  192. movw r12, r22 /* backup pf msg-ptr */
  193. movw r24, r16
  194. rcall sha256_init
  195. /* if length > 0xffff */
  196. 1:
  197. tst r11
  198. brne 2f
  199. tst r10
  200. breq 4f
  201. 2:
  202. movw r24, r16
  203. movw r22, r12
  204. rcall sha256_nextBlock
  205. ldi r19, 64
  206. add r12, r19
  207. adc r13, r1
  208. /* length -= 512 */
  209. ldi r19, 0x02
  210. sub r9, r19
  211. sbc r10, r1
  212. sbc r11, r1
  213. rjmp 1b
  214. 4:
  215. movw r24, r16
  216. movw r22, r12
  217. movw r20, r8
  218. rcall sha256_lastBlock
  219. pop r24
  220. pop r25
  221. movw r22, r16
  222. rcall sha256_ctx2hash
  223. sha256_epilog:
  224. in r30, SPL
  225. in r31, SPH
  226. adiw r30, 8*4+8
  227. in r0, SREG
  228. cli
  229. out SPL, r30
  230. out SREG, r0
  231. out SPH, r31
  232. pop r17
  233. pop r16
  234. pop r13
  235. pop r12
  236. pop r11
  237. pop r10
  238. pop r9
  239. pop r8
  240. ret
  241. ;###########################################################
  242. ; block MUST NOT be larger than 64 bytes
  243. .global sha256_lastBlock
  244. ; === sha256_lastBlock ===
  245. ; this function does padding & Co. for calculating SHA-256 hashes
  246. ; param1: the 16-bit pointer to sha256_ctx structure
  247. ; given in r25,r24 (r25 is most significant)
  248. ; param2: an 16-bit pointer to 64 byte block to hash
  249. ; given in r23,r22
  250. ; param3: an 16-bit integer specifing length of block in bits
  251. ; given in r21,r20
  252. sha256_lastBlock_localSpace = (SHA256_BLOCK_BITS/8+1)
  253. sha256_lastBlock:
  254. cpi r21, 0x02
  255. brlo sha256_lastBlock_prolog
  256. push r25
  257. push r24
  258. push r23
  259. push r22
  260. push r21
  261. push r20
  262. rcall sha256_nextBlock
  263. pop r20
  264. pop r21
  265. pop r22
  266. pop r23
  267. pop r24
  268. pop r25
  269. subi r21, 0x02
  270. ldi r19, 64
  271. add r22, r19
  272. adc r23, r1
  273. rjmp sha256_lastBlock
  274. sha256_lastBlock_prolog:
  275. /* allocate space on stack */
  276. in r30, SPL
  277. in r31, SPH
  278. in r0, SREG
  279. subi r30, lo8(64)
  280. sbci r31, hi8(64)
  281. cli
  282. out SPL, r30
  283. out SREG,r0
  284. out SPH, r31
  285. adiw r30, 1 /* SP points to next free byte on stack */
  286. mov r18, r20 /* r20 = LSB(length) */
  287. lsr r18
  288. lsr r18
  289. lsr r18
  290. bst r21, 0 /* may be we should explain this ... */
  291. bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */
  292. movw r26, r22 /* X points to begin of msg */
  293. tst r18
  294. breq sha256_lastBlock_post_copy
  295. mov r1, r18
  296. sha256_lastBlock_copy_loop:
  297. ld r0, X+
  298. st Z+, r0
  299. dec r1
  300. brne sha256_lastBlock_copy_loop
  301. sha256_lastBlock_post_copy:
  302. sha256_lastBlock_insert_stuffing_bit:
  303. ldi r19, 0x80
  304. mov r0,r19
  305. ldi r19, 0x07
  306. and r19, r20 /* if we are in bitmode */
  307. breq 2f /* no bitmode */
  308. 1:
  309. lsr r0
  310. dec r19
  311. brne 1b
  312. ld r19, X
  313. /* maybe we should do some ANDing here, just for safety */
  314. or r0, r19
  315. 2:
  316. st Z+, r0
  317. inc r18
  318. /* checking stuff here */
  319. cpi r18, 64-8+1
  320. brsh 0f
  321. rjmp sha256_lastBlock_insert_zeros
  322. 0:
  323. /* oh shit, we landed here */
  324. /* first we have to fill it up with zeros */
  325. ldi r19, 64
  326. sub r19, r18
  327. breq 2f
  328. 1:
  329. st Z+, r1
  330. dec r19
  331. brne 1b
  332. 2:
  333. sbiw r30, 63
  334. sbiw r30, 1
  335. movw r22, r30
  336. push r31
  337. push r30
  338. push r25
  339. push r24
  340. push r21
  341. push r20
  342. rcall sha256_nextBlock
  343. pop r20
  344. pop r21
  345. pop r24
  346. pop r25
  347. pop r30
  348. pop r31
  349. /* now we should subtract 512 from length */
  350. movw r26, r24
  351. adiw r26, 4*8+1 /* we can skip the lowest byte */
  352. ld r19, X
  353. subi r19, hi8(512)
  354. st X+, r19
  355. ldi r18, 6
  356. 1:
  357. ld r19, X
  358. sbci r19, 0
  359. st X+, r19
  360. dec r18
  361. brne 1b
  362. ; clr r18 /* not neccessary ;-) */
  363. /* reset Z pointer to begin of block */
  364. sha256_lastBlock_insert_zeros:
  365. ldi r19, 64-8
  366. sub r19, r18
  367. breq sha256_lastBlock_insert_length
  368. clr r1
  369. 1:
  370. st Z+, r1 /* r1 is still zero */
  371. dec r19
  372. brne 1b
  373. ; rjmp sha256_lastBlock_epilog
  374. sha256_lastBlock_insert_length:
  375. movw r26, r24 /* X points to state */
  376. adiw r26, 8*4 /* X points to (state.length) */
  377. adiw r30, 8 /* Z points one after the last byte of block */
  378. ld r0, X+
  379. add r0, r20
  380. st -Z, r0
  381. ld r0, X+
  382. adc r0, r21
  383. st -Z, r0
  384. ldi r19, 6
  385. 1:
  386. ld r0, X+
  387. adc r0, r1
  388. st -Z, r0
  389. dec r19
  390. brne 1b
  391. sbiw r30, 64-8
  392. movw r22, r30
  393. rcall sha256_nextBlock
  394. sha256_lastBlock_epilog:
  395. in r30, SPL
  396. in r31, SPH
  397. in r0, SREG
  398. adiw r30, 63 ; lo8(64)
  399. adiw r30, 1 ; hi8(64)
  400. cli
  401. out SPL, r30
  402. out SREG,r0
  403. out SPH, r31
  404. clr r1
  405. ret
  406. /**/
  407. ;###########################################################
  408. .global sha256_nextBlock
  409. ; === sha256_nextBlock ===
  410. ; this is the core function for calculating SHA-256 hashes
  411. ; param1: the 16-bit pointer to sha256_ctx structure
  412. ; given in r25,r24 (r25 is most significant)
  413. ; param2: an 16-bit pointer to 64 byte block to hash
  414. ; given in r23,r22
  415. sha256_nextBlock_localSpace = (64+8)*4 ; 64 32-bit values for w array and 8 32-bit values for a array (total 288 byte)
  416. Bck1 = 12
  417. Bck2 = 13
  418. Bck3 = 14
  419. Bck4 = 15
  420. Func1 = 22
  421. Func2 = 23
  422. Func3 = 24
  423. Func4 = 25
  424. Accu1 = 16
  425. Accu2 = 17
  426. Accu3 = 18
  427. Accu4 = 19
  428. XAccu1 = 8
  429. XAccu2 = 9
  430. XAccu3 = 10
  431. XAccu4 = 11
  432. T1 = 4
  433. T2 = 5
  434. T3 = 6
  435. T4 = 7
  436. LoopC = 1
  437. /* byteorder: high number <--> high significance */
  438. sha256_nextBlock:
  439. ; initial, let's make some space ready for local vars
  440. push r4 /* replace push & pop by mem ops? */
  441. push r5
  442. push r6
  443. push r7
  444. push r8
  445. push r9
  446. push r10
  447. push r11
  448. push r12
  449. push r13
  450. push r14
  451. push r15
  452. push r16
  453. push r17
  454. push r28
  455. push r29
  456. in r20, SPL
  457. in r21, SPH
  458. movw r18, r20 ;backup SP
  459. ; movw r26, r20 ; X points to free space on stack
  460. movw r30, r22 ; Z points to message
  461. subi r20, lo8(sha256_nextBlock_localSpace) ;sbiw can do only up to 63
  462. sbci r21, hi8(sha256_nextBlock_localSpace)
  463. movw r26, r20 ; X points to free space on stack
  464. in r0, SREG
  465. cli ; we want to be uninterrupted while updating SP
  466. out SPL, r20
  467. out SREG, r0
  468. out SPH, r21
  469. push r18
  470. push r19
  471. push r24
  472. push r25 /* param1 will be needed later */
  473. ; now we fill the w array with message (think about endianess)
  474. adiw r26, 1 ; X++
  475. ldi r20, 16
  476. sha256_nextBlock_wcpyloop:
  477. ld r23, Z+
  478. ld r22, Z+
  479. ld r19, Z+
  480. ld r18, Z+
  481. st X+, r18
  482. st X+, r19
  483. st X+, r22
  484. st X+, r23
  485. dec r20
  486. brne sha256_nextBlock_wcpyloop
  487. /* for (i=16; i<64; ++i){
  488. w[i] = SIGMA_b(w[i-2]) + w[i-7] + SIGMA_a(w[i-15]) + w[i-16];
  489. } */
  490. /* r25,r24,r23,r24 (r21,r20) are function values
  491. r19,r18,r17,r16 are the accumulator
  492. r15,r14,r13,rBck1 are backup1
  493. r11,r10,r9 ,r8 are xor accu
  494. r1 is round counter */
  495. ldi r20, 64-16
  496. mov LoopC, r20
  497. sha256_nextBlock_wcalcloop:
  498. movw r30, r26 ; cp X to Z
  499. sbiw r30, 63
  500. sbiw r30, 1 ; substract 64 = 16*4
  501. ld Accu1, Z+
  502. ld Accu2, Z+
  503. ld Accu3, Z+
  504. ld Accu4, Z+ /* w[i] = w[i-16] */
  505. ld Bck1, Z+
  506. ld Bck2, Z+
  507. ld Bck3, Z+
  508. ld Bck4, Z+ /* backup = w[i-15] */
  509. /* now sigma 0 */
  510. mov Func1, Bck2
  511. mov Func2, Bck3
  512. mov Func3, Bck4
  513. mov Func4, Bck1 /* prerotated by 8 */
  514. ldi r20, 1
  515. rcall bitrotl
  516. movw XAccu1, Func1
  517. movw XAccu3, Func3 /* store ROTR(w[i-15],7) in xor accu */
  518. movw Func1, Bck3
  519. movw Func3, Bck1 /* prerotated by 16 */
  520. ldi r20, 2
  521. rcall bitrotr
  522. eor XAccu1, Func1 /* xor ROTR(w[i-15], 18)*/
  523. eor XAccu2, Func2
  524. eor XAccu3, Func3
  525. eor XAccu4, Func4
  526. ldi Func2, 3 /* now shr3 */ /*we can destroy backup now*/
  527. sigma0_shr:
  528. lsr Bck4
  529. ror Bck3
  530. ror Bck2
  531. ror Bck1
  532. dec Func2
  533. brne sigma0_shr
  534. eor XAccu1, Bck1
  535. eor XAccu2, Bck2
  536. eor XAccu3, Bck3
  537. eor XAccu4, Bck4 /* xor SHR(w[i-15], 3)*/ /* xor accu == sigma1(w[i-15]) */
  538. add Accu1, XAccu1
  539. adc Accu2, XAccu2
  540. adc Accu3, XAccu3
  541. adc Accu4, XAccu4 /* finished with sigma0 */
  542. ldd Func1, Z+7*4 /* now accu += w[i-7] */
  543. ldd Func2, Z+7*4+1
  544. ldd Func3, Z+7*4+2
  545. ldd Func4, Z+7*4+3
  546. add Accu1, Func1
  547. adc Accu2, Func2
  548. adc Accu3, Func3
  549. adc Accu4, Func4
  550. ldd Bck1, Z+12*4 /* now backup = w[i-2]*/
  551. ldd Bck2, Z+12*4+1
  552. ldd Bck3, Z+12*4+2
  553. ldd Bck4, Z+12*4+3
  554. /* now sigma 1 */
  555. movw Func1, Bck3
  556. movw Func3, Bck1 /* prerotated by 16 */
  557. ldi r20, 1
  558. rcall bitrotr
  559. movw XAccu3, Func3
  560. movw XAccu1, Func1 /* store in ROTR(w[i-2], 17) xor accu */
  561. ; movw Func1, Bck3
  562. ; movw Func3, Bck1 /* prerotated by 16 */
  563. ldi r20, 2
  564. rcall bitrotr
  565. eor XAccu1, Func1 /* xor ROTR(w[i-2], 19)*/
  566. eor XAccu2, Func2
  567. eor XAccu3, Func3
  568. eor XAccu4, Func4
  569. ldi Func2, 2 /* now shr10 (dirty trick, skipping a byte) */ /*we can destroy backup now*/
  570. sigma1_shr:
  571. lsr Bck4
  572. ror Bck3
  573. ror Bck2
  574. dec Func2
  575. brne sigma1_shr
  576. eor XAccu1, Bck2
  577. eor XAccu2, Bck3
  578. eor XAccu3, Bck4 /* xor SHR(w[i-2], 10)*/ /* xor accu == sigma1(w[i-15]) */
  579. add Accu1, XAccu1
  580. adc Accu2, XAccu2
  581. adc Accu3, XAccu3
  582. adc Accu4, XAccu4 /* finished with sigma0 */
  583. /* now let's store the shit */
  584. st X+, Accu1
  585. st X+, Accu2
  586. st X+, Accu3
  587. st X+, Accu4
  588. dec LoopC
  589. breq 3f ; skip if zero
  590. rjmp sha256_nextBlock_wcalcloop
  591. 3:
  592. /* we are finished with w array X points one byte post w */
  593. /* init a array */
  594. pop r31
  595. pop r30
  596. push r30
  597. push r31
  598. ldi r25, 8*4 /* 8 32-bit values to copy from ctx to a array */
  599. init_a_array:
  600. ld r1, Z+
  601. st X+, r1
  602. dec r25
  603. brne init_a_array
  604. /* now the real fun begins */
  605. /* for (i=0; i<64; ++i){
  606. t1 = a[7] + SIGMA1(a[4]) + CH(a[4],a[5],a[6]) + k[i] + w[i];
  607. t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]);
  608. memmove(&(a[1]), &(a[0]), 7*4); // a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0];
  609. a[4] += t1;
  610. a[0] = t1 + t2;
  611. } */
  612. /* Y points to a[0], Z ('cause lpm wants it) points to k[i], X points to w[i] */
  613. sbiw r26, 8*4 /* X still points at a[7]+1*/
  614. movw r28, r26
  615. ldi r30, lo8(sha256_kv)
  616. ldi r31, hi8(sha256_kv)
  617. dec r27 /* X - (64*4 == 256) */
  618. ldi r25, 64
  619. mov LoopC, r25
  620. sha256_main_loop:
  621. /* now calculate t1 */
  622. /*CH(x,y,z) = (x&y)^((~x)&z)*/
  623. ldd T1, Y+5*4
  624. ldd T2, Y+5*4+1
  625. ldd T3, Y+5*4+2
  626. ldd T4, Y+5*4+3 /* y in T */
  627. ldd Func1, Y+4*4
  628. ldd Func2, Y+4*4+1
  629. ldd Func3, Y+4*4+2
  630. ldd Func4, Y+4*4+3 /* x in Func */
  631. ldd Bck1, Y+6*4
  632. ldd Bck2, Y+6*4+1
  633. ldd Bck3, Y+6*4+2
  634. ldd Bck4, Y+6*4+3 /* z in Bck */
  635. and T1, Func1
  636. and T2, Func2
  637. and T3, Func3
  638. and T4, Func4
  639. com Func1
  640. com Func2
  641. com Func3
  642. com Func4
  643. and Bck1, Func1
  644. and Bck2, Func2
  645. and Bck3, Func3
  646. and Bck4, Func4
  647. eor T1, Bck1
  648. eor T2, Bck2
  649. eor T3, Bck3
  650. eor T4, Bck4 /* done, CH(x,y,z) is in T */
  651. /* now SIGMA1(a[4]) */
  652. ldd Bck4, Y+4*4 /* think about using it from Func reg above*/
  653. ldd Bck1, Y+4*4+1
  654. ldd Bck2, Y+4*4+2
  655. ldd Bck3, Y+4*4+3 /* load prerotate by 8-bit */
  656. movw Func1, Bck1
  657. movw Func3, Bck3
  658. ldi r20, 2
  659. rcall bitrotl /* rotr(x,6) */
  660. movw XAccu1, Func1
  661. movw XAccu3, Func3
  662. movw Func1, Bck1
  663. movw Func3, Bck3
  664. ldi r20, 3
  665. rcall bitrotr /* rotr(x,11) */
  666. eor XAccu1, Func1
  667. eor XAccu2, Func2
  668. eor XAccu3, Func3
  669. eor XAccu4, Func4
  670. movw Func1, Bck3 /* this prerotates furteh 16 bits*/
  671. movw Func3, Bck1 /* so we have now prerotated by 24 bits*/
  672. ldi r20, 1
  673. rcall bitrotr /* rotr(x,11) */
  674. eor XAccu1, Func1
  675. eor XAccu2, Func2
  676. eor XAccu3, Func3
  677. eor XAccu4, Func4 /* finished with SIGMA1, add it to T */
  678. add T1, XAccu1
  679. adc T2, XAccu2
  680. adc T3, XAccu3
  681. adc T4, XAccu4
  682. /* now we've to add a[7], w[i] and k[i] */
  683. ldd XAccu1, Y+4*7
  684. ldd XAccu2, Y+4*7+1
  685. ldd XAccu3, Y+4*7+2
  686. ldd XAccu4, Y+4*7+3
  687. add T1, XAccu1
  688. adc T2, XAccu2
  689. adc T3, XAccu3
  690. adc T4, XAccu4 /* add a[7] */
  691. ld XAccu1, X+
  692. ld XAccu2, X+
  693. ld XAccu3, X+
  694. ld XAccu4, X+
  695. add T1, XAccu1
  696. adc T2, XAccu2
  697. adc T3, XAccu3
  698. adc T4, XAccu4 /* add w[i] */
  699. lpm XAccu1, Z+
  700. lpm XAccu2, Z+
  701. lpm XAccu3, Z+
  702. lpm XAccu4, Z+
  703. add T1, XAccu1
  704. adc T2, XAccu2
  705. adc T3, XAccu3
  706. adc T4, XAccu4 /* add k[i] */ /* finished with t1 */
  707. /*now t2 = SIGMA0(a[0]) + MAJ(a[0],a[1],a[2]) */ /*i did to much x86 asm, i always see 4 32bit regs*/
  708. /* starting with MAJ(x,y,z) */
  709. ldd Func1, Y+4*0+0
  710. ldd Func2, Y+4*0+1
  711. ldd Func3, Y+4*0+2
  712. ldd Func4, Y+4*0+3 /* load x=a[0] */
  713. ldd XAccu1, Y+4*1+0
  714. ldd XAccu2, Y+4*1+1
  715. ldd XAccu3, Y+4*1+2
  716. ldd XAccu4, Y+4*1+3 /* load y=a[1] */
  717. and XAccu1, Func1
  718. and XAccu2, Func2
  719. and XAccu3, Func3
  720. and XAccu4, Func4 /* XAccu == (x & y) */
  721. ldd Bck1, Y+4*2+0
  722. ldd Bck2, Y+4*2+1
  723. ldd Bck3, Y+4*2+2
  724. ldd Bck4, Y+4*2+3 /* load z=a[2] */
  725. and Func1, Bck1
  726. and Func2, Bck2
  727. and Func3, Bck3
  728. and Func4, Bck4
  729. eor XAccu1, Func1
  730. eor XAccu2, Func2
  731. eor XAccu3, Func3
  732. eor XAccu4, Func4 /* XAccu == (x & y) ^ (x & z) */
  733. ldd Func1, Y+4*1+0
  734. ldd Func2, Y+4*1+1
  735. ldd Func3, Y+4*1+2
  736. ldd Func4, Y+4*1+3 /* load y=a[1] */
  737. and Func1, Bck1
  738. and Func2, Bck2
  739. and Func3, Bck3
  740. and Func4, Bck4
  741. eor XAccu1, Func1
  742. eor XAccu2, Func2
  743. eor XAccu3, Func3
  744. eor XAccu4, Func4 /* XAccu == Maj(x,y,z) == (x & y) ^ (x & z) ^ (y & z) */
  745. /* SIGMA0(a[0]) */
  746. ldd Bck1, Y+4*0+0 /* we should combine this with above */
  747. ldd Bck2, Y+4*0+1
  748. ldd Bck3, Y+4*0+2
  749. ldd Bck4, Y+4*0+3
  750. movw Func1, Bck1
  751. movw Func3, Bck3
  752. ldi r20, 2
  753. rcall bitrotr
  754. movw Accu1, Func1
  755. movw Accu3, Func3 /* Accu = shr(a[0], 2) */
  756. movw Func1, Bck3
  757. movw Func3, Bck1 /* prerotate by 16 bits */
  758. ldi r20, 3
  759. rcall bitrotl
  760. eor Accu1, Func1
  761. eor Accu2, Func2
  762. eor Accu3, Func3
  763. eor Accu4, Func4 /* Accu ^= shr(a[0], 13) */
  764. mov Func1, Bck4
  765. mov Func2, Bck1
  766. mov Func3, Bck2
  767. mov Func4, Bck3 /* prerotate by 24 bits */
  768. ldi r20, 2
  769. rcall bitrotl
  770. eor Accu1, Func1
  771. eor Accu2, Func2
  772. eor Accu3, Func3
  773. eor Accu4, Func4 /* Accu ^= shr(a[0], 22) */
  774. add Accu1, XAccu1 /* add previous result (MAJ)*/
  775. adc Accu2, XAccu2
  776. adc Accu3, XAccu3
  777. adc Accu4, XAccu4
  778. /* now we are finished with the computing stuff (t1 in T, t2 in Accu)*/
  779. /* a[7]=a[6]; a[6]=a[5]; a[5]=a[4]; a[4]=a[3]; a[3]=a[2]; a[2]=a[1]; a[1]=a[0]; */
  780. ldi r21, 7*4
  781. adiw r28, 7*4
  782. a_shift_loop:
  783. ld r25, -Y /* warning: this is PREdecrement */
  784. std Y+4, r25
  785. dec r21
  786. brne a_shift_loop
  787. ldd Bck1, Y+4*4+0
  788. ldd Bck2, Y+4*4+1
  789. ldd Bck3, Y+4*4+2
  790. ldd Bck4, Y+4*4+3
  791. add Bck1, T1
  792. adc Bck2, T2
  793. adc Bck3, T3
  794. adc Bck4, T4
  795. std Y+4*4+0, Bck1
  796. std Y+4*4+1, Bck2
  797. std Y+4*4+2, Bck3
  798. std Y+4*4+3, Bck4
  799. add Accu1, T1
  800. adc Accu2, T2
  801. adc Accu3, T3
  802. adc Accu4, T4
  803. std Y+4*0+0, Accu1
  804. std Y+4*0+1, Accu2
  805. std Y+4*0+2, Accu3
  806. std Y+4*0+3, Accu4 /* a array updated */
  807. dec LoopC
  808. breq update_state
  809. rjmp sha256_main_loop ;brne sha256_main_loop
  810. update_state:
  811. /* update state */
  812. /* pointers to state should still exist on the stack ;-) */
  813. pop r31
  814. pop r30
  815. ldi r21, 8
  816. update_state_loop:
  817. ldd Accu1, Z+0
  818. ldd Accu2, Z+1
  819. ldd Accu3, Z+2
  820. ldd Accu4, Z+3
  821. ld Func1, Y+
  822. ld Func2, Y+
  823. ld Func3, Y+
  824. ld Func4, Y+
  825. add Accu1, Func1
  826. adc Accu2, Func2
  827. adc Accu3, Func3
  828. adc Accu4, Func4
  829. st Z+, Accu1
  830. st Z+, Accu2
  831. st Z+, Accu3
  832. st Z+, Accu4
  833. dec r21
  834. brne update_state_loop
  835. /* now we just have to update the length */
  836. adiw r30, 1 /* since we add 512, we can simply skip the LSB */
  837. ldi r21, 2
  838. ldi r22, 6
  839. ld r20, Z
  840. add r20, r21
  841. st Z+, r20
  842. clr r21
  843. sha256_nextBlock_fix_length:
  844. brcc sha256_nextBlock_epilog
  845. ld r20, Z
  846. adc r20, r21
  847. st Z+, r20
  848. dec r22
  849. brne sha256_nextBlock_fix_length
  850. ; EPILOG
  851. sha256_nextBlock_epilog:
  852. /* now we should clean up the stack */
  853. pop r21
  854. pop r20
  855. in r0, SREG
  856. cli ; we want to be uninterrupted while updating SP
  857. out SPL, r20
  858. out SREG, r0
  859. out SPH, r21
  860. clr r1
  861. pop r29
  862. pop r28
  863. pop r17
  864. pop r16
  865. pop r15
  866. pop r14
  867. pop r13
  868. pop r12
  869. pop r11
  870. pop r10
  871. pop r9
  872. pop r8
  873. pop r7
  874. pop r6
  875. pop r5
  876. pop r4
  877. ret
  878. sha256_kv: ; round-key-vector stored in ProgMem
  879. .word 0x2f98, 0x428a, 0x4491, 0x7137, 0xfbcf, 0xb5c0, 0xdba5, 0xe9b5, 0xc25b, 0x3956, 0x11f1, 0x59f1, 0x82a4, 0x923f, 0x5ed5, 0xab1c
  880. .word 0xaa98, 0xd807, 0x5b01, 0x1283, 0x85be, 0x2431, 0x7dc3, 0x550c, 0x5d74, 0x72be, 0xb1fe, 0x80de, 0x06a7, 0x9bdc, 0xf174, 0xc19b
  881. .word 0x69c1, 0xe49b, 0x4786, 0xefbe, 0x9dc6, 0x0fc1, 0xa1cc, 0x240c, 0x2c6f, 0x2de9, 0x84aa, 0x4a74, 0xa9dc, 0x5cb0, 0x88da, 0x76f9
  882. .word 0x5152, 0x983e, 0xc66d, 0xa831, 0x27c8, 0xb003, 0x7fc7, 0xbf59, 0x0bf3, 0xc6e0, 0x9147, 0xd5a7, 0x6351, 0x06ca, 0x2967, 0x1429
  883. .word 0x0a85, 0x27b7, 0x2138, 0x2e1b, 0x6dfc, 0x4d2c, 0x0d13, 0x5338, 0x7354, 0x650a, 0x0abb, 0x766a, 0xc92e, 0x81c2, 0x2c85, 0x9272
  884. .word 0xe8a1, 0xa2bf, 0x664b, 0xa81a, 0x8b70, 0xc24b, 0x51a3, 0xc76c, 0xe819, 0xd192, 0x0624, 0xd699, 0x3585, 0xf40e, 0xa070, 0x106a
  885. .word 0xc116, 0x19a4, 0x6c08, 0x1e37, 0x774c, 0x2748, 0xbcb5, 0x34b0, 0x0cb3, 0x391c, 0xaa4a, 0x4ed8, 0xca4f, 0x5b9c, 0x6ff3, 0x682e
  886. .word 0x82ee, 0x748f, 0x636f, 0x78a5, 0x7814, 0x84c8, 0x0208, 0x8cc7, 0xfffa, 0x90be, 0x6ceb, 0xa450, 0xa3f7, 0xbef9, 0x78f2, 0xc671
  887. ;###########################################################
  888. .global sha256_init
  889. ;uint32_t sha256_init_vector[]={
  890. ; 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
  891. ; 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 };
  892. ;
  893. ;void sha256_init(sha256_ctx_t *state){
  894. ; state->length=0;
  895. ; memcpy(state->h, sha256_init_vector, 8*4);
  896. ;}
  897. ; param1: (r23,r24) 16-bit pointer to sha256_ctx_t struct in ram
  898. ; modifys: Z(r30,r31), Func1, r22
  899. sha256_init:
  900. movw r26, r24 ; (24,25) --> (26,27) load X with param1
  901. ldi r30, lo8((sha256_init_vector))
  902. ldi r31, hi8((sha256_init_vector))
  903. ldi r22, 32+8
  904. sha256_init_vloop:
  905. lpm r23, Z+
  906. st X+, r23
  907. dec r22
  908. brne sha256_init_vloop
  909. ret
  910. sha256_init_vector:
  911. .word 0xE667, 0x6A09
  912. .word 0xAE85, 0xBB67
  913. .word 0xF372, 0x3C6E
  914. .word 0xF53A, 0xA54F
  915. .word 0x527F, 0x510E
  916. .word 0x688C, 0x9B05
  917. .word 0xD9AB, 0x1F83
  918. .word 0xCD19, 0x5BE0
  919. .word 0x0000, 0x0000
  920. .word 0x0000, 0x0000
  921. ;###########################################################
  922. .global rotl32
  923. ; === ROTL32 ===
  924. ; function that rotates a 32 bit word to the left
  925. ; param1: the 32-bit word to rotate
  926. ; given in r25,r24,r23,r22 (r25 is most significant)
  927. ; param2: an 8-bit value telling how often to rotate
  928. ; given in r20
  929. ; modifys: r21, r22
  930. rotl32:
  931. cpi r20, 8
  932. brlo bitrotl
  933. mov r21, r25
  934. mov r25, r24
  935. mov r24, r23
  936. mov r23, r22
  937. mov r22, r21
  938. subi r20, 8
  939. rjmp rotl32
  940. bitrotl:
  941. clr r21
  942. clc
  943. bitrotl_loop:
  944. tst r20
  945. breq fixrotl
  946. 2:
  947. rol r22
  948. rol r23
  949. rol r24
  950. rol r25
  951. rol r21
  952. dec r20
  953. brne 2b
  954. fixrotl:
  955. or r22, r21
  956. ret
  957. ;###########################################################
  958. .global rotr32
  959. ; === ROTR32 ===
  960. ; function that rotates a 32 bit word to the right
  961. ; param1: the 32-bit word to rotate
  962. ; given in r25,r24,r23,22 (r25 is most significant)
  963. ; param2: an 8-bit value telling how often to rotate
  964. ; given in r20
  965. ; modifys: r21, r22
  966. rotr32:
  967. cpi r20, 8
  968. brlo bitrotr
  969. mov r21, r22
  970. mov r22, r23
  971. mov r23, r24
  972. mov r24, r25
  973. mov r25, r21
  974. subi r20, 8
  975. rjmp rotr32
  976. bitrotr:
  977. clr r21
  978. clc
  979. bitrotr_loop:
  980. tst r20
  981. breq fixrotr
  982. 2:
  983. ror r25
  984. ror r24
  985. ror r23
  986. ror r22
  987. ror r21
  988. dec r20
  989. brne 2b
  990. fixrotr:
  991. or r25, r21
  992. ret
  993. ;###########################################################
  994. .global change_endian32
  995. ; === change_endian32 ===
  996. ; function that changes the endianess of a 32-bit word
  997. ; param1: the 32-bit word
  998. ; given in r25,r24,r23,22 (r25 is most significant)
  999. ; modifys: r21, r22
  1000. change_endian32:
  1001. movw r20, r22 ; (r22,r23) --> (r20,r21)
  1002. mov r22, r25
  1003. mov r23, r24
  1004. mov r24, r21
  1005. mov r25, r20
  1006. ret
  1007. #endif