@@ -143,13 +143,11 @@ void chacha20_xor(void* buffer, size_t n, const uint8_t key[32],
143143void poly1305 (const uint8_t * msg , size_t n , const uint8_t key [32 ],
144144 uint8_t tag [16 ])
145145{
146- uint32_t hibit ;
147146 uint64_t d0 , d1 , d2 , d3 , d4 ;
148147 uint32_t h0 , h1 , h2 , h3 , h4 ;
149148 uint32_t r0 , r1 , r2 , r3 , r4 ;
150149 uint32_t s1 , s2 , s3 , s4 ;
151150
152- hibit = (uint32_t ) 1 << 24 ;
153151 h0 = h1 = h2 = h3 = h4 = 0 ;
154152 r0 = (LOAD32_LE (key + 0 ) >> 0 ) & 0x03FFFFFF ;
155153 r1 = (LOAD32_LE (key + 3 ) >> 2 ) & 0x03FFFF03 ; s1 = r1 * 5 ;
@@ -158,12 +156,13 @@ void poly1305(const uint8_t* msg, size_t n, const uint8_t key[32],
158156 r4 = (LOAD32_LE (key + 12 ) >> 8 ) & 0x000FFFFF ; s4 = r4 * 5 ;
159157 while (n >= 16 )
160158 {
159+ h4 += 0x01000000 ;
161160process_block :
162161 h0 += (LOAD32_LE (msg + 0 ) >> 0 ) & 0x03FFFFFF ;
163162 h1 += (LOAD32_LE (msg + 3 ) >> 2 ) & 0x03FFFFFF ;
164163 h2 += (LOAD32_LE (msg + 6 ) >> 4 ) & 0x03FFFFFF ;
165164 h3 += (LOAD32_LE (msg + 9 ) >> 6 ) & 0x03FFFFFF ;
166- h4 += (LOAD32_LE (msg + 12 ) >> 8 ) | hibit ;
165+ h4 += (LOAD32_LE (msg + 12 ) >> 8 );
167166
168167 #define MUL (a ,b ) ((uint64_t)(a) * (b))
169168 d0 = MUL (h0 ,r0 ) + MUL (h1 ,s4 ) + MUL (h2 ,s3 ) + MUL (h3 ,s2 ) + MUL (h4 ,s1 );
@@ -188,32 +187,26 @@ void poly1305(const uint8_t* msg, size_t n, const uint8_t key[32],
188187 for (i = 0 ; i < n ; tag [i ] = msg [i ], i ++ );
189188 for (tag [i ++ ] = 1 ; i < 16 ; tag [i ++ ] = 0 );
190189 msg = tag ;
191- hibit = 0 ;
192190 n = 16 ;
193191 goto process_block ;
194192 }
195193
196- r0 = h0 + 5 ;
197- r1 = h1 + (r0 >> 26 ); * (volatile uint32_t * )& r0 = 0 ;
198- r2 = h2 + (r1 >> 26 ); * (volatile uint32_t * )& r1 = 0 ;
199- r3 = h3 + (r2 >> 26 ); * (volatile uint32_t * )& r2 = 0 ;
200- r4 = h4 + (r3 >> 26 ); * (volatile uint32_t * )& r3 = 0 ;
201- h0 = h0 + (r4 >> 26 ) * 5 ; * (volatile uint32_t * )& r4 = 0 ;
202-
203- d0 = (uint64_t )LOAD32_LE (key + 16 ) + (h0 >> 0 ) + (h1 << 26 );
204- d1 = (uint64_t )LOAD32_LE (key + 20 ) + (h1 >> 6 ) + (h2 << 20 ) + (d0 >> 32 );
205- d2 = (uint64_t )LOAD32_LE (key + 24 ) + (h2 >> 12 ) + (h3 << 14 ) + (d1 >> 32 );
206- d3 = (uint64_t )LOAD32_LE (key + 28 ) + (h3 >> 18 ) + (h4 << 8 ) + (d2 >> 32 );
207-
208- STORE32_LE (tag + 0 , d0 ); * (volatile uint32_t * )& s1 = 0 ;
209- STORE32_LE (tag + 4 , d1 ); * (volatile uint32_t * )& s2 = 0 ;
210- STORE32_LE (tag + 8 , d2 ); * (volatile uint32_t * )& s3 = 0 ;
211- STORE32_LE (tag + 12 , d3 ); * (volatile uint32_t * )& s4 = 0 ;
212- * (volatile uint64_t * )& d0 = 0 ; * (volatile uint32_t * )& h0 = 0 ;
213- * (volatile uint64_t * )& d1 = 0 ; * (volatile uint32_t * )& h1 = 0 ;
214- * (volatile uint64_t * )& d2 = 0 ; * (volatile uint32_t * )& h2 = 0 ;
215- * (volatile uint64_t * )& d3 = 0 ; * (volatile uint32_t * )& h3 = 0 ;
216- * (volatile uint64_t * )& d4 = 0 ; * (volatile uint32_t * )& h4 = 0 ;
194+ r0 = (h0 + 5 ) >> 26 ;
195+ r1 = (h1 + r0 ) >> 26 ;
196+ r2 = (h2 + r1 ) >> 26 ;
197+ r3 = (h3 + r2 ) >> 26 ;
198+ r4 = (h4 + r3 ) >> 26 ;
199+ h0 += r4 * 5 ;
200+
201+ d1 = (uint64_t )LOAD32_LE (key + 16 ) + (h0 >> 0 ) + (h1 << 26 );
202+ d2 = (uint64_t )LOAD32_LE (key + 20 ) + (h1 >> 6 ) + (h2 << 20 ) + (d1 >> 32 );
203+ d3 = (uint64_t )LOAD32_LE (key + 24 ) + (h2 >> 12 ) + (h3 << 14 ) + (d2 >> 32 );
204+ d4 = (uint64_t )LOAD32_LE (key + 28 ) + (h3 >> 18 ) + (h4 << 8 ) + (d3 >> 32 );
205+
206+ s1 = d1 ; STORE32_LE (tag + 0 , s1 );
207+ s2 = d2 ; STORE32_LE (tag + 4 , s2 );
208+ s3 = d3 ; STORE32_LE (tag + 8 , s3 );
209+ s4 = d4 ; STORE32_LE (tag + 12 , s4 );
217210}
218211
219212int poly1305_tagcmp (const uint8_t tag1 [16 ], const uint8_t tag2 [16 ])
@@ -295,7 +288,7 @@ static size_t entropy(void* buf, size_t n)
295288
296289#endif
297290
298- #elif defined(__linux__ ) || defined(__unix__ ) || defined(__APPLE__ )
291+ #elif defined(__linux__ ) || defined(__unix__ ) || defined(__APPLE__ ) || defined( __QNX__ )
299292
300293#ifndef _GNU_SOURCE
301294#define _GNU_SOURCE
0 commit comments