test/monniaux/BearSSL/src/rsa/rsa_i31_privexp.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318

/*
 * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
 *
 * Permission is hereby granted, free of charge, to any person obtaining 
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be 
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include "inner.h"

/* see bearssl_rsa.h */
size_t
br_rsa_i31_compute_privexp(void *d,
	const br_rsa_private_key *sk, uint32_t e)
{
	/*
	 * We want to invert e modulo phi = (p-1)(q-1). This first
	 * requires computing phi, which is easy since we have the factors
	 * p and q in the private key structure.
	 *
	 * Since p = 3 mod 4 and q = 3 mod 4, phi/4 is an odd integer.
	 * We could invert e modulo phi/4 then patch the result to
	 * modulo phi, but this would involve assembling three modulus-wide
	 * values (phi/4, 1 and e) and calling moddiv, that requires
	 * three more temporaries, for a total of six big integers, or
	 * slightly more than 3 kB of stack space for RSA-4096. This
	 * exceeds our stack requirements.
	 *
	 * Instead, we first use one step of the extended GCD:
	 *
	 *   - We compute phi = k*e + r  (Euclidean division of phi by e).
	 *     If public exponent e is correct, then r != 0 (e must be
	 *     invertible modulo phi). We also have k != 0 since we
	 *     enforce non-ridiculously-small factors.
	 *
	 *   - We find small u, v such that u*e - v*r = 1  (using a
	 *     binary GCD; we can arrange for u < r and v < e, i.e. all
	 *     values fit on 32 bits).
	 *
	 *   - Solution is: d = u + v*k
	 *     This last computation is exact: since u < r and v < e,
	 *     the above implies d < r + e*((phi-r)/e) = phi
	 */

	uint32_t tmp[4 * ((BR_MAX_RSA_FACTOR + 30) / 31) + 12];
	uint32_t *p, *q, *k, *m, *z, *phi;
	const unsigned char *pbuf, *qbuf;
	size_t plen, qlen, u, len, dlen;
	uint32_t r, a, b, u0, v0, u1, v1, he, hr;
	int i;

	/*
	 * Check that e is correct.
	 */
	if (e < 3 || (e & 1) == 0) {
		return 0;
	}

	/*
	 * Check lengths of p and q, and that they are both odd.
	 */
	pbuf = sk->p;
	plen = sk->plen;
	while (plen > 0 && *pbuf == 0) {
		pbuf ++;
		plen --;
	}
	if (plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8)
		|| (pbuf[plen - 1] & 1) != 1)
	{
		return 0;
	}
	qbuf = sk->q;
	qlen = sk->qlen;
	while (qlen > 0 && *qbuf == 0) {
		qbuf ++;
		qlen --;
	}
	if (qlen < 5 || qlen > (BR_MAX_RSA_FACTOR / 8)
		|| (qbuf[qlen - 1] & 1) != 1)
	{
		return 0;
	}

	/*
	 * Output length is that of the modulus.
	 */
	dlen = (sk->n_bitlen + 7) >> 3;
	if (d == NULL) {
		return dlen;
	}

	p = tmp;
	br_i31_decode(p, pbuf, plen);
	plen = (p[0] + 31) >> 5;
	q = p + 1 + plen;
	br_i31_decode(q, qbuf, qlen);
	qlen = (q[0] + 31) >> 5;

	/*
	 * Compute phi = (p-1)*(q-1), then move it over p-1 and q-1 (that
	 * we do not need anymore). The mulacc function sets the announced
	 * bit length of t to be the sum of the announced bit lengths of
	 * p-1 and q-1, which is usually exact but may overshoot by one 1
	 * bit in some cases; we readjust it to its true length.
	 */
	p[1] --;
	q[1] --;
	phi = q + 1 + qlen;
	br_i31_zero(phi, p[0]);
	br_i31_mulacc(phi, p, q);
	len = (phi[0] + 31) >> 5;
	memmove(tmp, phi, (1 + len) * sizeof *phi);
	phi = tmp;
	phi[0] = br_i31_bit_length(phi + 1, len);
	len = (phi[0] + 31) >> 5;

	/*
	 * Divide phi by public exponent e. The final remainder r must be
	 * non-zero (otherwise, the key is invalid). The quotient is k,
	 * which we write over phi, since we don't need phi after that.
	 */
	r = 0;
	for (u = len; u >= 1; u --) {
		/*
		 * Upon entry, r < e, and phi[u] < 2^31; hence,
		 * hi:lo < e*2^31. Thus, the produced word k[u]
		 * must be lower than 2^31, and the new remainder r
		 * is lower than e.
		 */
		uint32_t hi, lo;

		hi = r >> 1;
		lo = (r << 31) + phi[u];
		phi[u] = br_divrem(hi, lo, e, &r);
	}
	if (r == 0) {
		return 0;
	}
	k = phi;

	/*
	 * Compute u and v such that u*e - v*r = GCD(e,r). We use
	 * a binary GCD algorithm, with 6 extra integers a, b,
	 * u0, u1, v0 and v1. Initial values are:
	 *   a = e    u0 = 1   v0 = 0
	 *   b = r    u1 = r   v1 = e-1
	 * The following invariants are maintained:
	 *   a = u0*e - v0*r
	 *   b = u1*e - v1*r
	 *   0 < a <= e
	 *   0 < b <= r
	 *   0 <= u0 <= r
	 *   0 <= v0 <= e
	 *   0 <= u1 <= r
	 *   0 <= v1 <= e
	 *
	 * At each iteration, we reduce either a or b by one bit, and
	 * adjust u0, u1, v0 and v1 to maintain the invariants:
	 *  - if a is even, then a <- a/2
	 *  - otherwise, if b is even, then b <- b/2
	 *  - otherwise, if a > b, then a <- (a-b)/2
	 *  - otherwise, if b > a, then b <- (b-a)/2
	 * Algorithm stops when a = b. At that point, the common value
	 * is the GCD of e and r; it must be 1 (otherwise, the private
	 * key or public exponent is not valid). The (u0,v0) or (u1,v1)
	 * pairs are the solution we are looking for.
	 *
	 * Since either a or b is reduced by at least 1 bit at each
	 * iteration, 62 iterations are enough to reach the end
	 * condition.
	 *
	 * To maintain the invariants, we must compute the same operations
	 * on the u* and v* values that we do on a and b:
	 *  - When a is divided by 2, u0 and v0 must be divided by 2.
	 *  - When b is divided by 2, u1 and v1 must be divided by 2.
	 *  - When b is subtracted from a, u1 and v1 are subtracted from
	 *    u0 and v0, respectively.
	 *  - When a is subtracted from b, u0 and v0 are subtracted from
	 *    u1 and v1, respectively.
	 *
	 * However, we want to keep the u* and v* values in their proper
	 * ranges. The following remarks apply:
	 *
	 *  - When a is divided by 2, then a is even. Therefore:
	 *
	 *     * If r is odd, then u0 and v0 must have the same parity;
	 *       if they are both odd, then adding r to u0 and e to v0
	 *       makes them both even, and the division by 2 brings them
	 *       back to the proper range.
	 *
	 *     * If r is even, then u0 must be even; if v0 is odd, then
	 *       adding r to u0 and e to v0 makes them both even, and the
	 *       division by 2 brings them back to the proper range.
	 *
	 *    Thus, all we need to do is to look at the parity of v0,
	 *    and add (r,e) to (u0,v0) when v0 is odd. In order to avoid
	 *    a 32-bit overflow, we can add ((r+1)/2,(e/2)+1) after the
	 *    division (r+1 does not overflow since r < e; and (e/2)+1
	 *    is equal to (e+1)/2 since e is odd).
	 *
	 *  - When we subtract b from a, three cases may occur:
	 *
	 *     * u1 <= u0 and v1 <= v0: just do the subtractions
	 *
	 *     * u1 > u0 and v1 > v0: compute:
	 *         (u0, v0) <- (u0 + r - u1, v0 + e - v1)
	 *
	 *     * u1 <= u0 and v1 > v0: compute:
	 *         (u0, v0) <- (u0 + r - u1, v0 + e - v1)
	 *
	 *    The fourth case (u1 > u0 and v1 <= v0) is not possible
	 *    because it would contradict "b < a" (which is the reason
	 *    why we subtract b from a).
	 *
	 *    The tricky case is the third one: from the equations, it
	 *    seems that u0 may go out of range. However, the invariants
	 *    and ranges of other values imply that, in that case, the
	 *    new u0 does not actually exceed the range.
	 *
	 *    We can thus handle the subtraction by adding (r,e) based
	 *    solely on the comparison between v0 and v1.
	 */
	a = e;
	b = r;
	u0 = 1;
	v0 = 0;
	u1 = r;
	v1 = e - 1;
	hr = (r + 1) >> 1;
	he = (e >> 1) + 1;
	for (i = 0; i < 62; i ++) {
		uint32_t oa, ob, agtb, bgta;
		uint32_t sab, sba, da, db;
		uint32_t ctl;

		oa = a & 1;                  /* 1 if a is odd */
		ob = b & 1;                  /* 1 if b is odd */
		agtb = GT(a, b);             /* 1 if a > b */
		bgta = GT(b, a);             /* 1 if b > a */

		sab = oa & ob & agtb;        /* 1 if a <- a-b */
		sba = oa & ob & bgta;        /* 1 if b <- b-a */

		/* a <- a-b, u0 <- u0-u1, v0 <- v0-v1 */
		ctl = GT(v1, v0);
		a -= b & -sab;
		u0 -= (u1 - (r & -ctl)) & -sab;
		v0 -= (v1 - (e & -ctl)) & -sab;

		/* b <- b-a, u1 <- u1-u0 mod r, v1 <- v1-v0 mod e */
		ctl = GT(v0, v1);
		b -= a & -sba;
		u1 -= (u0 - (r & -ctl)) & -sba;
		v1 -= (v0 - (e & -ctl)) & -sba;

		da = NOT(oa) | sab;          /* 1 if a <- a/2 */
		db = (oa & NOT(ob)) | sba;   /* 1 if b <- b/2 */

		/* a <- a/2, u0 <- u0/2, v0 <- v0/2 */
		ctl = v0 & 1;
		a ^= (a ^ (a >> 1)) & -da;
		u0 ^= (u0 ^ ((u0 >> 1) + (hr & -ctl))) & -da;
		v0 ^= (v0 ^ ((v0 >> 1) + (he & -ctl))) & -da;

		/* b <- b/2, u1 <- u1/2 mod r, v1 <- v1/2 mod e */
		ctl = v1 & 1;
		b ^= (b ^ (b >> 1)) & -db;
		u1 ^= (u1 ^ ((u1 >> 1) + (hr & -ctl))) & -db;
		v1 ^= (v1 ^ ((v1 >> 1) + (he & -ctl))) & -db;
	}

	/*
	 * Check that the GCD is indeed 1. If not, then the key is invalid
	 * (and there's no harm in leaking that piece of information).
	 */
	if (a != 1) {
		return 0;
	}

	/*
	 * Now we have u0*e - v0*r = 1. Let's compute the result as:
	 *   d = u0 + v0*k
	 * We still have k in the tmp[] array, and its announced bit
	 * length is that of phi.
	 */
	m = k + 1 + len;
	m[0] = (1 << 5) + 1;  /* bit length is 32 bits, encoded */
	m[1] = v0 & 0x7FFFFFFF;
	m[2] = v0 >> 31;
	z = m + 3;
	br_i31_zero(z, k[0]);
	z[1] = u0 & 0x7FFFFFFF;
	z[2] = u0 >> 31;
	br_i31_mulacc(z, k, m);

	/*
	 * Encode the result.
	 */
	br_i31_encode(d, dlen, z);
	return dlen;
}