[PATCH] crypto: incorporate C implementation of ARC4
by Ard Biesheuvel
Incorporate the LGPL v2.1 licensed implementation of ARC4, taken from
the Nettle project (https://git.lysator.liu.se/nettle/nettle.git,
commit 3e7a480a1e351884), and tweak it a bit so we don't have to
operate on a skip buffer to fast forward the stream cipher, but can
simply invoke it with NULL dst or src arguments to achieve the same.
This removes the dependency [via libell] on the OS's implementation of
ecb(arc4), which may be going away, and which is not usually accelerated
in the first place.
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
---
src/crypto.c | 82 ++++++++++++++++++++++++++++++-----------------
src/main.c | 8 -----
unit/test-eapol.c | 3 +-
3 files changed, 53 insertions(+), 40 deletions(-)
diff --git a/src/crypto.c b/src/crypto.c
index 696b59901284..f5f8e24df1ea 100644
--- a/src/crypto.c
+++ b/src/crypto.c
@@ -18,6 +18,8 @@
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
+ * (contains ARC4 implementation copyright (c) 2001 Niels Möller)
+ *
*/
#ifdef HAVE_CONFIG_H
@@ -34,6 +36,16 @@
#include "src/missing.h"
#include "src/crypto.h"
+#define ARC4_MIN_KEY_SIZE 1
+#define ARC4_MAX_KEY_SIZE 256
+#define ARC4_KEY_SIZE 16
+
+struct arc4_ctx {
+ uint8_t S[256];
+ uint8_t i;
+ uint8_t j;
+};
+
/* RFC 3526, Section 2 */
const unsigned char crypto_dh5_prime[] = {
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc9, 0x0f, 0xda, 0xa2,
@@ -415,44 +427,54 @@ free_ctr:
return false;
}
-bool arc4_skip(const uint8_t *key, size_t key_len, size_t skip,
- const uint8_t *in, size_t len, uint8_t *out)
-{
- char skip_buf[1024];
- struct l_cipher *cipher;
- struct iovec in_vec[2];
- struct iovec out_vec[2];
- bool r;
-
- cipher = l_cipher_new(L_CIPHER_ARC4, key, key_len);
- if (!cipher)
- return false;
+#define SWAP(a,b) do { int _t = a; a = b; b = _t; } while (0)
- /* This is not strictly necessary, but keeps valgrind happy */
- memset(skip_buf, 0, sizeof(skip_buf));
+static void arc4_set_key(struct arc4_ctx *ctx, unsigned length,
+ const uint8_t *key)
+{
+ unsigned int i, j, k;
- while (skip > sizeof(skip_buf)) {
- size_t to_skip =
- skip > sizeof(skip_buf) ? sizeof(skip_buf) : skip;
+ /* Initialize context */
+ for (i = 0; i < 256; i++)
+ ctx->S[i] = i;
- l_cipher_decrypt(cipher, skip_buf, skip_buf, to_skip);
- skip -= to_skip;
+ for (i = j = k = 0; i < 256; i++) {
+ j += ctx->S[i] + key[k]; j &= 0xff;
+ SWAP(ctx->S[i], ctx->S[j]);
+ /* Repeat key as needed */
+ k = (k + 1) % length;
}
+ ctx->i = ctx->j = 0;
+}
- in_vec[0].iov_base = skip_buf;
- in_vec[0].iov_len = skip;
- in_vec[1].iov_base = (void *) in;
- in_vec[1].iov_len = len;
+static void arc4_crypt(struct arc4_ctx *ctx, unsigned length, uint8_t *dst,
+ const uint8_t *src)
+{
+ uint8_t i, j;
+
+ i = ctx->i; j = ctx->j;
+ while (length--) {
+ i++; i &= 0xff;
+ j += ctx->S[i]; j &= 0xff;
+ SWAP(ctx->S[i], ctx->S[j]);
+ if (!dst || !src)
+ continue;
+ *dst++ = *src++ ^ ctx->S[ (ctx->S[i] + ctx->S[j]) & 0xff ];
+ }
+ ctx->i = i; ctx->j = j;
+}
- out_vec[0].iov_base = skip_buf;
- out_vec[0].iov_len = skip;
- out_vec[1].iov_base = out;
- out_vec[1].iov_len = len;
+bool arc4_skip(const uint8_t *key, size_t key_len, size_t skip,
+ const uint8_t *in, size_t len, uint8_t *out)
+{
+ struct arc4_ctx cipher;
- r = l_cipher_decryptv(cipher, in_vec, 2, out_vec, 2);
- l_cipher_free(cipher);
+ arc4_set_key(&cipher, key_len, key);
+ arc4_crypt(&cipher, skip, NULL, NULL);
+ arc4_crypt(&cipher, len, out, in);
+ explicit_bzero(&cipher, sizeof(cipher));
- return r;
+ return true;
}
/* 802.11, Section 11.6.2, Table 11-4 */
diff --git a/src/main.c b/src/main.c
index 7c08746397d3..3216f50834d4 100644
--- a/src/main.c
+++ b/src/main.c
@@ -277,14 +277,6 @@ static int check_crypto()
ADD_OPTIONAL("CONFIG_CRYPTO_SHA512_SSSE3");
}
- if (!l_cipher_is_supported(L_CIPHER_ARC4)) {
- r = -ENOTSUP;
- l_error("RC4 support not found");
- ADD_MISSING("CONFIG_CRYPTO_USER_API_SKCIPHER");
- ADD_MISSING("CONFIG_CRYPTO_ARC4");
- ADD_MISSING("CONFIG_CRYPTO_ECB");
- }
-
if (!l_cipher_is_supported(L_CIPHER_DES) ||
!l_cipher_is_supported(L_CIPHER_DES3_EDE_CBC)) {
r = -ENOTSUP;
diff --git a/unit/test-eapol.c b/unit/test-eapol.c
index ac94522b9fab..f6af6f065199 100644
--- a/unit/test-eapol.c
+++ b/unit/test-eapol.c
@@ -3600,8 +3600,7 @@ int main(int argc, char *argv[])
l_test_add("/EAPoL Key/Calculate MIC Test 1",
eapol_calculate_mic_test, &eapol_calculate_mic_test_1);
- if (!l_cipher_is_supported(L_CIPHER_AES) ||
- !l_cipher_is_supported(L_CIPHER_ARC4))
+ if (!l_cipher_is_supported(L_CIPHER_AES))
goto done;
l_test_add("EAPoL/WPA2 4-Way Handshake",
--
2.20.1
1 year, 10 months
[PATCHv2] Fix a side channel leak on the password in SAE
by Daniel DE ALMEIDA BRAGA
Use a constant control flow in the derivation loop, avoiding leakage
in the iteration succesfuly converting the password.
Increase number of iterations (20 to 30) to avoid issues with
passwords needing more iterations.
Updated to take account of ELL's patch modifications.
---
src/sae.c | 135 +++++++++++++++++++++++++++++++++++++----------------
src/util.h | 31 ++++++++++++
2 files changed, 125 insertions(+), 41 deletions(-)
diff --git a/src/sae.c b/src/sae.c
index 97e3348e..c3bb6515 100644
--- a/src/sae.c
+++ b/src/sae.c
@@ -97,23 +97,47 @@ static bool sae_pwd_seed(const uint8_t *addr1, const uint8_t *addr2,
&counter, (size_t) 1);
}
+/*
+ * Computes KDF-256(pwd_seed, "SAE Hunting and Pecking", p). If the output is
+ * greater than p, the output is set to qnr, a quadratic non-residue.
+ * Since this happens with very low probability, using the same qnr is fine.
+ */
static struct l_ecc_scalar *sae_pwd_value(const struct l_ecc_curve *curve,
- uint8_t *pwd_seed)
+ uint8_t *pwd_seed, uint8_t *qnr)
{
uint8_t pwd_value[L_ECC_SCALAR_MAX_BYTES];
uint8_t prime[L_ECC_SCALAR_MAX_BYTES];
ssize_t len;
+ int is_in_range;
struct l_ecc_scalar *p = l_ecc_curve_get_prime(curve);
len = l_ecc_scalar_get_data(p, prime, sizeof(prime));
-
l_ecc_scalar_free(p);
if (!kdf_sha256(pwd_seed, 32, "SAE Hunting and Pecking",
- strlen("SAE Hunting and Pecking"), prime, len,
- pwd_value, len))
+ strlen("SAE Hunting and Pecking"), prime, len,
+ pwd_value, len))
return NULL;
+ /*
+ * If pwd_value >= prime, this iteration should fail. We need a smooth
+ * control flow, so we need to continue anyway.
+ */
+ is_in_range = l_secure_memcmp(pwd_value, prime, len);
+ /*
+ * We only consider is_in_range == -1 as valid, meaning the value of the
+ * MSB defines the mask.
+ */
+ is_in_range = util_secure_fill_with_msb(is_in_range);
+
+ /*
+ * libell has public Legendre symbol only for l_ecc_scalar, but they cannot
+ * be created if the coordinate is greater than the p. Hence, to avoid
+ * control flow dependencies, we replace pwd_value by a dummy quadratic
+ * non residue if we generate a value >= prime.
+ */
+ util_secure_select((uint8_t) is_in_range, pwd_value, qnr, pwd_value, sizeof(pwd_value));
+
return l_ecc_scalar_new(curve, pwd_value, sizeof(pwd_value));
}
@@ -190,7 +214,7 @@ static struct l_ecc_scalar *sae_new_residue(const struct l_ecc_curve *curve,
return s;
}
-static bool sae_is_quadradic_residue(const struct l_ecc_curve *curve,
+static uint8_t sae_is_quadradic_residue(const struct l_ecc_curve *curve,
struct l_ecc_scalar *value,
struct l_ecc_scalar *qr,
struct l_ecc_scalar *qnr)
@@ -213,7 +237,7 @@ static bool sae_is_quadradic_residue(const struct l_ecc_curve *curve,
if (bytes <= 0) {
l_ecc_scalar_free(num);
- return false;
+ return 0;
}
if (rbuf[bytes / 8 - 1] & 1) {
@@ -221,20 +245,20 @@ static bool sae_is_quadradic_residue(const struct l_ecc_curve *curve,
if (l_ecc_scalar_legendre(num) == -1) {
l_ecc_scalar_free(num);
- return true;
+ return 1;
}
} else {
l_ecc_scalar_multiply(num, num, qnr);
if (l_ecc_scalar_legendre(num) == 1) {
l_ecc_scalar_free(num);
- return true;
+ return 1;
}
}
l_ecc_scalar_free(num);
- return false;
+ return 0;
}
/*
@@ -244,66 +268,95 @@ static bool sae_is_quadradic_residue(const struct l_ecc_curve *curve,
static bool sae_compute_pwe(struct sae_sm *sm, char *password,
const uint8_t *addr1, const uint8_t *addr2)
{
- bool found = false;
+ uint8_t found = 0;
+ uint8_t is_residue;
+ uint8_t is_odd = 0;
uint8_t counter;
uint8_t pwd_seed[32];
+ uint8_t x[L_ECC_SCALAR_MAX_BYTES];
+ uint8_t x_cand[L_ECC_SCALAR_MAX_BYTES];
struct l_ecc_scalar *pwd_value;
- uint8_t random[32];
- uint8_t *base = (uint8_t *) password;
- size_t base_len = strlen(password);
- uint8_t save[32] = { 0 };
+ uint8_t *dummy;
+ uint8_t *base;
+ size_t base_len;
struct l_ecc_scalar *qr;
struct l_ecc_scalar *qnr;
- uint8_t x[L_ECC_SCALAR_MAX_BYTES];
+ uint8_t qnr_bin[L_ECC_SCALAR_MAX_BYTES] = {0};
/* create qr/qnr prior to beginning hunting-and-pecking loop */
qr = sae_new_residue(sm->curve, true);
qnr = sae_new_residue(sm->curve, false);
+ l_ecc_scalar_get_data(qnr, qnr_bin, sizeof(qnr_bin));
+
+ /*
+ * Allocate memory for the base, and set a random dummy to be used in
+ * additional iterations, once a valid value is found
+ */
+ base_len = strlen(password);
+ base = l_malloc(base_len * sizeof(*base));
+ dummy = l_malloc(base_len * sizeof(*dummy));
+ l_getrandom(dummy, base_len);
- for (counter = 1; counter <= 20; counter++) {
- /* pwd-seed = H(max(addr1, addr2) || min(addr1, addr2),
- * base || counter)
+ /*
+ * Loop with constant time and memory access
+ * We do 30 iterations instead of the 40 recommended to achieve a
+ * resonnable security/complexity trade-off.
+ */
+ for (counter = 1; counter <= 30; counter++) {
+ /*
+ * Set base to either dummy or password, depending on found's value
+ * A non-secure version would be:
+ * base = (found ? dummy : password);
+ */
+ util_secure_select(found, dummy, (uint8_t *)password, base, base_len);
+
+ /*
+ * pwd-seed = H(max(addr1, addr2) || min(addr1, addr2),
+ * base || counter)
* pwd-value = KDF-256(pwd-seed, "SAE Hunting and Pecking", p)
*/
sae_pwd_seed(addr1, addr2, base, base_len, counter, pwd_seed);
+ /*
+ * The case pwd_value > prime is handled inside, so that execution
+ * can continue whatever the result is, without changing the outcome.
+ */
+ pwd_value = sae_pwd_value(sm->curve, pwd_seed, qnr_bin);
- pwd_value = sae_pwd_value(sm->curve, pwd_seed);
- if (!pwd_value)
- continue;
-
- if (sae_is_quadradic_residue(sm->curve, pwd_value, qr, qnr)) {
- if (found == false) {
- l_ecc_scalar_get_data(pwd_value, x, sizeof(x));
-
- memcpy(save, pwd_seed, 32);
+ /*
+ * Check if the candidate is a valid x-coordinate on our curve, and
+ * convert it from scalar to binary.
+ */
+ is_residue = sae_is_quadradic_residue(sm->curve, pwd_value, qr, qnr);
+ l_ecc_scalar_get_data(pwd_value, x_cand, sizeof(x_cand));
- l_getrandom(random, 32);
- base = random;
- base_len = 32;
+ /*
+ * If we already found the point, we overwrite x with itself.
+ * Otherwise, we copy the new candidate into x.
+ */
+ util_secure_select(found, x, x_cand, x, sizeof(x));
+ is_odd = util_secure_select_byte(found, is_odd, pwd_seed[31] & 0x01);
- found = true;
- }
- }
+ /*
+ * found is 0 or 0xff here and is_residue is 0 or 1. Bitwise OR of them
+ * (with is_residue converted to 0/0xff) handles this in constant time.
+ */
+ found |= is_residue * 0xff;
+ memset(pwd_seed, 0, sizeof(pwd_seed));
l_ecc_scalar_free(pwd_value);
}
l_ecc_scalar_free(qr);
l_ecc_scalar_free(qnr);
+ l_free(dummy);
+ l_free(base);
if (!found) {
l_error("max PWE iterations reached!");
return false;
}
+ sm->pwe = l_ecc_point_from_data(sm->curve, !is_odd + 2, x, sizeof(x));
- if (!(save[31] & 1))
- sm->pwe = l_ecc_point_from_data(sm->curve,
- L_ECC_POINT_TYPE_COMPRESSED_BIT1,
- x, sizeof(x));
- else
- sm->pwe = l_ecc_point_from_data(sm->curve,
- L_ECC_POINT_TYPE_COMPRESSED_BIT0,
- x, sizeof(x));
if (!sm->pwe) {
l_error("computing y failed, was x quadratic residue?");
diff --git a/src/util.h b/src/util.h
index edc6e777..93c7d619 100644
--- a/src/util.h
+++ b/src/util.h
@@ -71,4 +71,35 @@ static inline void util_set_bit(uint8_t *field, unsigned int bit)
field[bit / 8] = 1 << (bit % 8);
}
+/*
+ * Returns either true_value or false_value (depending if mask is 0xFF or 0x00
+ * respectively).
+ * This constant time selection method allows to keep an identical memory
+ * access pattern.
+ */
+static inline uint8_t util_secure_select_byte(uint8_t mask, const uint8_t true_value,
+ const uint8_t false_value)
+{
+ return (mask & true_value) | (~mask & false_value);
+}
+
+/*
+ * Copies either true_value or false_value (depending if mask is 0xFF or 0x00
+ * respectively) into dest. All three buffers are assumed to be the same size.
+ * This constant time selection method allows to keep an identical memory
+ * access pattern.
+ */
+static inline void util_secure_select(uint8_t mask, const uint8_t *true_value,
+ const uint8_t *false_value, uint8_t *dest, size_t size)
+{
+ for(int i = 0; i < size; i++)
+ dest[i] = util_secure_select_byte(mask, true_value[i], false_value[i]);
+}
+
+/* Create a value filled with the MSB of the input. */
+static inline uint32_t util_secure_fill_with_msb(uint32_t val)
+{
+ return (uint32_t) (val >> (sizeof(val)*8 - 1)) * 0xFFFFFFFF;
+}
+
#endif /* __UTIL_H */
--
2.25.4
1 year, 10 months