diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..2d46afe --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,34 @@ +libuecc v7 (2016/03/27) + +* Change conversion between Ed25519 and legacy representation. This should + not affect any operations unless Ed25519 and legacy load/store + functions are mixed when accessing a work structure. Doing so is now + officially supported, for example to convert a legacy public key to + Ed25519 format. +* The changed representation allows to use the same + ecc_25519_work_default_base for both Ed25519 and legacy. + ecc_25519_work_default_base and ecc_25519_scalarmult_base have been + undeprecated, ecc_25519_work_base_ed25519 and + ecc_25519_work_base_legacy are deprecated now. +* All points are now internally represented with Ed25519 coordinates, which + allows about 6% faster scalar multplication than the legacy + representation. +* ecc_25519_scalarmult_base has been further optimized, making it another + 6% faster than normal ecc_25519_scalarmult. + + +libuecc v6 (2015/10/25) + +* Fixes a bug which might have caused a point's y coordinate to be negated + in certain circumstances when the point was stored in packed + representation and loaded again. It is extremely improbable that this + has ever actually happened, as only a small range of coordinates was + affected. +* Use stdint types to clarify ABI and add support for systems with + sizeof(int) < 4 (this is not an ABI break in practise as all systems on + which libuecc has been used in the past should have int == int32_t) +* Add point negation and subtraction functions +* Rename all point access functions to bear a _legacy suffix (the old names + are still available, but marked as deprecated) +* Add new point access functions and a new generator point that are + compatible with Ed25519 diff --git a/CMakeLists.txt b/CMakeLists.txt index cae20ed..779ac41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 2.6) project(LIBUECC C) -set(PROJECT_VERSION 5) +set(PROJECT_VERSION 7) set(CMAKE_MODULE_PATH ${LIBUECC_SOURCE_DIR}) diff --git a/README b/README new file mode 100644 index 0000000..6e0ec9b --- /dev/null +++ b/README @@ -0,0 +1,30 @@ +libuecc is a very small generic-purpose Elliptic Curve Cryptography library +compatible with Ed25519. + +Most documentation can be found as Doxygen comments in the ecc.h header +file. You can use `make doxygen` after running CMake to create HTML +documenation from it. + +There are two sets of functions converting between libuecc's internal point +representation and coordinates or compressed representation. The functions +ending with _ed25519 use the same representation as original Ed25519 +implementation and should be used by new software. The functions with the +suffix _legacy are provided for compatiblity with libuecc version before +v6. + +Ed25519 and the legacy representation are isomorphic, they use a Twisted +Edwards Curve + + ax^2 + y^2 = 1 + dx^2y^2 + +over the prime field for p = 2^255 - 19. + +Ed25519 uses the parameters + + a = -1 and + d = -(121665/121666), + +while the legacy curve has + + a = 486664 + d = 486660. diff --git a/include/libuecc/ecc.h b/include/libuecc/ecc.h index 4f6b870..1fb6106 100644 --- a/include/libuecc/ecc.h +++ b/include/libuecc/ecc.h @@ -27,6 +27,14 @@ #ifndef _LIBUECC_ECC_H_ #define _LIBUECC_ECC_H_ +#ifndef DEPRECATED +#define DEPRECATED __attribute__((deprecated)) +#endif + + +#include + + /** * A 256 bit integer * @@ -34,7 +42,7 @@ */ typedef union _ecc_int256 { /** Data bytes */ - unsigned char p[32]; + uint8_t p[32]; } ecc_int256_t; /** @@ -44,10 +52,10 @@ typedef union _ecc_int256 { * it should always be packed. */ typedef struct _ecc_25519_work { - unsigned int X[32]; - unsigned int Y[32]; - unsigned int Z[32]; - unsigned int T[32]; + uint32_t X[32]; + uint32_t Y[32]; + uint32_t Z[32]; + uint32_t T[32]; } ecc_25519_work_t; /** @@ -55,22 +63,205 @@ typedef struct _ecc_25519_work { * @{ */ +/** The identity element */ extern const ecc_25519_work_t ecc_25519_work_identity; + + +/** + * The Ed25519 default generator point + * + * \deprecated Use the equivalent \ref ecc_25519_work_default_base instead. + * + **/ +DEPRECATED extern const ecc_25519_work_t ecc_25519_work_base_ed25519; + +/** + * The Ed25519 default generator point + * + * \deprecated Use the equivalent \ref ecc_25519_work_default_base instead. + */ +DEPRECATED extern const ecc_25519_work_t ecc_25519_work_base_legacy; + + +/** + * The Ed25519 default generator point + * + * The order of the base point is \f$ 2^{252} + 27742317777372353535851937790883648493 \f$. + */ extern const ecc_25519_work_t ecc_25519_work_default_base; -int ecc_25519_load_xy(ecc_25519_work_t *out, const ecc_int256_t *x, const ecc_int256_t *y); -void ecc_25519_store_xy(ecc_int256_t *x, ecc_int256_t *y, const ecc_25519_work_t *in); -int ecc_25519_load_packed(ecc_25519_work_t *out, const ecc_int256_t *in); -void ecc_25519_store_packed(ecc_int256_t *out, const ecc_25519_work_t *in); +/** Loads a point of the Ed25519 curve with given coordinates into its unpacked representation */ +int ecc_25519_load_xy_ed25519(ecc_25519_work_t *out, const ecc_int256_t *x, const ecc_int256_t *y); +/** + * Loads a point of the legacy curve with given coordinates into its unpacked representation + * + * New software should use \ref ecc_25519_load_xy_ed25519, which uses the same curve as the Ed25519 algorithm. + */ +int ecc_25519_load_xy_legacy(ecc_25519_work_t *out, const ecc_int256_t *x, const ecc_int256_t *y); + +/** + * Loads a point of the legacy curve with given coordinates into its unpacked representation + * + * \deprecated Use \ref ecc_25519_load_xy_legacy + */ +DEPRECATED int ecc_25519_load_xy(ecc_25519_work_t *out, const ecc_int256_t *x, const ecc_int256_t *y); + + +/** + * Stores the x and y coordinates of a point of the Ed25519 curve + * + * \param x Returns the x coordinate of the point. May be NULL. + * \param y Returns the y coordinate of the point. May be NULL. + * \param in The unpacked point to store. + */ +void ecc_25519_store_xy_ed25519(ecc_int256_t *x, ecc_int256_t *y, const ecc_25519_work_t *in); + +/** + * Stores the x and y coordinates of a point of the legacy curve + * + * New software should use \ref ecc_25519_store_xy_ed25519, which uses the same curve as the Ed25519 algorithm. + * + * \param x Returns the x coordinate of the point. May be NULL. + * \param y Returns the y coordinate of the point. May be NULL. + * \param in The unpacked point to store. + */ +void ecc_25519_store_xy_legacy(ecc_int256_t *x, ecc_int256_t *y, const ecc_25519_work_t *in); + +/** + * Stores a point's x and y coordinates + * + * \param x Returns the x coordinate of the point. May be NULL. + * \param y Returns the y coordinate of the point. May be NULL. + * \param in The unpacked point to store. + * + * \deprecated Use \ref ecc_25519_store_xy_legacy + */ +DEPRECATED void ecc_25519_store_xy(ecc_int256_t *x, ecc_int256_t *y, const ecc_25519_work_t *in); + + +/** + * Loads a packed point of the Ed25519 curve into its unpacked representation + * + * The packed format is different from the legacy one: the legacy format contains that X coordinate and the parity of the Y coordinate, + * Ed25519 uses the Y coordinate and the parity of the X coordinate. +*/ +int ecc_25519_load_packed_ed25519(ecc_25519_work_t *out, const ecc_int256_t *in); + +/** + * Loads a packed point of the legacy curve into its unpacked representation + * + * New software should use \ref ecc_25519_load_packed_ed25519, which uses the same curve and packed representation as the Ed25519 algorithm. + * + * The packed format is different from the Ed25519 one: the legacy format contains that X coordinate and the parity of the Y coordinate, + * Ed25519 uses the Y coordinate and the parity of the X coordinate. + */ +int ecc_25519_load_packed_legacy(ecc_25519_work_t *out, const ecc_int256_t *in); + +/** + * Loads a packed point of the legacy curve into its unpacked representation + * + * \deprecated Use \ref ecc_25519_load_packed_legacy + */ +DEPRECATED int ecc_25519_load_packed(ecc_25519_work_t *out, const ecc_int256_t *in); + + +/** + * Stores a point of the Ed25519 curve into its packed representation + * + * The packed format is different from the Ed25519 one: the legacy format contains that X coordinate and the parity of the Y coordinate, + * Ed25519 uses the Y coordinate and the parity of the X coordinate. + */ +void ecc_25519_store_packed_ed25519(ecc_int256_t *out, const ecc_25519_work_t *in); + +/** + * Stores a point of the legacy curve into its packed representation + * + * New software should use \ref ecc_25519_store_packed_ed25519, which uses the same curve and packed representation as the Ed25519 algorithm. + * + * The packed format is different from the Ed25519 one: the legacy format contains that X coordinate and the parity of the Y coordinate, + * Ed25519 uses the Y coordinate and the parity of the X coordinate. + */ +void ecc_25519_store_packed_legacy(ecc_int256_t *out, const ecc_25519_work_t *in); + +/** + * Stores a point of the legacy curve into its packed representation + * + * \deprecated Use \ref ecc_25519_store_packed_legacy + */ +DEPRECATED void ecc_25519_store_packed(ecc_int256_t *out, const ecc_25519_work_t *in); + + +/** Checks if a point is the identity element of the Elliptic Curve group */ int ecc_25519_is_identity(const ecc_25519_work_t *in); + +/** + * Negates a point of the Elliptic Curve + * + * The same pointer may be given for input and output + */ +void ecc_25519_negate(ecc_25519_work_t *out, const ecc_25519_work_t *in); + +/** + * Doubles a point of the Elliptic Curve + * + * ecc_25519_double(out, in) is equivalent to ecc_25519_add(out, in, in), but faster. + * + * The same pointer may be given for input and output. + */ void ecc_25519_double(ecc_25519_work_t *out, const ecc_25519_work_t *in); + +/** + * Adds two points of the Elliptic Curve + * + * The same pointers may be given for input and output. + */ void ecc_25519_add(ecc_25519_work_t *out, const ecc_25519_work_t *in1, const ecc_25519_work_t *in2); +/** + * Subtracts two points of the Elliptic Curve + * + * The same pointers may be given for input and output. + */ +void ecc_25519_sub(ecc_25519_work_t *out, const ecc_25519_work_t *in1, const ecc_25519_work_t *in2); + +/** + * Does a scalar multiplication of a point of the Elliptic Curve with an integer of a given bit length + * + * To speed up scalar multiplication when it is known that not the whole 256 bits of the scalar + * are used. The bit length should always be a constant and not computed at runtime to ensure + * that no timing attacks are possible. + * + * The same pointer may be given for input and output. + **/ void ecc_25519_scalarmult_bits(ecc_25519_work_t *out, const ecc_int256_t *n, const ecc_25519_work_t *base, unsigned bits); + +/** + * Does a scalar multiplication of a point of the Elliptic Curve with an integer + * + * The same pointer may be given for input and output. + **/ void ecc_25519_scalarmult(ecc_25519_work_t *out, const ecc_int256_t *n, const ecc_25519_work_t *base); + +/** + * Does a scalar multiplication of the default base point (generator element) of the Elliptic Curve with an integer of a given bit length + * + * The order of the base point is \f$ 2^{252} + 27742317777372353535851937790883648493 \f$. + * + * ecc_25519_scalarmult_base_bits(out, n, bits) is faster than ecc_25519_scalarmult_bits(out, n, &ecc_25519_work_default_base, bits). + * + * See the notes about \ref ecc_25519_scalarmult_bits before using this function. + */ void ecc_25519_scalarmult_base_bits(ecc_25519_work_t *out, const ecc_int256_t *n, unsigned bits); + +/** + * Does a scalar multiplication of the default base point (generator element) of the Elliptic Curve with an integer + * + * The order of the base point is \f$ 2^{252} + 27742317777372353535851937790883648493 \f$. + * + * ecc_25519_scalarmult_base(out, n) is faster than ecc_25519_scalarmult(out, n, &ecc_25519_work_default_base). + */ void ecc_25519_scalarmult_base(ecc_25519_work_t *out, const ecc_int256_t *n); /**@}*/ @@ -80,14 +271,61 @@ void ecc_25519_scalarmult_base(ecc_25519_work_t *out, const ecc_int256_t *n); * @{ */ +/** + * The order of the prime field + * + * The order is \f$ 2^{252} + 27742317777372353535851937790883648493 \f$. + */ extern const ecc_int256_t ecc_25519_gf_order; + +/** Checks if an integer is equal to zero (after reduction) */ int ecc_25519_gf_is_zero(const ecc_int256_t *in); + +/** + * Adds two integers as Galois field elements + * + * The same pointers may be given for input and output. + */ void ecc_25519_gf_add(ecc_int256_t *out, const ecc_int256_t *in1, const ecc_int256_t *in2); + +/** + * Subtracts two integers as Galois field elements + * + * The same pointers may be given for input and output. + */ void ecc_25519_gf_sub(ecc_int256_t *out, const ecc_int256_t *in1, const ecc_int256_t *in2); + +/** + * Reduces an integer to a unique representation in the range \f$ [0,q-1] \f$ + * + * The same pointer may be given for input and output. + */ void ecc_25519_gf_reduce(ecc_int256_t *out, const ecc_int256_t *in); + +/** + * Multiplies two integers as Galois field elements + * + * The same pointers may be given for input and output. + */ void ecc_25519_gf_mult(ecc_int256_t *out, const ecc_int256_t *in1, const ecc_int256_t *in2); + +/** + * Computes the reciprocal of a Galois field element + * + * The same pointers may be given for input and output. + */ void ecc_25519_gf_recip(ecc_int256_t *out, const ecc_int256_t *in); + +/** + * Ensures some properties of a Galois field element to make it fit for use as a secret key + * + * This sets the 255th bit and clears the 256th and the bottom three bits (so the key + * will be a multiple of 8). See Daniel J. Bernsteins paper "Curve25519: new Diffie-Hellman speed records." + * for the rationale of this. + * + * The same pointer may be given for input and output. + */ void ecc_25519_gf_sanitize_secret(ecc_int256_t *out, const ecc_int256_t *in); /**@}*/ diff --git a/src/ec25519.c b/src/ec25519.c index 736b798..0ed3741 100644 --- a/src/ec25519.c +++ b/src/ec25519.c @@ -25,156 +25,324 @@ */ /** \file - * EC group operations for Twisted Edwards Curve \f$ ax^2 + y^2 = 1 + dx^2y^2 \f$ with - * \f$ a = 486664 \f$ and - * \f$ d = 486660 \f$ + * EC group operations for Twisted Edwards Curve \f$ ax^2 + y^2 = 1 + dx^2y^2 \f$ * on prime field \f$ p = 2^{255} - 19 \f$. * - * The curve is equivalent to the Montgomery Curve used in D. J. Bernstein's + * Two different (isomorphic) sets of curve parameters are supported: + * + * \f$ a = 486664 \f$ and + * \f$ d = 486660 \f$ + * are the parameters used by the original libuecc implementation (till v5). + * To use points on this curve, use the functions with the suffix \em legacy. + * + * The other supported curve uses the parameters + * \f$ a = -1 \f$ and + * \f$ d = -(121665/121666) \f$, + * which is the curve used by the Ed25519 algorithm. The functions for this curve + * have the suffix \em ed25519. + * + * Internally, libuecc always uses the latter representation for its \em work structure. + * + * The curves are equivalent to the Montgomery Curve used in D. J. Bernstein's * Curve25519 Diffie-Hellman algorithm. * * See http://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html for add and * double operations. + * + * Doxygen comments for public APIs can be found in the public header file. + * + * Invariant that must be held by all public API: the components of an + * \ref ecc_25519_work_t are always in the range \f$ [0, 2p) \f$. + * Integers in this range will be called \em squeezed in the following. */ #include -/** The identity element */ const ecc_25519_work_t ecc_25519_work_identity = {{0}, {1}, {1}, {0}}; - -/** The ec25519 default base */ -const ecc_25519_work_t ecc_25519_work_default_base = { - {0xd4, 0x6b, 0xfe, 0x7f, 0x39, 0xfa, 0x8c, 0x22, - 0xe1, 0x96, 0x23, 0xeb, 0x26, 0xb7, 0x8e, 0x6a, - 0x34, 0x74, 0x8b, 0x66, 0xd6, 0xa3, 0x26, 0xdd, - 0x19, 0x5e, 0x9f, 0x21, 0x50, 0x43, 0x7c, 0x54}, +const ecc_25519_work_t ecc_25519_work_base_legacy = { + {0x1a, 0xd5, 0x25, 0x8f, 0x60, 0x2d, 0x56, 0xc9, + 0xb2, 0xa7, 0x25, 0x95, 0x60, 0xc7, 0x2c, 0x69, + 0x5c, 0xdc, 0xd6, 0xfd, 0x31, 0xe2, 0xa4, 0xc0, + 0xfe, 0x53, 0x6e, 0xcd, 0xd3, 0x36, 0x69, 0x21}, {0x58, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66}, {1}, - {0x47, 0x56, 0x98, 0x99, 0xc7, 0x61, 0x0a, 0x82, - 0x1a, 0xdf, 0x82, 0x22, 0x1f, 0x2c, 0x72, 0x88, - 0xc3, 0x29, 0x09, 0x52, 0x78, 0xe9, 0x1e, 0xe4, - 0x47, 0x4b, 0x4c, 0x81, 0xa6, 0x02, 0xfd, 0x29} + {0xa3, 0xdd, 0xb7, 0xa5, 0xb3, 0x8a, 0xde, 0x6d, + 0xf5, 0x52, 0x51, 0x77, 0x80, 0x9f, 0xf0, 0x20, + 0x7d, 0xe3, 0xab, 0x64, 0x8e, 0x4e, 0xea, 0x66, + 0x65, 0x76, 0x8b, 0xd7, 0x0f, 0x5f, 0x87, 0x67}, +}; + +const ecc_25519_work_t ecc_25519_work_default_base = { + {0x1a, 0xd5, 0x25, 0x8f, 0x60, 0x2d, 0x56, 0xc9, + 0xb2, 0xa7, 0x25, 0x95, 0x60, 0xc7, 0x2c, 0x69, + 0x5c, 0xdc, 0xd6, 0xfd, 0x31, 0xe2, 0xa4, 0xc0, + 0xfe, 0x53, 0x6e, 0xcd, 0xd3, 0x36, 0x69, 0x21}, + {0x58, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66}, + {1}, + {0xa3, 0xdd, 0xb7, 0xa5, 0xb3, 0x8a, 0xde, 0x6d, + 0xf5, 0x52, 0x51, 0x77, 0x80, 0x9f, 0xf0, 0x20, + 0x7d, 0xe3, 0xab, 0x64, 0x8e, 0x4e, 0xea, 0x66, + 0x65, 0x76, 0x8b, 0xd7, 0x0f, 0x5f, 0x87, 0x67}, }; -static const unsigned int zero[32] = {0}; -static const unsigned int one[32] = {1}; +const ecc_25519_work_t ecc_25519_work_base_ed25519 = { + {0x1a, 0xd5, 0x25, 0x8f, 0x60, 0x2d, 0x56, 0xc9, + 0xb2, 0xa7, 0x25, 0x95, 0x60, 0xc7, 0x2c, 0x69, + 0x5c, 0xdc, 0xd6, 0xfd, 0x31, 0xe2, 0xa4, 0xc0, + 0xfe, 0x53, 0x6e, 0xcd, 0xd3, 0x36, 0x69, 0x21}, + {0x58, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66}, + {1}, + {0xa3, 0xdd, 0xb7, 0xa5, 0xb3, 0x8a, 0xde, 0x6d, + 0xf5, 0x52, 0x51, 0x77, 0x80, 0x9f, 0xf0, 0x20, + 0x7d, 0xe3, 0xab, 0x64, 0x8e, 0x4e, 0xea, 0x66, + 0x65, 0x76, 0x8b, 0xd7, 0x0f, 0x5f, 0x87, 0x67}, +}; + + +static const uint32_t zero[32] = {0}; +static const uint32_t one[32] = {1}; + +static const uint32_t minus1[32] = { + 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, +}; + +/** Ed25519 parameter -(121665/121666) */ +static const uint32_t d[32] = { + 0xa3, 0x78, 0x59, 0x13, 0xca, 0x4d, 0xeb, 0x75, + 0xab, 0xd8, 0x41, 0x41, 0x4d, 0x0a, 0x70, 0x00, + 0x98, 0xe8, 0x79, 0x77, 0x79, 0x40, 0xc7, 0x8c, + 0x73, 0xfe, 0x6f, 0x2b, 0xee, 0x6c, 0x03, 0x52, +}; + + +/** Factor to multiply the X coordinate with to convert from the legacy to the Ed25519 curve */ +static const uint32_t legacy_to_ed25519[32] = { + 0xe7, 0x81, 0xba, 0x00, 0x55, 0xfb, 0x91, 0x33, + 0x7d, 0xe5, 0x82, 0xb4, 0x2e, 0x2c, 0x5e, 0x3a, + 0x81, 0xb0, 0x03, 0xfc, 0x23, 0xf7, 0x84, 0x2d, + 0x44, 0xf9, 0x5f, 0x9f, 0x0b, 0x12, 0xd9, 0x70, +}; + +/** Factor to multiply the X coordinate with to convert from the Ed25519 to the legacy curve */ +static const uint32_t ed25519_to_legacy[32] = { + 0xe9, 0x68, 0x42, 0xdb, 0xaf, 0x04, 0xb4, 0x40, + 0xa1, 0xd5, 0x43, 0xf2, 0xf9, 0x38, 0x31, 0x28, + 0x01, 0x17, 0x05, 0x67, 0x9b, 0x81, 0x61, 0xf8, + 0xa9, 0x5b, 0x3e, 0x6a, 0x20, 0x67, 0x4b, 0x24, +}; /** Adds two unpacked integers (modulo p) */ -static void add(unsigned int out[32], const unsigned int a[32], const unsigned int b[32]) { +static void add(uint32_t out[32], const uint32_t a[32], const uint32_t b[32]) { unsigned int j; - unsigned int u; - u = 0; - for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; } - u += a[31] + b[31]; out[31] = u; -} + uint32_t u; -/** Subtracts two unpacked integers (modulo p) */ -static void sub(unsigned int out[32], const unsigned int a[32], const unsigned int b[32]) { - unsigned int j; - unsigned int u; - u = 218; - for (j = 0;j < 31;++j) { - u += a[j] + 65280 - b[j]; + u = 0; + + for (j = 0; j < 31; j++) { + u += a[j] + b[j]; out[j] = u & 255; u >>= 8; } + + u += a[31] + b[31]; + out[31] = u; +} + +/** + * Subtracts two unpacked integers (modulo p) + * + * b must be \em squeezed. + */ +static void sub(uint32_t out[32], const uint32_t a[32], const uint32_t b[32]) { + unsigned int j; + uint32_t u; + + u = 218; + + for (j = 0;j < 31;++j) { + u += a[j] + UINT32_C(65280) - b[j]; + out[j] = u & 255; + u >>= 8; + } + u += a[31] - b[31]; out[31] = u; } -/** Performs carry and reduce on an unpacked integer */ -static void squeeze(unsigned int a[32]) { +/** + * Performs carry and reduce on an unpacked integer + * + * The result is not always fully reduced, but it will be significantly smaller than \f$ 2p \f$. + */ +static void squeeze(uint32_t a[32]) { unsigned int j; - unsigned int u; + uint32_t u; + u = 0; - for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } - u += a[31]; a[31] = u & 127; + + for (j = 0;j < 31;++j) { + u += a[j]; + a[j] = u & 255; + u >>= 8; + } + + u += a[31]; + a[31] = u & 127; u = 19 * (u >> 7); - for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } - u += a[31]; a[31] = u; + + for (j = 0;j < 31;++j) { + u += a[j]; + a[j] = u & 255; + u >>= 8; + } + + u += a[31]; + a[31] = u; } + +static const uint32_t minusp[32] = { + 19, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 128 +}; + /** * Ensures that the output of a previous \ref squeeze is fully reduced * - * After a \ref freeze, only the lower byte of each integer part holds a meaningful value + * After a \ref freeze, only the lower byte of each integer part holds a meaningful value. */ -static void freeze(unsigned int a[32]) { - static const unsigned int minusp[32] = { - 19, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 128 - }; - - unsigned int aorig[32]; +static void freeze(uint32_t a[32]) { + uint32_t aorig[32]; unsigned int j; - unsigned int negative; + uint32_t negative; - for (j = 0; j < 32; j++) aorig[j] = a[j]; + for (j = 0; j < 32; j++) + aorig[j] = a[j]; add(a, a, minusp); negative = -((a[31] >> 7) & 1); - for (j = 0; j < 32; j++) a[j] ^= negative & (aorig[j] ^ a[j]); + + for (j = 0; j < 32; j++) + a[j] ^= negative & (aorig[j] ^ a[j]); } -/** Multiplies two unpacked integers (modulo p) */ -static void mult(unsigned int out[32], const unsigned int a[32], const unsigned int b[32]) { - unsigned int i; - unsigned int j; - unsigned int u; +/** + * Returns the parity (lowest bit of the fully reduced value) of a + * + * The input must be \em squeezed. + */ +static int parity(const uint32_t a[32]) { + uint32_t b[32]; + + add(b, a, minusp); + return (a[0] ^ (b[31] >> 7) ^ 1) & 1; +} + +/** + * Multiplies two unpacked integers (modulo p) + * + * The result will be \em squeezed. + */ +static void mult(uint32_t out[32], const uint32_t a[32], const uint32_t b[32]) { + unsigned int i, j; + uint32_t u; for (i = 0; i < 32; ++i) { u = 0; - for (j = 0;j <= i;++j) u += a[j] * b[i - j]; - for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j]; + + for (j = 0; j <= i; j++) + u += a[j] * b[i - j]; + + for (j = i + 1; j < 32; j++) + u += 38 * a[j] * b[i + 32 - j]; + out[i] = u; } + squeeze(out); } -/** Multiplies an unpacked integer with a small integer (modulo p) */ -static void mult_int(unsigned int out[32], unsigned int n, const unsigned int a[32]) { +/** + * Multiplies an unpacked integer with a small integer (modulo p) + * + * The result will be \em squeezed. + */ +static void mult_int(uint32_t out[32], uint32_t n, const uint32_t a[32]) { unsigned int j; - unsigned int u; + uint32_t u; u = 0; - for (j = 0;j < 31;++j) { u += n * a[j]; out[j] = u & 255; u >>= 8; } + + for (j = 0; j < 31; j++) { + u += n * a[j]; + out[j] = u & 255; + u >>= 8; + } + u += n * a[31]; out[31] = u & 127; u = 19 * (u >> 7); - for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; } - u += out[j]; out[j] = u; + + for (j = 0; j < 31; j++) { + u += out[j]; + out[j] = u & 255; + u >>= 8; + } + + u += out[j]; + out[j] = u; } -/** Squares an unpacked integer */ -static void square(unsigned int out[32], const unsigned int a[32]) { - unsigned int i; - unsigned int j; - unsigned int u; +/** + * Squares an unpacked integer + * + * The result will be sqeezed. + */ +static void square(uint32_t out[32], const uint32_t a[32]) { + unsigned int i, j; + uint32_t u; - for (i = 0; i < 32; ++i) { + for (i = 0; i < 32; i++) { u = 0; - for (j = 0;j < i - j;++j) u += a[j] * a[i - j]; - for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j]; + + for (j = 0; j < i - j; j++) + u += a[j] * a[i - j]; + + for (j = i + 1; j < i + 32 - j; j++) + u += 38 * a[j] * a[i + 32 - j]; + u *= 2; + if ((i & 1) == 0) { u += a[i / 2] * a[i / 2]; u += 38 * a[i / 2 + 16] * a[i / 2 + 16]; } + out[i] = u; } + squeeze(out); } /** Checks for the equality of two unpacked integers */ -static int check_equal(const unsigned int x[32], const unsigned int y[32]) { - unsigned int differentbits = 0; +static int check_equal(const uint32_t x[32], const uint32_t y[32]) { + uint32_t differentbits = 0; int i; for (i = 0; i < 32; i++) { @@ -186,12 +354,12 @@ static int check_equal(const unsigned int x[32], const unsigned int y[32]) { } /** - * Checks if an unpacked integer equals zero + * Checks if an unpacked integer equals zero (modulo p) * - * The intergers must be must be \ref squeeze "squeezed" before. + * The integer must be squeezed before. */ -static int check_zero(const unsigned int x[32]) { - static const unsigned int p[32] = { +static int check_zero(const uint32_t x[32]) { + static const uint32_t p[32] = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, @@ -202,10 +370,10 @@ static int check_zero(const unsigned int x[32]) { } /** Copies r to out when b == 0, s when b == 1 */ -static void selectw(ecc_25519_work_t *out, const ecc_25519_work_t *r, const ecc_25519_work_t *s, unsigned int b) { +static void selectw(ecc_25519_work_t *out, const ecc_25519_work_t *r, const ecc_25519_work_t *s, uint32_t b) { unsigned int j; - unsigned int t; - unsigned int bminus1; + uint32_t t; + uint32_t bminus1; bminus1 = b - 1; for (j = 0; j < 32; ++j) { @@ -224,10 +392,10 @@ static void selectw(ecc_25519_work_t *out, const ecc_25519_work_t *r, const ecc_ } /** Copies r to out when b == 0, s when b == 1 */ -static void select(unsigned int out[32], const unsigned int r[32], const unsigned int s[32], unsigned int b) { +static void select(uint32_t out[32], const uint32_t r[32], const uint32_t s[32], uint32_t b) { unsigned int j; - unsigned int t; - unsigned int bminus1; + uint32_t t; + uint32_t bminus1; bminus1 = b - 1; for (j = 0;j < 32;++j) { @@ -241,15 +409,8 @@ static void select(unsigned int out[32], const unsigned int r[32], const unsigne * * If the given integer has no square root, 0 is returned, 1 otherwise. */ -static int square_root(unsigned int out[32], const unsigned int z[32]) { - static const unsigned int minus1[32] = { - 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f - }; - - static const unsigned int rho_s[32] = { +static int square_root(uint32_t out[32], const uint32_t z[32]) { + static const uint32_t rho_s[32] = { 0xb0, 0xa0, 0x0e, 0x4a, 0x27, 0x1b, 0xee, 0xc4, 0x78, 0xe4, 0x2f, 0xad, 0x06, 0x18, 0x43, 0x2f, 0xa7, 0xd7, 0xfb, 0x3d, 0x99, 0x00, 0x4d, 0x2b, @@ -258,18 +419,18 @@ static int square_root(unsigned int out[32], const unsigned int z[32]) { /* raise z to power (2^252-2), check if power (2^253-5) equals -1 */ - unsigned int z2[32]; - unsigned int z9[32]; - unsigned int z11[32]; - unsigned int z2_5_0[32]; - unsigned int z2_10_0[32]; - unsigned int z2_20_0[32]; - unsigned int z2_50_0[32]; - unsigned int z2_100_0[32]; - unsigned int t0[32]; - unsigned int t1[32]; - unsigned int z2_252_1[32]; - unsigned int z2_252_1_rho_s[32]; + uint32_t z2[32]; + uint32_t z9[32]; + uint32_t z11[32]; + uint32_t z2_5_0[32]; + uint32_t z2_10_0[32]; + uint32_t z2_20_0[32]; + uint32_t z2_50_0[32]; + uint32_t z2_100_0[32]; + uint32_t t0[32]; + uint32_t t1[32]; + uint32_t z2_252_1[32]; + uint32_t z2_252_1_rho_s[32]; int i; /* 2 */ square(z2, z); @@ -335,17 +496,17 @@ static int square_root(unsigned int out[32], const unsigned int z[32]) { } /** Computes the reciprocal of an unpacked integer (in the prime field modulo p) */ -static void recip(unsigned int out[32], const unsigned int z[32]) { - unsigned int z2[32]; - unsigned int z9[32]; - unsigned int z11[32]; - unsigned int z2_5_0[32]; - unsigned int z2_10_0[32]; - unsigned int z2_20_0[32]; - unsigned int z2_50_0[32]; - unsigned int z2_100_0[32]; - unsigned int t0[32]; - unsigned int t1[32]; +static void recip(uint32_t out[32], const uint32_t z[32]) { + uint32_t z2[32]; + uint32_t z9[32]; + uint32_t z11[32]; + uint32_t z2_5_0[32]; + uint32_t z2_10_0[32]; + uint32_t z2_20_0[32]; + uint32_t z2_50_0[32]; + uint32_t z2_100_0[32]; + uint32_t t0[32]; + uint32_t t1[32]; int i; /* 2 */ square(z2, z); @@ -401,10 +562,37 @@ static void recip(unsigned int out[32], const unsigned int z[32]) { /* 2^255 - 21 */ mult(out, t1, z11); } -/** Loads a point with given coordinates into its unpacked representation */ -int ecc_25519_load_xy(ecc_25519_work_t *out, const ecc_int256_t *x, const ecc_int256_t *y) { +/** + * Checks if the X and Y coordinates of a work structure represent a valid point of the curve + * + * Also fills in the T coordinate. + */ +static int check_load_xy(ecc_25519_work_t *val) { + uint32_t X2[32], Y2[32], dX2[32], dX2Y2[32], Y2_X2[32], Y2_X2_1[32], r[32]; + + /* Check validity */ + square(X2, val->X); + square(Y2, val->Y); + + mult(dX2, d, X2); + mult(dX2Y2, dX2, Y2); + + sub(Y2_X2, Y2, X2); + sub(Y2_X2_1, Y2_X2, one); + + sub(r, Y2_X2_1, dX2Y2); + squeeze(r); + + if (!check_zero(r)) + return 0; + + mult(val->T, val->X, val->Y); + + return 1; +} + +int ecc_25519_load_xy_ed25519(ecc_25519_work_t *out, const ecc_int256_t *x, const ecc_int256_t *y) { int i; - unsigned int X2[32], Y2[32], aX2[32], dX2[32], dX2Y2[32], aX2_Y2[32], _1_dX2Y2[32], r[32]; for (i = 0; i < 32; i++) { out->X[i] = x->p[i]; @@ -412,34 +600,31 @@ int ecc_25519_load_xy(ecc_25519_work_t *out, const ecc_int256_t *x, const ecc_in out->Z[i] = (i == 0); } - /* Check validity */ - square(X2, out->X); - square(Y2, out->Y); - mult_int(aX2, 486664, X2); - mult_int(dX2, 486660, X2); - mult(dX2Y2, dX2, Y2); - add(aX2_Y2, aX2, Y2); - add(_1_dX2Y2, one, dX2Y2); - sub(r, aX2_Y2, _1_dX2Y2); - squeeze(r); - - if (!check_zero(r)) - return 0; - - mult(out->T, out->X, out->Y); - - return 1; + return check_load_xy(out); } -/** - * Stores a point's x and y coordinates - * - * \param x Returns the x coordinate of the point. May be NULL. - * \param y Returns the y coordinate of the point. May be NULL. - * \param in The unpacked point to store. - */ -void ecc_25519_store_xy(ecc_int256_t *x, ecc_int256_t *y, const ecc_25519_work_t *in) { - unsigned int X[32], Y[32], Z[32]; +int ecc_25519_load_xy_legacy(ecc_25519_work_t *out, const ecc_int256_t *x, const ecc_int256_t *y) { + int i; + uint32_t tmp[32]; + + for (i = 0; i < 32; i++) { + tmp[i] = x->p[i]; + out->Y[i] = y->p[i]; + out->Z[i] = (i == 0); + } + + mult(out->X, tmp, legacy_to_ed25519); + + return check_load_xy(out); +} + +int ecc_25519_load_xy(ecc_25519_work_t *out, const ecc_int256_t *x, const ecc_int256_t *y) { + return ecc_25519_load_xy_legacy(out, x, y); +} + + +void ecc_25519_store_xy_ed25519(ecc_int256_t *x, ecc_int256_t *y, const ecc_25519_work_t *in) { + uint32_t X[32], Y[32], Z[32]; int i; recip(Z, in->Z); @@ -459,22 +644,80 @@ void ecc_25519_store_xy(ecc_int256_t *x, ecc_int256_t *y, const ecc_25519_work_t } } -/** Loads a packed point into its unpacked representation */ -int ecc_25519_load_packed(ecc_25519_work_t *out, const ecc_int256_t *in) { +void ecc_25519_store_xy_legacy(ecc_int256_t *x, ecc_int256_t *y, const ecc_25519_work_t *in) { + uint32_t X[32], tmp[32], Y[32], Z[32]; int i; - unsigned int X2[32] /* X^2 */, aX2[32] /* aX^2 */, dX2[32] /* dX^2 */, _1_aX2[32] /* 1-aX^2 */, _1_dX2[32] /* 1-aX^2 */; - unsigned int _1_1_dX2[32] /* 1/(1-aX^2) */, Y2[32] /* Y^2 */, Y[32], Yt[32]; + + recip(Z, in->Z); + + if (x) { + mult(tmp, Z, in->X); + mult(X, tmp, ed25519_to_legacy); + freeze(X); + for (i = 0; i < 32; i++) + x->p[i] = X[i]; + } + + if (y) { + mult(Y, Z, in->Y); + freeze(Y); + for (i = 0; i < 32; i++) + y->p[i] = Y[i]; + } +} + +void ecc_25519_store_xy(ecc_int256_t *x, ecc_int256_t *y, const ecc_25519_work_t *in) { + ecc_25519_store_xy_legacy(x, y, in); +} + + +int ecc_25519_load_packed_ed25519(ecc_25519_work_t *out, const ecc_int256_t *in) { + int i; + uint32_t Y2[32] /* Y^2 */, dY2[32] /* dY^2 */, Y2_1[32] /* Y^2-1 */, dY2_1[32] /* dY^2+1 */, _1_dY2_1[32] /* 1/(dY^2+1) */; + uint32_t X2[32] /* X^2 */, X[32], Xt[32]; for (i = 0; i < 32; i++) { - out->X[i] = in->p[i]; + out->Y[i] = in->p[i]; out->Z[i] = (i == 0); } - out->X[31] &= 0x7f; + out->Y[31] &= 0x7f; - square(X2, out->X); - mult_int(aX2, 486664, X2); - mult_int(dX2, 486660, X2); + square(Y2, out->Y); + mult(dY2, d, Y2); + sub(Y2_1, Y2, one); + add(dY2_1, dY2, one); + recip(_1_dY2_1, dY2_1); + mult(X2, Y2_1, _1_dY2_1); + + if (!square_root(X, X2)) + return 0; + + /* No squeeze is necessary after subtractions from zero if the subtrahend is squeezed */ + sub(Xt, zero, X); + + select(out->X, X, Xt, (in->p[31] >> 7) ^ parity(X)); + + mult(out->T, out->X, out->Y); + + return 1; +} + +int ecc_25519_load_packed_legacy(ecc_25519_work_t *out, const ecc_int256_t *in) { + int i; + uint32_t X2[32] /* X^2 */, aX2[32] /* aX^2 */, dX2[32] /* dX^2 */, _1_aX2[32] /* 1-aX^2 */, _1_dX2[32] /* 1-aX^2 */; + uint32_t _1_1_dX2[32] /* 1/(1-aX^2) */, Y2[32] /* Y^2 */, Y[32], Yt[32], X_legacy[32]; + + for (i = 0; i < 32; i++) { + X_legacy[i] = in->p[i]; + out->Z[i] = (i == 0); + } + + X_legacy[31] &= 0x7f; + + square(X2, X_legacy); + mult_int(aX2, UINT32_C(486664), X2); + mult_int(dX2, UINT32_C(486660), X2); sub(_1_aX2, one, aX2); sub(_1_dX2, one, dX2); recip(_1_1_dX2, _1_dX2); @@ -483,26 +726,43 @@ int ecc_25519_load_packed(ecc_25519_work_t *out, const ecc_int256_t *in) { if (!square_root(Y, Y2)) return 0; + /* No squeeze is necessary after subtractions from zero if the subtrahend is squeezed */ sub(Yt, zero, Y); - select(out->Y, Y, Yt, (in->p[31] >> 7) ^ (Y[0] & 1)); + select(out->Y, Y, Yt, (in->p[31] >> 7) ^ parity(Y)); + mult(out->X, X_legacy, legacy_to_ed25519); mult(out->T, out->X, out->Y); return 1; } -/** Stores a point into its packed representation */ -void ecc_25519_store_packed(ecc_int256_t *out, const ecc_25519_work_t *in) { +int ecc_25519_load_packed(ecc_25519_work_t *out, const ecc_int256_t *in) { + return ecc_25519_load_packed_legacy(out, in); +} + + +void ecc_25519_store_packed_ed25519(ecc_int256_t *out, const ecc_25519_work_t *in) { + ecc_int256_t x; + + ecc_25519_store_xy_ed25519(&x, out, in); + out->p[31] |= (x.p[0] << 7); +} + +void ecc_25519_store_packed_legacy(ecc_int256_t *out, const ecc_25519_work_t *in) { ecc_int256_t y; - ecc_25519_store_xy(out, &y, in); + ecc_25519_store_xy_legacy(out, &y, in); out->p[31] |= (y.p[0] << 7); } -/** Checks if a point is the identity element of the Elliptic Curve group */ +void ecc_25519_store_packed(ecc_int256_t *out, const ecc_25519_work_t *in) { + ecc_25519_store_packed_legacy(out, in); +} + + int ecc_25519_is_identity(const ecc_25519_work_t *in) { - unsigned int Y_Z[32]; + uint32_t Y_Z[32]; sub(Y_Z, in->Y, in->Z); squeeze(Y_Z); @@ -510,71 +770,117 @@ int ecc_25519_is_identity(const ecc_25519_work_t *in) { return (check_zero(in->X)&check_zero(Y_Z)); } -/** - * Doubles a point of the Elliptic Curve - * - * ecc_25519_double(out, in) is equivalent to ecc_25519_add(out, in, in), but faster. - * - * The same pointers may be used for input and output. - */ +void ecc_25519_negate(ecc_25519_work_t *out, const ecc_25519_work_t *in) { + int i; + + for (i = 0; i < 32; i++) { + out->Y[i] = in->Y[i]; + out->Z[i] = in->Z[i]; + } + + /* No squeeze is necessary after subtractions from zero if the subtrahend is squeezed */ + sub(out->X, zero, in->X); + sub(out->T, zero, in->T); +} + void ecc_25519_double(ecc_25519_work_t *out, const ecc_25519_work_t *in) { - unsigned int A[32], B[32], C[32], D[32], E[32], F[32], G[32], H[32], t0[32], t1[32], t2[32], t3[32]; + uint32_t A[32], B[32], C[32], D[32], E[32], F[32], G[32], H[32], t0[32], t1[32]; square(A, in->X); + square(B, in->Y); + square(t0, in->Z); mult_int(C, 2, t0); - mult_int(D, 486664, A); - add(t1, in->X, in->Y); - square(t2, t1); - sub(t3, t2, A); squeeze(t3); - sub(E, t3, B); - add(G, D, B); squeeze(G); + + sub(D, zero, A); + + add(t0, in->X, in->Y); + square(t1, t0); + sub(t0, t1, A); + sub(E, t0, B); + + add(G, D, B); sub(F, G, C); sub(H, D, B); + mult(out->X, E, F); mult(out->Y, G, H); mult(out->T, E, H); mult(out->Z, F, G); } -/** - * Adds two points of the Elliptic Curve - * - * The same pointers may be used for input and output. - */ void ecc_25519_add(ecc_25519_work_t *out, const ecc_25519_work_t *in1, const ecc_25519_work_t *in2) { - unsigned int A[32], B[32], C[32], D[32], E[32], F[32], G[32], H[32], t0[32], t1[32], t2[32], t3[32], t4[32], t5[32]; + const uint32_t j = UINT32_C(60833); + const uint32_t k = UINT32_C(121665); + uint32_t A[32], B[32], C[32], D[32], E[32], F[32], G[32], H[32], t0[32], t1[32]; - mult(A, in1->X, in2->X); - mult(B, in1->Y, in2->Y); - mult_int(t0, 486660, in2->T); + sub(t0, in1->Y, in1->X); + mult_int(t1, j, t0); + sub(t0, in2->Y, in2->X); + mult(A, t0, t1); + + add(t0, in1->Y, in1->X); + mult_int(t1, j, t0); + add(t0, in2->Y, in2->X); + mult(B, t0, t1); + + mult_int(t0, k, in2->T); mult(C, in1->T, t0); - mult(D, in1->Z, in2->Z); - add(t1, in1->X, in1->Y); - add(t2, in2->X, in2->Y); - mult(t3, t1, t2); - sub(t4, t3, A); squeeze(t4); - sub(E, t4, B); - sub(F, D, C); - add(G, D, C); - mult_int(t5, 486664, A); - sub(H, B, t5); + + mult_int(t0, 2*j, in2->Z); + mult(D, in1->Z, t0); + + sub(E, B, A); + add(F, D, C); + sub(G, D, C); + add(H, B, A); + mult(out->X, E, F); mult(out->Y, G, H); mult(out->T, E, H); mult(out->Z, F, G); } -/** - * Does a scalar multiplication of a point of the Elliptic Curve with an integer of a given bit length - * - * To speed up scalar multiplication when it is known that not the whole 256 bits of the scalar - * are used. The bit length should always be a constant and not computed at runtime to ensure - * that no timing attacks are possible. - * - * The same pointers may be used for input and output. - **/ +/** Adds two points of the Elliptic Curve, assuming that in2->Z == 1 */ +static void ecc_25519_add1(ecc_25519_work_t *out, const ecc_25519_work_t *in1, const ecc_25519_work_t *in2) { + const uint32_t j = UINT32_C(60833); + const uint32_t k = UINT32_C(121665); + uint32_t A[32], B[32], C[32], D[32], E[32], F[32], G[32], H[32], t0[32], t1[32]; + + sub(t0, in1->Y, in1->X); + mult_int(t1, j, t0); + sub(t0, in2->Y, in2->X); + mult(A, t0, t1); + + add(t0, in1->Y, in1->X); + mult_int(t1, j, t0); + add(t0, in2->Y, in2->X); + mult(B, t0, t1); + + mult_int(t0, k, in2->T); + mult(C, in1->T, t0); + + mult_int(D, 2*j, in1->Z); + + sub(E, B, A); + add(F, D, C); + sub(G, D, C); + add(H, B, A); + + mult(out->X, E, F); + mult(out->Y, G, H); + mult(out->T, E, H); + mult(out->Z, F, G); +} + +void ecc_25519_sub(ecc_25519_work_t *out, const ecc_25519_work_t *in1, const ecc_25519_work_t *in2) { + ecc_25519_work_t in2_neg; + + ecc_25519_negate(&in2_neg, in2); + ecc_25519_add(out, in1, &in2_neg); +} + void ecc_25519_scalarmult_bits(ecc_25519_work_t *out, const ecc_int256_t *n, const ecc_25519_work_t *base, unsigned bits) { ecc_25519_work_t Q2, Q2p; ecc_25519_work_t cur = ecc_25519_work_identity; @@ -595,31 +901,30 @@ void ecc_25519_scalarmult_bits(ecc_25519_work_t *out, const ecc_int256_t *n, con *out = cur; } -/** - * Does a scalar multiplication of a point of the Elliptic Curve with an integer - * - * The same pointers may be used for input and output. - **/ void ecc_25519_scalarmult(ecc_25519_work_t *out, const ecc_int256_t *n, const ecc_25519_work_t *base) { ecc_25519_scalarmult_bits(out, n, base, 256); } -/** - * Does a scalar multiplication of the default base point (generator element) of the Elliptic Curve with an integer of a given bit length - * - * The order of the base point is \f$ 2^{252} + 27742317777372353535851937790883648493 \f$. - * - * See the notes about \ref ecc_25519_scalarmult_bits before using this function. - */ void ecc_25519_scalarmult_base_bits(ecc_25519_work_t *out, const ecc_int256_t *n, unsigned bits) { - ecc_25519_scalarmult_bits(out, n, &ecc_25519_work_default_base, bits); + ecc_25519_work_t Q2, Q2p; + ecc_25519_work_t cur = ecc_25519_work_identity; + int b, pos; + + if (bits > 256) + bits = 256; + + for (pos = bits - 1; pos >= 0; --pos) { + b = n->p[pos / 8] >> (pos & 7); + b &= 1; + + ecc_25519_double(&Q2, &cur); + ecc_25519_add1(&Q2p, &Q2, &ecc_25519_work_default_base); + selectw(&cur, &Q2, &Q2p, b); + } + + *out = cur; } -/** - * Does a scalar multiplication of the default base point (generator element) of the Elliptic Curve with an integer - * - * The order of the base point is \f$ 2^{252} + 27742317777372353535851937790883648493 \f$. - */ void ecc_25519_scalarmult_base(ecc_25519_work_t *out, const ecc_int256_t *n) { - ecc_25519_scalarmult(out, n, &ecc_25519_work_default_base); + ecc_25519_scalarmult_base_bits(out, n, 256); } diff --git a/src/ec25519_gf.c b/src/ec25519_gf.c index 4914fa7..11d2989 100644 --- a/src/ec25519_gf.c +++ b/src/ec25519_gf.c @@ -25,26 +25,23 @@ */ /** \file - Simple finite field operations on the prime field \f$ F_q \f$ for - \f$ q = 2^{252} + 27742317777372353535851937790883648493 \f$, which - is the order of the base point used for ec25519 -*/ + * Simple finite field operations on the prime field \f$ F_q \f$ for + * \f$ q = 2^{252} + 27742317777372353535851937790883648493 \f$, which + * is the order of the base point used for ec25519 + * + * Doxygen comments for public APIs can be found in the public header file. + */ #include -/** Checks if the highest bit of an unsigned integer is set */ +/** Checks if the highest bit of an uint32_teger is set */ #define IS_NEGATIVE(n) ((int)((((unsigned)n) >> (8*sizeof(n)-1))&1)) /** Performs an arithmetic right shift */ #define ASR(n,s) (((n) >> s)|(IS_NEGATIVE(n)*((unsigned)-1) << (8*sizeof(n)-s))) -/** - * The order of the prime field - * - * The order is \f$ 2^{252} + 27742317777372353535851937790883648493 \f$. - */ const ecc_int256_t ecc_25519_gf_order = {{ 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, 0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14, @@ -53,15 +50,15 @@ const ecc_int256_t ecc_25519_gf_order = {{ }}; /** An internal alias for \ref ecc_25519_gf_order */ -static const unsigned char *q = ecc_25519_gf_order.p; +static const uint8_t *q = ecc_25519_gf_order.p; /** * Copies the content of r into out if b == 0, the contents of s if b == 1 */ -static void select(unsigned char out[32], const unsigned char r[32], const unsigned char s[32], unsigned int b) { +static void select(uint8_t out[32], const uint8_t r[32], const uint8_t s[32], uint32_t b) { unsigned int j; - unsigned int t; - unsigned int bminus1; + uint8_t t; + uint8_t bminus1; bminus1 = b - 1; for (j = 0;j < 32;++j) { @@ -70,11 +67,10 @@ static void select(unsigned char out[32], const unsigned char r[32], const unsig } } -/** Checks if an integer is equal to zero (after reduction) */ int ecc_25519_gf_is_zero(const ecc_int256_t *in) { int i; ecc_int256_t r; - unsigned int bits = 0; + uint32_t bits = 0; ecc_25519_gf_reduce(&r, in); @@ -84,14 +80,9 @@ int ecc_25519_gf_is_zero(const ecc_int256_t *in) { return (((bits-1)>>8) & 1); } -/** - * Adds two integers as Galois field elements - * - * The same pointers may be used for input and output. - */ void ecc_25519_gf_add(ecc_int256_t *out, const ecc_int256_t *in1, const ecc_int256_t *in2) { unsigned int j; - unsigned int u; + uint32_t u; int nq = 1 - (in1->p[31]>>4) - (in2->p[31]>>4); u = 0; @@ -103,14 +94,9 @@ void ecc_25519_gf_add(ecc_int256_t *out, const ecc_int256_t *in1, const ecc_int2 } } -/** - * Subtracts two integers as Galois field elements - * - * The same pointers may be used for input and output. - */ void ecc_25519_gf_sub(ecc_int256_t *out, const ecc_int256_t *in1, const ecc_int256_t *in2) { unsigned int j; - unsigned int u; + uint32_t u; int nq = 8 - (in1->p[31]>>4) + (in2->p[31]>>4); u = 0; @@ -123,11 +109,11 @@ void ecc_25519_gf_sub(ecc_int256_t *out, const ecc_int256_t *in1, const ecc_int2 } /** Reduces an integer to a unique representation in the range \f$ [0,q-1] \f$ */ -static void reduce(unsigned char a[32]) { +static void reduce(uint8_t a[32]) { unsigned int j; - unsigned int nq = a[31] >> 4; - unsigned int u1, u2; - unsigned char out1[32], out2[32]; + uint32_t nq = a[31] >> 4; + uint32_t u1, u2; + uint8_t out1[32], out2[32]; u1 = u2 = 0; for (j = 0; j < 31; ++j) { @@ -145,11 +131,6 @@ static void reduce(unsigned char a[32]) { select(a, out1, out2, IS_NEGATIVE(u1)); } -/** - * Reduces an integer to a unique representation in the range \f$ [0,q-1] \f$ - * - * The same pointers may be used for input and output. - */ void ecc_25519_gf_reduce(ecc_int256_t *out, const ecc_int256_t *in) { int i; @@ -160,10 +141,10 @@ void ecc_25519_gf_reduce(ecc_int256_t *out, const ecc_int256_t *in) { } /** Montgomery modular multiplication algorithm */ -static void montgomery(unsigned char out[32], const unsigned char a[32], const unsigned char b[32]) { +static void montgomery(uint8_t out[32], const uint8_t a[32], const uint8_t b[32]) { unsigned int i, j; - unsigned int nq; - unsigned int u; + uint32_t nq; + uint32_t u; for (i = 0; i < 32; i++) out[i] = 0; @@ -183,22 +164,17 @@ static void montgomery(unsigned char out[32], const unsigned char a[32], const u } } -/** - * Multiplies two integers as Galois field elements - * - * The same pointers may be used for input and output. - */ void ecc_25519_gf_mult(ecc_int256_t *out, const ecc_int256_t *in1, const ecc_int256_t *in2) { /* 2^512 mod q */ - static const unsigned char C[32] = { + static const uint8_t C[32] = { 0x01, 0x0f, 0x9c, 0x44, 0xe3, 0x11, 0x06, 0xa4, 0x47, 0x93, 0x85, 0x68, 0xa7, 0x1b, 0x0e, 0xd0, 0x65, 0xbe, 0xf5, 0x17, 0xd2, 0x73, 0xec, 0xce, 0x3d, 0x9a, 0x30, 0x7c, 0x1b, 0x41, 0x99, 0x03 }; - unsigned char B[32]; - unsigned char R[32]; + uint8_t B[32]; + uint8_t R[32]; unsigned int i; for (i = 0; i < 32; i++) @@ -210,18 +186,13 @@ void ecc_25519_gf_mult(ecc_int256_t *out, const ecc_int256_t *in1, const ecc_int montgomery(out->p, R, C); } -/** - * Computes the reciprocal of a Galois field element - * - * The same pointers may be used for input and output. - */ void ecc_25519_gf_recip(ecc_int256_t *out, const ecc_int256_t *in) { - static const unsigned char C[32] = { + static const uint8_t C[32] = { 0x01 }; - unsigned char A[32], B[32]; - unsigned char R1[32], R2[32]; + uint8_t A[32], B[32]; + uint8_t R1[32], R2[32]; int use_r2 = 0; unsigned int i, j; @@ -233,7 +204,7 @@ void ecc_25519_gf_recip(ecc_int256_t *out, const ecc_int256_t *in) { reduce(A); for (i = 0; i < 32; i++) { - unsigned char c; + uint8_t c; if (i == 0) c = 0xeb; /* q[0] - 2 */ @@ -268,15 +239,6 @@ void ecc_25519_gf_recip(ecc_int256_t *out, const ecc_int256_t *in) { montgomery(out->p, R2, C); } -/** - * Ensures some properties of a Galois field element to make it fit for use as a secret key - * - * This sets the 255th bit and clears the 256th and the bottom three bits (so the key - * will be a multiple of 8). See Daniel J. Bernsteins paper "Curve25519: new Diffie-Hellman speed records." - * for the rationale of this. - * - * The same pointers may be used for input and output. - */ void ecc_25519_gf_sanitize_secret(ecc_int256_t *out, const ecc_int256_t *in) { int i;