summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMatthias Schiffer <mschiffer@universe-factory.net>2013-11-28 19:58:58 +0100
committerMatthias Schiffer <mschiffer@universe-factory.net>2013-11-28 19:58:58 +0100
commit54c3e6adf04b0a5ef4d13c2306ce5a380fc75205 (patch)
treec56a5a1786084df86efa4a48ed55cde1117247fe /src
parent793c8db37ce333afdb9557ad12d1cbeb5d85a670 (diff)
downloadfastd-54c3e6adf04b0a5ef4d13c2306ce5a380fc75205.tar
fastd-54c3e6adf04b0a5ef4d13c2306ce5a380fc75205.zip
ghash-pclmulqdq: make byteswap a macro
For some reason, this allows clang to optimize the code much better.
Diffstat (limited to 'src')
-rw-r--r--src/crypto/mac/ghash/pclmulqdq/ghash_pclmulqdq_impl.c17
1 files changed, 9 insertions, 8 deletions
diff --git a/src/crypto/mac/ghash/pclmulqdq/ghash_pclmulqdq_impl.c b/src/crypto/mac/ghash/pclmulqdq/ghash_pclmulqdq_impl.c
index 7b162ba..441e2d7 100644
--- a/src/crypto/mac/ghash/pclmulqdq/ghash_pclmulqdq_impl.c
+++ b/src/crypto/mac/ghash/pclmulqdq/ghash_pclmulqdq_impl.c
@@ -43,17 +43,15 @@ static inline v2di shr(v2di v, int a) {
return (v2di){{tmph.e[0]|tmpl.e[1], tmph.e[1]}};
}
-static inline v2di byteswap(v2di v) {
- const v2di shuffle = { .v16 = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}};
- v.v16 = __builtin_ia32_pshufb128(v.v16, shuffle.v16);
- return v;
-}
+static const v2di BYTESWAP_SHUFFLE = { .v16 = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}};
+
+#define BYTESWAP(v) ({ (v).v16 = __builtin_ia32_pshufb128((v).v16, BYTESWAP_SHUFFLE.v16); })
fastd_mac_state_t* fastd_ghash_pclmulqdq_init_state(fastd_context_t *ctx UNUSED, const fastd_mac_context_t *mctx UNUSED, const uint8_t *key) {
fastd_mac_state_t *state = malloc(sizeof(fastd_mac_state_t));
memcpy(&state->H, key, sizeof(v2di));
- state->H = byteswap(state->H);
+ BYTESWAP(state->H);
return state;
}
@@ -98,11 +96,14 @@ bool fastd_ghash_pclmulqdq_hash(fastd_context_t *ctx UNUSED, const fastd_mac_sta
size_t i;
for (i = 0; i < n_blocks; i++) {
- v.v ^= byteswap(inv[i]).v;
+ v2di b = inv[i];
+ BYTESWAP(b);
+ v.v ^= b.v;
v = gmul(v, state->H);
}
- *out = byteswap(v).block;
+ BYTESWAP(v);
+ *out = v.block;
return true;
}