2 files changed, 57 insertions, 10 deletions
diff --git a/TODO b/TODO
index 9271316..cfad917 100644
--- a/TODO
+++ b/TODO
@@ -32,8 +32,6 @@ Core
 
 - OSPF: refuse running on non-multicast devices
 
-- lib: use better checksum function
-
 Cleanup
 ~~~~~~~
 - right usage of DBG vs. debug
diff --git a/lib/checksum.c b/lib/checksum.c
index 4dfa252..94cf71e 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -11,6 +11,62 @@
 #include "nest/bird.h"
 #include "checksum.h"
 
+static u16				/* One-complement addition */
+add16(u16 sum, u16 x)
+{
+  u16 z = sum + x;
+  return z + (z < sum);
+}
+
+static u32
+add32(u32 sum, u32 x)
+{
+  u32 z = sum + x;
+  return z + (z < sum);
+}
+
+static u16
+ipsum_calc_block(u16 *x, unsigned len, u16 sum)
+{
+  int rest;
+  u32 tmp, *xx;
+
+  /*
+   *  A few simple facts about the IP checksum (see RFC 1071 for detailed
+   *  discussion):
+   *
+   *	o  It's associative and commutative.
+   *	o  It's byte order independent.
+   *	o  It's word size independent.
+   *
+   *  This gives us a neat 32-bits-at-a-time algorithm which respects
+   *  usual alignment requirements and is reasonably fast.
+   */
+
+  ASSERT(!(len % 2));
+  if (!len)
+    return sum;
+  len >>= 1;
+  if ((unsigned long) x & 2)		/* Align to 32-bit boundary */
+    {
+      sum = add16(sum, *x++);
+      len--;
+    }
+  rest = len & 1;
+  len >>= 1;
+  tmp = 0;
+  xx = (u32 *) x;
+  while (len)
+    {
+      tmp = add32(tmp, *xx++);
+      len--;
+    }
+  sum = add16(sum, add16(tmp & 0xffff, tmp >> 16U));
+  if (rest)
+    sum = add16(sum, *(u16 *) xx);
+  return sum;
+}
+
 static u16
 ipsum_calc(void *frag, unsigned len, va_list args)
 {
@@ -18,14 +74,7 @@ ipsum_calc(void *frag, unsigned len, va_list args)
 
   for(;;)
     {
-      u16 *x = frag;
-      ASSERT(!(len % 2));
-      while (len)
-	{
-	  u16 z = sum + *x++;
-	  sum = z + (z < sum);
-	  len -= 2;
-	}
+      sum = ipsum_calc_block(frag, len, sum);
       frag = va_arg(args, void *);
       if (!frag)
 	break;