From 43fd1e7b5a01623bc59fcf68254ce5be8e0b8d42 Mon Sep 17 00:00:00 2001
From: =?utf8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
Date: Mon, 11 Nov 2013 14:19:09 +0000
Subject: [PATCH] base64: provide a fast path for encoding well sized buffers

Avoid conditionals in the base64 encoding loop,
which was seen to give a 60% throughput improvement
with the base64 utility from coreutils:

$ truncate -s100MiB file.in
$ time base64-old -w0 < file.in >/dev/null
real  0m0.302s
$ time base64-new -w0 < file.in >/dev/null
real  0m0.182s

* lib/base64.c (base64_encode_fast): A new function to be called
when we don't want to NUL terminate, and we have enough space
in the output to encode the given input.
(base64_encode): Call the _fast() version when appropriate.
Also remove a redundant mask with 0x3F on the first encoded byte.
---
 ChangeLog    | 11 +++++++++++
 lib/base64.c | 40 ++++++++++++++++++++++++++++++++++------
 2 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 3ac439aa0..59de3e573 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2013-11-11  PÃ¡draig Brady <P@draigBrady.com>
+
+	base64: provide a fast path for encoding well sized buffers
+	Avoid conditionals in the base64 encoding loop,
+	which was seen to give 60% better throughput.
+	* lib/base64.c (base64_encode_fast): A new function to be called
+	when we don't want to NUL terminate, and we have enough space
+	in the output to encode the given input.
+	(base64_encode): Call the _fast() version when appropriate.
+	Also remove a redundant mask with 0x3F on the first encoded byte.
+
 2013-11-08  Paul Eggert  <eggert@cs.ucla.edu>
 
 	extern-inline: port better to OS X 10.9
diff --git a/lib/base64.c b/lib/base64.c
index 105f419b5..99c2d2e56 100644
--- a/lib/base64.c
+++ b/lib/base64.c
@@ -59,6 +59,27 @@ to_uchar (char ch)
   return ch;
 }
 
+static const char b64c[64] =
+  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+/* Base64 encode IN array of size INLEN into OUT array. OUT needs
+   to be of length >= BASE64_LENGTH(INLEN), and INLEN needs to be
+   a multiple of 3.  */
+static void
+base64_encode_fast (const char *restrict in, size_t inlen, char *restrict out)
+{
+  while (inlen)
+    {
+      *out++ = b64c[to_uchar (in[0]) >> 2];
+      *out++ = b64c[((to_uchar (in[0]) << 4) + (to_uchar (in[1]) >> 4)) & 0x3f];
+      *out++ = b64c[((to_uchar (in[1]) << 2) + (to_uchar (in[2]) >> 6)) & 0x3f];
+      *out++ = b64c[to_uchar (in[2]) & 0x3f];
+
+      inlen -= 3;
+      in += 3;
+    }
+}
+
 /* Base64 encode IN array of size INLEN into OUT array of size OUTLEN.
    If OUTLEN is less than BASE64_LENGTH(INLEN), write as many bytes as
    possible.  If OUTLEN is larger than BASE64_LENGTH(INLEN), also zero
@@ -67,28 +88,35 @@ void
 base64_encode (const char *restrict in, size_t inlen,
                char *restrict out, size_t outlen)
 {
-  static const char b64str[64] =
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+  /* Note this outlen constraint can be enforced at compile time.
+     I.E. that the output buffer is exactly large enough to hold
+     the encoded inlen bytes.  The inlen constraints (of corresponding
+     to outlen, and being a multiple of 3) can change at runtime
+     at the end of input.  However the common case when reading
+     large inputs is to have both constraints satisfied, so we depend
+     on both in base_encode_fast().  */
+  if (outlen % 4 == 0 && inlen == outlen / 4 * 3)
+    return base64_encode_fast (in, inlen, out);
 
   while (inlen && outlen)
     {
-      *out++ = b64str[(to_uchar (in[0]) >> 2) & 0x3f];
+      *out++ = b64c[to_uchar (in[0]) >> 2];
       if (!--outlen)
         break;
-      *out++ = b64str[((to_uchar (in[0]) << 4)
+      *out++ = b64c[((to_uchar (in[0]) << 4)
                        + (--inlen ? to_uchar (in[1]) >> 4 : 0))
                       & 0x3f];
       if (!--outlen)
         break;
       *out++ =
         (inlen
-         ? b64str[((to_uchar (in[1]) << 2)
+         ? b64c[((to_uchar (in[1]) << 2)
                    + (--inlen ? to_uchar (in[2]) >> 6 : 0))
                   & 0x3f]
          : '=');
       if (!--outlen)
         break;
-      *out++ = inlen ? b64str[to_uchar (in[2]) & 0x3f] : '=';
+      *out++ = inlen ? b64c[to_uchar (in[2]) & 0x3f] : '=';
       if (!--outlen)
         break;
       if (inlen)
-- 
2.11.0