Improve pglz_decompress() so that it cannot clobber memory beyond the
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 8 Mar 2008 01:09:36 +0000 (01:09 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 8 Mar 2008 01:09:36 +0000 (01:09 +0000)
available output buffer when presented with corrupt input.  Some testing
suggests that this slows the decompression loop about 1%, which seems an
acceptable price to pay for more robustness.  (Curiously, the penalty
seems to be *less* on not-very-compressible data, which I didn't expect
since the overhead per output byte ought to be more in the literal-bytes
path.)

Patch from Zdenek Kotala.  I fixed a corner case and did some renaming
of variables to make the routine more readable.

src/backend/utils/adt/pg_lzcompress.c

index 0716b252985ed54abeabd81cc06ba84e86632a07..2fa3bb1a90c5b1a60c3c072c7dbb2df0003b8011 100644 (file)
  *
  * Copyright (c) 1999-2008, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/backend/utils/adt/pg_lzcompress.c,v 1.30 2008/03/07 23:20:21 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/pg_lzcompress.c,v 1.31 2008/03/08 01:09:36 tgl Exp $
  * ----------
  */
 #include "postgres.h"
@@ -641,26 +641,26 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 void
 pglz_decompress(const PGLZ_Header *source, char *dest)
 {
-       const unsigned char *dp;
-       const unsigned char *dend;
-       unsigned char *bp;
-       unsigned char ctrl;
-       int32           ctrlc;
-       int32           len;
-       int32           off;
-       int32           destsize;
-
-       dp = ((const unsigned char *) source) + sizeof(PGLZ_Header);
-       dend = ((const unsigned char *) source) + VARSIZE(source);
-       bp = (unsigned char *) dest;
+       const unsigned char *sp;
+       const unsigned char *srcend;
+       unsigned char *dp;
+       unsigned char *destend;
 
-       while (dp < dend)
+       sp = ((const unsigned char *) source) + sizeof(PGLZ_Header);
+       srcend = ((const unsigned char *) source) + VARSIZE(source);
+       dp = (unsigned char *) dest;
+       destend = dp + source->rawsize;
+
+       while (sp < srcend && dp < destend)
        {
                /*
-                * Read one control byte and process the next 8 items.
+                * Read one control byte and process the next 8 items (or as many
+                * as remain in the compressed input).
                 */
-               ctrl = *dp++;
-               for (ctrlc = 0; ctrlc < 8 && dp < dend; ctrlc++)
+               unsigned char ctrl = *sp++;
+               int             ctrlc;
+
+               for (ctrlc = 0; ctrlc < 8 && sp < srcend; ctrlc++)
                {
                        if (ctrl & 1)
                        {
@@ -671,11 +671,27 @@ pglz_decompress(const PGLZ_Header *source, char *dest)
                                 * coded as 18, another extension tag byte tells how much
                                 * longer the match really was (0-255).
                                 */
-                               len = (dp[0] & 0x0f) + 3;
-                               off = ((dp[0] & 0xf0) << 4) | dp[1];
-                               dp += 2;
+                               int32           len;
+                               int32           off;
+
+                               len = (sp[0] & 0x0f) + 3;
+                               off = ((sp[0] & 0xf0) << 4) | sp[1];
+                               sp += 2;
                                if (len == 18)
-                                       len += *dp++;
+                                       len += *sp++;
+
+                               /*
+                                * Check for output buffer overrun, to ensure we don't
+                                * clobber memory in case of corrupt input.  Note: we must
+                                * advance dp here to ensure the error is detected below
+                                * the loop.  We don't simply put the elog inside the loop
+                                * since that will probably interfere with optimization.
+                                */
+                               if (dp + len > destend)
+                               {
+                                       dp += len;
+                                       break;
+                               }
 
                                /*
                                 * Now we copy the bytes specified by the tag from OUTPUT to
@@ -685,8 +701,8 @@ pglz_decompress(const PGLZ_Header *source, char *dest)
                                 */
                                while (len--)
                                {
-                                       *bp = bp[-off];
-                                       bp++;
+                                       *dp = dp[-off];
+                                       dp++;
                                }
                        }
                        else
@@ -695,7 +711,10 @@ pglz_decompress(const PGLZ_Header *source, char *dest)
                                 * An unset control bit means LITERAL BYTE. So we just copy
                                 * one from INPUT to OUTPUT.
                                 */
-                               *bp++ = *dp++;
+                               if (dp >= destend)      /* check for buffer overrun */
+                                       break;                  /* do not clobber memory */
+
+                               *dp++ = *sp++;
                        }
 
                        /*
@@ -706,14 +725,10 @@ pglz_decompress(const PGLZ_Header *source, char *dest)
        }
 
        /*
-        * Check we decompressed the right amount, else die.  This is a FATAL
-        * condition if we tromped on more memory than expected (we assume we have
-        * not tromped on shared memory, though, so need not PANIC).
+        * Check we decompressed the right amount.
         */
-       destsize = (char *) bp - dest;
-       if (destsize != source->rawsize)
-               elog(destsize > source->rawsize ? FATAL : ERROR,
-                        "compressed data is corrupt");
+       if (dp != destend || sp != srcend)
+               elog(ERROR, "compressed data is corrupt");
 
        /*
         * That's it.