summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/heap/tuptoaster.c384
-rw-r--r--src/backend/utils/adt/pg_lzcompress.c81
-rw-r--r--src/backend/utils/misc/guc.c11
-rw-r--r--src/include/access/tuptoaster.h1
-rw-r--r--src/include/postgres.h36
-rw-r--r--src/include/utils/pg_lzcompress.h38
6 files changed, 393 insertions, 158 deletions
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index fc37ceb4a3..5d36772f0b 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -34,6 +34,8 @@
#include "access/heapam.h"
#include "access/tuptoaster.h"
#include "access/xact.h"
+#include "common/snappy/snappy.h"
+#include "common/lz4/lz4.h"
#include "catalog/catalog.h"
#include "utils/fmgroids.h"
#include "utils/pg_lzcompress.h"
@@ -72,6 +74,20 @@ do { \
memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \
} while (0)
+/*
+ * Available compression algorithms
+ */
+typedef enum ToastCompressionAlgo
+{
+ TOAST_COMPRESS_PGLZ = 0,
+ TOAST_COMPRESS_SNAPPY,
+ TOAST_COMPRESS_LZ4,
+ TOAST_COMPRESS_PGLZ_LONG /* for testing of +1 byte storage */
+} ToastCompressionAlgo;
+#define LAST_COMPRESSION_ALGO TOAST_COMPRESS_PGLZ_LONG
+
+/* guc for the default compression algorithm, just for testing */
+int toast_compression_algo = 0;
static void toast_delete_datum(Relation rel, Datum value);
static Datum toast_save_datum(Relation rel, Datum value,
@@ -81,7 +97,12 @@ static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
static struct varlena *toast_fetch_datum(struct varlena * attr);
static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
int32 sliceoffset, int32 length);
+static Datum toast_uncompress_datum(Datum attr);
+static Size toast_uncompressed_length(struct varlena *attr);
+static void toast_get_compress_size_algo(Datum value, ToastCompressionAlgo *algo,
+ Size *compressed_length, Size *uncompressed_length,
+ void **compressed_data);
/* ----------
* heap_tuple_fetch_attr -
@@ -137,11 +158,11 @@ heap_tuple_untoast_attr(struct varlena * attr)
/* If it's compressed, decompress it */
if (VARATT_IS_COMPRESSED(attr))
{
- PGLZ_Header *tmp = (PGLZ_Header *) attr;
+ struct varlena *tmp = attr;
+
+ attr = (struct varlena *) DatumGetPointer(
+ toast_uncompress_datum(PointerGetDatum(attr)));
- attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
- SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
- pglz_decompress(tmp, VARDATA(attr));
pfree(tmp);
}
}
@@ -150,11 +171,8 @@ heap_tuple_untoast_attr(struct varlena * attr)
/*
* This is a compressed value inside of the main tuple
*/
- PGLZ_Header *tmp = (PGLZ_Header *) attr;
-
- attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
- SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
- pglz_decompress(tmp, VARDATA(attr));
+ attr = (struct varlena *) DatumGetPointer(
+ toast_uncompress_datum(PointerGetDatum(attr)));
}
else if (VARATT_IS_SHORT(attr))
{
@@ -209,15 +227,8 @@ heap_tuple_untoast_attr_slice(struct varlena * attr,
if (VARATT_IS_COMPRESSED(preslice))
{
- PGLZ_Header *tmp = (PGLZ_Header *) preslice;
- Size size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ;
-
- preslice = (struct varlena *) palloc(size);
- SET_VARSIZE(preslice, size);
- pglz_decompress(tmp, VARDATA(preslice));
-
- if (tmp != (PGLZ_Header *) attr)
- pfree(tmp);
+ preslice = (struct varlena *) DatumGetPointer(
+ toast_uncompress_datum(PointerGetDatum(preslice)));
}
if (VARATT_IS_SHORT(preslice))
@@ -277,8 +288,7 @@ toast_raw_datum_size(Datum value)
}
else if (VARATT_IS_COMPRESSED(attr))
{
- /* here, va_rawsize is just the payload size */
- result = VARRAWSIZE_4B_C(attr) + VARHDRSZ;
+ result = toast_uncompressed_length(attr);
}
else if (VARATT_IS_SHORT(attr))
{
@@ -1162,6 +1172,75 @@ toast_flatten_tuple_attribute(Datum value,
return PointerGetDatum(new_data);
}
+/* ----
+ * toast_get_compress_size_algo -
+ *
+ * get metadata about compressed Datum
+ *
+ *
+ * How to disambiguate between compression strategies:
+ *
+ * For historical reasons the first 4 bytes of a compressed Datum are used to
+ * store the raw size of the datum as an unsigned integer. Since the length
+ * cannot be more than 1GB due to general toast limitations we have the 2 high
+ * bits to disambiguate whether the Datum has been compressed with the legacy
+ * pglz or something else. We cannot change the meaning of Datums with the
+ * first 2 bits unset since we need to support the old ondisk format.
+ *
+ * Since earlier our only compression format was pglz, which stored two 0 bits
+ * we can use those two bits to discern different formats. If the compresion
+ * format we use has a higher numeric value than 2 we store b11/3 in the high
+ * bits and use an extra byte for storing the numeric id - 2 in an extra byte.
+ *
+ * So storage looks like:
+ * 1) [4 byte varlena header]
+ * 2) [4 byte uncompressed length, 2 high bits for algorithm]
+ * 3) [1 optional byte of algorithm id - 2]
+ * 4) [compressed data ...]
+ *
+ * On little endian the storage for 2) looks like:
+ * [1st length byte][3rd length byte][2nd length byte][6 bit length][2 bit algorithm]
+ *
+ * Due to the 2 high bits only being in the 4th bit we cannot store the
+ * algorithm in a convenient format if we need more than two bits to represent
+ * it.
+ * ----
+ */
+static void
+toast_get_compress_size_algo(Datum value, ToastCompressionAlgo *algo,
+ Size *compressed_length,
+ Size *uncompressed_length,
+ void **compressed_data)
+{
+ struct varlena *attr = (struct varlena *) DatumGetPointer(value);
+ int32 compression_type;
+
+ /*
+ * read the two highest bits of the size, the compression is (possibly
+ * partially) stored there.
+ */
+ compression_type = (*(uint32 *) VARDATA(value)) >> 30;
+ *compressed_data = ((char *) VARDATA(value)) + sizeof(uint32);
+
+ /* mask the two highest bits away to get the real size */
+ *uncompressed_length = ((*(uint32 *) (VARDATA(attr))) & 0x3ffffff);
+ *compressed_length = VARSIZE_ANY_EXHDR(attr) - sizeof(uint32);
+
+ /* algorithm is also stored in extra byte */
+ if (compression_type == 3)
+ {
+ /* add extra byte of compression algorithm metadata */
+ compression_type = 2 + *((uint8 *) *compressed_data);
+ /* and deal with the extra byte */
+ *compressed_data = ((char *) *compressed_data) + sizeof(uint8);
+ *compressed_length -= sizeof(uint8);
+ }
+
+ if (compression_type > LAST_COMPRESSION_ALGO)
+ elog(ERROR, "unknown compression algorithm %d", compression_type);
+
+ *algo = (ToastCompressionAlgo) compression_type;
+}
/* ----------
* toast_compress_datum -
@@ -1174,13 +1253,23 @@ toast_flatten_tuple_attribute(Datum value,
*
* We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
* copying them. But we can't handle external or compressed datums.
+ *
+ * NB: check toast_get_compress_size_algo for details of the encoding of
+ * compression algorithms.
* ----------
*/
Datum
toast_compress_datum(Datum value)
{
- struct varlena *tmp;
- int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
+ struct varlena *compressed_datum = NULL;
+ void *compressed;
+ int32 uncompressed_size = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
+ void *uncompressed_data = VARDATA_ANY(DatumGetPointer(value));
+ int32 raw_compressed_size; /* size returned by compressor */
+ int32 compressed_size; /* including varlena & uncompressed size */
+ Size compression_overhead;
+ Size buffer_size;
+ int ret;
Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
@@ -1188,38 +1277,248 @@ toast_compress_datum(Datum value)
/*
* No point in wasting a palloc cycle if value size is out of the allowed
* range for compression
+ *
+ * XXX: define magic numbers somewhere else
*/
- if (valsize < PGLZ_strategy_default->min_input_size ||
- valsize > PGLZ_strategy_default->max_input_size)
+ if (uncompressed_size < 32)
return PointerGetDatum(NULL);
- tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize));
+ compression_overhead = sizeof(uint32);
+ /* use extra byte to store additional algorithms */
+ if (toast_compression_algo > 2)
+ compression_overhead += sizeof(uint8);
/*
- * We recheck the actual size even if pglz_compress() reports success,
- * because it might be satisfied with having saved as little as one byte
- * in the compressed data --- which could turn into a net loss once you
- * consider header and alignment padding. Worst case, the compressed
- * format might require three padding bytes (plus header, which is
- * included in VARSIZE(tmp)), whereas the uncompressed format would take
- * only one header byte and no padding if the value is short enough. So
- * we insist on a savings of more than 2 bytes to ensure we have a gain.
+ * compute compression algorithm agnostic part of the buffer size needed
+ * for compression.
*/
- if (pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize,
- (PGLZ_Header *) tmp, PGLZ_strategy_default) &&
- VARSIZE(tmp) < valsize - 2)
+ buffer_size = VARHDRSZ + compression_overhead;
+
+ /* and the compression specific part */
+ switch ((ToastCompressionAlgo) toast_compression_algo)
{
- /* successful compression */
- return PointerGetDatum(tmp);
+ case TOAST_COMPRESS_PGLZ:
+ case TOAST_COMPRESS_PGLZ_LONG:
+ buffer_size += PGLZ_MAX_OUTPUT(uncompressed_size);
+ break;
+ case TOAST_COMPRESS_SNAPPY:
+ buffer_size += snappy_max_compressed_length(uncompressed_size);
+ break;
+ case TOAST_COMPRESS_LZ4:
+ buffer_size += LZ4_compressBound(uncompressed_size);
+ break;
+ }
+
+ /* allocate to-be-compressed Datum */
+ compressed_datum = (struct varlena *) palloc(buffer_size);
+ compressed = ((char *) VARDATA(compressed_datum)) + compression_overhead;
+
+ /* compress data */
+ switch ((ToastCompressionAlgo) toast_compression_algo)
+ {
+ case TOAST_COMPRESS_PGLZ:
+ case TOAST_COMPRESS_PGLZ_LONG:
+ {
+ ret = pglz_compress(uncompressed_data, uncompressed_size,
+ compressed, PGLZ_strategy_default);
+ if (!ret)
+ goto incompressible;
+ raw_compressed_size = ret;
+ break;
+ }
+ case TOAST_COMPRESS_SNAPPY:
+ {
+ static struct snappy_env *snappy_env = NULL;
+ if (snappy_env == NULL)
+ {
+ snappy_env = malloc(sizeof(struct snappy_env));
+ snappy_init_env(snappy_env);
+ }
+
+ ret = snappy_compress(snappy_env,
+ uncompressed_data,
+ (size_t)uncompressed_size,
+ compressed,
+ &buffer_size);
+ /* EIO is returned for incompressible data */
+ if (ret == EIO)
+ goto incompressible;
+ else if (ret != 0)
+ elog(ERROR, "snappy: compression failed: %d", ret);
+
+ raw_compressed_size = buffer_size;
+ break;
+ }
+ case TOAST_COMPRESS_LZ4:
+ {
+ ret = LZ4_compress(uncompressed_data,
+ compressed,
+ uncompressed_size);
+ if (ret == 0)
+ elog(ERROR, "lz4: compression failed");
+
+ raw_compressed_size = ret;
+ break;
+ }
+ default:
+ elog(ERROR, "invalid compression algorithm");
+ }
+
+ /* encode compression algorithm in size if it fits there */
+ if (toast_compression_algo <= 2)
+ {
+ *((uint32 *) VARDATA(compressed_datum)) =
+ ((uint32) toast_compression_algo) << 30 | uncompressed_size;
}
else
{
- /* incompressible data */
- pfree(tmp);
- return PointerGetDatum(NULL);
+ /* set marker for separate algorithm byte */
+ *((uint32 *) VARDATA(compressed_datum)) =
+ ((uint32) 3) << 30 | uncompressed_size;
+
+ /*
+ * Store algorithm in extra byte. Algorithms 0, 1 and 2 are handled in
+ * the above case.
+ */
+ *((char *) VARDATA(compressed_datum) + sizeof(uint32)) =
+ toast_compression_algo - 2;
+ }
+
+ /* set size and mark varlena as being of type 4B_C/compressed */
+ SET_VARSIZE_COMPRESSED(compressed_datum,
+ raw_compressed_size + compression_overhead + VARHDRSZ);
+
+ compressed_size = VARSIZE(compressed_datum);
+
+ /*
+ * Check whether the compression was sufficiently effective. Some of the
+ * compression methods check for blowing up to a larger amount of data than
+ * the source, some don't. Even if they do, like pglz_compress(), they
+ * might reports success, having saved as little as one byte in the
+ * compressed data --- which could turn into a net loss once you consider
+ * header and alignment padding. Worst case, the compressed format might
+ * require three padding bytes (plus header, which is included in
+ * VARSIZE(tmp)), whereas the uncompressed format would take only one
+ * header byte and no padding if the value is short enough. So we insist
+ * on a savings of more than 2 bytes to ensure we have a gain.
+ */
+ if (compressed_size < uncompressed_size - 2)
+ {
+ /* successful compression */
+ return PointerGetDatum(compressed_datum);
+ }
+
+ /* incompressible data */
+incompressible:
+ if (compressed_datum != NULL)
+ pfree(compressed_datum);
+ return PointerGetDatum(NULL);
+}
+
+/*
+ * toast_uncompress_datum -
+ *
+ * Uncompress compressed datum using the appropriate compression
+ * algorithm. Will return a 4B Datum.
+ */
+static Datum
+toast_uncompress_datum(Datum value)
+{
+ struct varlena *attr = (struct varlena *) DatumGetPointer(value);
+ ToastCompressionAlgo compression_type;
+ Size uncompressed_length;
+ void *uncompressed_data;
+ Size compressed_length;
+ void *compressed_data;
+
+ Assert(VARATT_IS_COMPRESSED(attr));
+
+ /* get meta information about the compressed datum */
+ toast_get_compress_size_algo(value,
+ &compression_type,
+ &compressed_length,
+ &uncompressed_length,
+ &compressed_data);
+
+ attr = (struct varlena *) palloc(uncompressed_length + VARHDRSZ);
+ SET_VARSIZE(attr, uncompressed_length + VARHDRSZ);
+ uncompressed_data = VARDATA(attr);
+
+ switch (compression_type)
+ {
+ case TOAST_COMPRESS_PGLZ:
+ case TOAST_COMPRESS_PGLZ_LONG:
+ {
+ pglz_decompress(compressed_data, compressed_length,
+ uncompressed_data, uncompressed_length);
+ break;
+ }
+ case TOAST_COMPRESS_SNAPPY:
+ {
+ int ret;
+ Size s_uncompressed_length;
+
+ ret = snappy_uncompressed_length(compressed_data,
+ compressed_length,
+ &s_uncompressed_length);
+ if (!ret)
+ elog(ERROR, "snappy: failed to determine compression length");
+ if (uncompressed_length != s_uncompressed_length)
+ elog(ERROR, "snappy: compression size mismatch %zu != %zu",
+ uncompressed_length, s_uncompressed_length);
+
+ ret = snappy_uncompress(compressed_data,
+ compressed_length,
+ uncompressed_data);
+ if (ret != 0)
+ elog(ERROR, "snappy: decompression failed: %d", ret);
+ break;
+ }
+ case TOAST_COMPRESS_LZ4:
+ {
+ int ret;
+
+ ret = LZ4_decompress_fast(compressed_data, uncompressed_data,
+ uncompressed_length);
+ if (ret != compressed_length)
+ elog(ERROR, "lz4: decompression size mismatch: %d vs %zu",
+ ret, compressed_length);
+ break;
+ }
}
+
+ /* make sure we didn't overwrite this anywhere */
+ Assert(VARSIZE(attr) == uncompressed_length + VARHDRSZ);
+
+ return PointerGetDatum(attr);
}
+/*
+ * toast_uncompressed_length -
+ *
+ * Return the length a compressed Datum has after decompression. Includes
+ * varlena overhead.
+ */
+static Size
+toast_uncompressed_length(struct varlena *attr)
+{
+ ToastCompressionAlgo compression_type;
+ Size uncompressed_length;
+ Size compressed_length;
+ void *compressed_data;
+
+ Assert(VARATT_IS_COMPRESSED(attr));
+
+ toast_get_compress_size_algo(PointerGetDatum(attr),
+ &compression_type,
+ &compressed_length,
+ &uncompressed_length,
+ &compressed_data);
+
+ /* varlena overhead */
+ uncompressed_length += VARHDRSZ;
+ return uncompressed_length;
+}
/* ----------
* toast_save_datum -
@@ -1284,10 +1583,11 @@ toast_save_datum(Relation rel, Datum value,
}
else if (VARATT_IS_COMPRESSED(dval))
{
+ struct varlena *dval_a = (struct varlena *) dval;
data_p = VARDATA(dval);
data_todo = VARSIZE(dval) - VARHDRSZ;
/* rawsize in a compressed datum is just the size of the payload */
- toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ;
+ toast_pointer.va_rawsize = toast_uncompressed_length(dval_a);
toast_pointer.va_extsize = data_todo;
/* Assert that the numbers look like it's compressed */
Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
diff --git a/src/backend/utils/adt/pg_lzcompress.c b/src/backend/utils/adt/pg_lzcompress.c
index 66c64c198f..3a9b6cf760 100644
--- a/src/backend/utils/adt/pg_lzcompress.c
+++ b/src/backend/utils/adt/pg_lzcompress.c
@@ -9,8 +9,8 @@
* Entry routines:
*
* bool
- * pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
- * const PGLZ_Strategy *strategy);
+ * pglz_compress(const char *source, int32 slen, char *dest,
+ * *const PGLZ_Strategy *strategy);
*
* source is the input data to be compressed.
*
@@ -23,12 +23,12 @@
* the compression algorithm. If NULL, the compiled
* in default strategy is used.
*
- * The return value is TRUE if compression succeeded,
- * FALSE if not; in the latter case the contents of dest
- * are undefined.
+ * The return value is the size of the compressed output if
+ * compression succeeded, 0 if not; in the latter case the
+ * contents of dest are undefined.
*
* void
- * pglz_decompress(const PGLZ_Header *source, char *dest)
+ * pglz_decompress(const char *source, char *dest)
*
* source is the compressed input.
*
@@ -42,25 +42,8 @@
*
* The decompression algorithm and internal data format:
*
- * PGLZ_Header is defined as
- *
- * typedef struct PGLZ_Header {
- * int32 vl_len_;
- * int32 rawsize;
- * }
- *
- * The header is followed by the compressed data itself.
- *
- * The data representation is easiest explained by describing
- * the process of decompression.
- *
- * If VARSIZE(x) == rawsize + sizeof(PGLZ_Header), then the data
- * is stored uncompressed as plain bytes. Thus, the decompressor
- * simply copies rawsize bytes from the location after the
- * header to the destination.
- *
- * Otherwise the first byte after the header tells what to do
- * the next 8 times. We call this the control byte.
+ * The first byte after the header - which this file never sees -
+ * tells what to do the next 8 times. We call this the control byte.
*
* An unset bit in the control byte means, that one uncompressed
* byte follows, which is copied from input to output.
@@ -225,18 +208,6 @@ static const PGLZ_Strategy strategy_default_data = {
const PGLZ_Strategy *const PGLZ_strategy_default = &strategy_default_data;
-static const PGLZ_Strategy strategy_always_data = {
- 0, /* Chunks of any size are compressed */
- INT_MAX,
- 0, /* It's enough to save one single byte */
- INT_MAX, /* Never give up early */
- 128, /* Stop history lookup if a match of 128 bytes
- * is found */
- 6 /* Look harder for a good match */
-};
-const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data;
-
-
/* ----------
* Statically allocated work arrays for history
* ----------
@@ -478,16 +449,16 @@ pglz_find_match(PGLZ_HistEntry **hstart, const char *input, const char *end,
* Compresses source into dest using strategy.
* ----------
*/
-bool
-pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
+int32
+pglz_compress(const void *source, int32 slen, void *dest,
const PGLZ_Strategy *strategy)
{
- unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header);
+ unsigned char *bp = (unsigned char *) dest;
unsigned char *bstart = bp;
int hist_next = 0;
bool hist_recycle = false;
const char *dp = source;
- const char *dend = source + slen;
+ const char *dend = ((char *) source) + slen;
unsigned char ctrl_dummy = 0;
unsigned char *ctrlp = &ctrl_dummy;
unsigned char ctrlb = 0;
@@ -514,12 +485,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
if (strategy->match_size_good <= 0 ||
slen < strategy->min_input_size ||
slen > strategy->max_input_size)
- return false;
-
- /*
- * Save the original source size in the header.
- */
- dest->rawsize = slen;
+ return 0;
/*
* Limit the match parameters to the supported range.
@@ -574,7 +540,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
* allow 4 slop bytes.
*/
if (bp - bstart >= result_max)
- return false;
+ return 0;
/*
* If we've emitted more than first_success_by bytes without finding
@@ -583,7 +549,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
* pre-compressed data).
*/
if (!found_match && bp - bstart >= strategy->first_success_by)
- return false;
+ return 0;
/*
* Try to find a match in the history
@@ -627,14 +593,9 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
*ctrlp = ctrlb;
result_size = bp - bstart;
if (result_size >= result_max)
- return false;
-
- /*
- * Success - need only fill in the actual length of the compressed datum.
- */
- SET_VARSIZE_COMPRESSED(dest, result_size + sizeof(PGLZ_Header));
+ return 0;
- return true;
+ return result_size;
}
@@ -645,17 +606,17 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
* ----------
*/
void
-pglz_decompress(const PGLZ_Header *source, char *dest)
+pglz_decompress(const void *source, int32 slen, void *dest, int32 dlen)
{
const unsigned char *sp;
const unsigned char *srcend;
unsigned char *dp;
unsigned char *destend;
- sp = ((const unsigned char *) source) + sizeof(PGLZ_Header);
- srcend = ((const unsigned char *) source) + VARSIZE(source);
+ sp = (const unsigned char *) source;
+ srcend = ((const unsigned char *) source) + slen;
dp = (unsigned char *) dest;
- destend = dp + source->rawsize;
+ destend = dp + dlen;
while (sp < srcend && dp < destend)
{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 3a7653698d..788731e273 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -28,6 +28,7 @@
#include "access/gin.h"
#include "access/transam.h"
+#include "access/tuptoaster.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "catalog/namespace.h"
@@ -1890,6 +1891,16 @@ static struct config_int ConfigureNamesInt[] =
},
{
+ {"toast_compression_algo", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("chooses the compression algo: 0: pglz, 1: snappy, 2: lz4, 3: pglz_long"),
+ NULL
+ },
+ &toast_compression_algo,
+ 2, 0, 3,
+ NULL, NULL, NULL
+ },
+
+ {
{"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
gettext_noop("Minimum age at which VACUUM should freeze a table row."),
NULL
diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h
index 6f4fc4545d..97334fd79b 100644
--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -94,6 +94,7 @@
sizeof(int32) - \
VARHDRSZ)
+extern int toast_compression_algo;
/* ----------
* toast_insert_or_update -
diff --git a/src/include/postgres.h b/src/include/postgres.h
index 30e1dee187..6fd70c6295 100644
--- a/src/include/postgres.h
+++ b/src/include/postgres.h
@@ -81,21 +81,15 @@ struct varatt_external
* compiler might otherwise think it could generate code that assumes
* alignment while touching fields of a 1-byte-header varlena.
*/
-typedef union
+
+/* Normal and inline compressed varlena (4-byte length) */
+typedef struct
{
- struct /* Normal varlena (4-byte length) */
- {
- uint32 va_header;
- char va_data[1];
- } va_4byte;
- struct /* Compressed-in-line format */
- {
- uint32 va_header;
- uint32 va_rawsize; /* Original data size (excludes header) */
- char va_data[1]; /* Compressed data */
- } va_compressed;
+ uint32 va_header;
+ char va_data[1];
} varattrib_4b;
+/* short inline uncompressed varlena (1-byte lenght) */
typedef struct
{
uint8 va_header;
@@ -158,16 +152,16 @@ typedef struct
/* VARSIZE_4B() should only be used on known-aligned data */
#define VARSIZE_4B(PTR) \
- (((varattrib_4b *) (PTR))->va_4byte.va_header & 0x3FFFFFFF)
+ (((varattrib_4b *) (PTR))->va_header & 0x3FFFFFFF)
#define VARSIZE_1B(PTR) \
(((varattrib_1b *) (PTR))->va_header & 0x7F)
#define VARSIZE_1B_E(PTR) \
(((varattrib_1b_e *) (PTR))->va_len_1be)
#define SET_VARSIZE_4B(PTR,len) \
- (((varattrib_4b *) (PTR))->va_4byte.va_header = (len) & 0x3FFFFFFF)
+ (((varattrib_4b *) (PTR))->va_header = (len) & 0x3FFFFFFF)
#define SET_VARSIZE_4B_C(PTR,len) \
- (((varattrib_4b *) (PTR))->va_4byte.va_header = ((len) & 0x3FFFFFFF) | 0x40000000)
+ (((varattrib_4b *) (PTR))->va_header = ((len) & 0x3FFFFFFF) | 0x40000000)
#define SET_VARSIZE_1B(PTR,len) \
(((varattrib_1b *) (PTR))->va_header = (len) | 0x80)
#define SET_VARSIZE_1B_E(PTR,len) \
@@ -190,16 +184,16 @@ typedef struct
/* VARSIZE_4B() should only be used on known-aligned data */
#define VARSIZE_4B(PTR) \
- ((((varattrib_4b *) (PTR))->va_4byte.va_header >> 2) & 0x3FFFFFFF)
+ ((((varattrib_4b *) (PTR))->va_header >> 2) & 0x3FFFFFFF)
#define VARSIZE_1B(PTR) \
((((varattrib_1b *) (PTR))->va_header >> 1) & 0x7F)
#define VARSIZE_1B_E(PTR) \
(((varattrib_1b_e *) (PTR))->va_len_1be)
#define SET_VARSIZE_4B(PTR,len) \
- (((varattrib_4b *) (PTR))->va_4byte.va_header = (((uint32) (len)) << 2))
+ (((varattrib_4b *) (PTR))->va_header = (((uint32) (len)) << 2))
#define SET_VARSIZE_4B_C(PTR,len) \
- (((varattrib_4b *) (PTR))->va_4byte.va_header = (((uint32) (len)) << 2) | 0x02)
+ (((varattrib_4b *) (PTR))->va_header = (((uint32) (len)) << 2) | 0x02)
#define SET_VARSIZE_1B(PTR,len) \
(((varattrib_1b *) (PTR))->va_header = (((uint8) (len)) << 1) | 0x01)
#define SET_VARSIZE_1B_E(PTR,len) \
@@ -217,14 +211,10 @@ typedef struct
#define VARHDRSZ_EXTERNAL 2
-#define VARDATA_4B(PTR) (((varattrib_4b *) (PTR))->va_4byte.va_data)
-#define VARDATA_4B_C(PTR) (((varattrib_4b *) (PTR))->va_compressed.va_data)
+#define VARDATA_4B(PTR) (((varattrib_4b *) (PTR))->va_data)
#define VARDATA_1B(PTR) (((varattrib_1b *) (PTR))->va_data)
#define VARDATA_1B_E(PTR) (((varattrib_1b_e *) (PTR))->va_data)
-#define VARRAWSIZE_4B_C(PTR) \
- (((varattrib_4b *) (PTR))->va_compressed.va_rawsize)
-
/* Externally visible macros */
/*
diff --git a/src/include/utils/pg_lzcompress.h b/src/include/utils/pg_lzcompress.h
index 4af24a32a4..eaa36e6a97 100644
--- a/src/include/utils/pg_lzcompress.h
+++ b/src/include/utils/pg_lzcompress.h
@@ -10,20 +10,6 @@
#ifndef _PG_LZCOMPRESS_H_
#define _PG_LZCOMPRESS_H_
-
-/* ----------
- * PGLZ_Header -
- *
- * The information at the start of the compressed data.
- * ----------
- */
-typedef struct PGLZ_Header
-{
- int32 vl_len_; /* varlena header (do not touch directly!) */
- int32 rawsize;
-} PGLZ_Header;
-
-
/* ----------
* PGLZ_MAX_OUTPUT -
*
@@ -31,17 +17,7 @@ typedef struct PGLZ_Header
* We allow 4 bytes for overrun before detecting compression failure.
* ----------
*/
-#define PGLZ_MAX_OUTPUT(_dlen) ((_dlen) + 4 + sizeof(PGLZ_Header))
-
-/* ----------
- * PGLZ_RAW_SIZE -
- *
- * Macro to determine the uncompressed data size contained
- * in the entry.
- * ----------
- */
-#define PGLZ_RAW_SIZE(_lzdata) ((_lzdata)->rawsize)
-
+#define PGLZ_MAX_OUTPUT(_dlen) ((_dlen) + 4)
/* ----------
* PGLZ_Strategy -
@@ -88,25 +64,21 @@ typedef struct PGLZ_Strategy
/* ----------
- * The standard strategies
+ * The standard strategy
*
* PGLZ_strategy_default Recommended default strategy for TOAST.
- *
- * PGLZ_strategy_always Try to compress inputs of any length.
- * Fallback to uncompressed storage only if
- * output would be larger than input.
* ----------
*/
extern const PGLZ_Strategy *const PGLZ_strategy_default;
-extern const PGLZ_Strategy *const PGLZ_strategy_always;
/* ----------
* Global function declarations
* ----------
*/
-extern bool pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
+extern int32 pglz_compress(const void *source, int32 slen, void *dest,
const PGLZ_Strategy *strategy);
-extern void pglz_decompress(const PGLZ_Header *source, char *dest);
+extern void pglz_decompress(const void *source, int32 slen, void *dest,
+ int32 dlen);
#endif /* _PG_LZCOMPRESS_H_ */