Skip to content

Commit 2cc0b91

Browse files
Nikhil Kumar VeldandaCommitfest Bot
Nikhil Kumar Veldanda
authored and
Commitfest Bot
committed
varattrib_4b design proposal to make it extended to support multiple compression algorithms.
1 parent 368c3fb commit 2cc0b91

File tree

11 files changed

+171
-45
lines changed

11 files changed

+171
-45
lines changed

contrib/amcheck/verify_heapam.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -1786,7 +1786,8 @@ check_tuple_attribute(HeapCheckContext *ctx)
17861786
bool valid = false;
17871787

17881788
/* Compressed attributes should have a valid compression method */
1789-
cmid = TOAST_COMPRESS_METHOD(&toast_pointer);
1789+
cmid = toast_get_compression_id(attr);
1790+
17901791
switch (cmid)
17911792
{
17921793
/* List of all valid compression method IDs */

src/backend/access/brin/brin_tuple.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
223223
{
224224
Datum cvalue;
225225
char compression;
226+
CompressionInfo cmp;
226227
Form_pg_attribute att = TupleDescAttr(brdesc->bd_tupdesc,
227228
keyno);
228229

@@ -237,7 +238,8 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
237238
else
238239
compression = InvalidCompressionMethod;
239240

240-
cvalue = toast_compress_datum(value, compression);
241+
cmp = setup_compression_info(compression, att);
242+
cvalue = toast_compress_datum(value, cmp);
241243

242244
if (DatumGetPointer(cvalue) != NULL)
243245
{

src/backend/access/common/detoast.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ toast_decompress_datum(struct varlena *attr)
478478
* Fetch the compression method id stored in the compression header and
479479
* decompress the data using the appropriate decompression routine.
480480
*/
481-
cmid = TOAST_COMPRESS_METHOD(attr);
481+
cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr);
482482
switch (cmid)
483483
{
484484
case TOAST_PGLZ_COMPRESSION_ID:
@@ -514,14 +514,14 @@ toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
514514
* have been seen to give wrong results if passed an output size that is
515515
* more than the data's true decompressed size.
516516
*/
517-
if ((uint32) slicelength >= TOAST_COMPRESS_EXTSIZE(attr))
517+
if ((uint32) slicelength >= VARDATA_COMPRESSED_GET_EXTSIZE(attr))
518518
return toast_decompress_datum(attr);
519519

520520
/*
521521
* Fetch the compression method id stored in the compression header and
522522
* decompress the data slice using the appropriate decompression routine.
523523
*/
524-
cmid = TOAST_COMPRESS_METHOD(attr);
524+
cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr);
525525
switch (cmid)
526526
{
527527
case TOAST_PGLZ_COMPRESSION_ID:

src/backend/access/common/indextuple.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,10 @@ index_form_tuple_context(TupleDesc tupleDescriptor,
123123
att->attstorage == TYPSTORAGE_MAIN))
124124
{
125125
Datum cvalue;
126+
CompressionInfo cmp;
126127

127-
cvalue = toast_compress_datum(untoasted_values[i],
128-
att->attcompression);
128+
cmp = setup_compression_info(att->attcompression, att);
129+
cvalue = toast_compress_datum(untoasted_values[i], cmp);
129130

130131
if (DatumGetPointer(cvalue) != NULL)
131132
{

src/backend/access/common/toast_compression.c

+25-1
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,9 @@ toast_get_compression_id(struct varlena *attr)
266266

267267
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
268268

269-
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
269+
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) > TOAST_LAST_COMPRESSION_ID_BEFORE_EXT)
270+
cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(detoast_external_attr(attr));
271+
else
270272
cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer);
271273
}
272274
else if (VARATT_IS_COMPRESSED(attr))
@@ -314,3 +316,25 @@ GetCompressionMethodName(char method)
314316
return NULL; /* keep compiler quiet */
315317
}
316318
}
319+
320+
CompressionInfo
321+
setup_compression_info(char cmethod, Form_pg_attribute att)
322+
{
323+
CompressionInfo info;
324+
325+
/* initialize from the attribute’s default settings */
326+
info.cmethod = cmethod;
327+
info.cmp_ext = NULL;
328+
329+
if (!CompressionMethodIsValid(cmethod))
330+
info.cmethod = default_toast_compression;
331+
332+
return info;
333+
}
334+
335+
void
336+
free_compression_info(CompressionInfo *info)
337+
{
338+
if (info->cmp_ext != NULL)
339+
pfree(info->cmp_ext);
340+
}

src/backend/access/common/toast_internals.c

+10-8
Original file line numberDiff line numberDiff line change
@@ -43,25 +43,22 @@ static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
4343
* ----------
4444
*/
4545
Datum
46-
toast_compress_datum(Datum value, char cmethod)
46+
toast_compress_datum(Datum value, CompressionInfo cmp)
4747
{
4848
struct varlena *tmp = NULL;
4949
int32 valsize;
5050
ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID;
51+
varatt_cmp_extended *cmp_ext = cmp.cmp_ext;
5152

5253
Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
5354
Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
5455

5556
valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
5657

57-
/* If the compression method is not valid, use the current default */
58-
if (!CompressionMethodIsValid(cmethod))
59-
cmethod = default_toast_compression;
60-
6158
/*
6259
* Call appropriate compression routine for the compression method.
6360
*/
64-
switch (cmethod)
61+
switch (cmp.cmethod)
6562
{
6663
case TOAST_PGLZ_COMPRESSION:
6764
tmp = pglz_compress_datum((const struct varlena *) value);
@@ -72,11 +69,14 @@ toast_compress_datum(Datum value, char cmethod)
7269
cmid = TOAST_LZ4_COMPRESSION_ID;
7370
break;
7471
default:
75-
elog(ERROR, "invalid compression method %c", cmethod);
72+
elog(ERROR, "invalid compression method %c", cmp.cmethod);
7673
}
7774

7875
if (tmp == NULL)
76+
{
77+
free_compression_info(&cmp);
7978
return PointerGetDatum(NULL);
79+
}
8080

8181
/*
8282
* We recheck the actual size even if compression reports success, because
@@ -92,13 +92,15 @@ toast_compress_datum(Datum value, char cmethod)
9292
{
9393
/* successful compression */
9494
Assert(cmid != TOAST_INVALID_COMPRESSION_ID);
95-
TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(tmp, valsize, cmid);
95+
TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(tmp, valsize, cmid, cmp_ext);
96+
free_compression_info(&cmp);
9697
return PointerGetDatum(tmp);
9798
}
9899
else
99100
{
100101
/* incompressible data */
101102
pfree(tmp);
103+
free_compression_info(&cmp);
102104
return PointerGetDatum(NULL);
103105
}
104106
}

src/backend/access/table/toast_helper.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,10 @@ toast_tuple_try_compression(ToastTupleContext *ttc, int attribute)
229229
Datum *value = &ttc->ttc_values[attribute];
230230
Datum new_value;
231231
ToastAttrInfo *attr = &ttc->ttc_attr[attribute];
232+
Form_pg_attribute att = TupleDescAttr(ttc->ttc_rel->rd_att, attribute);
233+
CompressionInfo cmp = setup_compression_info(attr->tai_compression, att);
232234

233-
new_value = toast_compress_datum(*value, attr->tai_compression);
235+
new_value = toast_compress_datum(*value, cmp);
234236

235237
if (DatumGetPointer(new_value) != NULL)
236238
{

src/include/access/toast_compression.h

+32-12
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
#ifndef TOAST_COMPRESSION_H
1414
#define TOAST_COMPRESSION_H
1515

16+
#include "varatt.h"
17+
#include "toast_compression.h"
18+
#include "catalog/pg_attribute.h"
19+
1620
/*
1721
* GUC support.
1822
*
@@ -23,24 +27,38 @@
2327
extern PGDLLIMPORT int default_toast_compression;
2428

2529
/*
26-
* Built-in compression method ID. The toast compression header will store
27-
* this in the first 2 bits of the raw length. These built-in compression
28-
* method IDs are directly mapped to the built-in compression methods.
30+
* TOAST compression methods enumeration.
31+
*
32+
* Each entry defines:
33+
* - NAME : identifier for the compression algorithm
34+
* - VALUE : numeric enum value
35+
* - METADATA type: struct type holding extra info (void when none)
2936
*
30-
* Don't use these values for anything other than understanding the meaning
31-
* of the raw bits from a varlena; in particular, if the goal is to identify
32-
* a compression method, use the constants TOAST_PGLZ_COMPRESSION, etc.
33-
* below. We might someday support more than 4 compression methods, but
34-
* we can never have more than 4 values in this enum, because there are
35-
* only 2 bits available in the places where this is stored.
37+
* The INVALID entry is a sentinel and must remain last.
3638
*/
39+
#define TOAST_COMPRESSION_LIST \
40+
X(PGLZ, 0, void) /* PostgreSQL LZ-based */ \
41+
X(LZ4, 1, void) /* LZ4 algorithm */ \
42+
X(INVALID, 2, void) /* sentinel, must be last */
43+
44+
3745
typedef enum ToastCompressionId
3846
{
39-
TOAST_PGLZ_COMPRESSION_ID = 0,
40-
TOAST_LZ4_COMPRESSION_ID = 1,
41-
TOAST_INVALID_COMPRESSION_ID = 2,
47+
#define X(name,val,struct) TOAST_##name##_COMPRESSION_ID = (val),
48+
TOAST_COMPRESSION_LIST
49+
#undef X
4250
} ToastCompressionId;
4351

52+
#define TOAST_LAST_COMPRESSION_ID_BEFORE_EXT TOAST_LZ4_COMPRESSION_ID
53+
54+
typedef struct CompressionInfo
55+
{
56+
char cmethod;
57+
/* Extended compression meta info */
58+
varatt_cmp_extended *cmp_ext;
59+
} CompressionInfo;
60+
61+
4462
/*
4563
* Built-in compression methods. pg_attribute will store these in the
4664
* attcompression column. In attcompression, InvalidCompressionMethod
@@ -69,5 +87,7 @@ extern struct varlena *lz4_decompress_datum_slice(const struct varlena *value,
6987
extern ToastCompressionId toast_get_compression_id(struct varlena *attr);
7088
extern char CompressionNameToMethod(const char *compression);
7189
extern const char *GetCompressionMethodName(char method);
90+
extern CompressionInfo setup_compression_info(char cmethod, Form_pg_attribute att);
91+
extern void free_compression_info(CompressionInfo *info);
7292

7393
#endif /* TOAST_COMPRESSION_H */

src/include/access/toast_internals.h

+18-13
Original file line numberDiff line numberDiff line change
@@ -31,21 +31,26 @@ typedef struct toast_compress_header
3131
* Utilities for manipulation of header information for compressed
3232
* toast entries.
3333
*/
34-
#define TOAST_COMPRESS_EXTSIZE(ptr) \
35-
(((toast_compress_header *) (ptr))->tcinfo & VARLENA_EXTSIZE_MASK)
36-
#define TOAST_COMPRESS_METHOD(ptr) \
37-
(((toast_compress_header *) (ptr))->tcinfo >> VARLENA_EXTSIZE_BITS)
38-
39-
#define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(ptr, len, cm_method) \
40-
do { \
41-
Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \
42-
Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \
43-
(cm_method) == TOAST_LZ4_COMPRESSION_ID); \
44-
((toast_compress_header *) (ptr))->tcinfo = \
45-
(len) | ((uint32) (cm_method) << VARLENA_EXTSIZE_BITS); \
34+
#define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(ptr, len, cm_method, cmp_ext) \
35+
do { \
36+
Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \
37+
Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \
38+
(cm_method) == TOAST_LZ4_COMPRESSION_ID); \
39+
if ((cm_method) <= TOAST_LAST_COMPRESSION_ID_BEFORE_EXT) { \
40+
((toast_compress_header *) (ptr))->tcinfo = \
41+
(len) | ((uint32) (cm_method) << VARLENA_EXTSIZE_BITS); \
42+
} else { \
43+
/* For compression methods after lz4, use 11 in the top bits of tcinfo \
44+
to indicate compression algorithm is stored in extended format. */ \
45+
((toast_compress_header *) (ptr))->tcinfo = \
46+
(len) | ((uint32) (VARATT_4BCE_MASK) << VARLENA_EXTSIZE_BITS); \
47+
Assert((cmp_ext) != NULL); \
48+
memcpy(VARATT_4BCE_HDR_PTR(ptr), (cmp_ext), \
49+
sizeof(varatt_cmp_extended) + VARATT_4BCE_META_SIZE( cmp_ext->ext_hdr )); \
50+
} \
4651
} while (0)
4752

48-
extern Datum toast_compress_datum(Datum value, char cmethod);
53+
extern Datum toast_compress_datum(Datum value, CompressionInfo cmp);
4954
extern Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock);
5055

5156
extern void toast_delete_datum(Relation rel, Datum value, bool is_speculative);

src/include/varatt.h

+70-3
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,8 @@ typedef struct
328328
#define VARDATA_COMPRESSED_GET_EXTSIZE(PTR) \
329329
(((varattrib_4b *) (PTR))->va_compressed.va_tcinfo & VARLENA_EXTSIZE_MASK)
330330
#define VARDATA_COMPRESSED_GET_COMPRESS_METHOD(PTR) \
331-
(((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS)
331+
( (VARATT_IS_4BCE(PTR)) ? (VARATT_4BCE_CMP_METHOD(VARATT_4BCE_HDR_PTR(PTR)->ext_hdr)) \
332+
: (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS))
332333

333334
/* Same for external Datums; but note argument is a struct varatt_external */
334335
#define VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) \
@@ -340,8 +341,17 @@ typedef struct
340341
do { \
341342
Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \
342343
(cm) == TOAST_LZ4_COMPRESSION_ID); \
343-
((toast_pointer).va_extinfo = \
344-
(len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS)); \
344+
if ((cm) <= TOAST_LAST_COMPRESSION_ID_BEFORE_EXT) \
345+
{ \
346+
/* Store the actual method in va_extinfo */ \
347+
((toast_pointer).va_extinfo = \
348+
(len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS)); \
349+
} \
350+
else \
351+
{ \
352+
/* Store 11 in the top bits, meaning "extended" method. */ \
353+
(toast_pointer).va_extinfo = (uint32)(len) | (VARATT_4BCE_MASK << VARLENA_EXTSIZE_BITS ); \
354+
} \
345355
} while (0)
346356

347357
/*
@@ -355,4 +365,61 @@ typedef struct
355365
(VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) < \
356366
(toast_pointer).va_rawsize - VARHDRSZ)
357367

368+
typedef struct varatt_cmp_extended
369+
{
370+
uint32 ext_hdr; /* [ size:24 | type:8 ] */
371+
char ext_data[FLEXIBLE_ARRAY_MEMBER]; /* algorithm-specific meta */
372+
} varatt_cmp_extended;
373+
374+
/*--------------------------------------------------------------------*/
375+
/* 1) Detect the extended compression */
376+
/* (top-2 mode bits of va_tcinfo are 0b11) */
377+
#define VARATT_4BCE_MASK 0x0003
378+
379+
#define VARATT_IS_4BCE(ptr) \
380+
((((varattrib_4b*)(ptr))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS) \
381+
== VARATT_4BCE_MASK)
382+
383+
/*--------------------------------------------------------------------*/
384+
/* 2) Pointer to varatt_cmp_extended header (just after the 8-byte varlena head) */
385+
#define VARATT_4BCE_HDR_PTR(ptr) ((varatt_cmp_extended*)(((char*)(ptr)) + VARHDRSZ_COMPRESSED))
386+
#define VARATT_4BCE_GET_HDR(ptr) ((uint32)(VARATT_4BCE_HDR_PTR(ptr)->ext_hdr))
387+
388+
/*--------------------------------------------------------------------*/
389+
/* 3) The 32-bit ext_hdr */
390+
/* Layout: [ meta size:24 bits | type:8 bits ] */
391+
#define VARATT_4BCE_TYPE_MASK 0x000000FF /* low-order 8 bits */
392+
#define VARATT_4BCE_SIZE_MASK 0xFFFFFF00 /* high-order 24 bits */
393+
394+
#define VARATT_4BCE_SET_HDR(hdr, type, size24) \
395+
do { \
396+
Assert((uint32)(type) <= VARATT_4BCE_TYPE_MASK); /* 8 bits */ \
397+
Assert((uint32)(size24) <= (VARATT_4BCE_SIZE_MASK >> 8)); \
398+
(hdr) = ( ((uint32)(type)) ) | ( ((uint32)(size24) << 8) ); \
399+
} while (0)
400+
401+
#define VARATT_4BCE_CMP_METHOD(hdr) ( (uint8) ((hdr) & VARATT_4BCE_TYPE_MASK) )
402+
#define VARATT_4BCE_META_SIZE(hdr) ( ((hdr) & VARATT_4BCE_SIZE_MASK) >> 8)
403+
404+
/*--------------------------------------------------------------------*/
405+
/* 4) Derived helpers to jump inside the extension block */
406+
407+
/* -> metadata begins immediately after the 4-byte ext header */
408+
#define VARATT_4BCE_META_PTR(ptr) ( (void*) VARATT_4BCE_HDR_PTR(ptr)->ext_data )
409+
410+
/* -> compressed bytes begins after metadata */
411+
#define VARATT_4BCE_DATA_PTR(ptr) \
412+
( (void*)( (char*)VARATT_4BCE_META_PTR(ptr) \
413+
+ VARATT_4BCE_META_SIZE(VARATT_4BCE_HDR_PTR(ptr)->ext_hdr) ) )
414+
415+
/* -> payload byte count */
416+
#define VARATT_4BCE_DATA_SIZE(ptr) \
417+
( VARSIZE_4B(ptr) \
418+
- VARHDRSZ_COMPRESSED \
419+
- sizeof(varatt_cmp_extended) \
420+
- VARATT_4BCE_META_SIZE(VARATT_4BCE_HDR_PTR(ptr)->ext_hdr) )
421+
422+
/* Expects varatt_cmp_extended pointer */
423+
#define VARATT_4BCE_HDRSZ(ptr) (VARHDRSZ_COMPRESSED + sizeof(varatt_cmp_extended) + VARATT_4BCE_META_SIZE((ptr)->ext_hdr))
424+
358425
#endif

src/tools/pgindent/typedefs.list

+2
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,7 @@ CompositeIOData
482482
CompositeTypeStmt
483483
CompoundAffixFlag
484484
CompressFileHandle
485+
CompressionInfo
485486
CompressionLocation
486487
CompressorState
487488
ComputeXidHorizonsResult
@@ -4153,6 +4154,7 @@ uuid_t
41534154
va_list
41544155
vacuumingOptions
41554156
validate_string_relopt
4157+
varatt_cmp_extended
41564158
varatt_expanded
41574159
varattrib_1b
41584160
varattrib_1b_e

0 commit comments

Comments
 (0)