From 5af0263afd7beaf947e22115b7e9ade000b0387d Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Mon, 18 Sep 2023 12:18:33 -0700 Subject: Make binaryheap available to frontend code. There are a couple of places in frontend code that could make use of this simple binary heap implementation. This commit makes binaryheap usable in frontend code, much like commit 26aaf97b68 did for StringInfo. Like StringInfo, the header file is left in lib/ to reduce the likelihood of unnecessary breakage. The frontend version of binaryheap exposes a void *-based API since frontend code does not have access to the Datum definitions. This seemed like a better approach than switching all existing uses to void * or making the Datum definitions available to frontend code. Reviewed-by: Tom Lane, Alvaro Herrera Discussion: https://postgr.es/m/3612876.1689443232%40sss.pgh.pa.us --- src/backend/lib/Makefile | 1 - src/backend/lib/binaryheap.c | 317 ------------------------------------ src/backend/lib/meson.build | 1 - src/common/Makefile | 1 + src/common/binaryheap.c | 336 +++++++++++++++++++++++++++++++++++++++ src/common/meson.build | 1 + src/include/lib/binaryheap.h | 26 ++- src/tools/pgindent/typedefs.list | 1 + 8 files changed, 358 insertions(+), 326 deletions(-) delete mode 100644 src/backend/lib/binaryheap.c create mode 100644 src/common/binaryheap.c diff --git a/src/backend/lib/Makefile b/src/backend/lib/Makefile index 9dad31398ae..b6cefd9cca0 100644 --- a/src/backend/lib/Makefile +++ b/src/backend/lib/Makefile @@ -13,7 +13,6 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global OBJS = \ - binaryheap.o \ bipartite_match.o \ bloomfilter.o \ dshash.o \ diff --git a/src/backend/lib/binaryheap.c b/src/backend/lib/binaryheap.c deleted file mode 100644 index 17375467578..00000000000 --- a/src/backend/lib/binaryheap.c +++ /dev/null @@ -1,317 +0,0 @@ -/*------------------------------------------------------------------------- - * - * binaryheap.c - * A simple binary heap implementation - * - * Portions Copyright (c) 2012-2023, PostgreSQL Global Development Group - * - * IDENTIFICATION - * src/backend/lib/binaryheap.c - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include - -#include "lib/binaryheap.h" - -static void sift_down(binaryheap *heap, int node_off); -static void sift_up(binaryheap *heap, int node_off); - -/* - * binaryheap_allocate - * - * Returns a pointer to a newly-allocated heap that has the capacity to - * store the given number of nodes, with the heap property defined by - * the given comparator function, which will be invoked with the additional - * argument specified by 'arg'. - */ -binaryheap * -binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg) -{ - int sz; - binaryheap *heap; - - sz = offsetof(binaryheap, bh_nodes) + sizeof(Datum) * capacity; - heap = (binaryheap *) palloc(sz); - heap->bh_space = capacity; - heap->bh_compare = compare; - heap->bh_arg = arg; - - heap->bh_size = 0; - heap->bh_has_heap_property = true; - - return heap; -} - -/* - * binaryheap_reset - * - * Resets the heap to an empty state, losing its data content but not the - * parameters passed at allocation. - */ -void -binaryheap_reset(binaryheap *heap) -{ - heap->bh_size = 0; - heap->bh_has_heap_property = true; -} - -/* - * binaryheap_free - * - * Releases memory used by the given binaryheap. - */ -void -binaryheap_free(binaryheap *heap) -{ - pfree(heap); -} - -/* - * These utility functions return the offset of the left child, right - * child, and parent of the node at the given index, respectively. - * - * The heap is represented as an array of nodes, with the root node - * stored at index 0. The left child of node i is at index 2*i+1, and - * the right child at 2*i+2. The parent of node i is at index (i-1)/2. - */ - -static inline int -left_offset(int i) -{ - return 2 * i + 1; -} - -static inline int -right_offset(int i) -{ - return 2 * i + 2; -} - -static inline int -parent_offset(int i) -{ - return (i - 1) / 2; -} - -/* - * binaryheap_add_unordered - * - * Adds the given datum to the end of the heap's list of nodes in O(1) without - * preserving the heap property. This is a convenience to add elements quickly - * to a new heap. To obtain a valid heap, one must call binaryheap_build() - * afterwards. - */ -void -binaryheap_add_unordered(binaryheap *heap, Datum d) -{ - if (heap->bh_size >= heap->bh_space) - elog(ERROR, "out of binary heap slots"); - heap->bh_has_heap_property = false; - heap->bh_nodes[heap->bh_size] = d; - heap->bh_size++; -} - -/* - * binaryheap_build - * - * Assembles a valid heap in O(n) from the nodes added by - * binaryheap_add_unordered(). Not needed otherwise. - */ -void -binaryheap_build(binaryheap *heap) -{ - int i; - - for (i = parent_offset(heap->bh_size - 1); i >= 0; i--) - sift_down(heap, i); - heap->bh_has_heap_property = true; -} - -/* - * binaryheap_add - * - * Adds the given datum to the heap in O(log n) time, while preserving - * the heap property. - */ -void -binaryheap_add(binaryheap *heap, Datum d) -{ - if (heap->bh_size >= heap->bh_space) - elog(ERROR, "out of binary heap slots"); - heap->bh_nodes[heap->bh_size] = d; - heap->bh_size++; - sift_up(heap, heap->bh_size - 1); -} - -/* - * binaryheap_first - * - * Returns a pointer to the first (root, topmost) node in the heap - * without modifying the heap. The caller must ensure that this - * routine is not used on an empty heap. Always O(1). - */ -Datum -binaryheap_first(binaryheap *heap) -{ - Assert(!binaryheap_empty(heap) && heap->bh_has_heap_property); - return heap->bh_nodes[0]; -} - -/* - * binaryheap_remove_first - * - * Removes the first (root, topmost) node in the heap and returns a - * pointer to it after rebalancing the heap. The caller must ensure - * that this routine is not used on an empty heap. O(log n) worst - * case. - */ -Datum -binaryheap_remove_first(binaryheap *heap) -{ - Datum result; - - Assert(!binaryheap_empty(heap) && heap->bh_has_heap_property); - - /* extract the root node, which will be the result */ - result = heap->bh_nodes[0]; - - /* easy if heap contains one element */ - if (heap->bh_size == 1) - { - heap->bh_size--; - return result; - } - - /* - * Remove the last node, placing it in the vacated root entry, and sift - * the new root node down to its correct position. - */ - heap->bh_nodes[0] = heap->bh_nodes[--heap->bh_size]; - sift_down(heap, 0); - - return result; -} - -/* - * binaryheap_replace_first - * - * Replace the topmost element of a non-empty heap, preserving the heap - * property. O(1) in the best case, or O(log n) if it must fall back to - * sifting the new node down. - */ -void -binaryheap_replace_first(binaryheap *heap, Datum d) -{ - Assert(!binaryheap_empty(heap) && heap->bh_has_heap_property); - - heap->bh_nodes[0] = d; - - if (heap->bh_size > 1) - sift_down(heap, 0); -} - -/* - * Sift a node up to the highest position it can hold according to the - * comparator. - */ -static void -sift_up(binaryheap *heap, int node_off) -{ - Datum node_val = heap->bh_nodes[node_off]; - - /* - * Within the loop, the node_off'th array entry is a "hole" that - * notionally holds node_val, but we don't actually store node_val there - * till the end, saving some unnecessary data copying steps. - */ - while (node_off != 0) - { - int cmp; - int parent_off; - Datum parent_val; - - /* - * If this node is smaller than its parent, the heap condition is - * satisfied, and we're done. - */ - parent_off = parent_offset(node_off); - parent_val = heap->bh_nodes[parent_off]; - cmp = heap->bh_compare(node_val, - parent_val, - heap->bh_arg); - if (cmp <= 0) - break; - - /* - * Otherwise, swap the parent value with the hole, and go on to check - * the node's new parent. - */ - heap->bh_nodes[node_off] = parent_val; - node_off = parent_off; - } - /* Re-fill the hole */ - heap->bh_nodes[node_off] = node_val; -} - -/* - * Sift a node down from its current position to satisfy the heap - * property. - */ -static void -sift_down(binaryheap *heap, int node_off) -{ - Datum node_val = heap->bh_nodes[node_off]; - - /* - * Within the loop, the node_off'th array entry is a "hole" that - * notionally holds node_val, but we don't actually store node_val there - * till the end, saving some unnecessary data copying steps. - */ - while (true) - { - int left_off = left_offset(node_off); - int right_off = right_offset(node_off); - int swap_off = 0; - - /* Is the left child larger than the parent? */ - if (left_off < heap->bh_size && - heap->bh_compare(node_val, - heap->bh_nodes[left_off], - heap->bh_arg) < 0) - swap_off = left_off; - - /* Is the right child larger than the parent? */ - if (right_off < heap->bh_size && - heap->bh_compare(node_val, - heap->bh_nodes[right_off], - heap->bh_arg) < 0) - { - /* swap with the larger child */ - if (!swap_off || - heap->bh_compare(heap->bh_nodes[left_off], - heap->bh_nodes[right_off], - heap->bh_arg) < 0) - swap_off = right_off; - } - - /* - * If we didn't find anything to swap, the heap condition is - * satisfied, and we're done. - */ - if (!swap_off) - break; - - /* - * Otherwise, swap the hole with the child that violates the heap - * property; then go on to check its children. - */ - heap->bh_nodes[node_off] = heap->bh_nodes[swap_off]; - node_off = swap_off; - } - /* Re-fill the hole */ - heap->bh_nodes[node_off] = node_val; -} diff --git a/src/backend/lib/meson.build b/src/backend/lib/meson.build index 974cab87763..b4e88f54ae2 100644 --- a/src/backend/lib/meson.build +++ b/src/backend/lib/meson.build @@ -1,7 +1,6 @@ # Copyright (c) 2022-2023, PostgreSQL Global Development Group backend_sources += files( - 'binaryheap.c', 'bipartite_match.c', 'bloomfilter.c', 'dshash.c', diff --git a/src/common/Makefile b/src/common/Makefile index 113029bf7b9..cc5c54dcee4 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -48,6 +48,7 @@ LIBS += $(PTHREAD_LIBS) OBJS_COMMON = \ archive.o \ base64.o \ + binaryheap.o \ checksum_helper.o \ compression.o \ config_info.o \ diff --git a/src/common/binaryheap.c b/src/common/binaryheap.c new file mode 100644 index 00000000000..39a8243a6d5 --- /dev/null +++ b/src/common/binaryheap.c @@ -0,0 +1,336 @@ +/*------------------------------------------------------------------------- + * + * binaryheap.c + * A simple binary heap implementation + * + * Portions Copyright (c) 2012-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/binaryheap.c + * + *------------------------------------------------------------------------- + */ + +#ifdef FRONTEND +#include "postgres_fe.h" +#else +#include "postgres.h" +#endif + +#include + +#ifdef FRONTEND +#include "common/logging.h" +#endif +#include "lib/binaryheap.h" + +static void sift_down(binaryheap *heap, int node_off); +static void sift_up(binaryheap *heap, int node_off); + +/* + * binaryheap_allocate + * + * Returns a pointer to a newly-allocated heap that has the capacity to + * store the given number of nodes, with the heap property defined by + * the given comparator function, which will be invoked with the additional + * argument specified by 'arg'. + */ +binaryheap * +binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg) +{ + int sz; + binaryheap *heap; + + sz = offsetof(binaryheap, bh_nodes) + sizeof(bh_node_type) * capacity; + heap = (binaryheap *) palloc(sz); + heap->bh_space = capacity; + heap->bh_compare = compare; + heap->bh_arg = arg; + + heap->bh_size = 0; + heap->bh_has_heap_property = true; + + return heap; +} + +/* + * binaryheap_reset + * + * Resets the heap to an empty state, losing its data content but not the + * parameters passed at allocation. + */ +void +binaryheap_reset(binaryheap *heap) +{ + heap->bh_size = 0; + heap->bh_has_heap_property = true; +} + +/* + * binaryheap_free + * + * Releases memory used by the given binaryheap. + */ +void +binaryheap_free(binaryheap *heap) +{ + pfree(heap); +} + +/* + * These utility functions return the offset of the left child, right + * child, and parent of the node at the given index, respectively. + * + * The heap is represented as an array of nodes, with the root node + * stored at index 0. The left child of node i is at index 2*i+1, and + * the right child at 2*i+2. The parent of node i is at index (i-1)/2. + */ + +static inline int +left_offset(int i) +{ + return 2 * i + 1; +} + +static inline int +right_offset(int i) +{ + return 2 * i + 2; +} + +static inline int +parent_offset(int i) +{ + return (i - 1) / 2; +} + +/* + * binaryheap_add_unordered + * + * Adds the given datum to the end of the heap's list of nodes in O(1) without + * preserving the heap property. This is a convenience to add elements quickly + * to a new heap. To obtain a valid heap, one must call binaryheap_build() + * afterwards. + */ +void +binaryheap_add_unordered(binaryheap *heap, bh_node_type d) +{ + if (heap->bh_size >= heap->bh_space) + { +#ifdef FRONTEND + pg_fatal("out of binary heap slots"); +#else + elog(ERROR, "out of binary heap slots"); +#endif + } + heap->bh_has_heap_property = false; + heap->bh_nodes[heap->bh_size] = d; + heap->bh_size++; +} + +/* + * binaryheap_build + * + * Assembles a valid heap in O(n) from the nodes added by + * binaryheap_add_unordered(). Not needed otherwise. + */ +void +binaryheap_build(binaryheap *heap) +{ + int i; + + for (i = parent_offset(heap->bh_size - 1); i >= 0; i--) + sift_down(heap, i); + heap->bh_has_heap_property = true; +} + +/* + * binaryheap_add + * + * Adds the given datum to the heap in O(log n) time, while preserving + * the heap property. + */ +void +binaryheap_add(binaryheap *heap, bh_node_type d) +{ + if (heap->bh_size >= heap->bh_space) + { +#ifdef FRONTEND + pg_fatal("out of binary heap slots"); +#else + elog(ERROR, "out of binary heap slots"); +#endif + } + heap->bh_nodes[heap->bh_size] = d; + heap->bh_size++; + sift_up(heap, heap->bh_size - 1); +} + +/* + * binaryheap_first + * + * Returns a pointer to the first (root, topmost) node in the heap + * without modifying the heap. The caller must ensure that this + * routine is not used on an empty heap. Always O(1). + */ +bh_node_type +binaryheap_first(binaryheap *heap) +{ + Assert(!binaryheap_empty(heap) && heap->bh_has_heap_property); + return heap->bh_nodes[0]; +} + +/* + * binaryheap_remove_first + * + * Removes the first (root, topmost) node in the heap and returns a + * pointer to it after rebalancing the heap. The caller must ensure + * that this routine is not used on an empty heap. O(log n) worst + * case. + */ +bh_node_type +binaryheap_remove_first(binaryheap *heap) +{ + bh_node_type result; + + Assert(!binaryheap_empty(heap) && heap->bh_has_heap_property); + + /* extract the root node, which will be the result */ + result = heap->bh_nodes[0]; + + /* easy if heap contains one element */ + if (heap->bh_size == 1) + { + heap->bh_size--; + return result; + } + + /* + * Remove the last node, placing it in the vacated root entry, and sift + * the new root node down to its correct position. + */ + heap->bh_nodes[0] = heap->bh_nodes[--heap->bh_size]; + sift_down(heap, 0); + + return result; +} + +/* + * binaryheap_replace_first + * + * Replace the topmost element of a non-empty heap, preserving the heap + * property. O(1) in the best case, or O(log n) if it must fall back to + * sifting the new node down. + */ +void +binaryheap_replace_first(binaryheap *heap, bh_node_type d) +{ + Assert(!binaryheap_empty(heap) && heap->bh_has_heap_property); + + heap->bh_nodes[0] = d; + + if (heap->bh_size > 1) + sift_down(heap, 0); +} + +/* + * Sift a node up to the highest position it can hold according to the + * comparator. + */ +static void +sift_up(binaryheap *heap, int node_off) +{ + bh_node_type node_val = heap->bh_nodes[node_off]; + + /* + * Within the loop, the node_off'th array entry is a "hole" that + * notionally holds node_val, but we don't actually store node_val there + * till the end, saving some unnecessary data copying steps. + */ + while (node_off != 0) + { + int cmp; + int parent_off; + bh_node_type parent_val; + + /* + * If this node is smaller than its parent, the heap condition is + * satisfied, and we're done. + */ + parent_off = parent_offset(node_off); + parent_val = heap->bh_nodes[parent_off]; + cmp = heap->bh_compare(node_val, + parent_val, + heap->bh_arg); + if (cmp <= 0) + break; + + /* + * Otherwise, swap the parent value with the hole, and go on to check + * the node's new parent. + */ + heap->bh_nodes[node_off] = parent_val; + node_off = parent_off; + } + /* Re-fill the hole */ + heap->bh_nodes[node_off] = node_val; +} + +/* + * Sift a node down from its current position to satisfy the heap + * property. + */ +static void +sift_down(binaryheap *heap, int node_off) +{ + bh_node_type node_val = heap->bh_nodes[node_off]; + + /* + * Within the loop, the node_off'th array entry is a "hole" that + * notionally holds node_val, but we don't actually store node_val there + * till the end, saving some unnecessary data copying steps. + */ + while (true) + { + int left_off = left_offset(node_off); + int right_off = right_offset(node_off); + int swap_off = 0; + + /* Is the left child larger than the parent? */ + if (left_off < heap->bh_size && + heap->bh_compare(node_val, + heap->bh_nodes[left_off], + heap->bh_arg) < 0) + swap_off = left_off; + + /* Is the right child larger than the parent? */ + if (right_off < heap->bh_size && + heap->bh_compare(node_val, + heap->bh_nodes[right_off], + heap->bh_arg) < 0) + { + /* swap with the larger child */ + if (!swap_off || + heap->bh_compare(heap->bh_nodes[left_off], + heap->bh_nodes[right_off], + heap->bh_arg) < 0) + swap_off = right_off; + } + + /* + * If we didn't find anything to swap, the heap condition is + * satisfied, and we're done. + */ + if (!swap_off) + break; + + /* + * Otherwise, swap the hole with the child that violates the heap + * property; then go on to check its children. + */ + heap->bh_nodes[node_off] = heap->bh_nodes[swap_off]; + node_off = swap_off; + } + /* Re-fill the hole */ + heap->bh_nodes[node_off] = node_val; +} diff --git a/src/common/meson.build b/src/common/meson.build index 53942a9a61c..3b97497d1a9 100644 --- a/src/common/meson.build +++ b/src/common/meson.build @@ -3,6 +3,7 @@ common_sources = files( 'archive.c', 'base64.c', + 'binaryheap.c', 'checksum_helper.c', 'compression.c', 'controldata_utils.c', diff --git a/src/include/lib/binaryheap.h b/src/include/lib/binaryheap.h index 52f7b06b253..3647aeae657 100644 --- a/src/include/lib/binaryheap.h +++ b/src/include/lib/binaryheap.h @@ -11,11 +11,23 @@ #ifndef BINARYHEAP_H #define BINARYHEAP_H +/* + * We provide a Datum-based API for backend code and a void *-based API for + * frontend code (since the Datum definitions are not available to frontend + * code). You should typically avoid using bh_node_type directly and instead + * use Datum or void * as appropriate. + */ +#ifdef FRONTEND +typedef void *bh_node_type; +#else +typedef Datum bh_node_type; +#endif + /* * For a max-heap, the comparator must return <0 iff a < b, 0 iff a == b, * and >0 iff a > b. For a min-heap, the conditions are reversed. */ -typedef int (*binaryheap_comparator) (Datum a, Datum b, void *arg); +typedef int (*binaryheap_comparator) (bh_node_type a, bh_node_type b, void *arg); /* * binaryheap @@ -34,7 +46,7 @@ typedef struct binaryheap bool bh_has_heap_property; /* debugging cross-check */ binaryheap_comparator bh_compare; void *bh_arg; - Datum bh_nodes[FLEXIBLE_ARRAY_MEMBER]; + bh_node_type bh_nodes[FLEXIBLE_ARRAY_MEMBER]; } binaryheap; extern binaryheap *binaryheap_allocate(int capacity, @@ -42,12 +54,12 @@ extern binaryheap *binaryheap_allocate(int capacity, void *arg); extern void binaryheap_reset(binaryheap *heap); extern void binaryheap_free(binaryheap *heap); -extern void binaryheap_add_unordered(binaryheap *heap, Datum d); +extern void binaryheap_add_unordered(binaryheap *heap, bh_node_type d); extern void binaryheap_build(binaryheap *heap); -extern void binaryheap_add(binaryheap *heap, Datum d); -extern Datum binaryheap_first(binaryheap *heap); -extern Datum binaryheap_remove_first(binaryheap *heap); -extern void binaryheap_replace_first(binaryheap *heap, Datum d); +extern void binaryheap_add(binaryheap *heap, bh_node_type d); +extern bh_node_type binaryheap_first(binaryheap *heap); +extern bh_node_type binaryheap_remove_first(binaryheap *heap); +extern void binaryheap_replace_first(binaryheap *heap, bh_node_type d); #define binaryheap_empty(h) ((h)->bh_size == 0) diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index f3d8a2a855c..b5bbdd1608c 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3198,6 +3198,7 @@ bbstreamer_tar_archiver bbstreamer_tar_parser bbstreamer_zstd_frame bgworker_main_type +bh_node_type binaryheap binaryheap_comparator bitmapword -- cgit v1.2.3