From a65b791bbb78cbca3d8c0a09f2cd66f2563c3664 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Fri, 3 Nov 2017 02:20:06 +1300
Subject: [PATCH] Basic implementation of array lists (AList)

ALists do roughly the same job as List but are implemented using Arrays
rather than linked lists. Looping over these lists should be faster than
linked lists and they're particularly useful when the size of the list is
known before any items are added.  Fetching the Nth element in an AList is
an O(1) operations rather than O(n) as it is in the List type.

Array lists not intended as a direct replacement for List. List will still be
useful in cases when good performance is required to delete items out the
middle of a list.

Much is still to do here. This is only the basic implementation. Likely a
more complete version will contain functions for alist_concat and
alist_copy. However, what we have here will be good enough to test the
performance of these vs List for many of today's usages.
---
 src/backend/nodes/Makefile            |   7 +-
 src/backend/nodes/arraylist.c         | 199 ++++++++++++++++++++++++++++++++++
 src/backend/optimizer/plan/planmain.c |   1 +
 src/include/nodes/arraylist.h         | 156 ++++++++++++++++++++++++++
 src/include/nodes/nodes.h             |   7 ++
 src/include/nodes/primnodes.h         |   1 +
 6 files changed, 368 insertions(+), 3 deletions(-)
 create mode 100644 src/backend/nodes/arraylist.c
 create mode 100644 src/include/nodes/arraylist.h

diff --git a/src/backend/nodes/Makefile b/src/backend/nodes/Makefile
index 0b1e98c..c230653 100644
--- a/src/backend/nodes/Makefile
+++ b/src/backend/nodes/Makefile
@@ -12,8 +12,9 @@ subdir = src/backend/nodes
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = nodeFuncs.o nodes.o list.o bitmapset.o tidbitmap.o \
-       copyfuncs.o equalfuncs.o extensible.o makefuncs.o \
-       outfuncs.o readfuncs.o print.o read.o params.o value.o
+OBJS = nodeFuncs.o nodes.o arraylist.o list.o bitmapset.o \
+       tidbitmap.o copyfuncs.o equalfuncs.o extensible.o \
+       makefuncs.o outfuncs.o readfuncs.o print.o read.o params.o \
+       value.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/nodes/arraylist.c b/src/backend/nodes/arraylist.c
new file mode 100644
index 0000000..c9ff474
--- /dev/null
+++ b/src/backend/nodes/arraylist.c
@@ -0,0 +1,199 @@
+/*-------------------------------------------------------------------------
+ *
+ * arraylist.c
+ *	  implementation for PostgreSQL generic array list package
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/nodes/arraylist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "nodes/arraylist.h"
+#include "nodes/bitmapset.h"
+
+/*
+ * Initial number of elements to allocate for new lists where the required
+ * size is not specified
+ */
+#define ALIST_INIT_SIZE 16
+
+
+static inline AList *alist_make_size_internal(NodeTag type, int size);
+static inline AList *alist_add_precheck(AList *list, NodeTag type);
+
+/************************************************************
+ * Local functions
+ ************************************************************/
+static inline AList *
+alist_make_size_internal(NodeTag type, int size)
+{
+	AList	   *new_list;
+
+	new_list = (AList *) palloc(sizeof(*new_list));
+	new_list->type = type;
+	new_list->count = 0;
+	new_list->items = (AListItem *) palloc(sizeof(AListItem) * size);
+	new_list->size = size;
+
+	return new_list;
+}
+
+static inline AList *
+alist_add_precheck(AList *list, NodeTag type)
+{
+	/* If the list is not allocated yet, allocate it at the default size */
+	if (list == NULL)
+		return alist_make_size_internal(T_ArrayList, ALIST_INIT_SIZE);
+	else if (list->count >= list->size)
+	{
+		/*
+		 * XXX do we need to be smarter here, perhaps allocating in larger
+		 * increments for smaller lists, and less for larger lists?
+		 * For now, just double the list size.
+		 */
+		list->size *= 2;
+		list->items = (AListItem *) repalloc(list->items,
+								sizeof(AListItem) * list->size);
+	}
+	return list;
+}
+
+/************************************************************
+ * External functions
+ ************************************************************/
+
+/*
+ * alist_premake
+ *		Pre-allocate a new array list with 'size' elements.
+ *
+ * This is useful to do if the final size of the list is known before any
+ * items are added.
+ */
+AList *
+alist_premake(int size)
+{
+	return alist_make_size_internal(T_ArrayList, size);
+}
+
+AList *
+alist_premake_int(int size)
+{
+	return alist_make_size_internal(T_IntArrayList, size);
+}
+
+AList *
+alist_premake_oid(int size)
+{
+	return alist_make_size_internal(T_OidArrayList, size);
+}
+
+/*
+ * alist_add
+ *		Add a new item to the list. If the list is NULL then a new list
+ *		with the default size is created, or if there is not enough space
+ *		for the new item, then more space will be allocated.
+ */
+AList *
+alist_add(AList *list, void *datum)
+{
+	list = alist_add_precheck(list, T_ArrayList);
+	list->items[list->count++].data.ptr_value = datum;
+	return list;
+}
+
+AList *
+alist_add_int(AList *list, int datum)
+{
+	list = alist_add_precheck(list, T_IntArrayList);
+	list->items[list->count++].data.int_value = datum;
+	return list;
+}
+
+AList *
+alist_add_oid(AList *list, Oid datum)
+{
+	list = alist_add_precheck(list, T_OidArrayList);
+	list->items[list->count++].data.oid_value = datum;
+	return list;
+}
+
+/*
+ * alist_delete
+ *		Bulk delete items from list by index. Each bit set in del_items
+ *		marks an item to be deleted from the list.
+ */
+static AList *
+alist_delete(AList *list, Bitmapset *del_items)
+{
+	int src;
+	int dst;
+
+	/* No point in looping if there are no items to delete */
+	if (bms_is_empty(del_items))
+		return list;
+
+	for (src = 0, dst = 0; src < list->count; src++)
+	{
+		if (!bms_is_member(src, del_items))
+			list->items[dst++] = list->items[src];
+	}
+
+	/* record the new size of the list */
+	list->count = dst;
+
+	return list;
+}
+
+/* just a demo to show you how to use an AList */
+void alist_test()
+{
+	AList *al = NULL;
+	AListIterator i;
+	AListIterator i2;
+	AListIterator i3;
+
+	Bitmapset *del = NULL;
+	int x;
+
+	/* Add a bunch of items to the list */
+	for (x = 1; x <= 32; x++)
+		al = alist_add_int(al, x);
+
+	/*
+	 * Loop over each item, let's see what's in each element
+	 * and we'll also mark some of them to be deleted.
+	 */
+	alist_foreach(i, al)
+	{
+		int a = alist_curr_int(i);
+		if ((alist_iterator_index(i) & 1) == 0)
+			del = bms_add_member(del, alist_iterator_index(i));
+		elog(NOTICE, "%d", a);
+	}
+
+	/* Perform the deletion */
+	al = alist_delete(al, del);
+
+	/* check the list is as we expect after having performed the delete */
+	elog(NOTICE, "list countains %d items", al->count);
+	alist_foreach(i, al)
+	{
+		int a = alist_curr_int(i);
+		elog(NOTICE, "%d", a);
+	}
+	elog(NOTICE, "---");
+	alist_forthree(i, al, i2, al, i3, al)
+	{
+		int a = alist_curr_int(i);
+		int b = alist_curr_int(i2);
+		int c = alist_curr_int(i3);
+
+		elog(NOTICE, "%d: %d %d %d", alist_iterator_index(i), a, b, c);
+	}
+}
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index f4e0a6e..40cb403 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -60,6 +60,7 @@ query_planner(PlannerInfo *root, List *tlist,
 	Index		rti;
 	double		total_pages;
 
+alist_test();
 	/*
 	 * If the query has an empty join tree, then it's something easy like
 	 * "SELECT 2+2;" or "INSERT ... VALUES()".  Fall through quickly.
diff --git a/src/include/nodes/arraylist.h b/src/include/nodes/arraylist.h
new file mode 100644
index 0000000..4f5071e
--- /dev/null
+++ b/src/include/nodes/arraylist.h
@@ -0,0 +1,156 @@
+/*-------------------------------------------------------------------------
+ *
+ * arraylist.h
+ *	  interface for PostgreSQL generic array lists
+ *
+ * AList is a generic list type which allows O(1) lookups to a known element
+ * index.  Array lists are also more CPU cache friendly than a linked list,
+ * however, there are also drawbacks such as removing items from the middle
+ * of the list can be slow as it requires moving each subsequent element in
+ * the array 1 space towards the start of the array.
+ *
+ * As a general rule, array lists are better than linked lists when the
+ * number of items to be stored is known in advance and nothing needs to be
+ * removed from the list.  The reason for this is that we can add each
+ * element to the list without having to perform any pallocs.  Looping over
+ * an array list should also be faster than looping over a linked list due
+ * to better CPU cache locality.
+ *
+ * It's also important to never test for an empty AList by checking if it
+ * is NULL. An AList can be preallocated to a given size and still have no
+ * items stored. The correct way to test for an empty list is by checking
+ * that alist_count(list) == 0.
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ *
+ * src/include/nodes/arraylist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef ARRAYLIST_H
+#define ARRAYLIST_H
+
+#include "nodes/nodes.h"
+
+typedef struct AListItem AListItem;
+
+typedef struct AList
+{
+	NodeTag		type;	/* T_ArrayList, T_IntArrayList, or T_OidArrayList */
+	int			count;	/* number of items contained in the list */
+	int			size;	/* size of items array */
+	AListItem   *items;	/* elements array */
+} AList;
+
+struct AListItem
+{
+	union
+	{
+		void	   *ptr_value;
+		int			int_value;
+		Oid			oid_value;
+	}			data;
+};
+
+/*
+ * AListIterator
+ *	Used for iterating over an array list
+ */
+typedef struct AListIterator
+{
+	AListItem *curr;
+	AListItem *first;
+	AListItem *last;
+} AListIterator;
+
+/*
+ * alist_nth
+ *		Get the nth 0-based element in the list
+ */
+static inline void *
+alist_nth(const AList *list, int n)
+{
+	Assert(n >= 0);
+	Assert(n < list->count);
+	Assert(list->type == T_ArrayList);
+
+	return list->items[n].data.ptr_value;
+}
+
+static inline int
+alist_nth_int(const AList *list, int n)
+{
+	Assert(n >= 0);
+	Assert(n < list->count);
+	Assert(list->type == T_IntArrayList);
+
+	return list->items[n].data.int_value;
+}
+
+static inline int
+alist_nth_oid(const AList *list, int n)
+{
+	Assert(n >= 0);
+	Assert(n < list->count);
+	Assert(list->type == T_OidArrayList);
+
+	return list->items[n].data.oid_value;
+}
+
+/*
+ * alist_count
+ *		Returns the number of items stored in the list
+ */
+static inline int
+alist_count(const AList *list)
+{
+	return list ? list->count : 0;
+}
+
+/* macros for iterating over an AList */
+#define alist_foreach(i, al) \
+	for ((i).first = (i).curr = &(al)->items[0], \
+		 i.last = &(al)->items[(al)->count - 1]; \
+		 i.curr <= i.last; \
+		 i.curr++)
+
+#define alist_forboth(i1, al1, i2, al2) \
+	for ((i1).first = (i1).curr = &(al1)->items[0], \
+		 (i1).last = &(al1)->items[(al1)->count - 1], \
+		 (i2).first = (i2).curr = &(al2)->items[0], \
+		 (i2).last = &(al2)->items[(al2)->count - 1]; \
+		 (i1).curr <= (i1).last && (i2).curr <= (i2).last; \
+		 (i1).curr++, (i2).curr++)
+
+#define alist_forthree(i1, al1, i2, al2, i3, al3) \
+	for ((i1).first = (i1).curr = &(al1)->items[0], \
+		 (i1).last = &(al1)->items[(al1)->count - 1], \
+		 (i2).first = (i2).curr = &(al2)->items[0], \
+		 (i2).last = &(al2)->items[(al2)->count - 1], \
+		 (i3).first = (i3).curr = &(al3)->items[0], \
+		 (i3).last = &(al3)->items[(al3)->count - 1]; \
+		 (i1).curr <= (i1).last && \
+		 (i2).curr <= (i2).last && \
+		 (i3).curr <= (i3).last; \
+		 (i1).curr++, (i2).curr++, (i3).curr++)
+
+/* Gets the list value at the current AListIterator position */
+#define alist_curr(i)		(i).curr->data.ptr_value
+#define alist_curr_int(i)	(i).curr->data.int_value
+#define alist_curr_oid(i)	(i).curr->data.oid_value
+
+/* Gets the list index at the current AListIterator position */
+#define alist_iterator_index(i) ((i).curr - (i).first)
+
+extern AList *alist_premake(int size);
+extern AList *alist_premake_int(int size);
+extern AList *alist_premake_oid(int size);
+
+extern AList *alist_add(AList *list, void *datum);
+extern AList *alist_add_int(AList *list, int datum);
+extern AList *alist_add_oid(AList *list, Oid datum);
+
+extern void alist_test();
+
+#endif							/* ARRAYLIST_H */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index ffeeb49..8da661f 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -293,6 +293,13 @@ typedef enum NodeTag
 	T_OidList,
 
 	/*
+	 * TAGS FOR ARRAYLIST NODES (arraylist.h)
+	 */
+	T_ArrayList,
+	T_IntArrayList,
+	T_OidArrayList,
+
+	/*
 	 * TAGS FOR EXTENSIBLE NODES (extensible.h)
 	 */
 	T_ExtensibleNode,
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index c2929ac..af09f0e 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -20,6 +20,7 @@
 #include "access/attnum.h"
 #include "nodes/bitmapset.h"
 #include "nodes/pg_list.h"
+#include "nodes/arraylist.h"
 
 
 /* ----------------------------------------------------------------
-- 
1.9.5.msysgit.1

