summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGurjeet Singh2010-06-01 17:36:54 +0000
committerGurjeet Singh2010-06-01 17:36:54 +0000
commitdd12ce0a8c2c80c05cba2934b27a3c7242aba97b (patch)
tree2b3b3552a7ef5623ffaca9a5382d6c7281075371
Moved source code from http://pgfoundry.org/projects/pgadviser/
-rw-r--r--Makefile31
-rw-r--r--README.pgadviser2
-rw-r--r--index_adviser/Makefile15
-rw-r--r--index_adviser/README.index_adviser340
-rw-r--r--index_adviser/TODO.txt102
-rw-r--r--index_adviser/index_adviser.c2357
-rw-r--r--index_adviser/index_adviser.h41
-rw-r--r--pg_advise/Makefile12
-rw-r--r--pg_advise/advise_index.c452
-rw-r--r--pg_advise/advise_index.h27
-rw-r--r--pg_advise/sample_advise_index_session.txt24
-rw-r--r--pg_advise/util_funcs.c110
-rw-r--r--resources/Makefile14
-rw-r--r--resources/index_advisory.create.sql10
-rw-r--r--resources/sample_error_messages.sql80
-rw-r--r--resources/sample_error_messages.txt192
-rw-r--r--resources/sample_psql_session.sql80
-rw-r--r--resources/sample_psql_session.txt271
-rw-r--r--resources/select_index_advisory.create.sql13
-rw-r--r--resources/show_index_advisory.create.sql100
20 files changed, 4273 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..1c08881
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,31 @@
+#
+# PostgreSQL Adviser top level makefile
+#
+
+PGFILEDESC = "PostgreSQL Index Advisor"
+subdir = contrib/adviser
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+all:
+ $(MAKE) -C index_adviser all
+ $(MAKE) -C pg_advise all
+ $(MAKE) -C resources all
+ @echo "PostgreSQL Index Advisor successfully made. Ready to install."
+
+install:
+ $(MAKE) -C index_adviser $@
+ $(MAKE) -C pg_advise $@
+ $(MAKE) -C resources $@
+ @echo "PostgreSQL Index Advisor installed."
+
+uninstall:
+ $(MAKE) -C index_adviser $@
+ $(MAKE) -C pg_advise $@
+ $(MAKE) -C resources $@
+ @echo "PostgreSQL Index Advisor uninstalled."
+
+clean:
+ $(MAKE) -C index_adviser $@
+ $(MAKE) -C pg_advise $@
+ $(MAKE) -C resources $@
diff --git a/README.pgadviser b/README.pgadviser
new file mode 100644
index 0000000..8417d6f
--- /dev/null
+++ b/README.pgadviser
@@ -0,0 +1,2 @@
+As of now, this module contains only an index adviser. Please read index_adviser/README.index_advisery.
+
diff --git a/index_adviser/Makefile b/index_adviser/Makefile
new file mode 100644
index 0000000..8941341
--- /dev/null
+++ b/index_adviser/Makefile
@@ -0,0 +1,15 @@
+
+MODULE_big = index_adviser
+OBJS = index_adviser.o
+
+DOCS = README.index_adviser
+
+ifdef USE_PGXS
+PGXS := $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pgadviser/index_advisor
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/index_adviser/README.index_adviser b/index_adviser/README.index_adviser
new file mode 100644
index 0000000..90f55f7
--- /dev/null
+++ b/index_adviser/README.index_adviser
@@ -0,0 +1,340 @@
+
+README for the PostgreSQL Index Adviser
+
+1. Introduction
+ ============
+
+ The Index Adviser is an extension for PostgreSQL, that allows the RDBMS to
+recommend a set of potentially useful indexes for a given query.
+
+ Following are the components of the Index Adviser.
+
+ i) An extension to query planner. That is, a new backend module, that
+interacts with the planner to come up with suggestions. These suggestions are
+then stored in the advise_index table for later analysis.
+
+ ii) The pg_advise_index tool; it is a separate contrib module now. It takes a
+workload (a set of SQL queries) as it's input, and outputs SQL statements that,
+if executed, will create the indexes suggested by the Index Adviser.
+
+ pg_advise_index also has an option where you can specify the maximum size of
+indexes you can afford (disk space restrictions), and it will try to come up
+with only those indexes that would fit in that size with the greatest profit.
+
+ iii) A pl/pgsql function, advise_index_show(). This function interprets the
+contents of the advise_index table, and outputs SQL statements (similar to
+pg_advise_index) that, when executed, will create the indexes suggested by the
+Index Adviser.
+
+ iv) A few helper SQL queries like advise_index_summary.sql, that help in
+interpreting the Index Adviser output stored in the advise_index table.
+
+
+2. Installation Instructions
+ =========================
+
+ i) Apply the patch to the sources, recompile, and install postgres.
+
+ ii) Execute 'make' for this contrib module and the pg_advise_index module.
+ You can execute 'make install' in the pg_advise_index module; doing that
+ will install the pg_advise_index binary in the bin/ directory. But,
+ doing a 'make install' for this (pg_index_adviser) contrib module will
+ not help, you will have to manually install plugin generated by this
+ module. Copy the libpg_index_adviser (.dll or .so) file to your
+ PostgreSQL's $libdir/plugins/ directory.
+
+ iii) Create the table advise_index, using the script advise_index.create.sql
+ provided in this contrib module. Make sure that the user, under which
+ you wish to generate advisory, has proper permissions on it (INSERT is
+ the bare minimum, but having UPDATE and DELETE permissions too wouldn't
+ hurt).
+
+ iv) Create the advise_index_show() plpgsql function using the script
+ advise_index_show.create.sql provided in this contrib module. This
+ function is not a requisite for proper functioning of the Index
+ Adviser, but it will help you interpret the advisory generated and
+ inserted into advise_index table by the Adviser.
+
+ Notes
+ -----
+ While working with the Adviser, if you get an error like:
+
+ ERROR: relation "advise_index" does not exist
+ CONTEXT: SQL statement "insert into advise_index values( 16395,
+ array[1], 1782.973755, 1752, 11796, now());"
+
+ then it means that the required advise_index table is either not created or
+ the current user does not have INSERT permissions on it. Please create this
+ table as mentioned in step (ii) above.
+
+ Also, you should make sure that you do not enable the Index Adviser in
+ read-only transaction, as that will cause the internal INSERT statements to
+ throw ERRORs similar to above.
+
+
+3. User Interface
+ ==============
+
+ If a user intends the RDBMS to recommend indexes, then she must provide it
+with a workload, that is, a sample set of queries that are expected to be
+executed by the application.
+
+ There are three ways of getting index advisory from the backend:
+
+ i) pg_advise_index tool.
+ ---------------------
+
+ Create a file that contains all the queries (semicolon terminated; may
+be multi-line) that are expected to be executed by the application; and
+feed this file to the pg_advise_index tool with appropriate options.
+
+ pg_advise_index -d DB -h host -U user -s 10M -o advisory.sql workload.sql
+
+ pg_advise_index will open a connection with the PostgreSQL server by
+setting appropriate session level options that will force the backend to load
+the pg_index_adviser plugin. It will then prepend the keywords EXPLAIN to each
+of the queries found in the workload file, and execute them against the backend.
+For each query EXPLAINed, the backend will generate advice for each index that
+might have been beneficial in executing these queries.
+
+ At the end, pg_advise_index will enumerate all the indexes suggested for
+the current session, and output the CREATE INDEX statements for each of them.
+Optinally, if the -size option was specified, pg_advise_index will output suggestions
+for only those indexes, that fit into that size.
+
+ ii) Manually (through psql session)
+ --------
+
+ If you wish to execute the queries manually, through psql, then you must
+perform all the steps that pg_advise_index performs internally.
+
+ Connect to the database using psql. Make sure you set the environment variable PGOPTIONS for the backend to load the plugin for you. You can use a command similar to:
+
+ $env PGOPTIONS='-c local_preload_libraries=libpg_index_adviser' psql.exe db1
+
+ EXPLAIN every query that you think will be excuted by the
+application. The Index Adviser will store the suggestions in the
+advise_index table.
+
+ The EXPLAIN output will contain the normal plan and, if any indexes were
+suggested, another plan with the suggested indexes dumped in the server log file
+as a LOG output. If you wish to see these messages on your psql screen as well,
+then you must set the client_min_messages GUC variable to at least LOG; like so:
+
+ set client_min_messages to log;
+
+This new plan starts with a line like:
+
+ Index Adviser: Plan using estimates for suggested indexes:
+
+ When you are done EXPLAIN'ing all the queries, execute the following
+pl/pgsql function to see the output. (Note: you must have created the
+advise_index_show() function using the script available in this contrib module)
+
+ select advise_index_show( pg_backend_pid() );
+
+ If you wish, you can omit pg_backend_pid() function call inside the
+parameter list, and invoke it as follows. Both these invocations have the same
+effect:
+
+ select advise_index_show( null );
+
+ The advise_index_show() can also be helpful in enumerating advisory
+generated by any previous runs of the Index Adviser; possibly other
+sessions or even previous invocations of pg_advise_index tool.
+
+ For example, the following query will enumerate all the advices
+generated till now:
+
+ select E'backend_pid\n'
+ || E'===========\n'
+ || backend_pid,
+ advise_index_show( backend_pid )
+ from (select distinct backend_pid
+ from advise_index as adv
+ where adv.reloid in (select oid
+ from pg_class
+ where relkind = 'r')
+ ) as v;
+
+ iii) Automatically (at runtime, for every query executed by the application)
+ -----------------------------------------------------------------------
+
+ Just make sure that you set the environment varibale PGOPTIONS similar to:
+
+ '-c local_preload_libraries=libpg_index_adviser'
+
+before you run your application. Either set this at the start of your
+application, or set it in the postgresql.conf file (not recommended, but works).
+
+ This will result in the Index Adviser dumping it's advisory in the
+advise_index table; and then you can use the above mentioned methods to
+interpret the advisory.
+
+
+4. Architecture
+ ============
+
+ The Index Adviser consists of a plugin (.dll or .so file), a command line
+tool and a pl/pgsql function. The planner calls the plugin, and that in turn
+exploits virtual or hypothetical indexes. This kind of indexes are simulated
+during the query planning only.
+
+ For an incoming EXPLAIN command, the planner generates the plan and, if the
+plugin is loaded, then the query is sent to the Index Adviser for any
+suggestions it can make. The Adviser derives a set of potentially-useful indexes
+(index candidates) for this query by analyzing the query predicates. These
+indexes are inserted into the system catalog as virtual indexes; that is, they
+are not created on disk.
+
+ Then, the query is again sent to the planner, and this time the planner
+makes it's decisions taking the just-created vitual indexes into account
+too. All index candidates used in the final plan represent the
+recommendation for the query and are inserted into the advise_index table
+by the Adviser.
+
+ The gain of this recommendation is estimated by comparing the execution cost
+difference of this plan to the plan generated before virtual indexes were
+created.
+
+ Based on the index recommendations collected in the advise_index table the
+optimal index configuration for a given set of workloads under disk space
+restrictions can be determined by solving the knapsack problem. This is done by
+the pg_advise_index tool with a -s (size) option.
+
+
+5. The advise_index table
+ ======================
+
+ The Index Adviser inserts it's advisory into a table named advise_index. This
+table should be in the excuting user's search_path, and she should have INSERT
+permission on it. The user can create this table using the
+advise_index.create.sql script provided in this contrib module.
+
+ In this table, the index recommendations are collected for each query
+running under control of the Index Adviser. The table is structured as
+follows:
+
+column | type | meaning
+-------------+-----------+-------------------------------------------------
+reloid | oid | the oid of the base table for this index
+attrs | integer[] | an array containing the indexed column numbers
+profit | real | the estimated profit of this index for this query
+index_size | integer | the estimated size of the index (in disk-pages)
+backend_pid | integer | pid of the backend to uniquely identify the source.
+timestamp | timestamp | Can be used in conjunction with backend_pid.
+
+Note: The profit of an index is estimated as the fraction of the overall profit
+of all recommended index candidates for a given query
+
+(i.e. overall_profit = cost(without-virtual-indexes)
+ - cost(query-with-virtual-indexes))
+
+based on the size of this index compared to the overall size of all indexes
+recommended for this query.
+
+ Here's a sample of the contents of advise_index table:
+
+ select * from advise_index where backend_pid = pg_backend_pid();
+
+ reloid | attrs | profit | index_size | backend_pid | timestamp
+--------+-------+---------+------------+-------------+------------------------
+ 16395 | {1} | 1782.72 | 2608 | 5256 | 2007-01-13 13:08:54.25
+
+
+6. Backend source code modifications
+ =================================
+
+ This section gives a brief description of the modifiations made in
+PostgreSQL backend. The following modifications were necessary:
+
+.) Enable the planner.c to call a post-planner-plugin at the end of planner().
+ src\backend\optimizer\plan\planner.c
+ src\include\optimizer\planner.h
+
+.) Enable the optimizer to get estimated number of pages of v-index from plugin.
+ src\backend\optimizer\util\plancat.c
+
+.) Made a few misc. changes to help the Index Adviser, like a new function to
+ get string representation of a Plan, exporting a few global variables for the
+ Windows platform.
+ src\backend\commands\explain.c
+ src\include\commands\explain.h
+ src\include\utils\guc.h
+ src\include\miscadmin.h
+
+
+7. Index Adviser Internals
+ =======================
+
+1. If the plugin is loaded, then, at the end of the planner() function, backend
+calls the main (Driver) function of the plugin: index_adviser(query, plan...);
+where 'plan' is the already computed plan for the query.
+
+2. Then, the query is analyzed (scan_query()) in order to get a list of
+potential index candidates, i.e. indexes on columns used in query predicates or
+for joining or grouping or sorting. This includes multi-column indexes used in
+predicates containing AND conditions.
+
+3. Next, all irrelevant candidates are removed
+(remove_irrelevant_index_candidates()), e.g. indexes which already exist, or that
+are trying to index system tables or temporary tables.
+
+4. The remaining candidates are created as virtual indexes without populating
+them with the data (create_virtual_indexes()).
+
+5. Then, the planner is called and the costs are compared with those of the
+previously estimated Plan.
+
+6. If the cost difference exceeds a given limit, the plan is scanned
+(scan_plan()) in order to extract the virtual indexes that were used in this
+plan. The referenced virtual indexes represent the index recommendation.
+
+7. The virtual indexes are removed from the catalog (drop_virtual_indexes()).
+
+8. Finally, the profit per index is estimated and the recommendation is written
+to the advise_index table (save_advise()).
+
+Note: A major portion of the Index Adviser runs inside a SubTransaction, so that
+just rolling the transation back helps in easy reversal of all the catalog
+changes that were made as a side effect of creating virtual indexes.
+
+Also, as of now, the adviser creates only non-unique B-Tree virtual indexes.
+
+i. Credits
+ =======
+
+ Kai-Uwe Sattler (k.sattler@computer.org):
+ Design.
+ Multicolumn indexes
+ pg_advise (now called pg_advise_index tool)
+ Mario Stiffel (mario.stiffel@ms-ray.de):
+ Initial implementation. The algorithm worked as follows:
+ In ExplainOneQuery(), call indexadvisor() before normal call to planner().
+ indexadvisor() creates a copy of the Query* and runs planner() on it.
+ Save the costs returned by planner().
+ Invoke scan_query() on the Query* to create index candidates.
+ Prune irrelevant candidates using remove_irrelevant_index_candidates().
+ Create indexes using create_virtual_indexes(); these were real indexes.
+ Create another copy of the original Query* and call planner() on it.
+ Calculate the costs saved by using (supposedly) virtual indexes.
+ Call scan_plan() to mark the virtual indexes used in the new plan.
+ Call drop_virtual_indexes() to destroy the indexes previously created.
+ Remove the candidates from the list that are not marked as 'used'.
+ Call save_advise_to_catalog() to save the advisory (details about the
+ 'used' candidates) into a catalog table named pg_indexadvisor.
+ Martin Luehring (mar.lue@web.de):
+ Improvements for multicolumn indexes.
+ Bug fixes for memory leaks.
+ Gurjeet Singh (gurjeet.singh@enterprisedb.com):
+ Ported code from 7.4.8 to 8.2.
+ Major code refactoring, to bring it in line with coding practices at PG.
+ Removed an extra call to planner() and an extra copy of Query*.
+ Added code to identify BitmapHeapScan, BitmapIndexScan, BitmapAnd, BitmapOr.
+ Now we also show the new plan with the suggested indexes in EXPLAIN output.
+ Now the index is not created on disk; no time spent in creating virt-index.
+ All the catalog changes are done and rolled-back within a sub-transaction.
+ Eliminated the need for a catalog table. advise_index can be a user table.
+ Added a keyword to EXPLAIN command: EXPLAIN ADVISE <query>. {reverted}
+ Removed all GUC variables; replaced with elog messages at DEBUG levels.
+ Added the ability to advise in execution (non-EXPLAIN, GUC var controlled).
diff --git a/index_adviser/TODO.txt b/index_adviser/TODO.txt
new file mode 100644
index 0000000..401ba5e
--- /dev/null
+++ b/index_adviser/TODO.txt
@@ -0,0 +1,102 @@
+
+TODO:
+=====
+.) Remove the newly added parameter to DefineIndex(); Done.
+.) Remove the newly added member to IndexInfo{}; Done.
+.) Remove the newly added member to IndexStmt{}; Done.
+.) Order the #include lines alphabetically, in new files. Done.
+.) Analyze if we need timestamp column in pg_indexadvisor table.
+.) Analyze if we need nattrs column in pg_indexadvisor table. Done; removed.
+.) See why we shouldn't consider GROUP-BY clause in the presence on a WHERE
+ clause (scan_query_for_relevant_attributes())?
+.) Eliminate warnings from newly added sources.
+.) Write a pl/pgsql function that interprets the results in pg_indexadvisor.
+ Done.
+.) Sanitize the liked-list logic in various functions.
+ Done for remove_irrelevant_candidates().
+ Pending: merge_candidates(), build_composite_candidates().
+.) Somehow recommend to drop indexes that are not much used (Heikki's idea).
+.) Investigate the difference in costs (in sample_psql*) across patch versions
+ 15 and 17; both are based on REL8_2_STABLE!
+.) Do not try to insert advisory into advise_index if XactReadOnly (in xact.c)
+ is true; probably, raise a warning instead!
+.) Do not create a virtual index if the base relation has less than a specific
+ number of pages (eg 2) and/or less than a specific number of tuples (eg 10).
+.) Reduce the number of heap_open()s by attaching the Relation to the candidate
+ structure and heap_close() only when the candidate is being pfree()d.
+.) Create new memory context and do everything within that.
+.) Propose that _compile_assert() macro be added to the core.
+.) Mention that get_relation_info_hook and explain_get_index_name_hook are
+ registered only when required and are unregistered immediately after that.
+ This avoids overhead in normal routes.
+
+.) Also, the 'static global' index_candidates is reset based on the above
+ assumption, else, we have to reset it at the top of planner_callback() and
+ ExplainOneQuery_callback(), otherwise the is_virtual_index() calls will
+ behave abnormally if there was and ERROR thrown for the previous command!
+
+BUGS:
+=====
+.) The SELECTs in the pg_advise are returning wrong results, when the same index
+ is suggested twice, because of the SUM() aggregates. Expected behaviour. Not a bug; intended behaviour.
+.) I doubt that on a table t(a,b), for a suggestion of idx(b,a), pg_advise will
+ suggest idx(a,b). True; bug reproduced, fixed and verified.
+.) explain
+ select *
+ from t
+ where b = (select b
+ from t
+ where a = (select a
+ from t
+ where b = 100 ));
+.) explain
+ select *
+ from t
+ where b = (select b
+ from t
+ where a = (select a
+ from t
+ where b = (select b
+ from t
+ where a = (select a
+ from t
+ where b = 100))));
+
+Wish-list:
+==========
+.) Make pg_indexadvisor a user table. Done; used SPI interface.
+ Reason: a normal user cannot do "delete from pg_indexadvisor".
+ Difficulty: Need to know how to do
+ "insert into pg_indexadvisor values( 1, ...)"
+ from within the backend; that is, need to study/invent RSI
+ (Recursive SQL Interface).
+ Trial code can be seen by searching for:
+ exec_simple_query( "insert into index_advisor values( 10 )",
+ "advisor" /*portal name*/ );
+
+.) Make it plugin-based.
+ Reason: so that someone else with a better idea can replace
+ this advisor, without having to recompile the server.
+ Difficulty: This code calls many internal functoions:
+ index_create(), index_drop(), planner(), etc.
+ That makes it impossible to compile it standalone.
+
+.) Remove the dependency on the global variable "index_candidates"; used for
+ communication between indexadvisor.c and plancat.c.
+ Reason: Bad coding practice.
+ Difficulty: Even though I was successful in updating pg_class.relpages for
+ the virtual indexes, the planner is still calling smgr.c code to
+ get the number of pages occupied by the index! Hence, I had to
+ use the global the way I did.
+
+Files that shouldn't be in final patch: (done. this list is obsolete now)
+=======================================
+bootparse.y
+defrem.h
+tablecmds.c
+src/backend/commands/indexcmds.c
+src/backend/parser/gram.y
+utility.c
+src/backend/utils/mb/Unicode/euc_kr_to_utf8.map
+src/include/nodes/execnodes.h
+src/include/nodes/parsenodes.h
diff --git a/index_adviser/index_adviser.c b/index_adviser/index_adviser.c
new file mode 100644
index 0000000..aa0c062
--- /dev/null
+++ b/index_adviser/index_adviser.c
@@ -0,0 +1,2357 @@
+/*-------------------------------------------------------------------------
+ *
+ * index_adviser.c
+ * Plugin to recommend potentially useful indexes in a query.
+ *
+ * too much time? you do not know what to do next? then search for
+ * "TODO:" and "FIXME:" comments!
+ *
+ * created by Mario Stiffel
+ * modified and partly reimplemented by Martin Lhring
+ * Almost completely rewritten by gurjeet.singh@enterprisedb.com
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* ------------------------------------------------------------------------
+ * includes (ordered alphabetically)
+ * ------------------------------------------------------------------------
+ */
+#include <sys/time.h>
+
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/itup.h"
+#include "access/nbtree.h"
+#include "access/xact.h"
+#include "index_adviser.h"
+#include "catalog/catalog.h"
+#include "catalog/index.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "commands/defrem.h"
+#include "commands/explain.h"
+#include "executor/execdesc.h"
+#include "executor/spi.h"
+#include "fmgr.h" /* for PG_MODULE_MAGIC */
+#include "miscadmin.h"
+#include "nodes/pg_list.h"
+#include "nodes/print.h"
+#include "optimizer/planner.h"
+#include "optimizer/plancat.h"
+#include "parser/parse_coerce.h"
+#include "parser/parsetree.h"
+#include "storage/lmgr.h"
+#include "storage/proc.h"
+#include "tcop/dest.h"
+#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
+#include "utils/elog.h"
+#include "utils/lsyscache.h"
+#include "utils/relcache.h"
+#include "utils/syscache.h"
+
+/* mark this dynamic library to be compatible with PG */
+PG_MODULE_MAGIC;
+
+#define CREATE_V_INDEXES 1
+
+/* *****************************************************************************
+ * DEBUG Level : Information dumped
+ * ------------ ------------------
+ * DEBUG1 : code level logging. What came in and what went out of a
+ * function. candidates generated, cost estimates, etc.
+ * DEBUG2 : DEBUG1 plus : Profiling info. Time consumed in each of the
+ * major functions.
+ * DEBUG3 : Above plus : function enter/leave info.
+ * ****************************************************************************/
+
+#define DEBUG_LEVEL_COST (log_min_messages >= DEBUG2)
+#define DEBUG_LEVEL_PROFILE (log_min_messages >= DEBUG2)
+#define DEBUG_LEVEL_CANDS (log_min_messages >= DEBUG2)
+
+/* Index Adviser output table */
+#define IND_ADV_TABL "index_advisory"
+
+/* IND_ADV_TABL does Not Exist */
+#define IND_ADV_ERROR_NE "relation \""IND_ADV_TABL"\" does not exist."
+
+/* IND_ADV_TABL is Not a Table or a View */
+#define IND_ADV_ERROR_NTV "\""IND_ADV_TABL"\" is not a table or view."
+
+#define IND_ADV_ERROR_DET \
+ "Index Adviser uses \""IND_ADV_TABL"\" table to store it's advisory. You" \
+ " should have INSERT permissions on a table or an (INSERT-able) view named"\
+ " \""IND_ADV_TABL"\". Also, make sure that you are NOT running the Index" \
+ " Adviser under a read-only transaction."
+
+#define IND_ADV_ERROR_HINT \
+ "Please create the \""IND_ADV_TABL"\" table using the script provided in" \
+ " pg_advise_index contrib module."
+
+/* *****************************************************************************
+ * forward declarations of local-only functions
+ * ****************************************************************************/
+
+/* scan_* functions go looking for relevant attributes in the query */
+static List* scan_query( const Query* const query,
+ List* const opnos,
+ List* rangeTableStack );
+
+static List* scan_generic_node( const Node* const root,
+ List* const opnos,
+ List* const rangeTableStack );
+
+static List* scan_group_clause( List* const groupList,
+ List* const targtList,
+ List* const opnos,
+ List* const rangeTblStack );
+
+static List* build_composite_candidates( List* l1, List* l2 );
+
+static List* remove_irrelevant_candidates( List* candidates );
+
+static void mark_used_candidates( const Node* const plan,
+ List* const candidates );
+
+static int compare_candidates( const IndexCandidate* c1,
+ const IndexCandidate* c2 );
+
+static List* merge_candidates( List* l1, List* l2 );
+
+static List* create_virtual_indexes( List* candidates );
+
+static void drop_virtual_indexes( List* candidates );
+
+static void save_advise( List* candidates );
+
+static void log_candidates( const char* text, List* candidates );
+
+/* function used for estimating the size of virtual indexes */
+static int4 estimate_index_pages(Oid rel_oid, Oid ind_oid );
+
+static PlannedStmt* planner_callback( Query* query,
+ int cursorOptions,
+ ParamListInfo boundParams);
+
+static void ExplainOneQuery_callback( Query *query,
+ ExplainStmt *stmt,
+ const char *queryString,
+ ParamListInfo params,
+ TupOutputState *tstate);
+
+static void get_relation_info_callback( PlannerInfo* root,
+ Oid relationObjectId,
+ bool inhparent,
+ RelOptInfo* rel);
+
+static const char *
+explain_get_index_name_callback( Oid indexId );
+
+static PlannedStmt* index_adviser( Query *query,
+ int cursorOptions,
+ ParamListInfo boundParams,
+ PlannedStmt *actual_plan,
+ bool doingExplain);
+
+static void resetSecondaryHooks();
+static bool is_virtual_index( Oid oid, IndexCandidate **cand_out );
+
+/* ------------------------------------------------------------------------
+ * implementations: types and functions for profiling
+ * ------------------------------------------------------------------------
+ */
+
+typedef struct {
+ bool running;
+ struct timeval start;
+ struct timeval stop;
+ unsigned long usec;
+} Timer;
+
+/* Need this to remeber the virtual indexes generated. */
+static List *index_candidates;
+
+/* Timer for logCandiates; global, since it is called from different places */
+static Timer tLogCandidates;
+
+/* Global variable to hold a value across calls to mark_used_candidates() */
+static PlannedStmt *plannedStmtGlobal;
+
+static void
+startTimer( Timer* const timer )
+{
+ gettimeofday( &(timer->start), NULL );
+
+ timer->usec = 0;
+ timer->running = true;
+}
+
+static void
+continueTimer( Timer* const timer )
+{
+ if( timer->running == false )
+ {
+ gettimeofday( &(timer->start), NULL );
+ timer->running = true;
+ }
+}
+
+static void
+stopTimer( Timer* const timer )
+{
+ if( timer->running == true )
+ {
+ gettimeofday( &(timer->stop), NULL );
+
+ timer->usec += ( (unsigned long)timer->stop.tv_sec
+ - (unsigned long)timer->start.tv_sec )
+ * (unsigned long)1000000
+ + (unsigned long)timer->stop.tv_usec
+ - (unsigned long)timer->start.tv_usec;
+
+ timer->running = false;
+ }
+}
+
+static void
+t_reset( Timer *const timer )
+{
+ timer->usec = 0;
+ timer->running = false;
+}
+
+/*
+ * Since gettimeofday() is expensive, we don't collect profiling data unless
+ * elog is going to log this.
+ */
+#define t_start(x) do{ \
+ if( DEBUG_LEVEL_PROFILE ) \
+ startTimer( &(x) ); \
+ }while(0)
+
+#define t_continue(x) do{ \
+ if( DEBUG_LEVEL_PROFILE ) \
+ continueTimer( &(x) ); \
+ }while(0)
+
+#define t_stop(x) do{ \
+ if( DEBUG_LEVEL_PROFILE ) \
+ stopTimer( &(x) ); \
+ }while(0)
+
+/* ------------------------------------------------------------------------
+ * implementations: index adviser
+ * ------------------------------------------------------------------------
+ */
+
+/* PG calls this func when loading the plugin */
+void
+_PG_init(void)
+{
+ planner_hook = planner_callback;
+ ExplainOneQuery_hook = ExplainOneQuery_callback;
+
+ resetSecondaryHooks();
+
+ elog( NOTICE, "IND ADV: plugin loaded" );
+}
+
+/* PG calls this func when un-loading the plugin (if ever) */
+void
+_PG_fini(void)
+{
+ planner_hook = NULL;
+ ExplainOneQuery_hook = NULL;
+
+ resetSecondaryHooks();
+
+ elog( NOTICE, "IND ADV: plugin unloaded." );
+}
+
+/* Assert that Cost datatype can represent negative values */
+compile_assert( ((Cost)-1) < 0 );
+
+/**
+ * index_adviser
+ * Takes a query and the actual plan generated by the standard planner for
+ * that query. It then creates virtual indexes, for the columns used in the
+ * query, and asks the standard planner to generate a new plan for the query.
+ *
+ * If the new plan appears to be cheaper than the actual plan, then it
+ * saves the information about the virtual indexes, that were used by the
+ * planner, into an advisory table.
+ *
+ * If it is called by the Explain-hook, then it returns the newly generated
+ * plan (allocated in caller's memory context), so that ExplainOnePlan() can
+ * generate and send a string representation of the plan to the client.
+ */
+static PlannedStmt*
+index_adviser( Query* queryCopy,
+ int cursorOptions,
+ ParamListInfo boundParams,
+ PlannedStmt *actual_plan,
+ bool doingExplain)
+{
+ static int8 SuppressRecursion = 0; /* suppress recursive calls */
+ bool saveCandidates = false;
+ int i;
+ ListCell *prev, /* temps for list manipulation*/
+ *cell,
+ *next;
+ List* opnos = NIL; /* contains all vailid operator-ids */
+ List* candidates = NIL; /* the resulting candidates */
+
+ Timer tAdviser;
+ Timer tRePlan;
+ Timer tBTreeOperators;
+ Timer tGenCands;
+ Timer tMarkUsedCands;
+ Timer tCreateVInds;
+ Timer tDropVInds;
+ Timer tSaveAdvise;
+
+ Cost actualStartupCost;
+ Cost actualTotalCost;
+ Cost newStartupCost;
+ Cost newTotalCost;
+ Cost startupCostSaved;
+ Cost totalCostSaved;
+ float4 startupGainPerc; /* in percentages */
+ float4 totalGainPerc;
+
+ ResourceOwner oldResourceOwner;
+ PlannedStmt *new_plan;
+ MemoryContext outerContext;
+
+ char *BTreeOps[] = { "=", "<", ">", "<=", ">=", };
+
+ elog( DEBUG3, "IND ADV: Entering" );
+
+ /* We work only in Normal Mode, and non-recursively */
+ if( IsBootstrapProcessingMode() || SuppressRecursion++ > 0 )
+ {
+ new_plan = NULL;
+ goto DoneCleanly;
+ }
+
+ /* remeber the memory context; we might need it to supply the candidate
+ * list to the transaction that encloses the inner transaction.
+ */
+ outerContext = CurrentMemoryContext;
+
+ /* reset these globals; since an ERROR might have left them unclean */
+ t_reset( &tLogCandidates );
+ index_candidates = NIL;
+
+ /* save the start-time */
+ t_start( tAdviser );
+
+ /* get the costs without any virtual index */
+ actualStartupCost = actual_plan->planTree->startup_cost;
+ actualTotalCost = actual_plan->planTree->total_cost;
+
+ /* create list containing all operators supported by B-tree */
+ t_start( tBTreeOperators );
+ for( i=0; i < lengthof(BTreeOps); ++i )
+ {
+ FuncCandidateList opnosResult;
+
+ List* btreeop = list_make1( makeString( BTreeOps[i] ) );
+
+ /* get the operator-id's to the operator, and collect the operator-id's
+ * into an array.
+ */
+ /* TODO: find out if the memory of opnosResult is ever freed. */
+ for( opnosResult = OpernameGetCandidates( btreeop, '\0' );
+ opnosResult != NULL;
+ opnosResult = lnext(opnosResult) )
+ {
+ opnos = lappend_oid( opnos, opnosResult->oid );
+ }
+
+ /* free the Value* (T_String) and the list */
+ pfree( linitial( btreeop ) );
+ list_free( btreeop );
+ }
+ t_stop( tBTreeOperators );
+
+ /* Generate index candidates */
+ t_start( tGenCands );
+ candidates = scan_query( queryCopy, opnos, NULL );
+ t_stop( tGenCands );
+
+ /* the list of operator oids isn't needed anymore */
+ list_free( opnos );
+
+ if (list_length(candidates) == 0)
+ goto DoneCleanly;
+
+ log_candidates( "Generated candidates", candidates );
+
+ /* remove all irrelevant candidates */
+ candidates = remove_irrelevant_candidates( candidates );
+
+ if (list_length(candidates) == 0)
+ goto DoneCleanly;
+
+ log_candidates( "Relevant candidates", candidates );
+#if CREATE_V_INDEXES
+ /*
+ * We need to restore the resource-owner after RARCST(), only if we are
+ * called from the executor; but we do it all the time because,
+ * (1) Its difficult to determine if we are being called by the executor.
+ * (2) It is not harmful.
+ * (3) It is not much of an overhead!
+ */
+ oldResourceOwner = CurrentResourceOwner;
+
+ /*
+ * Setup an SPI frame around the BeginInternalSubTransaction() and
+ * RollbackAndReleaseCurrentSubTransaction(), since xact.c assumes that
+ * BIST()/RARCST() infrastructure is used only by PL/ interpreters (like
+ * pl/pgsql), and hence it calls AtEOSubXact_SPI(), and that in turn frees
+ * all the execution context memory of the SPI (which _may_ have invoked the
+ * adviser). By setting up our own SPI frame here, we make sure that
+ * AtEOSubXact_SPI() frees this frame's memory.
+ */
+ if( SPI_connect() != SPI_OK_CONNECT )
+ {
+ elog( WARNING, "IND ADV: SPI_connect() call failed" );
+ goto DoneCleanly;
+ }
+
+ /*
+ * DO NOT access any data-structure allocated between BEGIN/ROLLBACK
+ * transaction, after the ROLLBACK! All the memory allocated after BEGIN is
+ * freed in ROLLBACK.
+ */
+ BeginInternalSubTransaction( "index_adviser" );
+
+ /* now create the virtual indexes */
+ t_start( tCreateVInds );
+ candidates = create_virtual_indexes( candidates );
+ t_stop( tCreateVInds );
+#endif
+ /* update the global var */
+ index_candidates = candidates;
+
+ /*
+ * Setup the hook in the planner that injects information into base-tables
+ * as they are prepared
+ */
+ get_relation_info_hook = get_relation_info_callback;
+
+ /* do re-planning using virtual indexes */
+ /* TODO: is the plan ever freed? */
+ t_start( tRePlan );
+ new_plan = standard_planner(queryCopy, cursorOptions, boundParams);
+ t_stop( tRePlan );
+
+ /* reset the hook */
+ get_relation_info_hook = NULL;
+#if CREATE_V_INDEXES
+ /* remove the virtual-indexes */
+ t_start( tDropVInds );
+ drop_virtual_indexes( candidates );
+ t_stop( tDropVInds );
+#endif
+ newStartupCost = new_plan->planTree->startup_cost;
+ newTotalCost = new_plan->planTree->total_cost;
+
+ /* calculate the saved costs */
+ startupGainPerc =
+ actualStartupCost == 0 ? 0 :
+ (1 - newStartupCost/actualStartupCost) * 100;
+
+ totalGainPerc =
+ actualTotalCost == 0 ? 0 :
+ (1 - newTotalCost/actualTotalCost) * 100;
+
+ startupCostSaved = actualStartupCost - newStartupCost;
+
+ totalCostSaved = actualTotalCost - newTotalCost;
+
+ if( startupCostSaved >0 || totalCostSaved > 0 )
+ {
+ /* scan the plan for virtual indexes used */
+ t_start( tMarkUsedCands );
+ plannedStmtGlobal = new_plan;
+
+ mark_used_candidates( (Node*)new_plan->planTree, candidates );
+
+ plannedStmtGlobal = NULL;
+ t_stop( tMarkUsedCands );
+ }
+
+ /* Remove unused candidates from the list. */
+ for( prev = NULL, cell = list_head(candidates);
+ cell != NULL;
+ cell = next )
+ {
+ IndexCandidate *cand = (IndexCandidate*)lfirst( cell );
+
+ next = lnext( cell );
+
+ if( !cand->idxused )
+ {
+ pfree( cand );
+ candidates = list_delete_cell( candidates, cell, prev );
+ }
+ else
+ prev = cell;
+ }
+
+ /* update the global var */
+ index_candidates = candidates;
+
+ /* log the candidates used by the planner */
+ log_candidates( "Used candidates", candidates );
+
+ if( list_length( candidates ) > 0 )
+ saveCandidates = true;
+
+ /* calculate the share of cost saved by each index */
+ if( saveCandidates )
+ {
+ int4 totalSize = 0;
+ IndexCandidate *cand;
+
+ foreach( cell, candidates )
+ totalSize += ((IndexCandidate*)lfirst( cell ))->pages;
+
+ foreach( cell, candidates )
+ {
+ cand = (IndexCandidate*)lfirst( cell );
+
+ cand->profit = (float4)totalCostSaved
+ * ((float4)cand->pages/totalSize);
+ }
+ }
+
+ /* Print the new plan if debugging. */
+ if( saveCandidates && Debug_print_plan )
+ elog_node_display( DEBUG1, "plan (using Index Adviser suggestions)",
+ new_plan, Debug_pretty_print );
+
+ if( saveCandidates && doingExplain )
+ {
+ MemoryContext oldContext = MemoryContextSwitchTo( outerContext );
+
+ new_plan = copyObject( new_plan );
+
+ MemoryContextSwitchTo( oldContext );
+ }
+ else
+ {
+ /* TODO1: try to free the new plan node */
+ new_plan = NULL;
+ }
+#if CREATE_V_INDEXES
+ /*
+ * Undo the metadata changes; for eg. pg_depends entries will be removed
+ * (from our view).
+ *
+ * Again: DO NOT access any data-structure allocated between BEGIN/ROLLBACK
+ * transaction, after the ROLLBACK! All the memory allocated after BEGIN is
+ * freed in ROLLBACK.
+ */
+ RollbackAndReleaseCurrentSubTransaction();
+
+ /* restore the resource-owner */
+ CurrentResourceOwner = oldResourceOwner;
+
+ if( SPI_finish() != SPI_OK_FINISH )
+ elog( WARNING, "IND ADV: SPI did not FINISH properly." );
+#endif
+ /* save the advise into the table */
+ if( saveCandidates )
+ {
+ t_start( tSaveAdvise );
+
+ /* catch any ERROR */
+ PG_TRY();
+ {
+ save_advise(candidates);
+ }
+ PG_CATCH();
+ {
+ /* reset our 'running' state... */
+ --SuppressRecursion;
+
+ /*
+ * Add a detailed explanation to the ERROR. Note that these function
+ * calls will overwrite the DETAIL and HINT that are already
+ * associated (if any) with this ERROR. XXX consider errcontext().
+ */
+ errdetail( IND_ADV_ERROR_DET );
+ errhint( IND_ADV_ERROR_HINT );
+
+ /* ... and re-throw the ERROR */
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ t_stop( tSaveAdvise );
+ }
+
+ /* remove the candidates-list */
+ elog( DEBUG1, "IND ADV: Deleting candidate list." );
+ if( !saveCandidates || !doingExplain )
+ {
+ foreach( cell, index_candidates )
+ pfree( (IndexCandidate*)lfirst( cell ) );
+
+ list_free( index_candidates );
+
+ index_candidates = NIL;
+ }
+
+ t_stop( tAdviser );
+
+ /* emit debug info */
+ elog( DEBUG1, "IND ADV: old cost %.2f..%.2f", actualStartupCost,
+ actualTotalCost );
+ elog( DEBUG1, "IND ADV: new cost %.2f..%.2f", newStartupCost, newTotalCost);
+ elog( DEBUG1, "IND ADV: cost saved %.2f..%.2f, these are %lu..%lu percent",
+ startupCostSaved,
+ totalCostSaved,
+ (unsigned long)startupGainPerc,
+ (unsigned long)totalGainPerc );
+
+ /* print profiler information */
+ elog( DEBUG2, "IND ADV: [Prof] * Query String : %s",
+ debug_query_string );
+ elog( DEBUG2, "IND ADV: [Prof] * indexAdviser : %10lu usec",
+ tAdviser.usec );
+ elog( DEBUG2, "IND ADV: [Prof] |-- replanning : %10lu usec",
+ tRePlan.usec );
+ elog( DEBUG2, "IND ADV: [Prof] |-- getBTreeOperators : %10lu usec",
+ tBTreeOperators.usec );
+ elog( DEBUG2, "IND ADV: [Prof] |-- scanQuery : %10lu usec",
+ tGenCands.usec );
+ elog( DEBUG2, "IND ADV: [Prof] |-- scanPlan : %10lu usec",
+ tMarkUsedCands.usec );
+ elog( DEBUG2, "IND ADV: [Prof] |-- createVirtualIndexes : %10lu usec",
+ tCreateVInds.usec );
+ elog( DEBUG2, "IND ADV: [Prof] |-- dropVirtualIndexes : %10lu usec",
+ tDropVInds.usec );
+ elog( DEBUG2, "IND ADV: [Prof] |-- saveAdviseToCatalog : %10lu usec",
+ ( saveCandidates == true ) ? tSaveAdvise.usec : 0 );
+ elog( DEBUG2, "IND ADV: [Prof] |-- log_candidates : %10lu usec",
+ tLogCandidates.usec );
+
+DoneCleanly:
+ /* allow new calls to the index-adviser */
+ --SuppressRecursion;
+
+ elog( DEBUG3, "IND ADV: EXIT" );
+
+ return doingExplain && saveCandidates ? new_plan : NULL;
+}
+
+/*
+ * This callback is registered immediately upon loading this plugin. It is
+ * responsible for taking over control from the planner.
+ *
+ * It calls the standard planner and sends the resultant plan to
+ * index_adviser() for comparison with a plan generated after creating
+ * hypothetical indexes.
+ */
+static PlannedStmt*
+planner_callback( Query* query,
+ int cursorOptions,
+ ParamListInfo boundParams)
+{
+ Query *queryCopy;
+ PlannedStmt *actual_plan;
+ PlannedStmt *new_plan;
+
+ resetSecondaryHooks();
+
+ /* TODO1 : try to avoid making a copy if the index_adviser() is not going
+ * to use it; Index Adviser may not use the query copy at all if we are
+ * running in BootProcessing mode, or if the Index Adviser is being called
+ * recursively.
+ */
+
+ /* planner() scribbles on it's input, so make a copy of the query-tree */
+ queryCopy = copyObject( query );
+
+ /* Generate a plan using the standard planner */
+ actual_plan = standard_planner( query, cursorOptions, boundParams );
+
+ /* send the actual plan for comparison with a hypothetical plan */
+ new_plan = index_adviser( queryCopy, cursorOptions, boundParams,
+ actual_plan, false );
+
+ /* TODO1: try to free the redundant new_plan */
+
+ return actual_plan;
+}
+
+/*
+ * This callback is registered immediately upon loading this plugin. It is
+ * responsible for taking over control from the ExplainOneQuery() function.
+ *
+ * It calls the standard planner and sends the resultant plan to
+ * index_adviser() for comparison with a plan generated after creating
+ * hypothetical indexes.
+ */
+static void
+ExplainOneQuery_callback( Query *query,
+ ExplainStmt *stmt,
+ const char *queryString,
+ ParamListInfo params,
+ TupOutputState *tstate)
+{
+ Query *queryCopy;
+ PlannedStmt *actual_plan;
+ PlannedStmt *new_plan;
+ ListCell *cell;
+
+ resetSecondaryHooks();
+
+ /* planner() scribbles on it's input, so make a copy of the query-tree */
+ queryCopy = copyObject( query );
+
+ /* plan the query */
+ actual_plan = standard_planner( query, 0, params );
+
+ /* run it (if needed) and produce output */
+ ExplainOnePlan( actual_plan, params, stmt, tstate );
+
+ /* re-plan the query */
+ new_plan = index_adviser( queryCopy, 0, params, actual_plan, true );
+
+ if ( new_plan )
+ {
+ bool analyze = stmt->analyze;
+
+ stmt->analyze = false;
+
+ explain_get_index_name_hook = explain_get_index_name_callback;
+
+ do_text_output_oneline(tstate, ""); /* separator line */
+ do_text_output_oneline(tstate, "** Plan with hypothetical indexes **");
+ ExplainOnePlan( new_plan, params, stmt, tstate );
+
+ explain_get_index_name_hook = NULL;
+
+ stmt->analyze = analyze;
+ }
+
+ /* The candidates were not destroyed by the Index Adviser, do it now */
+ foreach( cell, index_candidates )
+ pfree( (IndexCandidate*)lfirst( cell ) );
+
+ list_free( index_candidates );
+
+ index_candidates = NIL;
+
+ /* TODO1: try to free the now-redundant new_plan */
+}
+
+/*
+ * get_relation_info() calls this callback after it has prepared a RelOptInfo
+ * for a relation.
+ *
+ * The Job of this callback is to fill in the information about the virtual
+ * index, that get_rel_info() could not load from the catalogs. As of now, the
+ * number of disk-pages that might be occupied by the virtual index (if created
+ * on-disk), is the only information that needs to be updated.
+ */
+static void
+get_relation_info_callback( PlannerInfo *root,
+ Oid relationObjectId,
+ bool inhparent,
+ RelOptInfo *rel)
+{
+#if CREATE_V_INDEXES
+ ListCell *cell1;
+ IndexCandidate *cand;
+
+ foreach( cell1, rel->indexlist )
+ {
+ IndexOptInfo *info = (IndexOptInfo*)lfirst( cell1 );
+
+ /* We call estimate_index_pages() here, instead of immediately after
+ * index_create() API call, since rel has been run through
+ * estimate_rel_size() by the caller!
+ */
+
+ if( is_virtual_index( info->indexoid, &cand ) )
+ {
+ /* estimate the size */
+ cand->pages = estimate_index_pages(cand->reloid, cand->idxoid);
+
+ info->pages = cand->pages;
+ }
+ }
+#else
+ /* This needs implimentation */
+ compile_assert( false );
+ ListCell *cell1;
+
+ foreach( cell1, index_candidates )
+ {
+ IndexCandidate *cand = (IndexCandidate*)lfirst( cell1 );
+
+ Form_pg_index index;
+ IndexOptInfo *info;
+ int ncolumns;
+ int i;
+
+ /*
+ * Extract info from the relation descriptor for the index.
+ */
+ index = indexRelation->rd_index;
+
+ info = makeNode(IndexOptInfo);
+
+ info->indexoid = InvalidOid;
+ info->rel = rel;
+ info->ncolumns = ncolumns = cand->ncols;
+
+ /*
+ * Allocate per-column info arrays. To save a few palloc cycles
+ * we allocate all the Oid-type arrays in one request. Note that
+ * the opfamily array needs an extra, terminating zero at the end.
+ * We pre-zero the ordering info in case the index is unordered.
+ */
+ info->indexkeys = (int *) palloc(sizeof(int) * ncolumns);
+ info->opfamily = (Oid *) palloc0(sizeof(Oid) * (4 * ncolumns + 1));
+ info->opcintype = info->opfamily + (ncolumns + 1);
+ info->fwdsortop = info->opcintype + ncolumns;
+ info->revsortop = info->fwdsortop + ncolumns;
+ info->nulls_first = (bool *) palloc0(sizeof(bool) * ncolumns);
+
+ for (i = 0; i < ncolumns; i++)
+ {
+ info->indexkeys[i] = cand->varattno[i];
+ info->opfamily[i] = InvalidOid;
+ info->opcintype[i] = InvalidOid;
+ }
+
+ info->relam = InvalidOid;
+ info->amcostestimate = InvalidOid;
+ info->amoptionalkey = false;
+ info->amsearchnulls = false;
+
+ for (i = 0; i < ncolumns; i++)
+ {
+ if (opt & INDOPTION_DESC)
+ {
+ fwdstrat = BTGreaterStrategyNumber;
+ revstrat = BTLessStrategyNumber;
+ }
+ else
+ {
+ fwdstrat = BTLessStrategyNumber;
+ revstrat = BTGreaterStrategyNumber;
+ }
+ /*
+ * Index AM must have a fixed set of strategies for it
+ * to make sense to specify amcanorder, so we
+ * need not allow the case amstrategies == 0.
+ */
+ if (fwdstrat > 0)
+ {
+ Assert(fwdstrat <= nstrat);
+ info->fwdsortop[i] = indexRelation->rd_operator[i * nstrat + fwdstrat - 1];
+ }
+ if (revstrat > 0)
+ {
+ Assert(revstrat <= nstrat);
+ info->revsortop[i] = indexRelation->rd_operator[i * nstrat + revstrat - 1];
+ }
+ info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
+ }
+
+ /*
+ * Fetch the index expressions and predicate, if any. We must
+ * modify the copies we obtain from the relcache to have the
+ * correct varno for the parent relation, so that they match up
+ * correctly against qual clauses.
+ */
+ info->indexprs = RelationGetIndexExpressions(indexRelation);
+ info->indpred = RelationGetIndexPredicate(indexRelation);
+ if (info->indexprs && varno != 1)
+ ChangeVarNodes((Node *) info->indexprs, 1, varno, 0);
+ if (info->indpred && varno != 1)
+ ChangeVarNodes((Node *) info->indpred, 1, varno, 0);
+ info->predOK = false; /* set later in indxpath.c */
+ info->unique = false;
+
+ /*
+ * Estimate the index size. If it's not a partial index, we lock
+ * the number-of-tuples estimate to equal the parent table; if it
+ * is partial then we have to use the same methods as we would for
+ * a table, except we can be sure that the index is not larger
+ * than the table.
+ */
+ if (info->indpred == NIL)
+ {
+ info->pages = RelationGetNumberOfBlocks(indexRelation);
+ info->tuples = rel->tuples;
+ }
+ else
+ {
+ estimate_rel_size(indexRelation, NULL,
+ &info->pages, &info->tuples);
+ if (info->tuples > rel->tuples)
+ info->tuples = rel->tuples;
+ }
+
+ index_close(indexRelation, NoLock);
+
+ indexinfos = lcons(info, indexinfos);
+ }
+#endif
+}
+
+/* Use this function to reset the hooks that are not required to be registered
+ * all the time; these may have been left registered by the previous call, in
+ * case of an ERROR.
+ */
+static void
+resetSecondaryHooks()
+{
+ get_relation_info_hook = NULL;
+ explain_get_index_name_hook = NULL;
+}
+
+static bool
+is_virtual_index( Oid oid, IndexCandidate **cand_out )
+{
+ ListCell *cell1;
+
+ foreach( cell1, index_candidates )
+ {
+ IndexCandidate *cand = (IndexCandidate*)lfirst( cell1 );
+
+ if( cand->idxoid == oid )
+ {
+ if( cand_out )
+ *cand_out = cand;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static const char *
+explain_get_index_name_callback(Oid indexId)
+{
+ StringInfoData buf;
+ IndexCandidate *cand;
+
+ if( is_virtual_index( indexId, &cand ) )
+ {
+ initStringInfo(&buf);
+
+ appendStringInfo( &buf, "<V-Index>:%d", cand->idxoid );
+
+ return buf.data;
+ }
+
+ return NULL; /* allow default behavior */
+}
+
+/**
+ * save_advise
+ * for every candidate insert an entry into IND_ADV_TABL
+ */
+static void
+save_advise( List* candidates )
+{
+ StringInfoData query; /* string for Query */
+ StringInfoData cols; /* string for Columns */
+ Oid advise_oid;
+ ListCell *cell;
+
+ elog( DEBUG3, "IND ADV: save_advise: ENTER" );
+
+ Assert( list_length(candidates) != 0 );
+
+ /*
+ * Minimal check: check that IND_ADV_TABL is atleast visible to us. There
+ * are a lot more checks we should do in order to not let the INSERT fail,
+ * like permissions, datatype mis-match, etc., but we leave those checks
+ * upto the executor.
+ */
+
+ /* find a relation named IND_ADV_TABL on the search path */
+ advise_oid = RelnameGetRelid( IND_ADV_TABL );
+
+ if (advise_oid != InvalidOid)
+ {
+#if 1
+ Relation advise_rel = relation_open(advise_oid, AccessShareLock);
+
+ if (advise_rel->rd_rel->relkind != RELKIND_RELATION
+ && advise_rel->rd_rel->relkind != RELKIND_VIEW)
+ {
+ relation_close(advise_rel, AccessShareLock);
+
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg( IND_ADV_ERROR_NTV )));
+ }
+
+ relation_close(advise_rel, AccessShareLock);
+#else
+ /*
+ * heap_open() makes sure that the oid does not represent an INDEX or a
+ * COMPOSITE type, else it will raise an ERROR, which is exactly what we
+ * want. The comments above heap_open() ask the caller not to assume any
+ * storage since the returned relation may be a VIEW; but we don't mind,
+ * since the user may have defined some rules on it to make the INSERTs
+ * work smoothly! If not, we leave it upto the executor to raise ERROR.
+ */
+ PG_TRY();
+ {
+ heap_close(heap_open(advise_oid, AccessShareLock), AccessShareLock);
+ }
+ PG_CATCH();
+ {
+ errmsg( IND_ADV_ERROR_NTV );
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+#endif
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg( IND_ADV_ERROR_NE )));
+ }
+
+ initStringInfo( &query );
+ initStringInfo( &cols );
+
+ foreach( cell, candidates )
+ {
+ int i;
+ IndexCandidate* idxcd = (IndexCandidate*)lfirst( cell );
+
+ if( !idxcd->idxused )
+ continue;
+
+ if( cols.len > 0 ) pfree( cols.data );
+
+ initStringInfo( &cols );
+
+ for (i = 0; i < idxcd->ncols; ++i)
+ appendStringInfo( &cols, "%s%d", (i>0?",":""), idxcd->varattno[i]);
+
+ appendStringInfo( &query, "insert into \""IND_ADV_TABL"\" values"
+ "( %d, array[%s], %f, %d, %d, now());",
+ idxcd->reloid,
+ cols.data,
+ idxcd->profit,
+ idxcd->pages * BLCKSZ/1024, /* in KBs */
+ MyProcPid );
+ } /* foreach cell in candidates */
+
+ if( query.len > 0 ) /* if we generated any SQL */
+ {
+ if( SPI_connect() == SPI_OK_CONNECT )
+ {
+ if( SPI_execute( query.data, false, 0 ) != SPI_OK_INSERT )
+ elog( WARNING, "IND ADV: SPI could not INSERT." );
+
+ if( SPI_finish() != SPI_OK_FINISH )
+ elog( WARNING, "IND ADV: SPI did not FINISH properly." );
+ }
+ else
+ elog( WARNING, "IND ADV: SPI could not CONNECT." );
+ }
+
+ if ( query.len > 0 ) pfree( query.data );
+ if ( cols.len > 0 ) pfree( cols.data );
+
+ elog( DEBUG3, "IND ADV: save_advise: EXIT" );
+}
+
+/**
+ * remove_irrelevant_candidates
+ *
+ * A candidate is irrelevant if it complies with at least one of the following
+ * conditions:
+ * (a) it indexes an unsupported relation (system-relations or temp-relations)
+ * (b) it matches an already present index.
+ *
+ * TODO1 Log the candidates as they are pruned, and remove the call to
+ * log_candidates() in index_adviser() after this function is called.
+ *
+ */
+static List*
+remove_irrelevant_candidates( List* candidates )
+{
+ ListCell *cell = list_head(candidates);
+ ListCell *prev = NULL;
+
+ while(cell != NULL)
+ {
+ ListCell *old_cell = cell;
+
+ Oid base_rel_oid = ((IndexCandidate*)lfirst( cell ))->reloid;
+ Relation base_rel = heap_open( base_rel_oid, AccessShareLock );
+
+ /* decide if the relation is unsupported. This check is now done before
+ * creating a candidate in scan_generic_node(); but still keeping the
+ * code here.
+ */
+ if((base_rel->rd_istemp == true)
+ || IsSystemRelation(base_rel))
+ {
+ ListCell *cell2;
+ ListCell *prev2;
+ ListCell *next;
+
+ /* remove all candidates indexing currently unsupported relations */
+ elog( DEBUG1,
+ "Index candidate(s) on an unsupported relation (%d) found!",
+ base_rel_oid );
+
+ /* Remove all candidates with same unsupported relation */
+ for(cell2 = cell, prev2 = prev; cell2 != NULL; cell2 = next)
+ {
+ next = lnext(cell2);
+
+ if(((IndexCandidate*)lfirst(cell2))->reloid == base_rel_oid)
+ {
+ pfree((IndexCandidate*)lfirst(cell2));
+ candidates = list_delete_cell( candidates, cell2, prev2 );
+
+ if(cell2 == cell)
+ cell = next;
+ }
+ else
+ {
+ prev2 = cell2;
+ }
+ }
+ }
+ else
+ {
+ /* remove candidates that match any of already defined indexes */
+
+ /* get all index Oids */
+ ListCell *index_cell;
+ List *old_index_oids = RelationGetIndexList( base_rel );
+
+ foreach( index_cell, old_index_oids )
+ {
+ /* open index relation and get the index info */
+ Oid old_index_oid = lfirst_oid( index_cell );
+ Relation old_index_rel = index_open( old_index_oid,
+ AccessShareLock );
+ IndexInfo *old_index_info = BuildIndexInfo( old_index_rel );
+
+ /* We ignore expressional indexes and partial indexes */
+ if( old_index_rel->rd_index->indisvalid
+ && old_index_info->ii_Expressions == NIL
+ && old_index_info->ii_Predicate == NIL )
+ {
+ ListCell *cell2;
+ ListCell *prev2;
+ ListCell *next;
+
+ Assert( old_index_info->ii_Expressions == NIL );
+ Assert( old_index_info->ii_Predicate == NIL );
+
+ /* search for a matching candidate */
+ for(cell2 = cell, prev2 = prev;
+ cell2 != NULL;
+ cell2 = next)
+ {next = lnext(cell2);{
+
+ IndexCandidate* cand = (IndexCandidate*)lfirst(cell2);
+
+ signed int cmp = (signed int)cand->ncols
+ - old_index_info->ii_NumIndexAttrs;
+
+ if(cmp == 0)
+ {
+ int i = 0;
+ do
+ {
+ cmp =
+ cand->varattno[i]
+ - old_index_info->ii_KeyAttrNumbers[i];
+ ++i;
+ } while((cmp == 0) && (i < cand->ncols));
+ }
+
+ if(cmp != 0)
+ {
+
+ /* current candidate does not match the current
+ * index, so go to next candidate.
+ */
+ prev2 = cell2;
+ }
+ else
+ {
+ elog( DEBUG1,
+ "A candidate matches the index oid of : %d;"
+ "hence ignoring it.",
+ old_index_oid );
+
+ /* remove the candidate from the list */
+ candidates = list_delete_cell(candidates,
+ cell2, prev2);
+ pfree( cand );
+
+ if (cell2 == cell)
+ cell = next;
+
+ break; /* while */
+ }
+ }} /* for */
+ }
+
+ /* close index relation and free index info */
+ index_close( old_index_rel, AccessShareLock );
+ pfree( old_index_info );
+ }
+
+ /* free the list of existing index Oids */
+ list_free( old_index_oids );
+
+ /* clear the index-list, else the planner can not see the
+ * virtual-indexes
+ * TODO: Really?? Verify this.
+ */
+ base_rel->rd_indexlist = NIL;
+ base_rel->rd_indexvalid = 0;
+ }
+
+ /* close the relation */
+ heap_close( base_rel, AccessShareLock );
+
+ /*
+ * Move the pointer forward, only if the crazy logic above did not do it
+ * else, cell is already pointing to a new list-element that needs
+ * processing
+ */
+ if(cell == old_cell)
+ {
+ prev = cell;
+ cell = lnext(cell);
+ }
+ }
+
+ return candidates;
+}
+
+/**
+ * mark_used_candidates
+ * runs thru the plan to find virtual indexes used by the planner
+ */
+static void
+mark_used_candidates(const Node* const node, List* const candidates)
+{
+ const ListCell *cell;
+ bool planNode = true; /* is it a plan-node */
+
+ elog( DEBUG3, "IND ADV: mark_used_candidates: ENTER" );
+
+ switch( nodeTag( node ) )
+ {
+ /* if the node is an indexscan */
+ case T_IndexScan:
+ {
+ /* are there any used virtual-indexes? */
+ const IndexScan* const idxScan = (const IndexScan*)node;
+
+ foreach( cell, candidates )
+ {
+
+ /* is virtual-index-oid in the IndexScan-list? */
+ IndexCandidate* const idxcd = (IndexCandidate*)lfirst( cell );
+ const bool used = idxcd->idxoid == idxScan->indexid;
+
+ /* connect the existing value per OR */
+ idxcd->idxused = idxcd->idxused || used;
+ }
+ }
+ break;
+
+ /* if the node is a bitmap-index-scan */
+ case T_BitmapIndexScan:
+ {
+ /* are there any used virtual-indexes? */
+ const BitmapIndexScan* const bmiScan = (const BitmapIndexScan*)node;
+ foreach( cell, candidates )
+ {
+ /* is virtual-index-oid in the BMIndexScan-list? */
+ IndexCandidate* const idxcd = (IndexCandidate*)lfirst( cell );
+ const bool used = idxcd->idxoid == bmiScan->indexid;
+
+ /* conntect the existing value per OR */
+ idxcd->idxused = idxcd->idxused || used;
+ }
+ }
+ break;
+
+ /* if the node is a bitmap-and */
+ case T_BitmapAnd:
+ {
+ /* are there any used virtual-indexes? */
+ const BitmapAnd* const bmiAndScan = (const BitmapAnd*)node;
+
+ foreach( cell, bmiAndScan->bitmapplans )
+ mark_used_candidates( (Node*)lfirst( cell ), candidates );
+ }
+ break;
+
+ /* if the node is a bitmap-or */
+ case T_BitmapOr:
+ {
+ /* are there any used virtual-indexes? */
+ const BitmapOr* const bmiOrScan = (const BitmapOr*)node;
+
+ foreach( cell, bmiOrScan->bitmapplans )
+ mark_used_candidates( (Node*)lfirst( cell ), candidates );
+ }
+ break;
+
+ case T_SubqueryScan:
+ {
+ /* scan subqueryplan */
+ const SubqueryScan* const subScan = (const SubqueryScan*)node;
+
+ mark_used_candidates( (const Node*)subScan->subplan, candidates );
+ }
+ break;
+
+ case T_NestLoop:
+ case T_MergeJoin:
+ case T_HashJoin:
+ case T_Join:
+ {
+ /* scan join-quals */
+ const Join* const join = (const Join*)node;
+
+ foreach( cell, join->joinqual )
+ {
+ const Node* const qualPlan = (const Node*)lfirst( cell );
+ mark_used_candidates( qualPlan, candidates );
+ }
+ }
+ break;
+
+ case T_OpExpr:
+ {
+ planNode = false;
+ const OpExpr* const expr = (const OpExpr*)node;
+ foreach( cell, expr->args )
+ mark_used_candidates( (const Node*)lfirst( cell ), candidates );
+ }
+ break;
+
+ case T_SubPlan:
+ {
+ /* scan the subplan */
+ planNode = false;
+ const SubPlan* const subPlan = (const SubPlan*)node;
+
+ mark_used_candidates( (const Node*)&plannedStmtGlobal->subplans[subPlan->plan_id], candidates );
+ }
+ break;
+
+ case T_BoolExpr:
+ {
+ planNode = false;
+ const BoolExpr* const expr = (const BoolExpr*)node;
+ foreach( cell, expr->args )
+ {
+ const Node* const nodeBool = (const Node*)lfirst( cell );
+ mark_used_candidates( nodeBool, candidates );
+ }
+ }
+ break;
+
+ case T_FunctionScan:
+ case T_Result:
+ case T_Append:
+ case T_TidScan:
+ case T_Material:
+ case T_Sort:
+ case T_Group:
+ case T_Agg:
+ case T_Unique:
+ case T_Hash:
+ case T_SetOp:
+ case T_Limit:
+ case T_Scan:
+ case T_SeqScan:
+ case T_BitmapHeapScan:
+ break;
+
+ case T_FuncExpr:
+ case T_Const:
+ case T_Var:
+ planNode = false;
+ break;
+
+ /* report non-considered parse-node types */
+ default:
+ {
+ elog( NOTICE, "IND ADV: unhandled plan-node type: %d; Query: %s\n",
+ (int)nodeTag( node ), debug_query_string );
+ planNode = false; /* stop scanning the tree here */
+ }
+ break;
+ }
+
+ if( planNode )
+ {
+ const Plan* const plan = (Plan *) node;
+
+ if( plan->initPlan )
+ {
+ ListCell *cell;
+
+ foreach( cell, ((Plan*)node)->initPlan )
+ {
+ SubPlan *subPlan = (SubPlan*)lfirst( cell );
+
+ mark_used_candidates( (Node*)exec_subplan_get_plan(
+ plannedStmtGlobal,
+ subPlan),
+ candidates );
+ }
+ }
+
+ if( IsA(((Node*)plan), Append) )
+ {
+ Append *appendplan = (Append *)node;
+ ListCell *cell;
+
+ foreach( cell, appendplan->appendplans )
+ {
+ Plan *child = (Plan*)lfirst( cell );
+
+ mark_used_candidates( (Node*)child, candidates );
+ }
+ }
+
+
+ /* scan left- and right-tree */
+ if( outerPlan(plan) )
+ mark_used_candidates( (const Node*)outerPlan(plan), candidates );
+
+ if( innerPlan(plan) )
+ mark_used_candidates( (const Node*)innerPlan(plan), candidates );
+
+ /* walk through the qual-list */
+ foreach( cell, plan->qual )
+ {
+ const Node* const nodeQual = (const Node*)lfirst( cell );
+ mark_used_candidates( nodeQual, candidates );
+ }
+ }
+
+ elog( DEBUG3, "IND ADV: mark_used_candidates: EXIT" );
+}
+
+/**
+ * scan_query
+ * runs thru the whole query
+ */
+static List*
+scan_query( const Query* const query,
+ List* const opnos,
+ List* rangeTableStack )
+{
+ const ListCell* cell;
+ List* candidates = NIL;
+ List* newCandidates = NIL;
+
+ elog( DEBUG3, "IND ADV: scan_query: ENTER" );
+
+ /* add the current rangetable to the stack */
+ rangeTableStack = lcons( query->rtable, rangeTableStack );
+
+ /* scan sub-queries */
+ foreach( cell, query->rtable )
+ {
+ const RangeTblEntry* const rte = (const RangeTblEntry*)lfirst( cell );
+
+ if( rte->subquery )
+ {
+ candidates = merge_candidates( candidates, scan_query(
+ rte->subquery,
+ opnos,
+ rangeTableStack));
+ }
+ }
+
+ /* scan "where" from the current query */
+ if( query->jointree->quals != NULL )
+ {
+ newCandidates = scan_generic_node( query->jointree->quals, opnos,
+ rangeTableStack );
+ }
+
+ /* if no indexcadidate found in "where", scan "group" */
+ if( ( newCandidates == NIL ) && ( query->groupClause != NULL ) )
+ {
+ newCandidates = scan_group_clause( query->groupClause,
+ query->targetList,
+ opnos,
+ rangeTableStack );
+ }
+
+ /* if no indexcadidate found in "group", scan "order by" */
+ if( ( newCandidates == NIL ) && ( query->sortClause != NULL ) )
+ {
+ newCandidates = scan_group_clause( query->sortClause,
+ query->targetList,
+ opnos,
+ rangeTableStack );
+ }
+
+ /* remove the current rangetable from the stack */
+ rangeTableStack = list_delete_ptr( rangeTableStack, query->rtable );
+
+ /* merge indexcandiates */
+ candidates = merge_candidates( candidates, newCandidates );
+
+ elog( DEBUG3, "IND ADV: scan_query: EXIT" );
+
+ return candidates;
+}
+
+/**
+ * scan_group_clause
+ * runs thru the "group-part"
+ */
+static List*
+scan_group_clause( List* const groupList,
+ List* const targetList,
+ List* const opnos,
+ List* const rangeTableStack )
+{
+ const ListCell* cell;
+ List* candidates = NIL;
+
+ elog( DEBUG3, "IND ADV: scan_group_clause: ENTER" );
+
+ /* scan every entry in the group-list */
+ foreach( cell, groupList )
+ {
+ /* convert to group-element */
+ const GroupClause* const groupElm = (const GroupClause*)lfirst( cell );
+
+ /* get the column the group-clause is for */
+ const TargetEntry* const targetElm = list_nth( targetList,
+ groupElm->tleSortGroupRef - 1);
+
+ /* scan the node and get candidates */
+ const Node* const node = (const Node*)targetElm->expr;
+
+ candidates = merge_candidates( candidates, scan_generic_node( node,
+ opnos,
+ rangeTableStack));
+ }
+
+ elog( DEBUG3, "IND ADV: scan_group_clause: EXIT" );
+
+ return candidates;
+}
+
+/**
+ * scan_generic_node
+ * runs thru the given nodes
+ */
+static List*
+scan_generic_node( const Node* const root,
+ List* const opnos,
+ List* const rangeTableStack )
+{
+ ListCell* cell;
+ List* candidates = NIL;
+
+ elog( DEBUG3, "IND ADV: scan_generic_node: ENTER" );
+
+ Assert( root != NULL );
+
+ switch( nodeTag( root ) )
+ {
+ /* if the node is an aggregate */
+ case T_Aggref:
+ {
+ const Aggref* const aggref = (const Aggref*)root;
+ const Node* const list = (const Node*)aggref->args;
+
+ /* The arg list may be NIL in case of count(*) */
+ if( list != NULL )
+ candidates = scan_generic_node( list, opnos, rangeTableStack );
+ }
+ break;
+
+ /* if the node is a boolean-expression */
+ case T_BoolExpr:
+ {
+ const BoolExpr* const expr = (const BoolExpr*)root;
+
+ if( expr->boolop != AND_EXPR )
+ {
+ /* non-AND expression */
+ foreach( cell, expr->args )
+ {
+ const Node* const node = (const Node*)lfirst( cell );
+ candidates = merge_candidates( candidates,
+ scan_generic_node( node, opnos,
+ rangeTableStack));
+ }
+ }
+ else
+ {
+ /* AND expression */
+ List* compositeCandidates = NIL;
+
+ foreach( cell, expr->args )
+ {
+ const Node* const node = (const Node*)lfirst( cell );
+ List *icList;
+ List *cicList;
+
+ icList = scan_generic_node( node, opnos, rangeTableStack );
+
+ cicList = build_composite_candidates(candidates, icList);
+
+ candidates = merge_candidates(candidates, icList);
+
+ compositeCandidates = merge_candidates(compositeCandidates,
+ cicList);
+ }
+
+ /* now append the composite (multi-col) indexes to the list */
+ candidates = merge_candidates(candidates, compositeCandidates);
+ }
+ }
+ break;
+
+ /* if the node is list of other nodes (e.g. group-by expressions) */
+ case T_List:
+ {
+ List* const list = (List*)root;
+ foreach( cell, list )
+ {
+ const Node* const node = (const Node*)lfirst( cell );
+
+ candidates = merge_candidates( candidates,
+ scan_generic_node( node, opnos,
+ rangeTableStack));
+ }
+ }
+ break;
+ /* if the node is an operator */
+ case T_OpExpr:
+ {
+ /* get candidates if operator is supported */
+ const OpExpr* const expr = (const OpExpr*)root;
+
+ if( list_member_oid( opnos, expr->opno ) )
+ {
+ foreach( cell, expr->args )
+ {
+ const Node* const node = (const Node*)lfirst( cell );
+
+ candidates = merge_candidates( candidates,
+ scan_generic_node( node, opnos,
+ rangeTableStack));
+ }
+ }
+ }
+ break;
+
+ /* if this case is reached, the variable is an index-candidate */
+ case T_Var:
+ {
+ const Var* const expr = (const Var*)root;
+ List* rt = list_nth( rangeTableStack, expr->varlevelsup );
+ const RangeTblEntry* rte = list_nth( rt, expr->varno - 1 );
+
+ /* only relations are source for index-candidates */
+ if( rte->rtekind == RTE_RELATION )
+ {
+ Relation base_rel = heap_open( rte->relid, AccessShareLock );
+
+ /* We do not support catalog tables and temporary tables */
+ if( base_rel->rd_istemp != true
+ && !IsSystemRelation(base_rel)
+ /* and don't recommend indexes on hidden/system columns */
+ && expr->varattno > 0
+ /* and it should have at least two tuples */
+ && base_rel->rd_rel->relpages > 1
+ && base_rel->rd_rel->reltuples > 1 )
+ {
+ /* create index-candidate and build a new list */
+ int i;
+ IndexCandidate *cand = (IndexCandidate*)palloc0(
+ sizeof(IndexCandidate));
+
+ cand->varno = expr->varno;
+ cand->varlevelsup = expr->varlevelsup;
+ cand->ncols = 1;
+ cand->vartype[ 0 ] = expr->vartype;
+ cand->varattno[ 0 ] = expr->varattno;
+ for( i = 1; i < INDEX_MAX_KEYS; ++i ) cand->varattno[i] = 0;
+ cand->reloid = rte->relid;
+ cand->idxused = false;
+
+ candidates = list_make1( cand );
+ }
+
+ heap_close( base_rel, AccessShareLock );
+ }
+ }
+ break;
+
+ /* subquery in where-clause */
+ case T_SubLink:
+ {
+ /* convert it to sublink-expression */
+ const SubLink* const expr = (const SubLink*)root;
+
+ candidates = scan_generic_node( expr->subselect, opnos,
+ rangeTableStack );
+
+ /* scan lefthand expression (if any); [NOT] EXISTS do not have it */
+ if( expr->testexpr )
+ candidates = merge_candidates(candidates,
+ scan_generic_node( expr->testexpr,
+ opnos,
+ rangeTableStack));
+ }
+ break;
+
+ case T_RelabelType:
+ {
+ const RelabelType* const relabeltype = (const RelabelType*)root;
+ const Node* const node = (const Node*)relabeltype->arg;
+
+ candidates = scan_generic_node( node, opnos, rangeTableStack );
+ }
+ break;
+
+ /* Query found */
+ case T_Query:
+ {
+ const Query* const query = (const Query*)root;
+
+ candidates = scan_query( query, opnos, rangeTableStack );
+ }
+ break;
+
+ /* ignore some types */
+ case T_FuncExpr:
+ case T_Param:
+ case T_Const:
+ break;
+
+ /* report non-considered parse-node types */
+ default:
+ elog( NOTICE, "IND ADV: unhandled parse-node type: %d; Query: %s\n",
+ (int)nodeTag( root ), debug_query_string );
+ break;
+ }
+
+ elog( DEBUG3, "IND ADV: scan_generic_node: EXIT" );
+
+ return candidates;
+}
+
+
+
+/**
+ * compare_candidates
+ */
+static int
+compare_candidates( const IndexCandidate* _ic1,
+ const IndexCandidate* _ic2 )
+{
+ int result = (signed int)_ic1->reloid - (signed int)_ic2->reloid;
+
+ if( result == 0 )
+ {
+ result = _ic1->ncols - _ic2->ncols;
+
+ if( result == 0 )
+ {
+ int i = 0;
+
+ do
+ {
+ result = _ic1->varattno[ i ] - _ic2->varattno[ i ];
+ ++i;
+ } while( ( result == 0 ) && ( i < _ic1->ncols ) );
+ }
+ }
+
+ return result;
+}
+
+/**
+ * log_candidates
+ */
+static void
+log_candidates( const char* prefix, List* list )
+{
+ ListCell *cell;
+ StringInfoData str;/* output string */
+
+ /* don't do anything unless we are going to log it */
+ if( log_min_messages < DEBUG1 )
+ return;
+
+ t_continue( tLogCandidates );
+
+ initStringInfo( &str );
+
+ foreach( cell, list )
+ {
+ int i;
+ const IndexCandidate* const cand = (IndexCandidate*)lfirst( cell );
+
+ appendStringInfo( &str, " %d_(", cand->reloid );
+
+ for( i = 0; i < cand->ncols; ++i )
+ appendStringInfo( &str, "%s%d", (i>0?",":""), cand->varattno[ i ] );
+
+ appendStringInfo( &str, ")%c", ((lnext( cell ) != NULL)?',':' ') );
+ }
+
+ elog( DEBUG1, "IND ADV: %s: |%d| {%s}", prefix, list_length(list),
+ str.len ? str.data : "" );
+
+ if( str.len > 0 ) pfree( str.data );
+
+ t_stop( tLogCandidates );
+}
+
+/**
+ * merge_candidates
+ * It builds new list out of passed in lists, and then frees the two lists
+ */
+static List*
+merge_candidates( List* list1, List* list2 )
+{
+ List *ret;
+ ListCell *cell1;
+ ListCell *cell2;
+ ListCell *prev2;
+
+ if( list_length( list1 ) == 0 && list_length( list2 ) == 0 )
+ return NIL;
+
+ elog( DEBUG3, "IND ADV: merge_candidates: ENTER" );
+
+ /* list1 and list2 are sorted lists of candidates in ascending order */
+
+ elog( DEBUG1, "IND ADV: ---merge_candidates---" );
+ log_candidates( "idxcd-list1", list1 );
+ log_candidates( "idxcd-list2", list2 );
+
+ if( list_length( list1 ) == 0 )
+ return list2;
+
+ if( list_length( list2 ) == 0 )
+ return list1;
+
+ ret = NIL;
+ prev2 = NULL;
+
+ for( cell1 = list_head(list1), cell2 = list_head(list2);
+ (cell1 != NULL) && (cell2 != NULL);
+ )
+ {
+ const int cmp = compare_candidates( (IndexCandidate*)lfirst( cell1 ),
+ (IndexCandidate*)lfirst( cell2 ) );
+
+ if( cmp <= 0 )
+ {
+ /* next candidate comes from list 1 */
+ ret = lappend( ret, lfirst( cell1 ) );
+
+ cell1 = lnext( cell1 );
+
+ /* if we have found two identical candidates then we remove the
+ * candidate from list 2
+ */
+ if( cmp == 0 )
+ {
+ ListCell *next = lnext( cell2 );
+
+ pfree( (IndexCandidate*)lfirst( cell2 ) );
+ list2 = list_delete_cell( list2, cell2, prev2 );
+
+ cell2 = next;
+ }
+ }
+ else
+ {
+ /* next candidate comes from list 2 */
+ ret = lappend( ret, lfirst( cell2 ) );
+
+ prev2 = cell2;
+ cell2 = lnext( cell2 );
+ }
+ }
+
+ for( ; cell1; cell1 = lnext(cell1) )
+ ret = lappend( ret, lfirst(cell1) );
+
+ for( ; cell2; cell2 = lnext(cell2) )
+ ret = lappend( ret, lfirst(cell2) );
+
+ list_free( list1 );
+ list_free( list2 );
+
+ log_candidates( "merged-list", ret );
+
+ elog( DEBUG3, "IND ADV: merge_candidates: EXIT" );
+
+ return ret;
+}
+
+/**
+ * build_composite_candidates.
+ *
+ * @param [IN] list1 is a sorted list of candidates in ascending order.
+ * @param [IN] list2 is a sorted list of candidates in ascending order.
+ *
+ * @returns A new sorted list containing composite candidates.
+ */
+static List*
+build_composite_candidates( List* list1, List* list2 )
+{
+ ListCell *cell1 = list_head( list1 );
+ ListCell *cell2 = list_head( list2 );
+ IndexCandidate *cand1;
+ IndexCandidate *cand2;
+
+ List* compositeCandidates = NIL;
+
+ elog( DEBUG3, "IND ADV: build_composite_candidates: ENTER" );
+
+ if( cell1 == NULL || cell2 == NULL )
+ goto DoneCleanly;
+
+ elog( DEBUG1, "IND ADV: ---build_composite_candidates---" );
+ log_candidates( "idxcd-list1", list1 );
+ log_candidates( "idxcd-list2", list2 );
+
+ /* build list with composite candiates */
+ while( ( cell1 != NULL ) && ( cell2 != NULL ) )
+ {
+ int cmp ;
+
+ cand1 = ((IndexCandidate*)lfirst( cell1 ));
+ cand2 = ((IndexCandidate*)lfirst( cell2 ));
+
+ cmp = cand1->reloid - cand2->reloid;
+
+ if( cmp != 0 )
+ {
+ Oid relOid;
+
+ if( cmp < 0 )
+ {
+ /* advance in list 1 */
+ relOid = cand2->reloid;
+
+ do
+ cell1 = lnext( cell1 );
+ while( cell1 != NULL && (relOid > cand1->reloid));
+ }
+ else
+ {
+ /* advance in list 2 */
+ relOid = cand1->reloid;
+
+ do
+ cell2 = lnext( cell2 );
+ while( cell2 != NULL && ( relOid > cand2->reloid ));
+ }
+ }
+ else
+ {
+ /* build composite candidates */
+ Oid relationOid = ((IndexCandidate*)lfirst(cell1))->reloid;
+ ListCell* l1b;
+
+ do
+ {
+ cand2 = lfirst( cell2 );
+
+ l1b = cell1;
+ do
+ {
+ cand1 = lfirst( l1b );
+
+ /* do not build a composite candidate if the number of
+ * attributes would exceed INDEX_MAX_KEYS
+ */
+ if( ( cand1->ncols + cand2->ncols ) < INDEX_MAX_KEYS )
+ {
+
+ /* test that both candidates do not have any equal
+ * attributes
+ * result: i1 == cand1->ncols, if the test succeded
+ * i1 > cand1->ncols, if the test failed
+ * (correct: i1==1+cand1->ncols)
+ */
+ int i1, i2;
+ bool foundCommon = false;
+
+ for(i1 = 0; i1 < cand1->ncols && !foundCommon; ++i1)
+ for(i2 = 0; i2 < cand2->ncols && !foundCommon; ++i2)
+ if(cand1->varattno[i1] == cand2->varattno[i2])
+ foundCommon = true;
+
+ /* build composite candidates if the previous test
+ * succeeded
+ */
+ if( !foundCommon )
+ {
+ /* composite candidate 1 is a combination of
+ * candidates 1,2 AND
+ * composite candidate 2 is a combination of
+ * candidate 2,1
+ */
+ IndexCandidate* cic1
+ = (IndexCandidate*)palloc(
+ sizeof(IndexCandidate));
+ IndexCandidate* cic2
+ = (IndexCandidate*)palloc(
+ sizeof(IndexCandidate));
+
+ /* init some members of composite candidate 1 */
+ cic1->varno = -1;
+ cic1->varlevelsup = -1;
+ cic1->ncols = cand1->ncols + cand2->ncols;
+ cic1->reloid = relationOid;
+ cic1->idxused = false;
+
+ /* init some members of composite candidate 2 */
+ cic2->varno = -1;
+ cic2->varlevelsup = -1;
+ cic2->ncols = cand1->ncols + cand2->ncols;
+ cic2->reloid = relationOid;
+ cic2->idxused = false;
+
+ /* copy attributes of candidate 1 to attributes of
+ * composite candidates 1,2
+ */
+ for( i1 = 0; i1 < cand1->ncols; ++i1)
+ {
+ cic1->vartype[ i1 ]
+ = cic2->vartype[cand2->ncols + i1]
+ = cand1->vartype[ i1 ];
+
+ cic1->varattno[ i1 ]
+ = cic2->varattno[cand2->ncols + i1]
+ = cand1->varattno[ i1 ];
+ }
+
+ /* copy attributes of candidate 2 to attributes of
+ * composite candidates 2,1
+ */
+ for( i1 = 0; i1 < cand2->ncols; ++i1)
+ {
+ cic1->vartype[cand1->ncols + i1]
+ = cic2->vartype[ i1 ]
+ = cand2->vartype[ i1 ];
+
+ cic1->varattno[cand1->ncols + i1]
+ = cic2->varattno[ i1 ]
+ = cand2->varattno[ i1 ];
+ }
+
+ /* set remaining attributes to null */
+ for( i1 = cand1->ncols + cand2->ncols;
+ i1 < INDEX_MAX_KEYS;
+ ++i1 )
+ {
+ cic1->varattno[ i1 ] = 0;
+ cic2->varattno[ i1 ] = 0;
+ }
+
+ /* add new composite candidates to list */
+ signed int cmp = compare_candidates(cic1, cic2);
+
+ if( cmp == 0 )
+ {
+ compositeCandidates =
+ merge_candidates( list_make1( cic1 ),
+ compositeCandidates );
+ pfree( cic2 );
+ }
+ else
+ {
+ List* l;
+
+ if( cmp < 0 )
+ l = lcons( cic1, list_make1( cic2 ) );
+ else
+ l = lcons( cic2, list_make1( cic1 ) );
+
+ compositeCandidates =
+ merge_candidates(l, compositeCandidates);
+ }
+ }
+ }
+
+ l1b = lnext( l1b );
+
+ } while( ( l1b != NULL ) &&
+ ( relationOid == ((IndexCandidate*)lfirst( l1b ))->reloid));
+
+ cell2 = lnext( cell2 );
+
+ } while( ( cell2 != NULL ) &&
+ ( relationOid == ((IndexCandidate*)lfirst( cell2 ))->reloid ) );
+ cell1 = l1b;
+ }
+ }
+
+ log_candidates( "composite-l", compositeCandidates );
+
+DoneCleanly:
+ elog( DEBUG3, "IND ADV: build_composite_candidates: EXIT" );
+
+ return compositeCandidates;
+}
+
+#if CREATE_V_INDEXES
+/**
+ * create_virtual_indexes
+ * creates an index for every entry in the index-candidate-list.
+ *
+ * It may delete some candidates from the list passed in to it.
+ */
+static List*
+create_virtual_indexes( List* candidates )
+{
+ ListCell *cell; /* an entry from the candidate-list */
+ ListCell *prev, *next; /* for list manipulation */
+ char idx_name[ 16 ]; /* contains the name of the current index */
+ int idx_count = 0; /* number of the current index */
+ IndexInfo* indexInfo;
+ Oid op_class[INDEX_MAX_KEYS];/* needed for creating indexes */
+
+ elog( DEBUG3, "IND ADV: create_virtual_indexes: ENTER" );
+
+ /* fill index-info */
+ indexInfo = makeNode( IndexInfo );
+
+ indexInfo->ii_Expressions = NIL;
+ indexInfo->ii_ExpressionsState = NIL;
+ indexInfo->ii_Predicate = NIL;
+ indexInfo->ii_PredicateState = NIL;
+ indexInfo->ii_Unique = false;
+ indexInfo->ii_Concurrent = true;
+
+ /* create index for every list entry */
+ for( prev = NULL, cell = list_head(candidates);
+ (cell && (next = lnext(cell))) || cell != NULL;
+ cell = next)
+ {
+ int i;
+
+ IndexCandidate* const cand = (IndexCandidate*)lfirst( cell );
+
+ indexInfo->ii_NumIndexAttrs = cand->ncols;
+
+ for( i = 0; i < cand->ncols; ++i )
+ {
+ /* prepare op_class[] */
+ op_class[i] = GetDefaultOpClass( cand->vartype[ i ], BTREE_AM_OID );
+
+ if( op_class[i] == InvalidOid )
+ /* don't create this index if couldn't find a default operator*/
+ break;
+
+ /* ... and set indexed attribute number */
+ indexInfo->ii_KeyAttrNumbers[i] = cand->varattno[i];
+ }
+
+ /* if we decided not to create the index above, try next candidate */
+ if( i < cand->ncols )
+ {
+ candidates = list_delete_cell( candidates, cell, prev );
+ continue;
+ }
+
+ /* generate indexname */
+ sprintf( idx_name, "idx_adv_%d", idx_count );
+
+ /* create the index without data */
+ cand->idxoid = index_create( cand->reloid, idx_name,
+ InvalidOid, indexInfo, BTREE_AM_OID,
+ InvalidOid, op_class, NULL, (Datum)0,
+ false, false, false, true, true );
+
+ elog( DEBUG1, "IND ADV: virtual index created: oid=%d name=%s size=%d",
+ cand->idxoid, idx_name, cand->pages );
+
+ /* increase count for the next index */
+ ++idx_count;
+ prev = cell;
+ }
+
+ pfree( indexInfo );
+
+ /* do CCI to make the new metadata changes "visible" */
+ CommandCounterIncrement();
+
+ elog( DEBUG3, "IND ADV: create_virtual_indexes: EXIT" );
+
+ return candidates;
+}
+#endif
+
+#if CREATE_V_INDEXES
+/**
+ * drop_virtual_indexes
+ * drops all virtual-indexes
+ */
+static void
+drop_virtual_indexes( List* candidates )
+{
+ ListCell* cell; /* a entry from the index-candidate-list */
+
+ elog( DEBUG3, "IND ADV: drop_virtual_indexes: ENTER" );
+
+ /* drop index for every listentry */
+ foreach( cell, candidates )
+ {
+ /* TODO: have a look at implementation of index_drop! citation:
+ * "NOTE: this routine should now only be called through
+ * performDeletion(), else associated dependencies won't be cleaned up."
+ */
+
+ /* disabling index_drop() call, since it acquires AccessExclusiveLock
+ * on the base table, and hence causing a deadlock when multiple
+ * clients are running the same query
+ */
+
+/* IndexCandidate* cand = (IndexCandidate*)lfirst( cell );
+
+ index_drop( cand->idxoid );
+ elog( DEBUG1, "IND ADV: virtual index dropped: oid=%d", cand->idxoid );
+*/ }
+
+ /* do CCI to make the new metadata changes "visible" */
+ CommandCounterIncrement();
+
+ elog( DEBUG3, "IND ADV: drop_virtual_indexes: EXIT" );
+}
+#endif
+
+static int4
+estimate_index_pages(Oid rel_oid, Oid ind_oid )
+{
+ Size data_length;
+ int i;
+ int natts;
+ int2 var_att_count;
+ int4 rel_pages; /* diskpages of heap relation */
+ float4 rel_tuples; /* tupes in the heap relation */
+ double idx_pages; /* diskpages in index relation */
+
+ TupleDesc ind_tup_desc;
+ Relation base_rel;
+ Relation index_rel;
+ Form_pg_attribute *atts;
+
+ base_rel = heap_open( rel_oid, AccessShareLock );
+ index_rel = index_open( ind_oid, AccessShareLock );
+
+ rel_pages = base_rel->rd_rel->relpages;
+ rel_tuples = base_rel->rd_rel->reltuples;
+
+ ind_tup_desc = RelationGetDescr( index_rel );
+
+ atts = ind_tup_desc->attrs;
+ natts = ind_tup_desc->natts;
+
+ /*
+ * These calculations are heavily borrowed from index_form_tuple(), and
+ * heap_compute_data_size(). The only difference is that, that they have a
+ * real tuple being inserted, and hence all the VALUES are available,
+ * whereas, we don't have any of them available here.
+ */
+
+ /*
+ * First, let's calculate the contribution of fixed size columns to the size
+ * of index tuple
+ */
+ var_att_count = 0;
+ data_length = 0;
+ for( i = 0; i < natts; ++i)
+ {
+ /* the following is based on att_addlength() macro */
+ if( atts[i]->attlen > 0 )
+ {
+ /* no +=; RHS is incrementing data_length by including it in the sum */
+ data_length = att_align_nominal(data_length, atts[i]->attalign);
+
+ data_length += atts[i]->attlen;
+ }
+ else
+ if( atts[i]->attlen == -1 )
+ {
+ data_length += atts[i]->atttypmod + VARHDRSZ;
+ }
+ else
+ { /* null terminated data */
+ Assert( atts[i]->attlen == -2 );
+ ++var_att_count;
+ }
+ }
+
+ /*
+ * Now, estimate the average space occupied by variable-length columns, per
+ * tuple. This is calculated as:
+ * Total 'available' space
+ * minus space consumed by ItemIdData
+ * minus space consumed by fixed-length columns
+ */
+ if( var_att_count )
+ data_length += (((float)rel_pages * (BLCKSZ - (sizeof(PageHeaderData)
+ - sizeof(ItemIdData)
+ ) ) )
+ - (rel_tuples * sizeof(ItemIdData))
+ - (data_length * rel_tuples)
+ )
+ /rel_tuples;
+
+ /* Take into account the possibility that we might have NULL values */
+ data_length += IndexInfoFindDataOffset( INDEX_NULL_MASK );
+
+ idx_pages = (rel_tuples * (data_length + sizeof(ItemIdData)))
+ /((BLCKSZ - SizeOfPageHeaderData
+ - sizeof(BTPageOpaqueData)
+ )
+ * ((float)BTREE_DEFAULT_FILLFACTOR/100));
+
+ idx_pages = ceil( idx_pages );
+
+ heap_close( base_rel, AccessShareLock );
+ index_close( index_rel, AccessShareLock );
+
+ return (int4)idx_pages;
+}
diff --git a/index_adviser/index_adviser.h b/index_adviser/index_adviser.h
new file mode 100644
index 0000000..8b7b07c
--- /dev/null
+++ b/index_adviser/index_adviser.h
@@ -0,0 +1,41 @@
+/*-------------------------------------------------------------------------
+ *
+ * index_advisor.h
+ * Prototypes for indexadvisor.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef INDEX_ADVISER_H
+#define INDEX_ADVISER_H 1
+
+
+#include "postgres.h"
+
+#include "nodes/print.h"
+#include "parser/parsetree.h"
+#include "catalog/namespace.h"
+#include "executor/executor.h"
+
+typedef struct {
+
+ Index varno; /* index into the rangetable */
+ Index varlevelsup; /* points to the correct rangetable */
+ int2 ncols; /* number of indexed columns */
+ Oid vartype[INDEX_MAX_KEYS];/* type of the column(s) */
+ AttrNumber varattno[INDEX_MAX_KEYS];/* attribute number of the column(s) */
+ Oid reloid; /* the table oid */
+//TODO1 remove this member
+ Oid idxoid; /* the virtual index oid */
+ BlockNumber pages; /* the estimated size of index */
+ bool idxused; /* was this used by the planner? */
+ float4 profit; /* profit made by using this cand */
+
+} IndexCandidate;
+
+extern void _PG_init(void);
+extern void _PG_fini(void);
+
+#define compile_assert(x) extern int _compile_assert_array[(x)?1:-1]
+
+#endif /* INDEX_ADVISER_H */
diff --git a/pg_advise/Makefile b/pg_advise/Makefile
new file mode 100644
index 0000000..2574ae8
--- /dev/null
+++ b/pg_advise/Makefile
@@ -0,0 +1,12 @@
+
+subdir = contrib/pgadviser/pg_advise
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+PROGRAM = pg_advise
+OBJS = advise_index.o util_funcs.o
+
+PG_CPPFLAGS = -I$(libpq_srcdir)
+PG_LIBS = $(libpq)
+
+include $(top_srcdir)/contrib/contrib-global.mk
diff --git a/pg_advise/advise_index.c b/pg_advise/advise_index.c
new file mode 100644
index 0000000..76dee63
--- /dev/null
+++ b/pg_advise/advise_index.c
@@ -0,0 +1,452 @@
+/*
+ * pg_advise: the frontend to the indexadvisor
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "libpq-fe.h"
+#include "advise_index.h"
+
+#define ADV_MAX_COLS 32
+
+static PGconn *init_connection(const char *dbname, const char *host, int port,
+ const char *user, const char *password)
+{
+ char conn_str[1024];
+ PGconn *conn;
+
+ sprintf(conn_str, "dbname = %s host = %s port = %d user = %s password = %s"
+ " options = '-c local_preload_libraries=libpg_index_adviser'",
+ dbname, host, port, user, password);
+
+ conn = PQconnectdb(conn_str);
+
+ if (PQstatus(conn) != CONNECTION_OK)
+ {
+ fprintf(stderr, "ERROR: %s", PQerrorMessage(conn));
+ return NULL;
+ }
+
+ return conn;
+}
+
+static int prepare_advisor(PGconn *conn)
+{
+#if 0
+ PGresult *res;
+
+ res = PQexec(conn, "SET enable_advise_index TO true");
+
+ if (res == NULL)
+ {
+ fprintf(stderr, "%s", PQerrorMessage(conn));
+ return -1;
+ }
+
+ res = PQexec(conn,
+ "DELETE FROM advise_index WHERE backend_pid = pg_backend_pid()");
+
+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
+ {
+ fprintf(stderr, "ERROR: %s", PQerrorMessage(conn));
+ return -1;
+ }
+ else
+ PQclear(res);
+#endif
+ return 0;
+}
+
+static int analyse_workload(PGconn *conn, FILE *file)
+{
+ PGresult *res;
+ char *query = NULL;
+ char line[1024];
+
+ printf("Analyzing queries ");
+
+ for(;;)
+ {
+ if (fgets(line, 1024, file) == NULL)
+ break;
+ if (query == NULL)
+ {
+ query = (char *)malloc(10*1024);
+ strcpy(query, "EXPLAIN ");
+ strcat(query, line);
+ }
+ else
+ {
+ if (strlen(query) + strlen(line) > 10*1024)
+ {
+ fprintf(stderr, "ERROR: Query string too long.\n");
+ return -1;
+ }
+
+ strcat(query, line);
+ }
+
+ if (strchr(query, ';') != NULL)
+ {
+ // printf("query \#%d: %s\n", ++lno, query);
+ res = PQexec(conn, query);
+ printf(".");
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ {
+ fprintf(stderr, "ERROR: %s", PQerrorMessage(conn));
+ return -1;
+ }
+ else
+ PQclear(res);
+
+ free(query);
+ query = NULL;
+ }
+ }
+ printf(" done.\n");
+ return 0;
+}
+
+static int read_advisor_output(PGconn *conn, AdvIndexList *index_list)
+{
+ PGresult *res;
+ int i;
+ int num_indexes = 0;
+ char stmt[1024];
+
+ res = PQexec(conn, "BEGIN");
+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
+ {
+ fprintf(stderr, "ERROR: BEGIN failed:\n %s", PQerrorMessage(conn));
+ PQclear(res);
+ return 0;
+ }
+
+ snprintf(stmt, sizeof(stmt),
+ "SELECT c.relname,"
+ "attrs AS colids,"
+ "MAX(index_size) AS size_in_pages,"
+ "SUM(profit) AS benefit,"
+ "SUM(profit)/MAX(index_size) AS gain "
+ "FROM advise_index a,"
+ "pg_class c "
+ "WHERE a.backend_pid = pg_backend_pid() "
+ "AND a.reloid = c.oid "
+ "GROUP BY c.relname, colids "
+ "ORDER BY gain"
+ " DESC");
+
+ res = PQexec(conn, stmt);
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ {
+ fprintf(stderr, "ERROR: %s", PQerrorMessage(conn));
+ PQclear(res);
+ return 0;
+ }
+
+ *index_list = (AdvIndexInfo **)malloc(PQntuples(res)*sizeof(AdvIndexInfo*));
+
+ for (i = 0; i < PQntuples(res); ++i)
+ {
+ AdvIndexInfo *index = (AdvIndexInfo *)malloc(sizeof(AdvIndexInfo));
+
+ index->table = strdup(PQgetvalue( res, i, 0));
+ index->col_ids = strdup(PQgetvalue( res, i, 1));
+ /*
+ * size returned by the query is in number of pages.
+ * TODO: change the backend to dump size in KBs. Done.
+ */
+ index->size = atol(PQgetvalue( res, i, 2));
+ index->benefit = atof(PQgetvalue( res, i, 3));
+ index->used = false;
+
+ (*index_list)[i] = index;
+
+ printf("size = %d KB, benefit = %f\n", index->size, index->benefit);
+ ++num_indexes;
+ }
+
+ PQclear(res);
+ res = PQexec(conn, "END");
+ PQclear(res);
+
+ return num_indexes;
+}
+
+static char* get_column_names(PGconn *conn, const char *table, char *column_ids)
+{
+ PGresult *res;
+ int len, colno;
+ char stmt[512];
+ char *idxdef;
+ char *colnames[ADV_MAX_COLS] = {0};
+ char *tok;
+
+ res = PQexec(conn, "BEGIN");
+
+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
+ {
+ fprintf(stderr, "ERROR: BEGIN failed.\n %s", PQerrorMessage(conn));
+ PQclear(res);
+ return NULL;
+ }
+
+ for( colno = 0, tok = strtok(column_ids, "{,}");
+ tok;
+ ++colno, tok = strtok(NULL, "{,}"))
+ {
+ snprintf(stmt, sizeof(stmt),
+ "SELECT a.attname,"
+ "a.attnum "
+ "FROM pg_class c,"
+ "pg_attribute a "
+ "WHERE c.relname = '%s' "
+ "AND a.attrelid = c.oid "
+ "AND a.attnum = %s",
+ table, tok);
+
+ res = PQexec(conn, stmt);
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ {
+ fprintf(stderr, "ERROR: %s", PQerrorMessage(conn));
+ PQclear(res);
+ return NULL;
+ }
+
+ if (PQntuples(res) != 1)
+ {
+ fprintf(stderr, "ERROR: an internal query failed at %s:%d.",
+ __FILE__, __LINE__ );
+ PQclear(res);
+ return NULL;
+ }
+
+ colnames[colno] = strdup(PQgetvalue(res, 0, 0));
+ }
+
+ res = PQexec(conn, "END");
+ PQclear(res);
+
+ len = 1; /* 1 for null terminator */
+ for (colno = 0; colno < ADV_MAX_COLS && colnames[colno]; ++colno)
+ {
+ len += strlen(colnames[colno]);
+ if (colno > 0) len += 1; /* for a ',' */
+ }
+
+ idxdef = (char *)malloc(len);
+ idxdef[0] = '\0';
+
+ for (colno = 0; colno < ADV_MAX_COLS && colnames[colno]; ++colno)
+ {
+ if (colno > 0) strcat(idxdef, ",");
+ strcat(idxdef, colnames[colno]);
+ free(colnames[colno]);
+ }
+
+ return idxdef;
+}
+
+static void output_recommendation(PGconn *conn, AdvIndexList index_list,
+ int len, FILE *sqlfile)
+{
+ int i;
+ long size = 0;
+
+ for (i = 0; i < len; ++i)
+ {
+ AdvIndexInfo *info = index_list[i];
+
+ if (!info->used)
+ continue;
+
+ char *idxdef = get_column_names(conn, info->table, info->col_ids);
+
+ printf("/* %d. %s(%s): size=%d KB, profit=%.2f */\n",
+ i+1, info->table, idxdef, info->size, info->benefit);
+
+ size += info->size;
+
+ if (sqlfile)
+ fprintf(sqlfile, "create index idx_%s_%d on %s (%s);\n",
+ info->table, i+1, info->table, idxdef);
+ free(idxdef);
+ }
+
+ printf("/* Total size = %ldKB */\n", size);
+}
+
+static void usage()
+{
+ puts("This is pg_advise_index, the PostgreSQL index advisor frontend.\n");
+ puts("Usage:\n\tadvise_index [options] [workload file]\n");
+ puts("Options:");
+ puts("\t-d DBNAME specify database name to connect to");
+ puts("\t-h HOSTNAME database server host or socket directory "
+ "(default: \"local socket\")");
+ puts("\t-p PORT database server port");
+ puts("\t-U NAME database user name");
+ puts("\t-o FILENAME name of output file for create index statements");
+ puts("\t-s SIZE specify max size of space to be used for indexes "
+ "(in bytes, opt. with G, M or K)");
+}
+
+/* return the size (-s option) converted into KBs */
+static long strtosize(const char *s)
+{
+ long size = 0;
+ char l = s[strlen(s) - 1];
+
+ if (l == 'G' || l == 'M' || l == 'k' || l == 'K')
+ {
+ char *ns = (char *)malloc(strlen(s) - 1);
+ strncpy(ns, s, strlen(s) - 1);
+ size = atol(ns);
+ free(ns);
+ }
+
+ switch(l)
+ {
+ case 'G':
+ size *= 1024;
+ case 'M':
+ size *= 1024;
+ case 'k':
+ case 'K':
+ break;
+ default:
+ size /= 1024;
+ }
+
+ return size;
+}
+
+int main(int argc, char **argv)
+{
+ char *dbname = NULL,
+ *host = NULL,
+ *user = NULL,
+ *password = NULL;
+
+ int port = 5432;
+ PGconn *conn;
+ long pool_size = 0;
+ FILE *workload = stdin,
+ *sqlfile = NULL;
+ int i,
+ num_indexes;
+ char *output_filename = NULL;
+
+ AdvIndexList suggested_indexes;
+
+ /* check arguments */
+ int ch;
+
+ while ((ch = getopt(argc, argv, "d:h:p:U:s:o:W:")) != -1)
+ switch(ch)
+ {
+ case 'd': /* database name */
+ dbname = optarg;
+ break;
+ case 'h': /* database server host */
+ host = optarg;
+ break;
+ case 'p': /* port */
+ port = atoi(optarg);
+ break;
+ case 'U': /* username */
+ user = optarg;
+ break;
+ case 's': /* index pool size */
+ pool_size = strtosize(optarg);
+ printf("poolsize = %ld KB\n", pool_size);
+ break;
+ case 'o': /* output file */
+ output_filename = optarg;
+ break;
+ case 'W': /* TODO: prompt for password */
+ break;
+ case '?':
+ usage();
+ return 0;
+ break;
+ default:
+ usage();
+ return 1;
+ break;
+ }
+
+ if (dbname == NULL || user == NULL)
+ {
+ usage();
+ return 1;
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc == 1)
+ {
+ workload = fopen(argv[argc-1], "r");
+
+ if (workload == NULL)
+ {
+ fprintf(stderr, "ERROR: cannot open file %s\n", argv[argc-1]);
+ return 1;
+ }
+
+ printf("load workload from file '%s'\n", argv[argc-1]);
+ }
+
+ /* connect to the backend */
+ conn = init_connection(dbname, host, port, user, password);
+
+ if (conn == NULL)
+ return 1;
+
+ if (prepare_advisor(conn) != 0)
+ {
+ fprintf(stderr, "ERROR: this PostgreSQL server doesn't support the "
+ "index advisor.\n");
+ PQfinish(conn);
+ return 1;
+ }
+
+ analyse_workload(conn, workload);
+
+ if (workload != stdin)
+ fclose(workload);
+
+ num_indexes = read_advisor_output(conn, &suggested_indexes);
+
+ if (pool_size > 0 &&
+ compute_config_size(suggested_indexes, num_indexes) > pool_size)
+ {
+ find_optimal_configuration_greedy(suggested_indexes, num_indexes,
+ pool_size / 4);
+ }
+ else
+ {
+ for (i = 0; i < num_indexes; ++i)
+ suggested_indexes[i]->used = true;
+ }
+
+ if (output_filename != NULL)
+ sqlfile = fopen(output_filename, "w");
+ else
+ sqlfile = stdout;
+
+ output_recommendation(conn, suggested_indexes, num_indexes, sqlfile);
+
+ if (output_filename != NULL)
+ fclose(sqlfile);
+
+ PQfinish(conn);
+
+ return 0;
+}
diff --git a/pg_advise/advise_index.h b/pg_advise/advise_index.h
new file mode 100644
index 0000000..c1bb987
--- /dev/null
+++ b/pg_advise/advise_index.h
@@ -0,0 +1,27 @@
+
+#ifndef ADVISE_INDEX_H
+#define ADVISE_INDEX_H
+
+typedef unsigned char bool;
+#define true 1
+#define false 0
+
+typedef struct {
+ char *table;
+ char *col_ids; /* space saparated column numbers */
+ int size; /* in KBs */
+ double benefit;
+ bool used;
+} AdvIndexInfo;
+
+typedef AdvIndexInfo** AdvIndexList;
+
+extern long compute_config_size(AdvIndexList index_list, int len);
+
+extern void find_optimal_configuration_greedy(AdvIndexList index_list, int len,
+ long size_limit);
+
+extern void find_optimal_configuration_dp(AdvIndexList index_list, int len,
+ long size_limit);
+
+#endif /* ADVISE_INDEX_H */
diff --git a/pg_advise/sample_advise_index_session.txt b/pg_advise/sample_advise_index_session.txt
new file mode 100644
index 0000000..0714482
--- /dev/null
+++ b/pg_advise/sample_advise_index_session.txt
@@ -0,0 +1,24 @@
+
+$ cat workload.sql
+select * from t where a = 100;
+select * from t where b = 100;
+select * from t where a = 100 and b = 100;
+select * from t where a = 100 or b = 100;
+select * from t1 where a = 100;
+select * from t1 where b = 100;
+select * from t1 where a = 100 and b = 100;
+select * from t1 where a = 100 or b = 100;
+select * from t, t1 where t.a = 100 and t1.a = 100 or t.b = 100 and t1.b = 100;
+
+$ db/bin/pg_advise_index.exe -d postgres -h localhost -p 5432 -U test -o create_index.sql workload.sql
+load workload from file 'workload.sql'
+Analyzing queries ......... done.
+size = 2608 KB, benefit = 184483000.000000
+size = 2608 KB, benefit = 184481000.000000
+/* 1. t(b): size=2608 KB, profit=184483000.00 */
+/* 2. t(a): size=2608 KB, profit=184481000.00 */
+/* Total size = 5216KB */
+
+$ cat create_index.sql
+create index idx_t_1 on t (b);
+create index idx_t_2 on t (a);
diff --git a/pg_advise/util_funcs.c b/pg_advise/util_funcs.c
new file mode 100644
index 0000000..76a868b
--- /dev/null
+++ b/pg_advise/util_funcs.c
@@ -0,0 +1,110 @@
+/*
+ * util_funcs.c
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "advise_index.h"
+
+long compute_config_size(AdvIndexList index_list, int len)
+{
+ int size = 0;
+ int i;
+
+ for (i = 0; i < len; ++i)
+ size += index_list[i]->size;
+
+ return size;
+}
+
+/* Note: it sets the 'used' member */
+void find_optimal_configuration_greedy(AdvIndexList index_list, int len,
+ long size_limit)
+{
+ int current_size = 0;
+ double sum_benefit = 0.0;
+ int i = 0;
+
+ for (i = 0; i < len && current_size <= size_limit; ++i)
+ if (current_size + index_list[i]->size <= size_limit)
+ {
+ index_list[i]->used = true;
+ sum_benefit += index_list[i]->benefit;
+ current_size += index_list[i]->size;
+ }
+}
+
+/* TODO: not called from anywhere. Find a use for it */
+void find_optimal_configuration_dp(AdvIndexList index_list, int len,
+ long size_limit)
+{
+ int **cost;
+ int w, i;
+
+ printf("w = %ld\n", size_limit);
+ cost = (int **)malloc(sizeof(int) * (len + 1));
+
+ for (i = 0; i < len+1; ++i)
+ cost[i] = (int *)malloc(sizeof(int *) * (size_limit * 1));
+ for (w = 0; w <= size_limit; ++w)
+ cost[0][w] = 0;
+ for (i = 1; i <= len; ++i)
+ {
+ cost[i][0] = 0;
+
+ for (w = 1; w <= size_limit; ++w)
+ {
+ if (index_list[i-1]->size <= w)
+ {
+ if (index_list[i-1]->benefit
+ + cost[i-1][w - index_list[i-1]->size]
+ > cost[i-1][w])
+ {
+ cost[i][w] = index_list[i-1]->benefit +
+ cost[i-1][w - index_list[i-1]->size];
+ }
+ else
+ cost[i][w] = cost[i-1][w];
+ }
+ else
+ cost[i][w] = cost[i-1][w];
+ }
+ }
+
+ for ( i = len, w = size_limit;
+ i > 0 && w > 0;
+ --i)
+ {
+ if (cost[i][w] != cost[i-1][w])
+ {
+ index_list[i-1]->used = true;
+ w -= index_list[i-1]->size;
+ }
+ }
+ /*
+ for (i = 0; i < len+1; ++i)
+ free(cost[i]);
+ free(cost);
+ */
+}
+
+#if DEBUG
+void test_optimize() {
+ AdvIndexList conf = (AdvIndexInfo **)malloc(3 * sizeof(AdvIndexInfo *));
+
+ conf[0] = (AdvIndexInfo *)malloc(sizeof(AdvIndexInfo));
+ conf[0]->benefit = 60;
+ conf[0]->size = 1;
+ conf[0]->used = false;
+ conf[1] = (AdvIndexInfo *)malloc(sizeof(AdvIndexInfo));
+ conf[1]->benefit = 100;
+ conf[1]->size = 2;
+ conf[1]->used = false;
+ conf[2] = (AdvIndexInfo *)malloc(sizeof(AdvIndexInfo));
+ conf[2]->benefit = 120;
+ conf[2]->size = 3;
+ conf[2]->used = false;
+ optimize_indexset(conf, 3, 5);
+}
+#endif
diff --git a/resources/Makefile b/resources/Makefile
new file mode 100644
index 0000000..1c09a91
--- /dev/null
+++ b/resources/Makefile
@@ -0,0 +1,14 @@
+
+DATA = index_advisory.create.sql \
+ show_index_advisory.create.sql \
+ select_index_advisory.create.sql
+
+ifdef USE_PGXS
+PGXS := $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pgadviser/resources
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/resources/index_advisory.create.sql b/resources/index_advisory.create.sql
new file mode 100644
index 0000000..1317107
--- /dev/null
+++ b/resources/index_advisory.create.sql
@@ -0,0 +1,10 @@
+
+create table index_advisory( reloid oid,
+ attrs integer[],
+ profit real,
+ index_size integer,
+ backend_pid integer,
+ timestamp timestamptz);
+
+create index IA_reloid on index_advisory( reloid );
+create index IA_backend_pid on index_advisory( backend_pid );
diff --git a/resources/sample_error_messages.sql b/resources/sample_error_messages.sql
new file mode 100644
index 0000000..2ef008d
--- /dev/null
+++ b/resources/sample_error_messages.sql
@@ -0,0 +1,80 @@
+
+\c postgres test
+
+drop table if exists advise_index;
+drop view if exists advise_index;
+
+/* create the advise_index same as provided in the contrib module */;
+create table advise_index( reloid oid, attrs integer[], profit real,
+ index_size integer, backend_pid integer,
+ timestamp timestamptz);
+
+/* set the client to see the log messages generated by the Adviser */;
+set client_min_messages to log;
+
+/* As expected, the EXPLAIN will work */;
+explain select * from t where a = 100;
+
+select * from advise_index;
+
+/* Now lets drop the advise_index and see what ERROR it throws */;
+drop table if exists advise_index;
+drop view if exists advise_index;
+
+explain select * from t where a = 100;
+
+/* create another object by the same name (in the same namespace) */;
+create index advise_index on t1(a);
+
+/* advise_index does exist, but its not a table or view! */;
+explain select * from t where a = 100;
+
+/* now create a table named advise_index, but with a different signature! */;
+drop index advise_index;
+
+create table advise_index(a int);
+
+/* This ERROR comes from the executor, but we still see our DETAIL and HINT */;
+explain select * from t where a = 100;
+
+/* create a table with same signature but different name */;
+drop table if exists advise_index;
+drop view if exists advise_index;
+
+drop table if exists advise_index_data cascade;
+
+create table advise_index_data( reloid oid, attrs integer[], profit real,
+ index_size integer, backend_pid integer,
+ timestamp timestamptz);
+
+/* and a view on that table */;
+create view advise_index as select * from advise_index_data;
+
+/* now try to insert into the view, and notice the ERROR, DETAIL and HINT from executor */;
+explain select * from t where a = 100;
+
+/* now create a RULE on the view that redirects the INSERTs into the table */;
+create or replace rule advise_index_insert as
+ON INSERT to advise_index
+do instead
+INSERT into advise_index_data values (new.reloid, new.attrs, new.profit,
+ new.index_size, new.backend_pid,
+ new.timestamp) ;
+
+/* and voila, (internal) INSERT into the view succeeds! */;
+explain select * from t where a = 100;
+
+/* Now, lets try what happens under a read-only transaction */;
+begin;
+
+set transaction_read_only=t;
+
+show transaction_read_only;
+
+explain select * from t where a = 100;
+
+end;
+
+select * from advise_index;
+
+select * from advise_index_data;
diff --git a/resources/sample_error_messages.txt b/resources/sample_error_messages.txt
new file mode 100644
index 0000000..db6dc77
--- /dev/null
+++ b/resources/sample_error_messages.txt
@@ -0,0 +1,192 @@
+
+postgres=> \c postgres test
+You are now connected to database "postgres".
+postgres=>
+postgres=> drop table if exists advise_index;
+NOTICE: table "advise_index" does not exist, skipping
+DROP TABLE
+postgres=> drop view if exists advise_index;
+NOTICE: view "advise_index" does not exist, skipping
+DROP VIEW
+postgres=>
+postgres=> /* create the advise_index same as provided in the contrib module */;
+postgres=> create table advise_index( reloid oid, attrs integer[], profit real,
+postgres(> index_size integer, backend_pid integer,
+postgres(> timestamp timestamptz);
+CREATE TABLE
+postgres=>
+postgres=> /* set the client to see the log messages generated by the Adviser */;
+postgres=> set client_min_messages to log;
+SET
+postgres=>
+postgres=> /* As expected, the EXPLAIN will work */;
+postgres=> explain select * from t where a = 100;
+LOG: Index Adviser: Plan using estimates for suggested indexes:
+
+Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8)
+ Index Cond: (a = 100)
+
+ QUERY PLAN
+----------------------------------------------------
+ Seq Scan on t (cost=0.00..1791.00 rows=1 width=8)
+ Filter: (a = 100)
+(2 rows)
+
+postgres=>
+postgres=> select * from advise_index;
+ reloid | attrs | profit | index_size | backend_pid | timestamp
+--------+-------+---------+------------+-------------+-------------------------------
+ 16395 | {1} | 1782.72 | 2608 | 5256 | 2007-01-13 13:08:53.593+05:30
+(1 row)
+
+postgres=>
+postgres=> /* Now lets drop the advise_index and see what ERROR it throws */;
+postgres=> drop table if exists advise_index;
+DROP TABLE
+postgres=> drop view if exists advise_index;
+NOTICE: view "advise_index" does not exist, skipping
+DROP VIEW
+postgres=>
+postgres=> explain select * from t where a = 100;
+LOG: Index Adviser: Plan using estimates for suggested indexes:
+
+Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8)
+ Index Cond: (a = 100)
+
+ERROR: relation "advise_index" does not exist.
+DETAIL: Index Adviser uses "advise_index" table to store it's advisory. You should have INSERT permissions on a table or an (INSERT-able) view named "advise_index". Also, make sure that you are not running the Index Adviser under a read-only transaction.
+HINT: Please create the "advise_index" table using the script provided in pg_advise_index contrib module.
+postgres=>
+postgres=> /* create another object by the same name (in the same namespace) */;
+
+postgres=> create index advise_index on t1(a);
+CREATE INDEX
+postgres=>
+postgres=> /* advise_index does exist, but its not a table or view! */;
+postgres=> explain select * from t where a = 100;
+LOG: Index Adviser: Plan using estimates for suggested indexes:
+
+Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8)
+ Index Cond: (a = 100)
+
+ERROR: "advise_index" is not a table or view.
+DETAIL: Index Adviser uses "advise_index" table to store it's advisory. You should have INSERT permissions on a table or an (INSERT-able) view named "advise_index". Also, make sure that you are not running the Index Adviser under a read-only transaction.
+HINT: Please create the "advise_index" table using the script provided in pg_advise_index contrib module.
+postgres=>
+postgres=> /* now create a table named advise_index, but with a different signature! */;
+postgres=> drop index advise_index;
+DROP INDEX
+postgres=>
+postgres=> create table advise_index(a int);
+CREATE TABLE
+postgres=>
+postgres=> /* This ERROR comes from the executor, but we still see our DETAIL and HINT */;
+postgres=> explain select * from t where a = 100;
+LOG: Index Adviser: Plan using estimates for suggested indexes:
+
+Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8)
+ Index Cond: (a = 100)
+
+ERROR: INSERT has more expressions than target columns
+DETAIL: Index Adviser uses "advise_index" table to store it's advisory. You should have INSERT permissions on a table or an (INSERT-able) view named "advise_index". Also, make sure that you are not running the Index Adviser under a read-only transaction.
+HINT: Please create the "advise_index" table using the script provided in pg_advise_index contrib module.
+CONTEXT: SQL statement "insert into "advise_index" values( 16395, array[1], 1782.719482, 2608, 5256, now());"
+postgres=>
+postgres=> /* create a table with same signature but different name */;
+postgres=> drop table if exists advise_index;
+DROP TABLE
+postgres=> drop view if exists advise_index;
+NOTICE: view "advise_index" does not exist, skipping
+DROP VIEW
+postgres=>
+postgres=> drop table if exists advise_index_data cascade;
+NOTICE: table "advise_index_data" does not exist, skipping
+DROP TABLE
+postgres=>
+postgres=> create table advise_index_data( reloid oid, attrs integer[], profit real,
+postgres(> index_size integer, backend_pid integer,
+postgres(> timestamp timestamptz);
+CREATE TABLE
+postgres=>
+postgres=> /* and a view on that table */;
+postgres=> create view advise_index as select * from advise_index_data;
+CREATE VIEW
+postgres=>
+postgres=> /* now try to insert into the view, and notice the ERROR, DETAIL and HINT from executor */;
+postgres=> explain select * from t where a = 100;
+LOG: Index Adviser: Plan using estimates for suggested indexes:
+
+Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8)
+ Index Cond: (a = 100)
+
+ERROR: cannot insert into a view
+DETAIL: Index Adviser uses "advise_index" table to store it's advisory. You should have INSERT permissions on a table or an (INSERT-able) view named "advise_index". Also, make sure that you are not running the Index Adviser under a read-only transaction.
+HINT: Please create the "advise_index" table using the script provided in pg_advise_index contrib module.
+CONTEXT: SQL statement "insert into "advise_index" values( 16395, array[1], 1782.719482, 2608, 5256, now());"
+postgres=>
+postgres=> /* now create a RULE on the view that redirects the INSERTs into the
+table */;
+postgres=> create or replace rule advise_index_insert as
+postgres-> ON INSERT to advise_index
+postgres-> do instead
+postgres-> INSERT into advise_index_data values (new.reloid, new.attrs, new.profit,
+postgres(> new.index_size, new.backend_pid,
+postgres(> new.timestamp) ;
+CREATE RULE
+postgres=>
+postgres=> /* and voila, (internal) INSERT into the view succeeds! */;
+postgres=> explain select * from t where a = 100;
+LOG: Index Adviser: Plan using estimates for suggested indexes:
+
+Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8)
+ Index Cond: (a = 100)
+
+ QUERY PLAN
+----------------------------------------------------
+ Seq Scan on t (cost=0.00..1791.00 rows=1 width=8)
+ Filter: (a = 100)
+(2 rows)
+
+postgres=>
+postgres=> /* Now, lets try what happens under a read-only transaction */;
+postgres=> begin;
+BEGIN
+postgres=>
+postgres=> set transaction_read_only=t;
+SET
+postgres=>
+postgres=> show transaction_read_only;
+ transaction_read_only
+-----------------------
+ on
+(1 row)
+
+postgres=>
+postgres=> explain select * from t where a = 100;
+LOG: Index Adviser: Plan using estimates for suggested indexes:
+
+Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8)
+ Index Cond: (a = 100)
+
+ERROR: transaction is read-only
+DETAIL: Index Adviser uses "advise_index" table to store it's advisory. You should have INSERT permissions on a table or an (INSERT-able) view named "advise_index". Also, make sure that you are not running the Index Adviser under a read-only transaction.
+HINT: Please create the "advise_index" table using the script provided in pg_advise_index contrib module.
+CONTEXT: SQL statement "insert into "advise_index" values( 16395, array[1], 1782.719482, 2608, 5256, now());"
+postgres=>
+postgres=> end;
+ROLLBACK
+postgres=>
+postgres=> select * from advise_index;
+ reloid | attrs | profit | index_size | backend_pid | timestamp
+--------+-------+---------+------------+-------------+------------------------------
+ 16395 | {1} | 1782.72 | 2608 | 5256 | 2007-01-13 13:08:54.25+05:30
+(1 row)
+
+postgres=>
+postgres=> select * from advise_index_data;
+ reloid | attrs | profit | index_size | backend_pid | timestamp
+--------+-------+---------+------------+-------------+------------------------------
+ 16395 | {1} | 1782.72 | 2608 | 5256 | 2007-01-13 13:08:54.25+05:30
+(1 row)
+
+postgres=>
diff --git a/resources/sample_psql_session.sql b/resources/sample_psql_session.sql
new file mode 100644
index 0000000..f3ba473
--- /dev/null
+++ b/resources/sample_psql_session.sql
@@ -0,0 +1,80 @@
+
+create language plpgsql;
+
+drop schema if exists test cascade;
+
+drop user if exists test;
+
+create user test;
+
+create schema test authorization test;
+
+\c postgres test
+
+load '$libdir/plugins/index_adviser';
+
+drop view if exists select_index_advisory;
+drop function if exists show_index_advisory(index_advisory.backend_pid%type);
+drop table if exists index_advisory;
+
+\i contrib/pgadviser/resources/index_advisory.create.sql
+
+\i contrib/pgadviser/resources/show_index_advisory.create.sql
+
+\i contrib/pgadviser/resources/select_index_advisory.create.sql
+
+drop table if exists t, t1;
+
+create table t( a int, b int );
+
+insert into t select s, 99999-s from generate_series(0,99999) as s;
+
+analyze t;
+
+create table t1 as select * from t;
+
+/* notice no ANALYZE for T1 */;
+
+select count(*) from t;
+
+select count(*) from t1;
+
+/* stop here if you wish to run only pg_advise tool */;
+
+/* set the client to see the log messages generated by the Adviser */;
+set client_min_messages to log;
+
+explain select * from t where a = 100;
+
+explain select * from t where b = 100;
+
+explain select * from t where a = 100 and b = 100;
+
+explain select * from t where a = 100 or b = 100;
+
+/* now we shall test with T1 */;
+
+explain select * from t1 where a = 100;
+
+explain select * from t1 where b = 100;
+
+explain select * from t1 where a = 100 and b = 100;
+
+explain select * from t1 where a = 100 or b = 100;
+
+/* let's do some sensible join over these two tables */;
+explain
+select *
+from t,
+ t1
+where t.a = 100
+and t1.a = 100
+ or t.b = 100
+ and t1.b = 100;
+
+/* following are the contents of the advise_index table */;
+select * from index_advisory;
+
+/* And following is the summary of the advices generated */;
+
+select * from select_index_advisory;
diff --git a/resources/sample_psql_session.txt b/resources/sample_psql_session.txt
new file mode 100644
index 0000000..149451f
--- /dev/null
+++ b/resources/sample_psql_session.txt
@@ -0,0 +1,271 @@
+
+Welcome to psql 8.2.1, the PostgreSQL interactive terminal.
+
+Type: \copyright for distribution terms
+ \h for help with SQL commands
+ \? for help with psql commands
+ \g or terminate with semicolon to execute query
+ \q to quit
+
+Warning: Console code page (437) differs from Windows code page (1252)
+ 8-bit characters may not work correctly. See psql reference
+ page "Notes for Windows users" for details.
+
+postgres=# /*
+postgres*# You must start the session with something like:
+postgres*#
+postgres*# env PGOPTIONS='-c local_preload_libraries=libpg_index_adviser' psql postgres
+postgres*# */;
+postgres=#
+postgres=# create language plpgsql;
+CREATE LANGUAGE
+postgres=#
+postgres=# drop schema if exists test cascade;
+NOTICE: schema "test" does not exist, skipping
+DROP SCHEMA
+postgres=#
+postgres=# drop user if exists test;
+NOTICE: role "test" does not exist, skipping
+DROP ROLE
+postgres=#
+postgres=# create user test;
+CREATE ROLE
+postgres=#
+postgres=# create schema test authorization test;
+CREATE SCHEMA
+postgres=#
+postgres=# \c postgres test
+You are now connected to database "postgres" as user "test".
+postgres=>
+postgres=> drop table if exists advise_index;
+NOTICE: table "advise_index" does not exist, skipping
+DROP TABLE
+postgres=>
+postgres=> \i contrib/pg_index_adviser/advise_index.create.sql
+CREATE TABLE
+postgres=>
+postgres=> \d advise_index;
+ Table "test.advise_index"
+ Column | Type | Modifiers
+-------------+--------------------------+-----------
+ reloid | oid |
+ attrs | integer[] |
+ profit | real |
+ index_size | integer |
+ backend_pid | integer |
+ timestamp | timestamp with time zone |
+
+postgres=>
+postgres=> \i contrib/pg_index_adviser/advise_index_show.create.sql
+CREATE FUNCTION
+postgres=>
+postgres=> select * from advise_index;
+ reloid | attrs | profit | index_size | backend_pid | timestamp
+--------+-------+--------+------------+-------------+-----------
+(0 rows)
+
+postgres=>
+postgres=> drop table if exists t, t1;
+NOTICE: table "t" does not exist, skipping
+NOTICE: table "t1" does not exist, skipping
+DROP TABLE
+postgres=>
+postgres=> create table t( a int, b int );
+CREATE TABLE
+postgres=>
+postgres=> insert into t select s, 99999-s from generate_series(0,99999) as s;
+INSERT 0 100000
+postgres=>
+postgres=> analyze t;
+ANALYZE
+postgres=>
+postgres=> create table t1 as select * from t;
+SELECT
+postgres=>
+postgres=> /* notice no ANALYZE for T1 */;
+postgres=>
+postgres=> select count(*) from t;
+ count
+--------
+ 100000
+(1 row)
+
+postgres=>
+postgres=> select count(*) from t1;
+ count
+--------
+ 100000
+(1 row)
+
+postgres=>
+postgres=> /* stop here if you wish to run only pg_advise_index tool */;
+postgres=>
+postgres=> -- set enable_advise_index to true;
+postgres=> /* set the client to see the log messages generated by the Adviser */
+;
+postgres=> set client_min_messages to log;
+SET
+postgres=>
+postgres=> explain select * from t where a = 100;
+LOG: Index Adviser: Plan using estimates for suggested indexes:
+
+Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8)
+ Index Cond: (a = 100)
+
+ QUERY PLAN
+----------------------------------------------------
+ Seq Scan on t (cost=0.00..1791.00 rows=1 width=8)
+ Filter: (a = 100)
+(2 rows)
+
+postgres=>
+postgres=> explain select * from t where b = 100;
+LOG: Index Adviser: Plan using estimates for suggested indexes:
+
+Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8)
+ Index Cond: (b = 100)
+
+ QUERY PLAN
+----------------------------------------------------
+ Seq Scan on t (cost=0.00..1791.00 rows=1 width=8)
+ Filter: (b = 100)
+(2 rows)
+
+postgres=>
+postgres=> explain select * from t where a = 100 and b = 100;
+LOG: Index Adviser: Plan using estimates for suggested indexes:
+
+Index Scan using idx_adv_1 on t (cost=0.00..8.28 rows=1 width=8)
+ Index Cond: (b = 100)
+ Filter: (a = 100)
+
+ QUERY PLAN
+----------------------------------------------------
+ Seq Scan on t (cost=0.00..2041.00 rows=1 width=8)
+ Filter: ((a = 100) AND (b = 100))
+(2 rows)
+
+postgres=>
+postgres=> explain select * from t where a = 100 or b = 100;
+LOG: Index Adviser: Plan using estimates for suggested indexes:
+
+Bitmap Heap Scan on t (cost=8.54..16.21 rows=2 width=8)
+ Recheck Cond: ((a = 100) OR (b = 100))
+ -> BitmapOr (cost=8.54..8.54 rows=2 width=0)
+ -> Bitmap Index Scan on idx_adv_0 (cost=0.00..4.27 rows=1 width=0)
+ Index Cond: (a = 100)
+ -> Bitmap Index Scan on idx_adv_1 (cost=0.00..4.27 rows=1 width=0)
+ Index Cond: (b = 100)
+
+ QUERY PLAN
+----------------------------------------------------
+ Seq Scan on t (cost=0.00..2041.00 rows=2 width=8)
+ Filter: ((a = 100) OR (b = 100))
+(2 rows)
+
+postgres=>
+postgres=> /* now we shall test with T1 */;
+postgres=>
+postgres=> explain select * from t1 where a = 100;
+ QUERY PLAN
+-------------------------------------------------------
+ Seq Scan on t1 (cost=0.00..1852.93 rows=525 width=8)
+ Filter: (a = 100)
+(2 rows)
+
+postgres=>
+postgres=> explain select * from t1 where b = 100;
+ QUERY PLAN
+-------------------------------------------------------
+ Seq Scan on t1 (cost=0.00..1852.93 rows=525 width=8)
+ Filter: (b = 100)
+(2 rows)
+
+postgres=>
+postgres=> explain select * from t1 where a = 100 and b = 100;
+ QUERY PLAN
+-----------------------------------------------------
+ Seq Scan on t1 (cost=0.00..2115.31 rows=3 width=8)
+ Filter: ((a = 100) AND (b = 100))
+(2 rows)
+
+postgres=>
+postgres=> explain select * from t1 where a = 100 or b = 100;
+ QUERY PLAN
+--------------------------------------------------------
+ Seq Scan on t1 (cost=0.00..2115.31 rows=1047 width=8)
+ Filter: ((a = 100) OR (b = 100))
+(2 rows)
+
+postgres=>
+postgres=> /* let's do some sensible join over these two tables */;
+postgres=> explain
+postgres-> select *
+postgres-> from t,
+postgres-> t1
+postgres-> where t.a = 100
+postgres-> and t1.a = 100
+postgres-> or t.b = 100
+postgres-> and t1.b = 100;
+LOG: Index Adviser: Plan using estimates for suggested indexes:
+
+Nested Loop (cost=8.54..7395.45 rows=1050 width=16)
+ Join Filter: (((t.a = 100) AND (t1.a = 100)) OR ((t.b = 100) AND (t1.b = 100)))
+ -> Bitmap Heap Scan on t (cost=8.54..16.21 rows=2 width=8)
+ Recheck Cond: ((a = 100) OR (b = 100))
+ -> BitmapOr (cost=8.54..8.54 rows=2 width=0)
+ -> Bitmap Index Scan on idx_adv_0 (cost=0.00..4.27 rows=1 width=0)
+ Index Cond: (a = 100)
+ -> Bitmap Index Scan on idx_adv_1 (cost=0.00..4.27 rows=1 width=0)
+ Index Cond: (b = 100)
+ -> Seq Scan on t1 (cost=0.00..1590.54 rows=104954 width=8)
+
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ Nested Loop (cost=0.00..368963541.00 rows=1050 width=16)
+ Join Filter: (((t.a = 100) AND (t1.a = 100)) OR ((t.b = 100) AND (t1.b = 100)))
+ -> Seq Scan on t (cost=0.00..1541.00 rows=100000 width=8)
+ -> Seq Scan on t1 (cost=0.00..1590.54 rows=104954 width=8)
+(4 rows)
+
+postgres=>
+postgres=> /* following are the contents of the advise_index table */;
+postgres=> select * from advise_index;
+ reloid | attrs | profit | index_size | backend_pid | timestamp
+--------+-------+--------------+------------+-------------+-------------------------------
+ 16395 | {1} | 1782.72 | 2608 | 2272 | 2007-01-13 12:30:02.39+05:30
+ 16395 | {2} | 1782.72 | 2608 | 2272 | 2007-01-13 12:30:02.531+05:30
+ 16395 | {2} | 2032.72 | 2608 | 2272 | 2007-01-13 12:30:02.578+05:30
+ 16395 | {1} | 1012.4 | 2608 | 2272 | 2007-01-13 12:30:02.64+05:30
+ 16395 | {2} | 1012.4 | 2608 | 2272 | 2007-01-13 12:30:02.64+05:30
+ 16395 | {1} | 1.84478e+008 | 2608 | 2272 | 2007-01-13 12:30:17.984+05:30
+ 16395 | {2} | 1.84478e+008 | 2608 | 2272 | 2007-01-13 12:30:17.984+05:30
+(7 rows)
+
+postgres=>
+postgres=> /* And following is the summary of the advices generated */;
+postgres=>
+postgres=> select E'backend_pid\n'
+postgres-> || E'===========\n'
+postgres-> || backend_pid,
+postgres-> advise_index_show( backend_pid )
+postgres-> from (select distinct backend_pid
+postgres(> from advise_index as adv
+postgres(> where adv.reloid in (select oid
+postgres(> from pg_class
+postgres(> where relkind = 'r')
+postgres(> ) as v;
+ ?column? | advise_index_show
+-------------+--------------------------------------------------------------------
+ backend_pid | /* Index Adviser */
+ =========== : /* ============= */
+ 2272 :
+ : /* size: 2608 KB, benefit: 1.84483e+008, gain: 70737.3067484663 */
+ : create index idx_t_b on t(b);
+ :
+ : /* size: 2608 KB, benefit: 1.84481e+008, gain: 70736.527607362 */
+ : create index idx_t_a on t(a);
+ :
+(1 row)
+
+postgres=>
diff --git a/resources/select_index_advisory.create.sql b/resources/select_index_advisory.create.sql
new file mode 100644
index 0000000..0a762a2
--- /dev/null
+++ b/resources/select_index_advisory.create.sql
@@ -0,0 +1,13 @@
+
+create view select_index_advisory as
+select E'backend_pid\n'
+ || E'===========\n'
+ || backend_pid,
+ show_index_advisory( backend_pid )
+from (select distinct backend_pid
+ from index_advisory as adv
+ /* do not consider tables that no longer exist */
+ where exists (select oid
+ from pg_class
+ where oid = adv.reloid)
+ ) as v;
diff --git a/resources/show_index_advisory.create.sql b/resources/show_index_advisory.create.sql
new file mode 100644
index 0000000..5764a90
--- /dev/null
+++ b/resources/show_index_advisory.create.sql
@@ -0,0 +1,100 @@
+
+create function show_index_advisory(p_backend_pid index_advisory.backend_pid%type) returns text
+as
+$$
+declare
+ pid p_backend_pid%type;
+ q_advice text;
+ r_advice record;
+ q_column text;
+ r_column record;
+ ret text;
+
+ NAMEDATALEN int := 64;
+
+ collist_w_C text; /* Column name list with commas */
+ collist_w_U text; /* Column name list with underscores */
+ colidlist_w_U text; /* Column id list with underscores */
+begin
+ if p_backend_pid is null then
+ pid = pg_backend_pid();
+ else
+ pid = p_backend_pid;
+ end if;
+
+ ret := '/* Index Adviser */' || E'\n' ||
+ '/* ============= */' || E'\n';
+
+ q_advice := 'SELECT c.relname,
+ c.oid as reloid,
+ a.attrs AS colids,
+ MAX( a.index_size ) AS size_in_KB,
+ SUM( a.profit ) AS benefit,
+ SUM( a.profit )/MAX( a.index_size ) AS gain
+ FROM index_advisory a,
+ pg_class c
+ WHERE a.backend_pid = ' || pid || '
+ AND a.reloid = c.oid
+ GROUP BY c.relname, c.oid, a.attrs
+ ORDER BY gain
+ DESC';
+
+ for r_advice in execute q_advice loop
+
+ ret := ret ||
+ E'\n/* size: ' || r_advice.size_in_KB || ' KB, '
+ || 'benefit: ' || r_advice.benefit || ', '
+ || 'gain: ' || r_advice.gain || E' */\n';
+
+ collist_w_C := '';
+ collist_w_U := '';
+ colidlist_w_U := '';
+
+ for i in array_lower( r_advice.colids, 1 )
+ .. array_upper( r_advice.colids, 1 )
+ loop
+
+ q_column := 'SELECT a.attname as name,
+ a.attnum as id
+ FROM pg_class c,
+ pg_attribute a
+ WHERE c.oid = ' || r_advice.reloid || '
+ AND a.attrelid = c.oid
+ AND a.attnum = ' || r_advice.colids[i] || '
+ ';
+
+ execute q_column into r_column;
+
+-- if ROW_COUNT > 1 then
+-- raise EXCEPTION 'an internal query failed';
+-- end if;
+
+ if i <> 1 then
+ collist_w_C := collist_w_C || ', ';
+ collist_w_U := collist_w_U || '_';
+ colidlist_w_U := colidlist_w_U || '_';
+ end if;
+
+ collist_w_C := collist_w_C || r_column.name;
+ collist_w_U := collist_w_U || r_column.name;
+ colidlist_w_U := colidlist_w_U || r_column.id;
+
+ end loop;
+
+ ret := ret || 'create index ';
+
+ if (length('idx_' || r_advice.relname || '_' || collist_w_U)
+ <= NAMEDATALEN)
+ then
+ ret := ret || 'idx_' || r_advice.relname || '_' || collist_w_U;
+ else
+ ret := ret || 'idx_' || r_advice.reloid || '_' || colidlist_w_U;
+ end if;
+
+ ret := ret || ' on ' || r_advice.relname || '(' || collist_w_C || E');\n';
+
+ end loop;
+
+ return ret;
+end;
+$$ language plpgsql;