11 files changed, 2973 insertions, 152 deletions
diff --git a/contrib/pg_trgm/Makefile b/contrib/pg_trgm/Makefile
index 64fd69f2cb..0d549f8b6c 100644
--- a/contrib/pg_trgm/Makefile
+++ b/contrib/pg_trgm/Makefile
@@ -1,10 +1,10 @@
 # contrib/pg_trgm/Makefile
 
 MODULE_big = pg_trgm
-OBJS = trgm_op.o trgm_gist.o trgm_gin.o
+OBJS = trgm_op.o trgm_gist.o trgm_gin.o trgm_regexp.o
 
 EXTENSION = pg_trgm
-DATA = pg_trgm--1.0.sql pg_trgm--unpackaged--1.0.sql
+DATA = pg_trgm--1.1.sql pg_trgm--1.0--1.1.sql pg_trgm--unpackaged--1.0.sql
 
 REGRESS = pg_trgm
 
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out
index e7af7d4890..13b1fde1b8 100644
--- a/contrib/pg_trgm/expected/pg_trgm.out
+++ b/contrib/pg_trgm/expected/pg_trgm.out
@@ -53,8 +53,14 @@ select similarity('wow',' WOW ');
           1
 (1 row)
 
+select similarity('---', '####---');
+ similarity 
+------------
+          0
+(1 row)
+
 CREATE TABLE test_trgm(t text);
-\copy test_trgm from 'data/trgm.data
+\copy test_trgm from 'data/trgm.data'
 select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
       t      |   sml    
 -------------+----------
@@ -3464,6 +3470,7 @@ select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198
 create table test2(t text);
 insert into test2 values ('abcdef');
 insert into test2 values ('quark');
+insert into test2 values ('  z foo bar');
 create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
 set enable_seqscan=off;
 explain (costs off)
@@ -3497,6 +3504,12 @@ select * from test2 where t like '%bcd%';
  abcdef
 (1 row)
 
+select * from test2 where t like E'%\\bcd%';
+   t    
+--------
+ abcdef
+(1 row)
+
 select * from test2 where t ilike '%BCD%';
    t    
 --------
@@ -3509,6 +3522,142 @@ select * from test2 where t ilike 'qua%';
  quark
 (1 row)
 
+select * from test2 where t like '%z foo bar%';
+      t      
+-------------
+   z foo bar
+(1 row)
+
+select * from test2 where t like '  z foo%';
+      t      
+-------------
+   z foo bar
+(1 row)
+
+explain (costs off)
+  select * from test2 where t ~ '[abc]{3}';
+                 QUERY PLAN                 
+--------------------------------------------
+ Bitmap Heap Scan on test2
+   Recheck Cond: (t ~ '[abc]{3}'::text)
+   ->  Bitmap Index Scan on test2_idx_gin
+         Index Cond: (t ~ '[abc]{3}'::text)
+(4 rows)
+
+explain (costs off)
+  select * from test2 where t ~* 'DEF';
+                QUERY PLAN                
+------------------------------------------
+ Bitmap Heap Scan on test2
+   Recheck Cond: (t ~* 'DEF'::text)
+   ->  Bitmap Index Scan on test2_idx_gin
+         Index Cond: (t ~* 'DEF'::text)
+(4 rows)
+
+select * from test2 where t ~ '[abc]{3}';
+   t    
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~ 'a[bc]+d';
+   t    
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~ '(abc)*$';
+      t      
+-------------
+ abcdef
+ quark
+   z foo bar
+(3 rows)
+
+select * from test2 where t ~* 'DEF';
+   t    
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~ 'dEf';
+ t 
+---
+(0 rows)
+
+select * from test2 where t ~* '^q';
+   t   
+-------
+ quark
+(1 row)
+
+select * from test2 where t ~* '[abc]{3}[def]{3}';
+   t    
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~* 'ab[a-z]{3}';
+   t    
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~* '(^| )qua';
+   t   
+-------
+ quark
+(1 row)
+
+select * from test2 where t ~ 'q.*rk$';
+   t   
+-------
+ quark
+(1 row)
+
+select * from test2 where t ~ 'q';
+   t   
+-------
+ quark
+(1 row)
+
+select * from test2 where t ~ '[a-z]{3}';
+      t      
+-------------
+ abcdef
+ quark
+   z foo bar
+(3 rows)
+
+select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
+ t 
+---
+(0 rows)
+
+select * from test2 where t ~ 'z foo bar';
+      t      
+-------------
+   z foo bar
+(1 row)
+
+select * from test2 where t ~ ' z foo bar';
+      t      
+-------------
+   z foo bar
+(1 row)
+
+select * from test2 where t ~ '  z foo bar';
+      t      
+-------------
+   z foo bar
+(1 row)
+
+select * from test2 where t ~ '  z foo';
+      t      
+-------------
+   z foo bar
+(1 row)
+
 drop index test2_idx_gin;
 create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
 set enable_seqscan=off;
@@ -3539,6 +3688,12 @@ select * from test2 where t like '%bcd%';
  abcdef
 (1 row)
 
+select * from test2 where t like E'%\\bcd%';
+   t    
+--------
+ abcdef
+(1 row)
+
 select * from test2 where t ilike '%BCD%';
    t    
 --------
@@ -3551,3 +3706,135 @@ select * from test2 where t ilike 'qua%';
  quark
 (1 row)
 
+select * from test2 where t like '%z foo bar%';
+      t      
+-------------
+   z foo bar
+(1 row)
+
+select * from test2 where t like '  z foo%';
+      t      
+-------------
+   z foo bar
+(1 row)
+
+explain (costs off)
+  select * from test2 where t ~ '[abc]{3}';
+                QUERY PLAN                
+------------------------------------------
+ Index Scan using test2_idx_gist on test2
+   Index Cond: (t ~ '[abc]{3}'::text)
+(2 rows)
+
+explain (costs off)
+  select * from test2 where t ~* 'DEF';
+                QUERY PLAN                
+------------------------------------------
+ Index Scan using test2_idx_gist on test2
+   Index Cond: (t ~* 'DEF'::text)
+(2 rows)
+
+select * from test2 where t ~ '[abc]{3}';
+   t    
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~ 'a[bc]+d';
+   t    
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~ '(abc)*$';
+      t      
+-------------
+ abcdef
+ quark
+   z foo bar
+(3 rows)
+
+select * from test2 where t ~* 'DEF';
+   t    
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~ 'dEf';
+ t 
+---
+(0 rows)
+
+select * from test2 where t ~* '^q';
+   t   
+-------
+ quark
+(1 row)
+
+select * from test2 where t ~* '[abc]{3}[def]{3}';
+   t    
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~* 'ab[a-z]{3}';
+   t    
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~* '(^| )qua';
+   t   
+-------
+ quark
+(1 row)
+
+select * from test2 where t ~ 'q.*rk$';
+   t   
+-------
+ quark
+(1 row)
+
+select * from test2 where t ~ 'q';
+   t   
+-------
+ quark
+(1 row)
+
+select * from test2 where t ~ '[a-z]{3}';
+      t      
+-------------
+ abcdef
+ quark
+   z foo bar
+(3 rows)
+
+select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
+ t 
+---
+(0 rows)
+
+select * from test2 where t ~ 'z foo bar';
+      t      
+-------------
+   z foo bar
+(1 row)
+
+select * from test2 where t ~ ' z foo bar';
+      t      
+-------------
+   z foo bar
+(1 row)
+
+select * from test2 where t ~ '  z foo bar';
+      t      
+-------------
+   z foo bar
+(1 row)
+
+select * from test2 where t ~ '  z foo';
+      t      
+-------------
+   z foo bar
+(1 row)
+
diff --git a/contrib/pg_trgm/pg_trgm--1.0--1.1.sql b/contrib/pg_trgm/pg_trgm--1.0--1.1.sql
new file mode 100644
index 0000000000..b4e3e26037
--- /dev/null
+++ b/contrib/pg_trgm/pg_trgm--1.0--1.1.sql
@@ -0,0 +1,12 @@
+/* contrib/pg_trgm/pg_trgm--1.0--1.1.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.1'" to load this file. \quit
+
+ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
+        OPERATOR        5       pg_catalog.~ (text, text),
+        OPERATOR        6       pg_catalog.~* (text, text);
+
+ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
+        OPERATOR        5       pg_catalog.~ (text, text),
+        OPERATOR        6       pg_catalog.~* (text, text);
diff --git a/contrib/pg_trgm/pg_trgm--1.0.sql b/contrib/pg_trgm/pg_trgm--1.1.sql
index 8067bd6033..1fff7af2c4 100644
--- a/contrib/pg_trgm/pg_trgm--1.0.sql
+++ b/contrib/pg_trgm/pg_trgm--1.1.sql
@@ -1,4 +1,4 @@
-/* contrib/pg_trgm/pg_trgm--1.0.sql */
+/* contrib/pg_trgm/pg_trgm--1.1.sql */
 
 -- complain if script is sourced in psql, rather than via CREATE EXTENSION
 \echo Use "CREATE EXTENSION pg_trgm" to load this file. \quit
@@ -132,6 +132,12 @@ ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
         OPERATOR        4       pg_catalog.~~* (text, text),
         FUNCTION        8 (text, text)  gtrgm_distance (internal, text, int, oid);
 
+-- Add operators that are new in 9.3.
+
+ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
+        OPERATOR        5       pg_catalog.~ (text, text),
+        OPERATOR        6       pg_catalog.~* (text, text);
+
 -- support functions for gin
 CREATE FUNCTION gin_extract_value_trgm(text, internal)
 RETURNS internal
@@ -164,3 +170,9 @@ AS
 ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
         OPERATOR        3       pg_catalog.~~ (text, text),
         OPERATOR        4       pg_catalog.~~* (text, text);
+
+-- Add operators that are new in 9.3.
+
+ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
+        OPERATOR        5       pg_catalog.~ (text, text),
+        OPERATOR        6       pg_catalog.~* (text, text);
diff --git a/contrib/pg_trgm/pg_trgm.control b/contrib/pg_trgm/pg_trgm.control
index 70404d881d..2ac51e6890 100644
--- a/contrib/pg_trgm/pg_trgm.control
+++ b/contrib/pg_trgm/pg_trgm.control
@@ -1,5 +1,5 @@
 # pg_trgm extension
 comment = 'text similarity measurement and index searching based on trigrams'
-default_version = '1.0'
+default_version = '1.1'
 module_pathname = '$libdir/pg_trgm'
 relocatable = true
diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql
index ea902f602f..7b02d98818 100644
--- a/contrib/pg_trgm/sql/pg_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_trgm.sql
@@ -11,9 +11,11 @@ select show_trgm('a b C0*%^');
 select similarity('wow','WOWa ');
 select similarity('wow',' WOW ');
 
+select similarity('---', '####---');
+
 CREATE TABLE test_trgm(t text);
 
-\copy test_trgm from 'data/trgm.data
+\copy test_trgm from 'data/trgm.data'
 
 select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
 select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
@@ -41,6 +43,7 @@ select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198
 create table test2(t text);
 insert into test2 values ('abcdef');
 insert into test2 values ('quark');
+insert into test2 values ('  z foo bar');
 create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
 set enable_seqscan=off;
 explain (costs off)
@@ -49,8 +52,32 @@ explain (costs off)
   select * from test2 where t ilike '%BCD%';
 select * from test2 where t like '%BCD%';
 select * from test2 where t like '%bcd%';
+select * from test2 where t like E'%\\bcd%';
 select * from test2 where t ilike '%BCD%';
 select * from test2 where t ilike 'qua%';
+select * from test2 where t like '%z foo bar%';
+select * from test2 where t like '  z foo%';
+explain (costs off)
+  select * from test2 where t ~ '[abc]{3}';
+explain (costs off)
+  select * from test2 where t ~* 'DEF';
+select * from test2 where t ~ '[abc]{3}';
+select * from test2 where t ~ 'a[bc]+d';
+select * from test2 where t ~ '(abc)*$';
+select * from test2 where t ~* 'DEF';
+select * from test2 where t ~ 'dEf';
+select * from test2 where t ~* '^q';
+select * from test2 where t ~* '[abc]{3}[def]{3}';
+select * from test2 where t ~* 'ab[a-z]{3}';
+select * from test2 where t ~* '(^| )qua';
+select * from test2 where t ~ 'q.*rk$';
+select * from test2 where t ~ 'q';
+select * from test2 where t ~ '[a-z]{3}';
+select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
+select * from test2 where t ~ 'z foo bar';
+select * from test2 where t ~ ' z foo bar';
+select * from test2 where t ~ '  z foo bar';
+select * from test2 where t ~ '  z foo';
 drop index test2_idx_gin;
 create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
 set enable_seqscan=off;
@@ -60,5 +87,29 @@ explain (costs off)
   select * from test2 where t ilike '%BCD%';
 select * from test2 where t like '%BCD%';
 select * from test2 where t like '%bcd%';
+select * from test2 where t like E'%\\bcd%';
 select * from test2 where t ilike '%BCD%';
 select * from test2 where t ilike 'qua%';
+select * from test2 where t like '%z foo bar%';
+select * from test2 where t like '  z foo%';
+explain (costs off)
+  select * from test2 where t ~ '[abc]{3}';
+explain (costs off)
+  select * from test2 where t ~* 'DEF';
+select * from test2 where t ~ '[abc]{3}';
+select * from test2 where t ~ 'a[bc]+d';
+select * from test2 where t ~ '(abc)*$';
+select * from test2 where t ~* 'DEF';
+select * from test2 where t ~ 'dEf';
+select * from test2 where t ~* '^q';
+select * from test2 where t ~* '[abc]{3}[def]{3}';
+select * from test2 where t ~* 'ab[a-z]{3}';
+select * from test2 where t ~* '(^| )qua';
+select * from test2 where t ~ 'q.*rk$';
+select * from test2 where t ~ 'q';
+select * from test2 where t ~ '[a-z]{3}';
+select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
+select * from test2 where t ~ 'z foo bar';
+select * from test2 where t ~ ' z foo bar';
+select * from test2 where t ~ '  z foo bar';
+select * from test2 where t ~ '  z foo';
diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h
index 067f29d4da..ed649b8dcc 100644
--- a/contrib/pg_trgm/trgm.h
+++ b/contrib/pg_trgm/trgm.h
@@ -7,18 +7,20 @@
 #include "access/gist.h"
 #include "access/itup.h"
 #include "storage/bufpage.h"
-#include "utils/builtins.h"
 
-/* options */
+/*
+ * Options ... but note that trgm_regexp.c effectively assumes these values
+ * of LPADDING and RPADDING.
+ */
 #define LPADDING		2
 #define RPADDING		1
 #define KEEPONLYALNUM
 /*
  * Caution: IGNORECASE macro means that trigrams are case-insensitive.
- * If this macro is disabled, the ~~* operator must be removed from the
- * operator classes, because we can't handle case-insensitive wildcard search
- * with case-sensitive trigrams.  Failure to do this will result in "cannot
- * handle ~~* with case-sensitive trigrams" errors.
+ * If this macro is disabled, the ~* and ~~* operators must be removed from
+ * the operator classes, because we can't handle case-insensitive wildcard
+ * search with case-sensitive trigrams.  Failure to do this will result in
+ * "cannot handle ~*(~~*) with case-sensitive trigrams" errors.
  */
 #define IGNORECASE
 #define DIVUNION
@@ -28,6 +30,8 @@
 #define DistanceStrategyNumber		2
 #define LikeStrategyNumber			3
 #define ILikeStrategyNumber			4
+#define RegExpStrategyNumber		5
+#define RegExpICaseStrategyNumber	6
 
 
 typedef char trgm[3];
@@ -42,11 +46,11 @@ typedef char trgm[3];
 	*(((char*)(a))+2) = *(((char*)(b))+2);	\
 } while(0);
 
-uint32		trgm2int(trgm *ptr);
-
 #ifdef KEEPONLYALNUM
+#define ISWORDCHR(c)	(t_isalpha(c) || t_isdigit(c))
 #define ISPRINTABLECHAR(a)	( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
 #else
+#define ISWORDCHR(c)	(!t_isspace(c))
 #define ISPRINTABLECHAR(a)	( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
 #endif
 #define ISPRINTABLETRGM(t)	( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
@@ -99,11 +103,19 @@ typedef char *BITVECP;
 #define GETARR(x)		( (trgm*)( (char*)x+TRGMHDRSIZE ) )
 #define ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) )
 
+typedef struct TrgmPackedGraph TrgmPackedGraph;
+
 extern float4 trgm_limit;
 
-TRGM	   *generate_trgm(char *str, int slen);
-TRGM	   *generate_wildcard_trgm(const char *str, int slen);
-float4		cnt_sml(TRGM *trg1, TRGM *trg2);
-bool		trgm_contained_by(TRGM *trg1, TRGM *trg2);
+extern uint32 trgm2int(trgm *ptr);
+extern void compact_trigram(trgm *tptr, char *str, int bytelen);
+extern TRGM *generate_trgm(char *str, int slen);
+extern TRGM *generate_wildcard_trgm(const char *str, int slen);
+extern float4 cnt_sml(TRGM *trg1, TRGM *trg2);
+extern bool trgm_contained_by(TRGM *trg1, TRGM *trg2);
+extern bool *trgm_presence_map(TRGM *query, TRGM *key);
+extern TRGM *createTrgmNFA(text *text_re, Oid collation,
+			  TrgmPackedGraph **graph, MemoryContext rcontext);
+extern bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check);
 
 #endif   /* __TRGM_H__ */
diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c
index 114fb784c4..c59925c575 100644
--- a/contrib/pg_trgm/trgm_gin.c
+++ b/contrib/pg_trgm/trgm_gin.c
@@ -10,16 +10,9 @@
 
 
 PG_FUNCTION_INFO_V1(gin_extract_trgm);
-Datum		gin_extract_trgm(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
-Datum		gin_extract_value_trgm(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
-Datum		gin_extract_query_trgm(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(gin_trgm_consistent);
-Datum		gin_trgm_consistent(PG_FUNCTION_ARGS);
 
 /*
  * This function can only be called if a pre-9.1 version of the GIN operator
@@ -80,13 +73,15 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
 	StrategyNumber strategy = PG_GETARG_UINT16(2);
 
 	/* bool   **pmatch = (bool **) PG_GETARG_POINTER(3); */
-	/* Pointer	  *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
+	Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
+
 	/* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
 	int32	   *searchMode = (int32 *) PG_GETARG_POINTER(6);
 	Datum	   *entries = NULL;
 	TRGM	   *trg;
 	int32		trglen;
 	trgm	   *ptr;
+	TrgmPackedGraph *graph;
 	int32		i;
 
 	switch (strategy)
@@ -107,6 +102,34 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
 			 */
 			trg = generate_wildcard_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
 			break;
+		case RegExpICaseStrategyNumber:
+#ifndef IGNORECASE
+			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
+#endif
+			/* FALL THRU */
+		case RegExpStrategyNumber:
+			trg = createTrgmNFA(val, PG_GET_COLLATION(),
+								&graph, CurrentMemoryContext);
+			if (trg && ARRNELEM(trg) > 0)
+			{
+				/*
+				 * Successful regex processing: store NFA-like graph as
+				 * extra_data.  GIN API requires an array of nentries
+				 * Pointers, but we just put the same value in each element.
+				 */
+				trglen = ARRNELEM(trg);
+				*extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
+				for (i = 0; i < trglen; i++)
+					(*extra_data)[i] = (Pointer) graph;
+			}
+			else
+			{
+				/* No result: have to do full index scan. */
+				*nentries = 0;
+				*searchMode = GIN_SEARCH_MODE_ALL;
+				PG_RETURN_POINTER(entries);
+			}
+			break;
 		default:
 			elog(ERROR, "unrecognized strategy number: %d", strategy);
 			trg = NULL;			/* keep compiler quiet */
@@ -146,8 +169,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
 
 	/* text    *query = PG_GETARG_TEXT_P(2); */
 	int32		nkeys = PG_GETARG_INT32(3);
-
-	/* Pointer	  *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
+	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
 	bool	   *recheck = (bool *) PG_GETARG_POINTER(5);
 	bool		res;
 	int32		i,
@@ -189,6 +211,21 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
 				}
 			}
 			break;
+		case RegExpICaseStrategyNumber:
+#ifndef IGNORECASE
+			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
+#endif
+			/* FALL THRU */
+		case RegExpStrategyNumber:
+			if (nkeys < 1)
+			{
+				/* Regex processing gave no result: do full index scan */
+				res = true;
+			}
+			else
+				res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
+										 check);
+			break;
 		default:
 			elog(ERROR, "unrecognized strategy number: %d", strategy);
 			res = false;		/* keep compiler quiet */
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index d59c8eb670..69dc7f71f0 100644
--- a/contrib/pg_trgm/trgm_gist.c
+++ b/contrib/pg_trgm/trgm_gist.c
@@ -8,37 +8,35 @@
 #include "access/skey.h"
 
 
-PG_FUNCTION_INFO_V1(gtrgm_in);
-Datum		gtrgm_in(PG_FUNCTION_ARGS);
+typedef struct
+{
+	/* most recent inputs to gtrgm_consistent */
+	StrategyNumber strategy;
+	text	   *query;
+	/* extracted trigrams for query */
+	TRGM	   *trigrams;
+	/* if a regex operator, the extracted graph */
+	TrgmPackedGraph *graph;
 
-PG_FUNCTION_INFO_V1(gtrgm_out);
-Datum		gtrgm_out(PG_FUNCTION_ARGS);
+	/*
+	 * The "query" and "trigrams" are stored in the same palloc block as this
+	 * cache struct, at MAXALIGN'ed offsets.  The graph however isn't.
+	 */
+} gtrgm_consistent_cache;
 
-PG_FUNCTION_INFO_V1(gtrgm_compress);
-Datum		gtrgm_compress(PG_FUNCTION_ARGS);
+#define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
 
-PG_FUNCTION_INFO_V1(gtrgm_decompress);
-Datum		gtrgm_decompress(PG_FUNCTION_ARGS);
 
+PG_FUNCTION_INFO_V1(gtrgm_in);
+PG_FUNCTION_INFO_V1(gtrgm_out);
+PG_FUNCTION_INFO_V1(gtrgm_compress);
+PG_FUNCTION_INFO_V1(gtrgm_decompress);
 PG_FUNCTION_INFO_V1(gtrgm_consistent);
-Datum		gtrgm_consistent(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(gtrgm_distance);
-Datum		gtrgm_distance(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(gtrgm_union);
-Datum		gtrgm_union(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(gtrgm_same);
-Datum		gtrgm_same(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(gtrgm_penalty);
-Datum		gtrgm_penalty(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(gtrgm_picksplit);
-Datum		gtrgm_picksplit(PG_FUNCTION_ARGS);
-
-#define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
 
 /* Number of one-bits in an unsigned byte */
 static const uint8 number_of_ones[256] = {
@@ -78,10 +76,10 @@ gtrgm_out(PG_FUNCTION_ARGS)
 static void
 makesign(BITVECP sign, TRGM *a)
 {
-	int4		k,
+	int32		k,
 				len = ARRNELEM(a);
 	trgm	   *ptr = GETARR(a);
-	int4		tmp = 0;
+	int32		tmp = 0;
 
 	MemSet((void *) sign, 0, sizeof(BITVEC));
 	SETBIT(sign, SIGLENBIT);	/* set last unused bit */
@@ -112,7 +110,7 @@ gtrgm_compress(PG_FUNCTION_ARGS)
 	else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
 			 !ISALLTRUE(DatumGetPointer(entry->key)))
 	{
-		int4		i,
+		int32		i,
 					len;
 		TRGM	   *res;
 		BITVECP		sign = GETSIGN(DatumGetPointer(entry->key));
@@ -160,14 +158,14 @@ gtrgm_decompress(PG_FUNCTION_ARGS)
 	}
 }
 
-static int4
+static int32
 cnt_sml_sign_common(TRGM *qtrg, BITVECP sign)
 {
-	int4		count = 0;
-	int4		k,
+	int32		count = 0;
+	int32		k,
 				len = ARRNELEM(qtrg);
 	trgm	   *ptr = GETARR(qtrg);
-	int4		tmp = 0;
+	int32		tmp = 0;
 
 	for (k = 0; k < len; k++)
 	{
@@ -191,24 +189,30 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
 	TRGM	   *qtrg;
 	bool		res;
 	Size		querysize = VARSIZE(query);
-	char	   *cache = (char *) fcinfo->flinfo->fn_extra,
-			   *cachedQuery = cache + MAXALIGN(sizeof(StrategyNumber));
+	gtrgm_consistent_cache *cache;
 
 	/*
-	 * Store both the strategy number and extracted trigrams in cache, because
-	 * trigram extraction is relatively CPU-expensive.	We must include
-	 * strategy number because trigram extraction depends on strategy.
+	 * We keep the extracted trigrams in cache, because trigram extraction is
+	 * relatively CPU-expensive.  When trying to reuse a cached value, check
+	 * strategy number not just query itself, because trigram extraction
+	 * depends on strategy.
 	 *
-	 * The cached structure contains the strategy number, then the input query
-	 * (starting at a MAXALIGN boundary), then the TRGM value (also starting
-	 * at a MAXALIGN boundary).
+	 * The cached structure is a single palloc chunk containing the
+	 * gtrgm_consistent_cache header, then the input query (starting at a
+	 * MAXALIGN boundary), then the TRGM value (also starting at a MAXALIGN
+	 * boundary).  However we don't try to include the regex graph (if any) in
+	 * that struct.  (XXX currently, this approach can leak regex graphs
+	 * across index rescans.  Not clear if that's worth fixing.)
 	 */
+	cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra;
 	if (cache == NULL ||
-		strategy != *((StrategyNumber *) cache) ||
-		VARSIZE(cachedQuery) != querysize ||
-		memcmp(cachedQuery, query, querysize) != 0)
+		cache->strategy != strategy ||
+		VARSIZE(cache->query) != querysize ||
+		memcmp((char *) cache->query, (char *) query, querysize) != 0)
 	{
-		char	   *newcache;
+		gtrgm_consistent_cache *newcache;
+		TrgmPackedGraph *graph = NULL;
+		Size		qtrgsize;
 
 		switch (strategy)
 		{
@@ -225,28 +229,58 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
 				qtrg = generate_wildcard_trgm(VARDATA(query),
 											  querysize - VARHDRSZ);
 				break;
+			case RegExpICaseStrategyNumber:
+#ifndef IGNORECASE
+				elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
+#endif
+				/* FALL THRU */
+			case RegExpStrategyNumber:
+				qtrg = createTrgmNFA(query, PG_GET_COLLATION(),
+									 &graph, fcinfo->flinfo->fn_mcxt);
+				/* just in case an empty array is returned ... */
+				if (qtrg && ARRNELEM(qtrg) <= 0)
+				{
+					pfree(qtrg);
+					qtrg = NULL;
+				}
+				break;
 			default:
 				elog(ERROR, "unrecognized strategy number: %d", strategy);
 				qtrg = NULL;	/* keep compiler quiet */
 				break;
 		}
 
-		newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
-									  MAXALIGN(sizeof(StrategyNumber)) +
-									  MAXALIGN(querysize) +
-									  VARSIZE(qtrg));
-		cachedQuery = newcache + MAXALIGN(sizeof(StrategyNumber));
+		qtrgsize = qtrg ? VARSIZE(qtrg) : 0;
 
-		*((StrategyNumber *) newcache) = strategy;
-		memcpy(cachedQuery, query, querysize);
-		memcpy(cachedQuery + MAXALIGN(querysize), qtrg, VARSIZE(qtrg));
+		newcache = (gtrgm_consistent_cache *)
+			MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+							   MAXALIGN(sizeof(gtrgm_consistent_cache)) +
+							   MAXALIGN(querysize) +
+							   qtrgsize);
+
+		newcache->strategy = strategy;
+		newcache->query = (text *)
+			((char *) newcache + MAXALIGN(sizeof(gtrgm_consistent_cache)));
+		memcpy((char *) newcache->query, (char *) query, querysize);
+		if (qtrg)
+		{
+			newcache->trigrams = (TRGM *)
+				((char *) newcache->query + MAXALIGN(querysize));
+			memcpy((char *) newcache->trigrams, (char *) qtrg, qtrgsize);
+			/* release qtrg in case it was made in fn_mcxt */
+			pfree(qtrg);
+		}
+		else
+			newcache->trigrams = NULL;
+		newcache->graph = graph;
 
 		if (cache)
 			pfree(cache);
-		fcinfo->flinfo->fn_extra = newcache;
+		fcinfo->flinfo->fn_extra = (void *) newcache;
+		cache = newcache;
 	}
 
-	qtrg = (TRGM *) (cachedQuery + MAXALIGN(querysize));
+	qtrg = cache->trigrams;
 
 	switch (strategy)
 	{
@@ -267,8 +301,8 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
 			}
 			else
 			{					/* non-leaf contains signature */
-				int4		count = cnt_sml_sign_common(qtrg, GETSIGN(key));
-				int4		len = ARRNELEM(qtrg);
+				int32		count = cnt_sml_sign_common(qtrg, GETSIGN(key));
+				int32		len = ARRNELEM(qtrg);
 
 				if (len == 0)
 					res = false;
@@ -317,6 +351,63 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
 				}
 			}
 			break;
+		case RegExpICaseStrategyNumber:
+#ifndef IGNORECASE
+			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
+#endif
+			/* FALL THRU */
+		case RegExpStrategyNumber:
+			/* Regexp search is inexact */
+			*recheck = true;
+
+			/* Check regex match as much as we can with available info */
+			if (qtrg)
+			{
+				if (GIST_LEAF(entry))
+				{				/* all leafs contains orig trgm */
+					bool	   *check;
+
+					check = trgm_presence_map(qtrg, key);
+					res = trigramsMatchGraph(cache->graph, check);
+					pfree(check);
+				}
+				else if (ISALLTRUE(key))
+				{				/* non-leaf contains signature */
+					res = true;
+				}
+				else
+				{				/* non-leaf contains signature */
+					int32		k,
+								tmp = 0,
+								len = ARRNELEM(qtrg);
+					trgm	   *ptr = GETARR(qtrg);
+					BITVECP		sign = GETSIGN(key);
+					bool	   *check;
+
+					/*
+					 * GETBIT() tests may give false positives, due to limited
+					 * size of the sign array.  But since trigramsMatchGraph()
+					 * implements a monotone boolean function, false positives
+					 * in the check array can't lead to false negative answer.
+					 * So we can apply trigramsMatchGraph despite uncertainty,
+					 * and that usefully improves the quality of the search.
+					 */
+					check = (bool *) palloc(len * sizeof(bool));
+					for (k = 0; k < len; k++)
+					{
+						CPTRGM(((char *) &tmp), ptr + k);
+						check[k] = GETBIT(sign, HASHVAL(tmp));
+					}
+					res = trigramsMatchGraph(cache->graph, check);
+					pfree(check);
+				}
+			}
+			else
+			{
+				/* trigram-free query must be rechecked everywhere */
+				res = true;
+			}
+			break;
 		default:
 			elog(ERROR, "unrecognized strategy number: %d", strategy);
 			res = false;		/* keep compiler quiet */
@@ -379,8 +470,8 @@ gtrgm_distance(PG_FUNCTION_ARGS)
 			}
 			else
 			{					/* non-leaf contains signature */
-				int4		count = cnt_sml_sign_common(qtrg, GETSIGN(key));
-				int4		len = ARRNELEM(qtrg);
+				int32		count = cnt_sml_sign_common(qtrg, GETSIGN(key));
+				int32		len = ARRNELEM(qtrg);
 
 				res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len);
 			}
@@ -394,10 +485,10 @@ gtrgm_distance(PG_FUNCTION_ARGS)
 	PG_RETURN_FLOAT8(res);
 }
 
-static int4
+static int32
 unionkey(BITVECP sbase, TRGM *add)
 {
-	int4		i;
+	int32		i;
 
 	if (ISSIGNKEY(add))
 	{
@@ -412,7 +503,7 @@ unionkey(BITVECP sbase, TRGM *add)
 	else
 	{
 		trgm	   *ptr = GETARR(add);
-		int4		tmp = 0;
+		int32		tmp = 0;
 
 		for (i = 0; i < ARRNELEM(add); i++)
 		{
@@ -428,11 +519,11 @@ Datum
 gtrgm_union(PG_FUNCTION_ARGS)
 {
 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
-	int4		len = entryvec->n;
+	int32		len = entryvec->n;
 	int		   *size = (int *) PG_GETARG_POINTER(1);
 	BITVEC		base;
-	int4		i;
-	int4		flag = 0;
+	int32		i;
+	int32		flag = 0;
 	TRGM	   *result;
 
 	MemSet((void *) base, 0, sizeof(BITVEC));
@@ -474,7 +565,7 @@ gtrgm_same(PG_FUNCTION_ARGS)
 			*result = false;
 		else
 		{
-			int4		i;
+			int32		i;
 			BITVECP		sa = GETSIGN(a),
 						sb = GETSIGN(b);
 
@@ -491,7 +582,7 @@ gtrgm_same(PG_FUNCTION_ARGS)
 	}
 	else
 	{							/* a and b ISARRKEY */
-		int4		lena = ARRNELEM(a),
+		int32		lena = ARRNELEM(a),
 					lenb = ARRNELEM(b);
 
 		if (lena != lenb)
@@ -500,7 +591,7 @@ gtrgm_same(PG_FUNCTION_ARGS)
 		{
 			trgm	   *ptra = GETARR(a),
 					   *ptrb = GETARR(b);
-			int4		i;
+			int32		i;
 
 			*result = true;
 			for (i = 0; i < lena; i++)
@@ -515,10 +606,10 @@ gtrgm_same(PG_FUNCTION_ARGS)
 	PG_RETURN_POINTER(result);
 }
 
-static int4
+static int32
 sizebitvec(BITVECP sign)
 {
-	int4		size = 0,
+	int32		size = 0,
 				i;
 
 	LOOPBYTE
@@ -634,7 +725,7 @@ fillcache(CACHESIGN *item, TRGM *key)
 typedef struct
 {
 	OffsetNumber pos;
-	int4		cost;
+	int32		cost;
 } SPLITCOST;
 
 static int
@@ -675,11 +766,11 @@ gtrgm_picksplit(PG_FUNCTION_ARGS)
 			   *datum_r;
 	BITVECP		union_l,
 				union_r;
-	int4		size_alpha,
+	int32		size_alpha,
 				size_beta;
-	int4		size_waste,
+	int32		size_waste,
 				waste = -1;
-	int4		nbytes;
+	int32		nbytes;
 	OffsetNumber seed_1 = 0,
 				seed_2 = 0;
 	OffsetNumber *left,
diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c
index 4e32c6f654..c385e09edd 100644
--- a/contrib/pg_trgm/trgm_op.c
+++ b/contrib/pg_trgm/trgm_op.c
@@ -9,6 +9,7 @@
 
 #include "catalog/pg_type.h"
 #include "tsearch/ts_locale.h"
+#include "utils/memutils.h"
 
 
 PG_MODULE_MAGIC;
@@ -16,22 +17,11 @@ PG_MODULE_MAGIC;
 float4		trgm_limit = 0.3f;
 
 PG_FUNCTION_INFO_V1(set_limit);
-Datum		set_limit(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(show_limit);
-Datum		show_limit(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(show_trgm);
-Datum		show_trgm(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(similarity);
-Datum		similarity(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(similarity_dist);
-Datum		similarity_dist(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(similarity_op);
-Datum		similarity_op(PG_FUNCTION_ARGS);
 
 
 Datum
@@ -77,12 +67,6 @@ unique_array(trgm *a, int len)
 	return curend + 1 - a;
 }
 
-#ifdef KEEPONLYALNUM
-#define iswordchr(c)	(t_isalpha(c) || t_isdigit(c))
-#else
-#define iswordchr(c)	(!t_isspace(c))
-#endif
-
 /*
  * Finds first word in string, returns pointer to the word,
  * endword points to the character after word
@@ -92,7 +76,7 @@ find_word(char *str, int lenstr, char **endword, int *charlen)
 {
 	char	   *beginword = str;
 
-	while (beginword - str < lenstr && !iswordchr(beginword))
+	while (beginword - str < lenstr && !ISWORDCHR(beginword))
 		beginword += pg_mblen(beginword);
 
 	if (beginword - str >= lenstr)
@@ -100,7 +84,7 @@ find_word(char *str, int lenstr, char **endword, int *charlen)
 
 	*endword = beginword;
 	*charlen = 0;
-	while (*endword - str < lenstr && iswordchr(*endword))
+	while (*endword - str < lenstr && ISWORDCHR(*endword))
 	{
 		*endword += pg_mblen(*endword);
 		(*charlen)++;
@@ -109,9 +93,13 @@ find_word(char *str, int lenstr, char **endword, int *charlen)
 	return beginword;
 }
 
-#ifdef USE_WIDE_UPPER_LOWER
-static void
-cnt_trigram(trgm *tptr, char *str, int bytelen)
+/*
+ * Reduce a trigram (three possibly multi-byte characters) to a trgm,
+ * which is always exactly three bytes.  If we have three single-byte
+ * characters, we just use them as-is; otherwise we form a hash value.
+ */
+void
+compact_trigram(trgm *tptr, char *str, int bytelen)
 {
 	if (bytelen == 3)
 	{
@@ -131,7 +119,6 @@ cnt_trigram(trgm *tptr, char *str, int bytelen)
 		CPTRGM(tptr, &crc);
 	}
 }
-#endif
 
 /*
  * Adds trigrams from words (already padded).
@@ -144,16 +131,16 @@ make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
 	if (charlen < 3)
 		return tptr;
 
-#ifdef USE_WIDE_UPPER_LOWER
-	if (pg_database_encoding_max_length() > 1)
+	if (bytelen > charlen)
 	{
+		/* Find multibyte character boundaries and apply compact_trigram */
 		int			lenfirst = pg_mblen(str),
 					lenmiddle = pg_mblen(str + lenfirst),
 					lenlast = pg_mblen(str + lenfirst + lenmiddle);
 
 		while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen)
 		{
-			cnt_trigram(tptr, ptr, lenfirst + lenmiddle + lenlast);
+			compact_trigram(tptr, ptr, lenfirst + lenmiddle + lenlast);
 
 			ptr += lenfirst;
 			tptr++;
@@ -164,8 +151,8 @@ make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
 		}
 	}
 	else
-#endif
 	{
+		/* Fast path when there are no multibyte characters */
 		Assert(bytelen == charlen);
 
 		while (ptr - str < bytelen - 2 /* number of trigrams = strlen - 2 */ )
@@ -191,6 +178,18 @@ generate_trgm(char *str, int slen)
 	char	   *bword,
 			   *eword;
 
+	/*
+	 * Guard against possible overflow in the palloc requests below.  (We
+	 * don't worry about the additive constants, since palloc can detect
+	 * requests that are a little above MaxAllocSize --- we just need to
+	 * prevent integer overflow in the multiplications.)
+	 */
+	if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) ||
+		(Size) slen >= (MaxAllocSize / pg_database_encoding_max_length()))
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("out of memory")));
+
 	trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3);
 	trg->flag = ARRKEY;
 	SET_VARSIZE(trg, TRGMHDRSIZE);
@@ -200,7 +199,8 @@ generate_trgm(char *str, int slen)
 
 	tptr = GETARR(trg);
 
-	buf = palloc(sizeof(char) * (slen + 4));
+	/* Allocate a buffer for case-folded, blank-padded words */
+	buf = (char *) palloc(slen * pg_database_encoding_max_length() + 4);
 
 	if (LPADDING > 0)
 	{
@@ -224,6 +224,7 @@ generate_trgm(char *str, int slen)
 #ifdef IGNORECASE
 		pfree(bword);
 #endif
+
 		buf[LPADDING + bytelen] = ' ';
 		buf[LPADDING + bytelen + 1] = ' ';
 
@@ -239,7 +240,10 @@ generate_trgm(char *str, int slen)
 	if ((len = tptr - GETARR(trg)) == 0)
 		return trg;
 
-	if (len > 0)
+	/*
+	 * Make trigrams unique.
+	 */
+	if (len > 1)
 	{
 		qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
 		len = unique_array(GETARR(trg), len);
@@ -272,33 +276,36 @@ get_wildcard_part(const char *str, int lenstr,
 	const char *beginword = str;
 	const char *endword;
 	char	   *s = buf;
-	bool		in_wildcard_meta = false;
+	bool		in_leading_wildcard_meta = false;
+	bool		in_trailing_wildcard_meta = false;
 	bool		in_escape = false;
 	int			clen;
 
 	/*
-	 * Find the first word character remembering whether last character was
-	 * wildcard meta-character.
+	 * Find the first word character, remembering whether preceding character
+	 * was wildcard meta-character.  Note that the in_escape state persists
+	 * from this loop to the next one, since we may exit at a word character
+	 * that is in_escape.
 	 */
 	while (beginword - str < lenstr)
 	{
 		if (in_escape)
 		{
-			in_escape = false;
-			in_wildcard_meta = false;
-			if (iswordchr(beginword))
+			if (ISWORDCHR(beginword))
 				break;
+			in_escape = false;
+			in_leading_wildcard_meta = false;
 		}
 		else
 		{
 			if (ISESCAPECHAR(beginword))
 				in_escape = true;
 			else if (ISWILDCARDCHAR(beginword))
-				in_wildcard_meta = true;
-			else if (iswordchr(beginword))
+				in_leading_wildcard_meta = true;
+			else if (ISWORDCHR(beginword))
 				break;
 			else
-				in_wildcard_meta = false;
+				in_leading_wildcard_meta = false;
 		}
 		beginword += pg_mblen(beginword);
 	}
@@ -310,11 +317,11 @@ get_wildcard_part(const char *str, int lenstr,
 		return NULL;
 
 	/*
-	 * Add left padding spaces if last character wasn't wildcard
+	 * Add left padding spaces if preceding character wasn't wildcard
 	 * meta-character.
 	 */
 	*charlen = 0;
-	if (!in_wildcard_meta)
+	if (!in_leading_wildcard_meta)
 	{
 		if (LPADDING > 0)
 		{
@@ -333,23 +340,29 @@ get_wildcard_part(const char *str, int lenstr,
 	 * string boundary.  Strip escapes during copy.
 	 */
 	endword = beginword;
-	in_wildcard_meta = false;
-	in_escape = false;
 	while (endword - str < lenstr)
 	{
 		clen = pg_mblen(endword);
 		if (in_escape)
 		{
-			in_escape = false;
-			in_wildcard_meta = false;
-			if (iswordchr(endword))
+			if (ISWORDCHR(endword))
 			{
 				memcpy(s, endword, clen);
 				(*charlen)++;
 				s += clen;
 			}
 			else
+			{
+				/*
+				 * Back up endword to the escape character when stopping at an
+				 * escaped char, so that subsequent get_wildcard_part will
+				 * restart from the escape character.  We assume here that
+				 * escape chars are single-byte.
+				 */
+				endword--;
 				break;
+			}
+			in_escape = false;
 		}
 		else
 		{
@@ -357,29 +370,26 @@ get_wildcard_part(const char *str, int lenstr,
 				in_escape = true;
 			else if (ISWILDCARDCHAR(endword))
 			{
-				in_wildcard_meta = true;
+				in_trailing_wildcard_meta = true;
 				break;
 			}
-			else if (iswordchr(endword))
+			else if (ISWORDCHR(endword))
 			{
 				memcpy(s, endword, clen);
 				(*charlen)++;
 				s += clen;
 			}
 			else
-			{
-				in_wildcard_meta = false;
 				break;
-			}
 		}
 		endword += clen;
 	}
 
 	/*
-	 * Add right padding spaces if last character wasn't wildcard
+	 * Add right padding spaces if next character isn't wildcard
 	 * meta-character.
 	 */
-	if (!in_wildcard_meta)
+	if (!in_trailing_wildcard_meta)
 	{
 		if (RPADDING > 0)
 		{
@@ -416,6 +426,18 @@ generate_wildcard_trgm(const char *str, int slen)
 				bytelen;
 	const char *eword;
 
+	/*
+	 * Guard against possible overflow in the palloc requests below.  (We
+	 * don't worry about the additive constants, since palloc can detect
+	 * requests that are a little above MaxAllocSize --- we just need to
+	 * prevent integer overflow in the multiplications.)
+	 */
+	if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) ||
+		(Size) slen >= (MaxAllocSize / pg_database_encoding_max_length()))
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("out of memory")));
+
 	trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3);
 	trg->flag = ARRKEY;
 	SET_VARSIZE(trg, TRGMHDRSIZE);
@@ -425,6 +447,7 @@ generate_wildcard_trgm(const char *str, int slen)
 
 	tptr = GETARR(trg);
 
+	/* Allocate a buffer for blank-padded, but not yet case-folded, words */
 	buf = palloc(sizeof(char) * (slen + 4));
 
 	/*
@@ -445,6 +468,7 @@ generate_wildcard_trgm(const char *str, int slen)
 		 * count trigrams
 		 */
 		tptr = make_trigrams(tptr, buf2, bytelen, charlen);
+
 #ifdef IGNORECASE
 		pfree(buf2);
 #endif
@@ -458,7 +482,7 @@ generate_wildcard_trgm(const char *str, int slen)
 	/*
 	 * Make trigrams unique.
 	 */
-	if (len > 0)
+	if (len > 1)
 	{
 		qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
 		len = unique_array(GETARR(trg), len);
@@ -547,6 +571,10 @@ cnt_sml(TRGM *trg1, TRGM *trg2)
 	len1 = ARRNELEM(trg1);
 	len2 = ARRNELEM(trg2);
 
+	/* explicit test is needed to avoid 0/0 division when both lengths are 0 */
+	if (len1 <= 0 || len2 <= 0)
+		return (float4) 0.0;
+
 	while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
 	{
 		int			res = CMPTRGM(ptr1, ptr2);
@@ -564,9 +592,9 @@ cnt_sml(TRGM *trg1, TRGM *trg2)
 	}
 
 #ifdef DIVUNION
-	return ((((float4) count) / ((float4) (len1 + len2 - count))));
+	return ((float4) count) / ((float4) (len1 + len2 - count));
 #else
-	return (((float) count) / ((float) ((len1 > len2) ? len1 : len2)));
+	return ((float4) count) / ((float4) ((len1 > len2) ? len1 : len2));
 #endif
 
 }
@@ -609,6 +637,50 @@ trgm_contained_by(TRGM *trg1, TRGM *trg2)
 		return true;
 }
 
+/*
+ * Return a palloc'd boolean array showing, for each trigram in "query",
+ * whether it is present in the trigram array "key".
+ * This relies on the "key" array being sorted, but "query" need not be.
+ */
+bool *
+trgm_presence_map(TRGM *query, TRGM *key)
+{
+	bool	   *result;
+	trgm	   *ptrq = GETARR(query),
+			   *ptrk = GETARR(key);
+	int			lenq = ARRNELEM(query),
+				lenk = ARRNELEM(key),
+				i;
+
+	result = (bool *) palloc0(lenq * sizeof(bool));
+
+	/* for each query trigram, do a binary search in the key array */
+	for (i = 0; i < lenq; i++)
+	{
+		int			lo = 0;
+		int			hi = lenk;
+
+		while (lo < hi)
+		{
+			int			mid = (lo + hi) / 2;
+			int			res = CMPTRGM(ptrq, ptrk + mid);
+
+			if (res < 0)
+				hi = mid;
+			else if (res > 0)
+				lo = mid + 1;
+			else
+			{
+				result[i] = true;
+				break;
+			}
+		}
+		ptrq++;
+	}
+
+	return result;
+}
+
 Datum
 similarity(PG_FUNCTION_ARGS)
 {
diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c
new file mode 100644
index 0000000000..9f050533c5
--- /dev/null
+++ b/contrib/pg_trgm/trgm_regexp.c
@@ -0,0 +1,2247 @@
+/*-------------------------------------------------------------------------
+ *
+ * trgm_regexp.c
+ *	  Regular expression matching using trigrams.
+ *
+ * The general idea of trigram index support for a regular expression (regex)
+ * search is to transform the regex into a logical expression on trigrams.
+ * For example:
+ *
+ *	 (ab|cd)efg  =>  ((abe & bef) | (cde & def)) & efg
+ *
+ * If a string matches the regex, then it must match the logical expression on
+ * trigrams.  The opposite is not necessarily true, however: a string that
+ * matches the logical expression might not match the original regex.  Such
+ * false positives are removed via recheck, by running the regular regex match
+ * operator on the retrieved heap tuple.
+ *
+ * Since the trigram expression involves both AND and OR operators, we can't
+ * expect the core index machinery to evaluate it completely.  Instead, the
+ * result of regex analysis is a list of trigrams to be sought in the index,
+ * plus a simplified graph that is used by trigramsMatchGraph() to determine
+ * whether a particular indexed value matches the expression.
+ *
+ * Converting a regex to a trigram expression is based on analysis of an
+ * automaton corresponding to the regex.  The algorithm consists of four
+ * stages:
+ *
+ * 1) Compile the regexp to NFA form.  This is handled by the PostgreSQL
+ *	  regexp library, which provides accessors for its opaque regex_t struct
+ *	  to expose the NFA state graph and the "colors" (sets of equivalent
+ *	  characters) used as state transition labels.
+ *
+ * 2) Transform the original NFA into an expanded graph, where arcs
+ *	  are labeled with trigrams that must be present in order to move from
+ *	  one state to another via the arcs.  The trigrams used in this stage
+ *	  consist of colors, not characters, as in the original NFA.
+ *
+ * 3) Expand the color trigrams into regular trigrams consisting of
+ *	  characters.  If too many distinct trigrams are produced, trigrams are
+ *	  eliminated and the graph is simplified until it's simple enough.
+ *
+ * 4) Finally, the resulting graph is packed into a TrgmPackedGraph struct,
+ *	  and returned to the caller.
+ *
+ * 1) Compile the regexp to NFA form
+ * ---------------------------------
+ * The automaton returned by the regexp compiler is a graph where vertices
+ * are "states" and arcs are labeled with colors.  Each color represents
+ * a set of characters, so that all characters assigned to the same color
+ * are interchangeable, so far as matching the regexp is concerned.  There
+ * are two special states: "initial" and "final".  A state can have multiple
+ * outgoing arcs labeled with the same color, which makes the automaton
+ * non-deterministic, because it can be in many states simultaneously.
+ *
+ * Note that this NFA is already lossy compared to the original regexp,
+ * since it ignores some regex features such as lookahead constraints and
+ * backref matching.  This is OK for our purposes since it's still the case
+ * that only strings matching the NFA can possibly satisfy the regexp.
+ *
+ * 2) Transform the original NFA into an expanded graph
+ * ----------------------------------------------------
+ * In the 2nd stage, the automaton is transformed into a graph based on the
+ * original NFA.  Each state in the expanded graph represents a state from
+ * the original NFA, plus a prefix identifying the last two characters
+ * (colors, to be precise) seen before entering the state.  There can be
+ * multiple states in the expanded graph for each state in the original NFA,
+ * depending on what characters can precede it.  A prefix position can be
+ * "unknown" if it's uncertain what the preceding character was, or "blank"
+ * if the character was a non-word character (we don't need to distinguish
+ * which non-word character it was, so just think of all of them as blanks).
+ *
+ * For convenience in description, call an expanded-state identifier
+ * (two prefix colors plus a state number from the original NFA) an
+ * "enter key".
+ *
+ * Each arc of the expanded graph is labelled with a trigram that must be
+ * present in the string to match.  We can construct this from an out-arc of
+ * the underlying NFA state by combining the expanded state's prefix with the
+ * color label of the underlying out-arc, if neither prefix position is
+ * "unknown".  But note that some of the colors in the trigram might be
+ * "blank".  This is OK since we want to generate word-boundary trigrams as
+ * the regular trigram machinery would, if we know that some word characters
+ * must be adjacent to a word boundary in all strings matching the NFA.
+ *
+ * The expanded graph can also have fewer states than the original NFA,
+ * because we don't bother to make a separate state entry unless the state
+ * is reachable by a valid arc.  When an enter key is reachable from a state
+ * of the expanded graph, but we do not know a complete trigram associated
+ * with that transition, we cannot make a valid arc; instead we insert the
+ * enter key into the enterKeys list of the source state.  This effectively
+ * means that the two expanded states are not reliably distinguishable based
+ * on examining trigrams.
+ *
+ * So the expanded graph resembles the original NFA, but the arcs are
+ * labeled with trigrams instead of individual characters, and there may be
+ * more or fewer states.  It is a lossy representation of the original NFA:
+ * any string that matches the original regexp must match the expanded graph,
+ * but the reverse is not true.
+ *
+ * We build the expanded graph through a breadth-first traversal of states
+ * reachable from the initial state.  At each reachable state, we identify the
+ * states reachable from it without traversing a predictable trigram, and add
+ * those states' enter keys to the current state.  Then we generate all
+ * out-arcs leading out of this collection of states that have predictable
+ * trigrams, adding their target states to the queue of states to examine.
+ *
+ * When building the graph, if the number of states or arcs exceed pre-defined
+ * limits, we give up and simply mark any states not yet processed as final
+ * states.  Roughly speaking, that means that we make use of some portion from
+ * the beginning of the regexp.  Also, any colors that have too many member
+ * characters are treated as "unknown", so that we can't derive trigrams
+ * from them.
+ *
+ * 3) Expand the color trigrams into regular trigrams
+ * --------------------------------------------------
+ * The trigrams in the expanded graph are "color trigrams", consisting
+ * of three consecutive colors that must be present in the string. But for
+ * search, we need regular trigrams consisting of characters. In the 3rd
+ * stage, the color trigrams are expanded into regular trigrams. Since each
+ * color can represent many characters, the total number of regular trigrams
+ * after expansion could be very large. Because searching the index for
+ * thousands of trigrams would be slow, and would likely produce so many
+ * false positives that we would have to traverse a large fraction of the
+ * index, the graph is simplified further in a lossy fashion by removing
+ * color trigrams. When a color trigram is removed, the states connected by
+ * any arcs labelled with that trigram are merged.
+ *
+ * Trigrams do not all have equivalent value for searching: some of them are
+ * more frequent and some of them are less frequent. Ideally, we would like
+ * to know the distribution of trigrams, but we don't. But because of padding
+ * we know for sure that the empty character is more frequent than others,
+ * so we can penalize trigrams according to presence of whitespace. The
+ * penalty assigned to each color trigram is the number of simple trigrams
+ * it would produce, times the penalties[] multiplier associated with its
+ * whitespace content. (The penalties[] constants were calculated by analysis
+ * of some real-life text.) We eliminate color trigrams starting with the
+ * highest-penalty one, until we get to a total penalty of no more than
+ * WISH_TRGM_PENALTY. However, we cannot remove a color trigram if that would
+ * lead to merging the initial and final states, so we may not be able to
+ * reach WISH_TRGM_PENALTY. It's still okay so long as we have no more than
+ * MAX_TRGM_COUNT simple trigrams in total, otherwise we fail.
+ *
+ * 4) Pack the graph into a compact representation
+ * -----------------------------------------------
+ * The 2nd and 3rd stages might have eliminated or merged many of the states
+ * and trigrams created earlier, so in this final stage, the graph is
+ * compacted and packed into a simpler struct that contains only the
+ * information needed to evaluate it.
+ *
+ * ALGORITHM EXAMPLE:
+ *
+ * Consider the example regex "ab[cd]".  This regex is transformed into the
+ * following NFA (for simplicity we show colors as their single members):
+ *
+ *					  4#
+ *					c/
+ *		 a	   b	/
+ *	 1* --- 2 ---- 3
+ *					\
+ *					d\
+ *					  5#
+ *
+ * We use * to mark initial state and # to mark final state. It's not depicted,
+ * but states 1, 4, 5 have self-referencing arcs for all possible characters,
+ * because this pattern can match to any part of a string.
+ *
+ * As the result of stage 2 we will have the following graph:
+ *
+ *		  abc	 abd
+ *	 2# <---- 1* ----> 3#
+ *
+ * The process for generating this graph is:
+ * 1) Create state 1 with enter key (UNKNOWN, UNKNOWN, 1).
+ * 2) Add key (UNKNOWN, "a", 2) to state 1.
+ * 3) Add key ("a", "b", 3) to state 1.
+ * 4) Create new state 2 with enter key ("b", "c", 4).  Add an arc
+ *	  from state 1 to state 2 with label trigram "abc".
+ * 5) Mark state 2 final because state 4 of source NFA is marked as final.
+ * 6) Create new state 3 with enter key ("b", "d", 5).  Add an arc
+ *	  from state 1 to state 3 with label trigram "abd".
+ * 7) Mark state 3 final because state 5 of source NFA is marked as final.
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  contrib/pg_trgm/trgm_regexp.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "trgm.h"
+
+#include "regex/regexport.h"
+#include "tsearch/ts_locale.h"
+#include "utils/hsearch.h"
+#include "utils/memutils.h"
+
+
+/*
+ * Uncomment to print intermediate stages, for exploring and debugging the
+ * algorithm implementation.  This produces three graph files in /tmp,
+ * in Graphviz .dot format.
+ */
+/* #define TRGM_REGEXP_DEBUG */
+
+/*
+ * These parameters are used to limit the amount of work done.
+ * Otherwise regex processing could be too slow and memory-consuming.
+ *
+ *	MAX_EXPANDED_STATES - How many states we allow in expanded graph
+ *	MAX_EXPANDED_ARCS - How many arcs we allow in expanded graph
+ *	MAX_TRGM_COUNT - How many simple trigrams we allow to be extracted
+ *	WISH_TRGM_PENALTY - Maximum desired sum of color trigram penalties
+ *	COLOR_COUNT_LIMIT - Maximum number of characters per color
+ */
+#define MAX_EXPANDED_STATES 128
+#define MAX_EXPANDED_ARCS	1024
+#define MAX_TRGM_COUNT		256
+#define WISH_TRGM_PENALTY	16
+#define COLOR_COUNT_LIMIT	256
+
+/*
+ * Penalty multipliers for trigram counts depending on whitespace contents.
+ * Numbers based on analysis of real-life texts.
+ */
+const float4 penalties[8] = {
+	1.0f,						/* "aaa" */
+	3.5f,						/* "aa " */
+	0.0f,						/* "a a" (impossible) */
+	0.0f,						/* "a  " (impossible) */
+	4.2f,						/* " aa" */
+	2.1f,						/* " a " */
+	25.0f,						/* "  a" */
+	0.0f						/* "   " (impossible) */
+};
+
+/* Struct representing a single pg_wchar, converted back to multibyte form */
+typedef struct
+{
+	char		bytes[MAX_MULTIBYTE_CHAR_LEN];
+} trgm_mb_char;
+
+/*
+ * Attributes of NFA colors:
+ *
+ *	expandable				- we know the character expansion of this color
+ *	containsNonWord			- color contains non-word characters
+ *							  (which will not be extracted into trigrams)
+ *	wordCharsCount			- count of word characters in color
+ *	wordChars				- array of this color's word characters
+ *							  (which can be extracted into trigrams)
+ *
+ * When expandable is false, the other attributes don't matter; we just
+ * assume this color represents unknown character(s).
+ */
+typedef struct
+{
+	bool		expandable;
+	bool		containsNonWord;
+	int			wordCharsCount;
+	trgm_mb_char *wordChars;
+} TrgmColorInfo;
+
+/*
+ * A "prefix" is information about the colors of the last two characters read
+ * before reaching a specific NFA state.  These colors can have special values
+ * COLOR_UNKNOWN and COLOR_BLANK.  COLOR_UNKNOWN means that we have no
+ * information, for example because we read some character of an unexpandable
+ * color.  COLOR_BLANK means that we read a non-word character.
+ *
+ * We call a prefix ambiguous if at least one of its colors is unknown.  It's
+ * fully ambiguous if both are unknown, partially ambiguous if only the first
+ * is unknown.  (The case of first color known, second unknown is not valid.)
+ *
+ * Wholly- or partly-blank prefixes are mostly handled the same as regular
+ * color prefixes.  This allows us to generate appropriate partly-blank
+ * trigrams when the NFA requires word character(s) to appear adjacent to
+ * non-word character(s).
+ */
+typedef int TrgmColor;
+
+/* We assume that colors returned by the regexp engine cannot be these: */
+#define COLOR_UNKNOWN	(-1)
+#define COLOR_BLANK		(-2)
+
+typedef struct
+{
+	TrgmColor	colors[2];
+} TrgmPrefix;
+
+/*
+ * Color-trigram data type.  Note that some elements of the trigram can be
+ * COLOR_BLANK, but we don't allow COLOR_UNKNOWN.
+ */
+typedef struct
+{
+	TrgmColor	colors[3];
+} ColorTrgm;
+
+/*
+ * Key identifying a state of our expanded graph: color prefix, and number
+ * of the corresponding state in the underlying regex NFA.  The color prefix
+ * shows how we reached the regex state (to the extent that we know it).
+ */
+typedef struct
+{
+	TrgmPrefix	prefix;
+	int			nstate;
+} TrgmStateKey;
+
+/*
+ * One state of the expanded graph.
+ *
+ *	stateKey - ID of this state
+ *	arcs	 - outgoing arcs of this state (List of TrgmArc)
+ *	enterKeys - enter keys reachable from this state without reading any
+ *			   predictable trigram (List of TrgmStateKey)
+ *	fin		 - flag indicating this state is final
+ *	init	 - flag indicating this state is initial
+ *	parent	 - parent state, if this state has been merged into another
+ *	children - child states (states that have been merged into this one)
+ *	number	 - number of this state (used at the packaging stage)
+ */
+typedef struct TrgmState
+{
+	TrgmStateKey stateKey;		/* hashtable key: must be first field */
+	List	   *arcs;
+	List	   *enterKeys;
+	bool		fin;
+	bool		init;
+	struct TrgmState *parent;
+	List	   *children;
+	int			number;
+} TrgmState;
+
+/*
+ * One arc in the expanded graph.
+ */
+typedef struct
+{
+	ColorTrgm	ctrgm;			/* trigram needed to traverse arc */
+	TrgmState  *target;			/* next state */
+} TrgmArc;
+
+/*
+ * Information about arc of specific color trigram (used in stage 3)
+ *
+ * Contains pointers to the source and target states.
+ */
+typedef struct
+{
+	TrgmState  *source;
+	TrgmState  *target;
+} TrgmArcInfo;
+
+/*
+ * Information about color trigram (used in stage 3)
+ *
+ * ctrgm	- trigram itself
+ * number	- number of this trigram (used in the packaging stage)
+ * count	- number of simple trigrams created from this color trigram
+ * expanded - indicates this color trigram is expanded into simple trigrams
+ * arcs		- list of all arcs labeled with this color trigram.
+ */
+typedef struct
+{
+	ColorTrgm	ctrgm;
+	int			number;
+	int			count;
+	float4		penalty;
+	bool		expanded;
+	List	   *arcs;
+} ColorTrgmInfo;
+
+/*
+ * Data structure representing all the data we need during regex processing.
+ *
+ *	regex			- compiled regex
+ *	colorInfo		- extracted information about regex's colors
+ *	ncolors			- number of colors in colorInfo[]
+ *	states			- hashtable of TrgmStates (states of expanded graph)
+ *	initState		- pointer to initial state of expanded graph
+ *	queue			- queue of to-be-processed TrgmStates
+ *	keysQueue		- queue of to-be-processed TrgmStateKeys
+ *	arcsCount		- total number of arcs of expanded graph (for resource
+ *					  limiting)
+ *	overflowed		- we have exceeded resource limit for transformation
+ *	colorTrgms		- array of all color trigrams present in graph
+ *	colorTrgmsCount - count of those color trigrams
+ *	totalTrgmCount	- total count of extracted simple trigrams
+ */
+typedef struct
+{
+	/* Source regexp, and color information extracted from it (stage 1) */
+	regex_t    *regex;
+	TrgmColorInfo *colorInfo;
+	int			ncolors;
+
+	/* Expanded graph (stage 2) */
+	HTAB	   *states;
+	TrgmState  *initState;
+
+	/* Workspace for stage 2 */
+	List	   *queue;
+	List	   *keysQueue;
+	int			arcsCount;
+	bool		overflowed;
+
+	/* Information about distinct color trigrams in the graph (stage 3) */
+	ColorTrgmInfo *colorTrgms;
+	int			colorTrgmsCount;
+	int			totalTrgmCount;
+} TrgmNFA;
+
+/*
+ * Final, compact representation of expanded graph.
+ */
+typedef struct
+{
+	int			targetState;	/* index of target state (zero-based) */
+	int			colorTrgm;		/* index of color trigram for transition */
+} TrgmPackedArc;
+
+typedef struct
+{
+	int			arcsCount;		/* number of out-arcs for this state */
+	TrgmPackedArc *arcs;		/* array of arcsCount packed arcs */
+} TrgmPackedState;
+
+/* "typedef struct TrgmPackedGraph TrgmPackedGraph" appears in trgm.h */
+struct TrgmPackedGraph
+{
+	/*
+	 * colorTrigramsCount and colorTrigramsGroups contain information about
+	 * how trigrams are grouped into color trigrams.  "colorTrigramsCount" is
+	 * the count of color trigrams and "colorTrigramGroups" contains number of
+	 * simple trigrams for each color trigram.  The array of simple trigrams
+	 * (stored separately from this struct) is ordered so that the simple
+	 * trigrams for each color trigram are consecutive, and they're in order
+	 * by color trigram number.
+	 */
+	int			colorTrigramsCount;
+	int		   *colorTrigramGroups;		/* array of size colorTrigramsCount */
+
+	/*
+	 * The states of the simplified NFA.  State number 0 is always initial
+	 * state and state number 1 is always final state.
+	 */
+	int			statesCount;
+	TrgmPackedState *states;	/* array of size statesCount */
+
+	/* Temporary work space for trigramsMatchGraph() */
+	bool	   *colorTrigramsActive;	/* array of size colorTrigramsCount */
+	bool	   *statesActive;	/* array of size statesCount */
+	int		   *statesQueue;	/* array of size statesCount */
+};
+
+/*
+ * Temporary structure for representing an arc during packaging.
+ */
+typedef struct
+{
+	int			sourceState;
+	int			targetState;
+	int			colorTrgm;
+} TrgmPackArcInfo;
+
+
+/* prototypes for private functions */
+static TRGM *createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph,
+					  MemoryContext rcontext);
+static void RE_compile(regex_t *regex, text *text_re,
+		   int cflags, Oid collation);
+static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA);
+static bool convertPgWchar(pg_wchar c, trgm_mb_char *result);
+static void transformGraph(TrgmNFA *trgmNFA);
+static void processState(TrgmNFA *trgmNFA, TrgmState *state);
+static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key);
+static void addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key);
+static void addArcs(TrgmNFA *trgmNFA, TrgmState *state);
+static void addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key,
+	   TrgmColor co, TrgmStateKey *destKey);
+static bool validArcLabel(TrgmStateKey *key, TrgmColor co);
+static TrgmState *getState(TrgmNFA *trgmNFA, TrgmStateKey *key);
+static bool prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2);
+static bool selectColorTrigrams(TrgmNFA *trgmNFA);
+static TRGM *expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext);
+static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3]);
+static void mergeStates(TrgmState *state1, TrgmState *state2);
+static int	colorTrgmInfoCmp(const void *p1, const void *p2);
+static int	colorTrgmInfoPenaltyCmp(const void *p1, const void *p2);
+static TrgmPackedGraph *packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext);
+static int	packArcInfoCmp(const void *a1, const void *a2);
+
+#ifdef TRGM_REGEXP_DEBUG
+static void printSourceNFA(regex_t *regex, TrgmColorInfo *colors, int ncolors);
+static void printTrgmNFA(TrgmNFA *trgmNFA);
+static void printTrgmColor(StringInfo buf, TrgmColor co);
+static void printTrgmPackedGraph(TrgmPackedGraph *packedGraph, TRGM *trigrams);
+#endif
+
+
+/*
+ * Main entry point to process a regular expression.
+ *
+ * Returns an array of trigrams required by the regular expression, or NULL if
+ * the regular expression was too complex to analyze.  In addition, a packed
+ * graph representation of the regex is returned into *graph.  The results
+ * must be allocated in rcontext (which might or might not be the current
+ * context).
+ */
+TRGM *
+createTrgmNFA(text *text_re, Oid collation,
+			  TrgmPackedGraph **graph, MemoryContext rcontext)
+{
+	TRGM	   *trg;
+	regex_t		regex;
+	MemoryContext tmpcontext;
+	MemoryContext oldcontext;
+
+	/*
+	 * This processing generates a great deal of cruft, which we'd like to
+	 * clean up before returning (since this function may be called in a
+	 * query-lifespan memory context).  Make a temp context we can work in so
+	 * that cleanup is easy.
+	 */
+	tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
+									   "createTrgmNFA temporary context",
+									   ALLOCSET_DEFAULT_MINSIZE,
+									   ALLOCSET_DEFAULT_INITSIZE,
+									   ALLOCSET_DEFAULT_MAXSIZE);
+	oldcontext = MemoryContextSwitchTo(tmpcontext);
+
+	/*
+	 * Stage 1: Compile the regexp into a NFA, using the regexp library.
+	 */
+#ifdef IGNORECASE
+	RE_compile(&regex, text_re, REG_ADVANCED | REG_ICASE, collation);
+#else
+	RE_compile(&regex, text_re, REG_ADVANCED, collation);
+#endif
+
+	/*
+	 * Since the regexp library allocates its internal data structures with
+	 * malloc, we need to use a PG_TRY block to ensure that pg_regfree() gets
+	 * done even if there's an error.
+	 */
+	PG_TRY();
+	{
+		trg = createTrgmNFAInternal(&regex, graph, rcontext);
+	}
+	PG_CATCH();
+	{
+		pg_regfree(&regex);
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
+
+	pg_regfree(&regex);
+
+	/* Clean up all the cruft we created */
+	MemoryContextSwitchTo(oldcontext);
+	MemoryContextDelete(tmpcontext);
+
+	return trg;
+}
+
+/*
+ * Body of createTrgmNFA, exclusive of regex compilation/freeing.
+ */
+static TRGM *
+createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph,
+					  MemoryContext rcontext)
+{
+	TRGM	   *trg;
+	TrgmNFA		trgmNFA;
+
+	trgmNFA.regex = regex;
+
+	/* Collect color information from the regex */
+	getColorInfo(regex, &trgmNFA);
+
+#ifdef TRGM_REGEXP_DEBUG
+	printSourceNFA(regex, trgmNFA.colorInfo, trgmNFA.ncolors);
+#endif
+
+	/*
+	 * Stage 2: Create an expanded graph from the source NFA.
+	 */
+	transformGraph(&trgmNFA);
+
+#ifdef TRGM_REGEXP_DEBUG
+	printTrgmNFA(&trgmNFA);
+#endif
+
+	/*
+	 * Fail if we were unable to make a nontrivial graph, ie it is possible to
+	 * get from the initial state to the final state without reading any
+	 * predictable trigram.
+	 */
+	if (trgmNFA.initState->fin)
+		return NULL;
+
+	/*
+	 * Stage 3: Select color trigrams to expand.  Fail if too many trigrams.
+	 */
+	if (!selectColorTrigrams(&trgmNFA))
+		return NULL;
+
+	/*
+	 * Stage 4: Expand color trigrams and pack graph into final
+	 * representation.
+	 */
+	trg = expandColorTrigrams(&trgmNFA, rcontext);
+
+	*graph = packGraph(&trgmNFA, rcontext);
+
+#ifdef TRGM_REGEXP_DEBUG
+	printTrgmPackedGraph(*graph, trg);
+#endif
+
+	return trg;
+}
+
+/*
+ * Main entry point for evaluating a graph during index scanning.
+ *
+ * The check[] array is indexed by trigram number (in the array of simple
+ * trigrams returned by createTrgmNFA), and holds TRUE for those trigrams
+ * that are present in the index entry being checked.
+ */
+bool
+trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
+{
+	int			i,
+				j,
+				k,
+				queueIn,
+				queueOut;
+
+	/*
+	 * Reset temporary working areas.
+	 */
+	memset(graph->colorTrigramsActive, 0,
+		   sizeof(bool) * graph->colorTrigramsCount);
+	memset(graph->statesActive, 0, sizeof(bool) * graph->statesCount);
+
+	/*
+	 * Check which color trigrams were matched.  A match for any simple
+	 * trigram associated with a color trigram counts as a match of the color
+	 * trigram.
+	 */
+	j = 0;
+	for (i = 0; i < graph->colorTrigramsCount; i++)
+	{
+		int			cnt = graph->colorTrigramGroups[i];
+
+		for (k = j; k < j + cnt; k++)
+		{
+			if (check[k])
+			{
+				/*
+				 * Found one matched trigram in the group. Can skip the rest
+				 * of them and go to the next group.
+				 */
+				graph->colorTrigramsActive[i] = true;
+				break;
+			}
+		}
+		j = j + cnt;
+	}
+
+	/*
+	 * Initialize the statesQueue to hold just the initial state.  Note:
+	 * statesQueue has room for statesCount entries, which is certainly enough
+	 * since no state will be put in the queue more than once. The
+	 * statesActive array marks which states have been queued.
+	 */
+	graph->statesActive[0] = true;
+	graph->statesQueue[0] = 0;
+	queueIn = 0;
+	queueOut = 1;
+
+	/* Process queued states as long as there are any. */
+	while (queueIn < queueOut)
+	{
+		int			stateno = graph->statesQueue[queueIn++];
+		TrgmPackedState *state = &graph->states[stateno];
+		int			cnt = state->arcsCount;
+
+		/* Loop over state's out-arcs */
+		for (i = 0; i < cnt; i++)
+		{
+			TrgmPackedArc *arc = &state->arcs[i];
+
+			/*
+			 * If corresponding color trigram is present then activate the
+			 * corresponding state.  We're done if that's the final state,
+			 * otherwise queue the state if it's not been queued already.
+			 */
+			if (graph->colorTrigramsActive[arc->colorTrgm])
+			{
+				int			nextstate = arc->targetState;
+
+				if (nextstate == 1)
+					return true;	/* success: final state is reachable */
+
+				if (!graph->statesActive[nextstate])
+				{
+					graph->statesActive[nextstate] = true;
+					graph->statesQueue[queueOut++] = nextstate;
+				}
+			}
+		}
+	}
+
+	/* Queue is empty, so match fails. */
+	return false;
+}
+
+/*
+ * Compile regex string into struct at *regex.
+ * NB: pg_regfree must be applied to regex if this completes successfully.
+ */
+static void
+RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation)
+{
+	int			text_re_len = VARSIZE_ANY_EXHDR(text_re);
+	char	   *text_re_val = VARDATA_ANY(text_re);
+	pg_wchar   *pattern;
+	int			pattern_len;
+	int			regcomp_result;
+	char		errMsg[100];
+
+	/* Convert pattern string to wide characters */
+	pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
+	pattern_len = pg_mb2wchar_with_len(text_re_val,
+									   pattern,
+									   text_re_len);
+
+	/* Compile regex */
+	regcomp_result = pg_regcomp(regex,
+								pattern,
+								pattern_len,
+								cflags,
+								collation);
+
+	pfree(pattern);
+
+	if (regcomp_result != REG_OKAY)
+	{
+		/* re didn't compile (no need for pg_regfree, if so) */
+		pg_regerror(regcomp_result, regex, errMsg, sizeof(errMsg));
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+				 errmsg("invalid regular expression: %s", errMsg)));
+	}
+}
+
+
+/*---------------------
+ * Subroutines for pre-processing the color map (stage 1).
+ *---------------------
+ */
+
+/*
+ * Fill TrgmColorInfo structure for each color using regex export functions.
+ */
+static void
+getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
+{
+	int			colorsCount = pg_reg_getnumcolors(regex);
+	int			i;
+
+	trgmNFA->ncolors = colorsCount;
+	trgmNFA->colorInfo = (TrgmColorInfo *)
+		palloc0(colorsCount * sizeof(TrgmColorInfo));
+
+	/*
+	 * Loop over colors, filling TrgmColorInfo about each.
+	 */
+	for (i = 0; i < colorsCount; i++)
+	{
+		TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[i];
+		int			charsCount = pg_reg_getnumcharacters(regex, i);
+		pg_wchar   *chars;
+		int			j;
+
+		if (charsCount < 0 || charsCount > COLOR_COUNT_LIMIT)
+		{
+			/* Non expandable, or too large to work with */
+			colorInfo->expandable = false;
+			continue;
+		}
+
+		colorInfo->expandable = true;
+		colorInfo->containsNonWord = false;
+		colorInfo->wordChars = (trgm_mb_char *)
+			palloc(sizeof(trgm_mb_char) * charsCount);
+		colorInfo->wordCharsCount = 0;
+
+		/* Extract all the chars in this color */
+		chars = (pg_wchar *) palloc(sizeof(pg_wchar) * charsCount);
+		pg_reg_getcharacters(regex, i, chars, charsCount);
+
+		/*
+		 * Convert characters back to multibyte form, and save only those that
+		 * are word characters.  Set "containsNonWord" if any non-word
+		 * character.  (Note: it'd probably be nicer to keep the chars in
+		 * pg_wchar format for now, but ISWORDCHR wants to see multibyte.)
+		 */
+		for (j = 0; j < charsCount; j++)
+		{
+			trgm_mb_char c;
+
+			if (!convertPgWchar(chars[j], &c))
+				continue;		/* ok to ignore it altogether */
+			if (ISWORDCHR(c.bytes))
+				colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
+			else
+				colorInfo->containsNonWord = true;
+		}
+
+		pfree(chars);
+	}
+}
+
+/*
+ * Convert pg_wchar to multibyte format.
+ * Returns false if the character should be ignored completely.
+ */
+static bool
+convertPgWchar(pg_wchar c, trgm_mb_char *result)
+{
+	/* "s" has enough space for a multibyte character and a trailing NUL */
+	char		s[MAX_MULTIBYTE_CHAR_LEN + 1];
+
+	/*
+	 * We can ignore the NUL character, since it can never appear in a PG text
+	 * string.  This avoids the need for various special cases when
+	 * reconstructing trigrams.
+	 */
+	if (c == 0)
+		return false;
+
+	/* Do the conversion, making sure the result is NUL-terminated */
+	memset(s, 0, sizeof(s));
+	pg_wchar2mb_with_len(&c, s, 1);
+
+	/*
+	 * In IGNORECASE mode, we can ignore uppercase characters.  We assume that
+	 * the regex engine generated both uppercase and lowercase equivalents
+	 * within each color, since we used the REG_ICASE option; so there's no
+	 * need to process the uppercase version.
+	 *
+	 * XXX this code is dependent on the assumption that lowerstr() works the
+	 * same as the regex engine's internal case folding machinery.  Might be
+	 * wiser to expose pg_wc_tolower and test whether c == pg_wc_tolower(c).
+	 * On the other hand, the trigrams in the index were created using
+	 * lowerstr(), so we're probably screwed if there's any incompatibility
+	 * anyway.
+	 */
+#ifdef IGNORECASE
+	{
+		char	   *lowerCased = lowerstr(s);
+
+		if (strcmp(lowerCased, s) != 0)
+		{
+			pfree(lowerCased);
+			return false;
+		}
+		pfree(lowerCased);
+	}
+#endif
+
+	/* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
+	strncpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
+	return true;
+}
+
+
+/*---------------------
+ * Subroutines for expanding original NFA graph into a trigram graph (stage 2).
+ *---------------------
+ */
+
+/*
+ * Transform the graph, given a regex and extracted color information.
+ *
+ * We create and process a queue of expanded-graph states until all the states
+ * are processed.
+ *
+ * This algorithm may be stopped due to resource limitation. In this case we
+ * force every unprocessed branch to immediately finish with matching (this
+ * can give us false positives but no false negatives) by marking all
+ * unprocessed states as final.
+ */
+static void
+transformGraph(TrgmNFA *trgmNFA)
+{
+	HASHCTL		hashCtl;
+	TrgmStateKey initkey;
+	TrgmState  *initstate;
+
+	/* Initialize this stage's workspace in trgmNFA struct */
+	trgmNFA->queue = NIL;
+	trgmNFA->keysQueue = NIL;
+	trgmNFA->arcsCount = 0;
+	trgmNFA->overflowed = false;
+
+	/* Create hashtable for states */
+	hashCtl.keysize = sizeof(TrgmStateKey);
+	hashCtl.entrysize = sizeof(TrgmState);
+	hashCtl.hcxt = CurrentMemoryContext;
+	hashCtl.hash = tag_hash;
+	trgmNFA->states = hash_create("Trigram NFA",
+								  1024,
+								  &hashCtl,
+								  HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION);
+
+	/* Create initial state: ambiguous prefix, NFA's initial state */
+	MemSet(&initkey, 0, sizeof(initkey));
+	initkey.prefix.colors[0] = COLOR_UNKNOWN;
+	initkey.prefix.colors[1] = COLOR_UNKNOWN;
+	initkey.nstate = pg_reg_getinitialstate(trgmNFA->regex);
+
+	initstate = getState(trgmNFA, &initkey);
+	initstate->init = true;
+	trgmNFA->initState = initstate;
+
+	/*
+	 * Recursively build the expanded graph by processing queue of states
+	 * (breadth-first search).  getState already put initstate in the queue.
+	 */
+	while (trgmNFA->queue != NIL)
+	{
+		TrgmState  *state = (TrgmState *) linitial(trgmNFA->queue);
+
+		trgmNFA->queue = list_delete_first(trgmNFA->queue);
+
+		/*
+		 * If we overflowed then just mark state as final.  Otherwise do
+		 * actual processing.
+		 */
+		if (trgmNFA->overflowed)
+			state->fin = true;
+		else
+			processState(trgmNFA, state);
+
+		/* Did we overflow? */
+		if (trgmNFA->arcsCount > MAX_EXPANDED_ARCS ||
+			hash_get_num_entries(trgmNFA->states) > MAX_EXPANDED_STATES)
+			trgmNFA->overflowed = true;
+	}
+}
+
+/*
+ * Process one state: add enter keys and then add outgoing arcs.
+ */
+static void
+processState(TrgmNFA *trgmNFA, TrgmState *state)
+{
+	/* keysQueue should be NIL already, but make sure */
+	trgmNFA->keysQueue = NIL;
+
+	/*
+	 * Add state's own key, and then process all keys added to keysQueue until
+	 * queue is empty.  But we can quit if the state gets marked final.
+	 */
+	addKey(trgmNFA, state, &state->stateKey);
+	while (trgmNFA->keysQueue != NIL && !state->fin)
+	{
+		TrgmStateKey *key = (TrgmStateKey *) linitial(trgmNFA->keysQueue);
+
+		trgmNFA->keysQueue = list_delete_first(trgmNFA->keysQueue);
+		addKey(trgmNFA, state, key);
+	}
+
+	/*
+	 * Add outgoing arcs only if state isn't final (we have no interest in
+	 * outgoing arcs if we already match)
+	 */
+	if (!state->fin)
+		addArcs(trgmNFA, state);
+}
+
+/*
+ * Add the given enter key into the state's enterKeys list, and determine
+ * whether this should result in any further enter keys being added.
+ * If so, add those keys to keysQueue so that processState will handle them.
+ *
+ * If the enter key is for the NFA's final state, set state->fin = TRUE.
+ * This situation means that we can reach the final state from this expanded
+ * state without reading any predictable trigram, so we must consider this
+ * state as an accepting one.
+ *
+ * The given key could be a duplicate of one already in enterKeys, or be
+ * redundant with some enterKeys.  So we check that before doing anything.
+ *
+ * Note that we don't generate any actual arcs here.  addArcs will do that
+ * later, after we have identified all the enter keys for this state.
+ */
+static void
+addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
+{
+	regex_arc_t *arcs;
+	TrgmStateKey destKey;
+	ListCell   *cell,
+			   *prev,
+			   *next;
+	int			i,
+				arcsCount;
+
+	/*
+	 * Ensure any pad bytes in destKey are zero, since it may get used as a
+	 * hashtable key by getState.
+	 */
+	MemSet(&destKey, 0, sizeof(destKey));
+
+	/*
+	 * Compare key to each existing enter key of the state to check for
+	 * redundancy.  We can drop either old key(s) or the new key if we find
+	 * redundancy.
+	 */
+	prev = NULL;
+	cell = list_head(state->enterKeys);
+	while (cell)
+	{
+		TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
+
+		next = lnext(cell);
+		if (existingKey->nstate == key->nstate)
+		{
+			if (prefixContains(&existingKey->prefix, &key->prefix))
+			{
+				/* This old key already covers the new key. Nothing to do */
+				return;
+			}
+			if (prefixContains(&key->prefix, &existingKey->prefix))
+			{
+				/*
+				 * The new key covers this old key. Remove the old key, it's
+				 * no longer needed once we add this key to the list.
+				 */
+				state->enterKeys = list_delete_cell(state->enterKeys,
+													cell, prev);
+			}
+			else
+				prev = cell;
+		}
+		else
+			prev = cell;
+		cell = next;
+	}
+
+	/* No redundancy, so add this key to the state's list */
+	state->enterKeys = lappend(state->enterKeys, key);
+
+	/* If state is now known final, mark it and we're done */
+	if (key->nstate == pg_reg_getfinalstate(trgmNFA->regex))
+	{
+		state->fin = true;
+		return;
+	}
+
+	/*
+	 * Loop through all outgoing arcs of the corresponding state in the
+	 * original NFA.
+	 */
+	arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
+	arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
+	pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
+
+	for (i = 0; i < arcsCount; i++)
+	{
+		regex_arc_t *arc = &arcs[i];
+
+		if (pg_reg_colorisbegin(trgmNFA->regex, arc->co))
+		{
+			/*
+			 * Start of line/string (^).  Trigram extraction treats start of
+			 * line same as start of word: double space prefix is added.
+			 * Hence, make an enter key showing we can reach the arc
+			 * destination with all-blank prefix.
+			 */
+			destKey.prefix.colors[0] = COLOR_BLANK;
+			destKey.prefix.colors[1] = COLOR_BLANK;
+			destKey.nstate = arc->to;
+
+			/* Add enter key to this state */
+			addKeyToQueue(trgmNFA, &destKey);
+		}
+		else if (pg_reg_colorisend(trgmNFA->regex, arc->co))
+		{
+			/*
+			 * End of line/string ($).  We must consider this arc as a
+			 * transition that doesn't read anything.  The reason for adding
+			 * this enter key to the state is that if the arc leads to the
+			 * NFA's final state, we must mark this expanded state as final.
+			 */
+			destKey.prefix.colors[0] = COLOR_UNKNOWN;
+			destKey.prefix.colors[1] = COLOR_UNKNOWN;
+			destKey.nstate = arc->to;
+
+			/* Add enter key to this state */
+			addKeyToQueue(trgmNFA, &destKey);
+		}
+		else
+		{
+			/* Regular color */
+			TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
+
+			if (colorInfo->expandable)
+			{
+				if (colorInfo->containsNonWord &&
+					!validArcLabel(key, COLOR_BLANK))
+				{
+					/*
+					 * We can reach the arc destination after reading a
+					 * non-word character, but the prefix is not something
+					 * that addArc will accept with COLOR_BLANK, so no trigram
+					 * arc can get made for this transition.  We must make an
+					 * enter key to show that the arc destination is
+					 * reachable.  Set it up with an all-blank prefix, since
+					 * that corresponds to what the trigram extraction code
+					 * will do at a word starting boundary.
+					 */
+					destKey.prefix.colors[0] = COLOR_BLANK;
+					destKey.prefix.colors[1] = COLOR_BLANK;
+					destKey.nstate = arc->to;
+					addKeyToQueue(trgmNFA, &destKey);
+				}
+
+				if (colorInfo->wordCharsCount > 0 &&
+					!validArcLabel(key, arc->co))
+				{
+					/*
+					 * We can reach the arc destination after reading a word
+					 * character, but the prefix is not something that addArc
+					 * will accept, so no trigram arc can get made for this
+					 * transition.  We must make an enter key to show that the
+					 * arc destination is reachable.  The prefix for the enter
+					 * key should reflect the info we have for this arc.
+					 */
+					destKey.prefix.colors[0] = key->prefix.colors[1];
+					destKey.prefix.colors[1] = arc->co;
+					destKey.nstate = arc->to;
+					addKeyToQueue(trgmNFA, &destKey);
+				}
+			}
+			else
+			{
+				/*
+				 * Unexpandable color.  Add enter key with ambiguous prefix,
+				 * showing we can reach the destination from this state, but
+				 * the preceding colors will be uncertain.  (We do not set the
+				 * first prefix color to key->prefix.colors[1], because a
+				 * prefix of known followed by unknown is invalid.)
+				 */
+				destKey.prefix.colors[0] = COLOR_UNKNOWN;
+				destKey.prefix.colors[1] = COLOR_UNKNOWN;
+				destKey.nstate = arc->to;
+				addKeyToQueue(trgmNFA, &destKey);
+			}
+		}
+	}
+
+	pfree(arcs);
+}
+
+/*
+ * Add copy of given key to keysQueue for later processing.
+ */
+static void
+addKeyToQueue(TrgmNFA *trgmNFA, TrgmStateKey *key)
+{
+	TrgmStateKey *keyCopy = (TrgmStateKey *) palloc(sizeof(TrgmStateKey));
+
+	memcpy(keyCopy, key, sizeof(TrgmStateKey));
+	trgmNFA->keysQueue = lappend(trgmNFA->keysQueue, keyCopy);
+}
+
+/*
+ * Add outgoing arcs from given state, whose enter keys are all now known.
+ */
+static void
+addArcs(TrgmNFA *trgmNFA, TrgmState *state)
+{
+	TrgmStateKey destKey;
+	ListCell   *cell;
+	regex_arc_t *arcs;
+	int			arcsCount,
+				i;
+
+	/*
+	 * Ensure any pad bytes in destKey are zero, since it may get used as a
+	 * hashtable key by getState.
+	 */
+	MemSet(&destKey, 0, sizeof(destKey));
+
+	/*
+	 * Iterate over enter keys associated with this expanded-graph state. This
+	 * includes both the state's own stateKey, and any enter keys we added to
+	 * it during addKey (which represent expanded-graph states that are not
+	 * distinguishable from this one by means of trigrams).  For each such
+	 * enter key, examine all the out-arcs of the key's underlying NFA state,
+	 * and try to make a trigram arc leading to where the out-arc leads.
+	 * (addArc will deal with whether the arc is valid or not.)
+	 */
+	foreach(cell, state->enterKeys)
+	{
+		TrgmStateKey *key = (TrgmStateKey *) lfirst(cell);
+
+		arcsCount = pg_reg_getnumoutarcs(trgmNFA->regex, key->nstate);
+		arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
+		pg_reg_getoutarcs(trgmNFA->regex, key->nstate, arcs, arcsCount);
+
+		for (i = 0; i < arcsCount; i++)
+		{
+			regex_arc_t *arc = &arcs[i];
+			TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
+
+			/*
+			 * Ignore non-expandable colors; addKey already handled the case.
+			 *
+			 * We need no special check for begin/end pseudocolors here.  We
+			 * don't need to do any processing for them, and they will be
+			 * marked non-expandable since the regex engine will have reported
+			 * them that way.
+			 */
+			if (!colorInfo->expandable)
+				continue;
+
+			if (colorInfo->containsNonWord)
+			{
+				/*
+				 * Color includes non-word character(s).
+				 *
+				 * Generate an arc, treating this transition as occurring on
+				 * BLANK.  This allows word-ending trigrams to be manufactured
+				 * if possible.
+				 */
+				destKey.prefix.colors[0] = key->prefix.colors[1];
+				destKey.prefix.colors[1] = COLOR_BLANK;
+				destKey.nstate = arc->to;
+
+				addArc(trgmNFA, state, key, COLOR_BLANK, &destKey);
+			}
+
+			if (colorInfo->wordCharsCount > 0)
+			{
+				/*
+				 * Color includes word character(s).
+				 *
+				 * Generate an arc.  Color is pushed into prefix of target
+				 * state.
+				 */
+				destKey.prefix.colors[0] = key->prefix.colors[1];
+				destKey.prefix.colors[1] = arc->co;
+				destKey.nstate = arc->to;
+
+				addArc(trgmNFA, state, key, arc->co, &destKey);
+			}
+		}
+
+		pfree(arcs);
+	}
+}
+
+/*
+ * Generate an out-arc of the expanded graph, if it's valid and not redundant.
+ *
+ * state: expanded-graph state we want to add an out-arc to
+ * key: provides prefix colors (key->nstate is not used)
+ * co: transition color
+ * destKey: identifier for destination state of expanded graph
+ */
+static void
+addArc(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key,
+	   TrgmColor co, TrgmStateKey *destKey)
+{
+	TrgmArc    *arc;
+	ListCell   *cell;
+
+	/* Do nothing if this wouldn't be a valid arc label trigram */
+	if (!validArcLabel(key, co))
+		return;
+
+	/*
+	 * Check if we are going to reach key which is covered by a key which is
+	 * already listed in this state.  If so arc is useless: the NFA can bypass
+	 * it through a path that doesn't require any predictable trigram, so
+	 * whether the arc's trigram is present or not doesn't really matter.
+	 */
+	foreach(cell, state->enterKeys)
+	{
+		TrgmStateKey *existingKey = (TrgmStateKey *) lfirst(cell);
+
+		if (existingKey->nstate == destKey->nstate &&
+			prefixContains(&existingKey->prefix, &destKey->prefix))
+			return;
+	}
+
+	/* Checks were successful, add new arc */
+	arc = (TrgmArc *) palloc(sizeof(TrgmArc));
+	arc->target = getState(trgmNFA, destKey);
+	arc->ctrgm.colors[0] = key->prefix.colors[0];
+	arc->ctrgm.colors[1] = key->prefix.colors[1];
+	arc->ctrgm.colors[2] = co;
+
+	state->arcs = lappend(state->arcs, arc);
+	trgmNFA->arcsCount++;
+}
+
+/*
+ * Can we make a valid trigram arc label from the given prefix and arc color?
+ *
+ * This is split out so that tests in addKey and addArc will stay in sync.
+ */
+static bool
+validArcLabel(TrgmStateKey *key, TrgmColor co)
+{
+	/*
+	 * We have to know full trigram in order to add outgoing arc.  So we can't
+	 * do it if prefix is ambiguous.
+	 */
+	if (key->prefix.colors[0] == COLOR_UNKNOWN)
+		return false;
+
+	/* If key->prefix.colors[0] isn't unknown, its second color isn't either */
+	Assert(key->prefix.colors[1] != COLOR_UNKNOWN);
+	/* And we should not be called with an unknown arc color anytime */
+	Assert(co != COLOR_UNKNOWN);
+
+	/*
+	 * We don't bother with making arcs representing three non-word
+	 * characters, since that's useless for trigram extraction.
+	 */
+	if (key->prefix.colors[0] == COLOR_BLANK &&
+		key->prefix.colors[1] == COLOR_BLANK &&
+		co == COLOR_BLANK)
+		return false;
+
+	/*
+	 * We also reject nonblank-blank-anything.  The nonblank-blank-nonblank
+	 * case doesn't correspond to any trigram the trigram extraction code
+	 * would make.  The nonblank-blank-blank case is also not possible with
+	 * RPADDING = 1.  (Note that in many cases we'd fail to generate such a
+	 * trigram even if it were valid, for example processing "foo bar" will
+	 * not result in considering the trigram "o  ".  So if you want to support
+	 * RPADDING = 2, there's more to do than just twiddle this test.)
+	 */
+	if (key->prefix.colors[0] != COLOR_BLANK &&
+		key->prefix.colors[1] == COLOR_BLANK)
+		return false;
+
+	/*
+	 * Other combinations involving blank are valid, in particular we assume
+	 * blank-blank-nonblank is valid, which presumes that LPADDING is 2.
+	 *
+	 * Note: Using again the example "foo bar", we will not consider the
+	 * trigram "  b", though this trigram would be found by the trigram
+	 * extraction code.  Since we will find " ba", it doesn't seem worth
+	 * trying to hack the algorithm to generate the additional trigram.
+	 */
+
+	/* arc label is valid */
+	return true;
+}
+
+/*
+ * Get state of expanded graph for given state key,
+ * and queue the state for processing if it didn't already exist.
+ */
+static TrgmState *
+getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
+{
+	TrgmState  *state;
+	bool		found;
+
+	state = (TrgmState *) hash_search(trgmNFA->states, key, HASH_ENTER,
+									  &found);
+	if (!found)
+	{
+		/* New state: initialize and queue it */
+		state->arcs = NIL;
+		state->enterKeys = NIL;
+		state->init = false;
+		state->fin = false;
+		state->parent = NULL;
+		state->children = NIL;
+		state->number = -1;
+
+		trgmNFA->queue = lappend(trgmNFA->queue, state);
+	}
+	return state;
+}
+
+/*
+ * Check if prefix1 "contains" prefix2.
+ *
+ * "contains" means that any exact prefix (with no ambiguity) that satisfies
+ * prefix2 also satisfies prefix1.
+ */
+static bool
+prefixContains(TrgmPrefix *prefix1, TrgmPrefix *prefix2)
+{
+	if (prefix1->colors[1] == COLOR_UNKNOWN)
+	{
+		/* Fully ambiguous prefix contains everything */
+		return true;
+	}
+	else if (prefix1->colors[0] == COLOR_UNKNOWN)
+	{
+		/*
+		 * Prefix with only first unknown color contains every prefix with
+		 * same second color.
+		 */
+		if (prefix1->colors[1] == prefix2->colors[1])
+			return true;
+		else
+			return false;
+	}
+	else
+	{
+		/* Exact prefix contains only the exact same prefix */
+		if (prefix1->colors[0] == prefix2->colors[0] &&
+			prefix1->colors[1] == prefix2->colors[1])
+			return true;
+		else
+			return false;
+	}
+}
+
+
+/*---------------------
+ * Subroutines for expanding color trigrams into regular trigrams (stage 3).
+ *---------------------
+ */
+
+/*
+ * Get vector of all color trigrams in graph and select which of them
+ * to expand into simple trigrams.
+ *
+ * Returns TRUE if OK, FALSE if exhausted resource limits.
+ */
+static bool
+selectColorTrigrams(TrgmNFA *trgmNFA)
+{
+	HASH_SEQ_STATUS scan_status;
+	int			arcsCount = trgmNFA->arcsCount,
+				i;
+	TrgmState  *state;
+	ColorTrgmInfo *colorTrgms;
+	int64		totalTrgmCount;
+	float4		totalTrgmPenalty;
+	int			number;
+
+	/* Collect color trigrams from all arcs */
+	colorTrgms = (ColorTrgmInfo *) palloc(sizeof(ColorTrgmInfo) * arcsCount);
+	trgmNFA->colorTrgms = colorTrgms;
+
+	i = 0;
+	hash_seq_init(&scan_status, trgmNFA->states);
+	while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
+	{
+		ListCell   *cell;
+
+		foreach(cell, state->arcs)
+		{
+			TrgmArc    *arc = (TrgmArc *) lfirst(cell);
+			TrgmArcInfo *arcInfo = (TrgmArcInfo *) palloc(sizeof(TrgmArcInfo));
+
+			arcInfo->source = state;
+			arcInfo->target = arc->target;
+			colorTrgms[i].arcs = list_make1(arcInfo);
+			colorTrgms[i].expanded = true;
+			colorTrgms[i].ctrgm = arc->ctrgm;
+			i++;
+		}
+	}
+	Assert(i == arcsCount);
+
+	/* Remove duplicates, merging their arcs lists */
+	if (arcsCount >= 2)
+	{
+		ColorTrgmInfo *p1,
+				   *p2;
+
+		/* Sort trigrams to ease duplicate detection */
+		qsort(colorTrgms, arcsCount, sizeof(ColorTrgmInfo), colorTrgmInfoCmp);
+
+		/* p1 is probe point, p2 is last known non-duplicate. */
+		p2 = colorTrgms;
+		for (p1 = colorTrgms + 1; p1 < colorTrgms + arcsCount; p1++)
+		{
+			if (colorTrgmInfoCmp(p1, p2) > 0)
+			{
+				p2++;
+				*p2 = *p1;
+			}
+			else
+			{
+				p2->arcs = list_concat(p2->arcs, p1->arcs);
+			}
+		}
+		trgmNFA->colorTrgmsCount = (p2 - colorTrgms) + 1;
+	}
+	else
+	{
+		trgmNFA->colorTrgmsCount = arcsCount;
+	}
+
+	/*
+	 * Count number of simple trigrams generated by each color trigram, and
+	 * also compute a penalty value, which is the number of simple trigrams
+	 * times a multiplier that depends on its whitespace content.
+	 *
+	 * Note: per-color-trigram counts cannot overflow an int so long as
+	 * COLOR_COUNT_LIMIT is not more than the cube root of INT_MAX, ie about
+	 * 1290.  However, the grand total totalTrgmCount might conceivably
+	 * overflow an int, so we use int64 for that within this routine.  Also,
+	 * penalties are calculated in float4 arithmetic to avoid any overflow
+	 * worries.
+	 */
+	totalTrgmCount = 0;
+	totalTrgmPenalty = 0.0f;
+	for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
+	{
+		ColorTrgmInfo *trgmInfo = &colorTrgms[i];
+		int			j,
+					count = 1,
+					typeIndex = 0;
+
+		for (j = 0; j < 3; j++)
+		{
+			TrgmColor	c = trgmInfo->ctrgm.colors[j];
+
+			typeIndex *= 2;
+			if (c == COLOR_BLANK)
+				typeIndex++;
+			else
+				count *= trgmNFA->colorInfo[c].wordCharsCount;
+		}
+		trgmInfo->count = count;
+		totalTrgmCount += count;
+		trgmInfo->penalty = penalties[typeIndex] * (float4) count;
+		totalTrgmPenalty += trgmInfo->penalty;
+	}
+
+	/* Sort color trigrams in descending order of their penalties */
+	qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
+		  colorTrgmInfoPenaltyCmp);
+
+	/*
+	 * Remove color trigrams from the graph so long as total penalty of color
+	 * trigrams exceeds WISH_TRGM_PENALTY.  (If we fail to get down to
+	 * WISH_TRGM_PENALTY, it's OK so long as total count is no more than
+	 * MAX_TRGM_COUNT.)  We prefer to remove color trigrams with higher
+	 * penalty, since those are the most promising for reducing the total
+	 * penalty.  When removing a color trigram we have to merge states
+	 * connected by arcs labeled with that trigram.  It's necessary to not
+	 * merge initial and final states, because our graph becomes useless if
+	 * that happens; so we cannot always remove the trigram we'd prefer to.
+	 */
+	for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
+	{
+		ColorTrgmInfo *trgmInfo = &colorTrgms[i];
+		bool		canRemove = true;
+		ListCell   *cell;
+
+		/* Done if we've reached the target */
+		if (totalTrgmPenalty <= WISH_TRGM_PENALTY)
+			break;
+
+		/*
+		 * Does any arc of this color trigram connect initial and final
+		 * states?	If so we can't remove it.
+		 */
+		foreach(cell, trgmInfo->arcs)
+		{
+			TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
+			TrgmState  *source = arcInfo->source,
+					   *target = arcInfo->target;
+
+			/* examine parent states, if any merging has already happened */
+			while (source->parent)
+				source = source->parent;
+			while (target->parent)
+				target = target->parent;
+
+			if ((source->init || target->init) &&
+				(source->fin || target->fin))
+			{
+				canRemove = false;
+				break;
+			}
+		}
+		if (!canRemove)
+			continue;
+
+		/* OK, merge states linked by each arc labeled by the trigram */
+		foreach(cell, trgmInfo->arcs)
+		{
+			TrgmArcInfo *arcInfo = (TrgmArcInfo *) lfirst(cell);
+			TrgmState  *source = arcInfo->source,
+					   *target = arcInfo->target;
+
+			while (source->parent)
+				source = source->parent;
+			while (target->parent)
+				target = target->parent;
+			if (source != target)
+				mergeStates(source, target);
+		}
+
+		/* Mark trigram unexpanded, and update totals */
+		trgmInfo->expanded = false;
+		totalTrgmCount -= trgmInfo->count;
+		totalTrgmPenalty -= trgmInfo->penalty;
+	}
+
+	/* Did we succeed in fitting into MAX_TRGM_COUNT? */
+	if (totalTrgmCount > MAX_TRGM_COUNT)
+		return false;
+
+	trgmNFA->totalTrgmCount = (int) totalTrgmCount;
+
+	/*
+	 * Sort color trigrams by colors (will be useful for bsearch in packGraph)
+	 * and enumerate the color trigrams that are expanded.
+	 */
+	number = 0;
+	qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
+		  colorTrgmInfoCmp);
+	for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
+	{
+		if (colorTrgms[i].expanded)
+		{
+			colorTrgms[i].number = number;
+			number++;
+		}
+	}
+
+	return true;
+}
+
+/*
+ * Expand selected color trigrams into regular trigrams.
+ *
+ * Returns the TRGM array to be passed to the index machinery.
+ * The array must be allocated in rcontext.
+ */
+static TRGM *
+expandColorTrigrams(TrgmNFA *trgmNFA, MemoryContext rcontext)
+{
+	TRGM	   *trg;
+	trgm	   *p;
+	int			i;
+	TrgmColorInfo blankColor;
+	trgm_mb_char blankChar;
+
+	/* Set up "blank" color structure containing a single zero character */
+	memset(blankChar.bytes, 0, sizeof(blankChar.bytes));
+	blankColor.wordCharsCount = 1;
+	blankColor.wordChars = &blankChar;
+
+	/* Construct the trgm array */
+	trg = (TRGM *)
+		MemoryContextAllocZero(rcontext,
+							   TRGMHDRSIZE +
+							   trgmNFA->totalTrgmCount * sizeof(trgm));
+	trg->flag = ARRKEY;
+	SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, trgmNFA->totalTrgmCount));
+	p = GETARR(trg);
+	for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
+	{
+		ColorTrgmInfo *colorTrgm = &trgmNFA->colorTrgms[i];
+		TrgmColorInfo *c[3];
+		trgm_mb_char s[3];
+		int			j,
+					i1,
+					i2,
+					i3;
+
+		/* Ignore any unexpanded trigrams ... */
+		if (!colorTrgm->expanded)
+			continue;
+
+		/* Get colors, substituting the dummy struct for COLOR_BLANK */
+		for (j = 0; j < 3; j++)
+		{
+			if (colorTrgm->ctrgm.colors[j] != COLOR_BLANK)
+				c[j] = &trgmNFA->colorInfo[colorTrgm->ctrgm.colors[j]];
+			else
+				c[j] = &blankColor;
+		}
+
+		/* Iterate over all possible combinations of colors' characters */
+		for (i1 = 0; i1 < c[0]->wordCharsCount; i1++)
+		{
+			s[0] = c[0]->wordChars[i1];
+			for (i2 = 0; i2 < c[1]->wordCharsCount; i2++)
+			{
+				s[1] = c[1]->wordChars[i2];
+				for (i3 = 0; i3 < c[2]->wordCharsCount; i3++)
+				{
+					s[2] = c[2]->wordChars[i3];
+					fillTrgm(p, s);
+					p++;
+				}
+			}
+		}
+	}
+
+	return trg;
+}
+
+/*
+ * Convert trigram into trgm datatype.
+ */
+static void
+fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
+{
+	char		str[3 * MAX_MULTIBYTE_CHAR_LEN],
+			   *p;
+	int			i,
+				j;
+
+	/* Write multibyte string into "str" (we don't need null termination) */
+	p = str;
+
+	for (i = 0; i < 3; i++)
+	{
+		if (s[i].bytes[0] != 0)
+		{
+			for (j = 0; j < MAX_MULTIBYTE_CHAR_LEN && s[i].bytes[j]; j++)
+				*p++ = s[i].bytes[j];
+		}
+		else
+		{
+			/* Emit a space in place of COLOR_BLANK */
+			*p++ = ' ';
+		}
+	}
+
+	/* Convert "str" to a standard trigram (possibly hashing it) */
+	compact_trigram(ptrgm, str, p - str);
+}
+
+/*
+ * Merge two states of graph.
+ */
+static void
+mergeStates(TrgmState *state1, TrgmState *state2)
+{
+	ListCell   *cell;
+
+	Assert(state1 != state2);
+	Assert(!state1->parent);
+	Assert(!state2->parent);
+
+	/* state1 absorbs state2's init/fin flags */
+	state1->init |= state2->init;
+	state1->fin |= state2->fin;
+
+	/* state2, and all its children, become children of state1 */
+	foreach(cell, state2->children)
+	{
+		TrgmState  *state = (TrgmState *) lfirst(cell);
+
+		state->parent = state1;
+	}
+	state2->parent = state1;
+	state1->children = list_concat(state1->children, state2->children);
+	state1->children = lappend(state1->children, state2);
+	state2->children = NIL;
+}
+
+/*
+ * Compare function for sorting of color trigrams by their colors.
+ */
+static int
+colorTrgmInfoCmp(const void *p1, const void *p2)
+{
+	const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
+	const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;
+
+	return memcmp(&c1->ctrgm, &c2->ctrgm, sizeof(ColorTrgm));
+}
+
+/*
+ * Compare function for sorting color trigrams in descending order of
+ * their penalty fields.
+ */
+static int
+colorTrgmInfoPenaltyCmp(const void *p1, const void *p2)
+{
+	float4		penalty1 = ((const ColorTrgmInfo *) p1)->penalty;
+	float4		penalty2 = ((const ColorTrgmInfo *) p2)->penalty;
+
+	if (penalty1 < penalty2)
+		return 1;
+	else if (penalty1 == penalty2)
+		return 0;
+	else
+		return -1;
+}
+
+
+/*---------------------
+ * Subroutines for packing the graph into final representation (stage 4).
+ *---------------------
+ */
+
+/*
+ * Pack expanded graph into final representation.
+ *
+ * The result data must be allocated in rcontext.
+ */
+static TrgmPackedGraph *
+packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
+{
+	int			number = 2,
+				arcIndex,
+				arcsCount;
+	HASH_SEQ_STATUS scan_status;
+	TrgmState  *state;
+	TrgmPackArcInfo *arcs,
+			   *p1,
+			   *p2;
+	TrgmPackedArc *packedArcs;
+	TrgmPackedGraph *result;
+	int			i,
+				j;
+
+	/* Enumerate surviving states, giving init and fin reserved numbers */
+	hash_seq_init(&scan_status, trgmNFA->states);
+	while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
+	{
+		while (state->parent)
+			state = state->parent;
+
+		if (state->number < 0)
+		{
+			if (state->init)
+				state->number = 0;
+			else if (state->fin)
+				state->number = 1;
+			else
+			{
+				state->number = number;
+				number++;
+			}
+		}
+	}
+
+	/* Collect array of all arcs */
+	arcs = (TrgmPackArcInfo *)
+		palloc(sizeof(TrgmPackArcInfo) * trgmNFA->arcsCount);
+	arcIndex = 0;
+	hash_seq_init(&scan_status, trgmNFA->states);
+	while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
+	{
+		TrgmState  *source = state;
+		ListCell   *cell;
+
+		while (source->parent)
+			source = source->parent;
+
+		foreach(cell, state->arcs)
+		{
+			TrgmArc    *arc = (TrgmArc *) lfirst(cell);
+			TrgmState  *target = arc->target;
+
+			while (target->parent)
+				target = target->parent;
+
+			if (source->number != target->number)
+			{
+				ColorTrgmInfo *ctrgm;
+
+				ctrgm = (ColorTrgmInfo *) bsearch(&arc->ctrgm,
+												  trgmNFA->colorTrgms,
+												  trgmNFA->colorTrgmsCount,
+												  sizeof(ColorTrgmInfo),
+												  colorTrgmInfoCmp);
+				Assert(ctrgm != NULL);
+				Assert(ctrgm->expanded);
+
+				arcs[arcIndex].sourceState = source->number;
+				arcs[arcIndex].targetState = target->number;
+				arcs[arcIndex].colorTrgm = ctrgm->number;
+				arcIndex++;
+			}
+		}
+	}
+
+	/* Sort arcs to ease duplicate detection */
+	qsort(arcs, arcIndex, sizeof(TrgmPackArcInfo), packArcInfoCmp);
+
+	/* We could have duplicates because states were merged. Remove them. */
+	/* p1 is probe point, p2 is last known non-duplicate. */
+	p2 = arcs;
+	for (p1 = arcs + 1; p1 < arcs + arcIndex; p1++)
+	{
+		if (packArcInfoCmp(p1, p2) > 0)
+		{
+			p2++;
+			*p2 = *p1;
+		}
+	}
+	arcsCount = (p2 - arcs) + 1;
+
+	/* Create packed representation */
+	result = (TrgmPackedGraph *)
+		MemoryContextAlloc(rcontext, sizeof(TrgmPackedGraph));
+
+	/* Pack color trigrams information */
+	result->colorTrigramsCount = 0;
+	for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
+	{
+		if (trgmNFA->colorTrgms[i].expanded)
+			result->colorTrigramsCount++;
+	}
+	result->colorTrigramGroups = (int *)
+		MemoryContextAlloc(rcontext, sizeof(int) * result->colorTrigramsCount);
+	j = 0;
+	for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
+	{
+		if (trgmNFA->colorTrgms[i].expanded)
+		{
+			result->colorTrigramGroups[j] = trgmNFA->colorTrgms[i].count;
+			j++;
+		}
+	}
+
+	/* Pack states and arcs information */
+	result->statesCount = number;
+	result->states = (TrgmPackedState *)
+		MemoryContextAlloc(rcontext, number * sizeof(TrgmPackedState));
+	packedArcs = (TrgmPackedArc *)
+		MemoryContextAlloc(rcontext, arcsCount * sizeof(TrgmPackedArc));
+	j = 0;
+	for (i = 0; i < number; i++)
+	{
+		int			cnt = 0;
+
+		result->states[i].arcs = &packedArcs[j];
+		while (j < arcsCount && arcs[j].sourceState == i)
+		{
+			packedArcs[j].targetState = arcs[j].targetState;
+			packedArcs[j].colorTrgm = arcs[j].colorTrgm;
+			cnt++;
+			j++;
+		}
+		result->states[i].arcsCount = cnt;
+	}
+
+	/* Allocate working memory for trigramsMatchGraph() */
+	result->colorTrigramsActive = (bool *)
+		MemoryContextAlloc(rcontext, sizeof(bool) * result->colorTrigramsCount);
+	result->statesActive = (bool *)
+		MemoryContextAlloc(rcontext, sizeof(bool) * result->statesCount);
+	result->statesQueue = (int *)
+		MemoryContextAlloc(rcontext, sizeof(int) * result->statesCount);
+
+	return result;
+}
+
+/*
+ * Comparison function for sorting TrgmPackArcInfos.
+ *
+ * Compares arcs in following order: sourceState, colorTrgm, targetState.
+ */
+static int
+packArcInfoCmp(const void *a1, const void *a2)
+{
+	const TrgmPackArcInfo *p1 = (const TrgmPackArcInfo *) a1;
+	const TrgmPackArcInfo *p2 = (const TrgmPackArcInfo *) a2;
+
+	if (p1->sourceState < p2->sourceState)
+		return -1;
+	if (p1->sourceState > p2->sourceState)
+		return 1;
+	if (p1->colorTrgm < p2->colorTrgm)
+		return -1;
+	if (p1->colorTrgm > p2->colorTrgm)
+		return 1;
+	if (p1->targetState < p2->targetState)
+		return -1;
+	if (p1->targetState > p2->targetState)
+		return 1;
+	return 0;
+}
+
+
+/*---------------------
+ * Debugging functions
+ *
+ * These are designed to emit GraphViz files.
+ *---------------------
+ */
+
+#ifdef TRGM_REGEXP_DEBUG
+
+/*
+ * Print initial NFA, in regexp library's representation
+ */
+static void
+printSourceNFA(regex_t *regex, TrgmColorInfo *colors, int ncolors)
+{
+	StringInfoData buf;
+	int			nstates = pg_reg_getnumstates(regex);
+	int			state;
+	int			i;
+
+	initStringInfo(&buf);
+
+	appendStringInfoString(&buf, "\ndigraph sourceNFA {\n");
+
+	for (state = 0; state < nstates; state++)
+	{
+		regex_arc_t *arcs;
+		int			i,
+					arcsCount;
+
+		appendStringInfo(&buf, "s%d", state);
+		if (pg_reg_getfinalstate(regex) == state)
+			appendStringInfoString(&buf, " [shape = doublecircle]");
+		appendStringInfoString(&buf, ";\n");
+
+		arcsCount = pg_reg_getnumoutarcs(regex, state);
+		arcs = (regex_arc_t *) palloc(sizeof(regex_arc_t) * arcsCount);
+		pg_reg_getoutarcs(regex, state, arcs, arcsCount);
+
+		for (i = 0; i < arcsCount; i++)
+		{
+			appendStringInfo(&buf, "  s%d -> s%d [label = \"%d\"];\n",
+							 state, arcs[i].to, arcs[i].co);
+		}
+
+		pfree(arcs);
+	}
+
+	appendStringInfoString(&buf, " node [shape = point ]; initial;\n");
+	appendStringInfo(&buf, " initial -> s%d;\n",
+					 pg_reg_getinitialstate(regex));
+
+	/* Print colors */
+	appendStringInfoString(&buf, " { rank = sink;\n");
+	appendStringInfoString(&buf, "  Colors [shape = none, margin=0, label=<\n");
+
+	for (i = 0; i < ncolors; i++)
+	{
+		TrgmColorInfo *color = &colors[i];
+		int			j;
+
+		appendStringInfo(&buf, "<br/>Color %d: ", i);
+		if (color->expandable)
+		{
+			for (j = 0; j < color->wordCharsCount; j++)
+			{
+				char		s[MAX_MULTIBYTE_CHAR_LEN + 1];
+
+				memcpy(s, color->wordChars[j].bytes, MAX_MULTIBYTE_CHAR_LEN);
+				s[MAX_MULTIBYTE_CHAR_LEN] = '\0';
+				appendStringInfoString(&buf, s);
+			}
+		}
+		else
+			appendStringInfoString(&buf, "not expandable");
+		appendStringInfoChar(&buf, '\n');
+	}
+
+	appendStringInfoString(&buf, "  >];\n");
+	appendStringInfoString(&buf, " }\n");
+	appendStringInfoString(&buf, "}\n");
+
+	{
+		/* dot -Tpng -o /tmp/source.png < /tmp/source.dot */
+		FILE	   *fp = fopen("/tmp/source.dot", "w");
+
+		fprintf(fp, "%s", buf.data);
+		fclose(fp);
+	}
+
+	pfree(buf.data);
+}
+
+/*
+ * Print expanded graph.
+ */
+static void
+printTrgmNFA(TrgmNFA *trgmNFA)
+{
+	StringInfoData buf;
+	HASH_SEQ_STATUS scan_status;
+	TrgmState  *state;
+	TrgmState  *initstate = NULL;
+
+	initStringInfo(&buf);
+
+	appendStringInfoString(&buf, "\ndigraph transformedNFA {\n");
+
+	hash_seq_init(&scan_status, trgmNFA->states);
+	while ((state = (TrgmState *) hash_seq_search(&scan_status)) != NULL)
+	{
+		ListCell   *cell;
+
+		appendStringInfo(&buf, "s%p", (void *) state);
+		if (state->fin)
+			appendStringInfoString(&buf, " [shape = doublecircle]");
+		if (state->init)
+			initstate = state;
+		appendStringInfo(&buf, " [label = \"%d\"]", state->stateKey.nstate);
+		appendStringInfoString(&buf, ";\n");
+
+		foreach(cell, state->arcs)
+		{
+			TrgmArc    *arc = (TrgmArc *) lfirst(cell);
+
+			appendStringInfo(&buf, "  s%p -> s%p [label = \"",
+							 (void *) state, (void *) arc->target);
+			printTrgmColor(&buf, arc->ctrgm.colors[0]);
+			appendStringInfoChar(&buf, ' ');
+			printTrgmColor(&buf, arc->ctrgm.colors[1]);
+			appendStringInfoChar(&buf, ' ');
+			printTrgmColor(&buf, arc->ctrgm.colors[2]);
+			appendStringInfoString(&buf, "\"];\n");
+		}
+	}
+
+	if (initstate)
+	{
+		appendStringInfoString(&buf, " node [shape = point ]; initial;\n");
+		appendStringInfo(&buf, " initial -> s%p;\n", (void *) initstate);
+	}
+
+	appendStringInfoString(&buf, "}\n");
+
+	{
+		/* dot -Tpng -o /tmp/transformed.png < /tmp/transformed.dot */
+		FILE	   *fp = fopen("/tmp/transformed.dot", "w");
+
+		fprintf(fp, "%s", buf.data);
+		fclose(fp);
+	}
+
+	pfree(buf.data);
+}
+
+/*
+ * Print a TrgmColor readably.
+ */
+static void
+printTrgmColor(StringInfo buf, TrgmColor co)
+{
+	if (co == COLOR_UNKNOWN)
+		appendStringInfoChar(buf, 'u');
+	else if (co == COLOR_BLANK)
+		appendStringInfoChar(buf, 'b');
+	else
+		appendStringInfo(buf, "%d", (int) co);
+}
+
+/*
+ * Print final packed representation of trigram-based expanded graph.
+ */
+static void
+printTrgmPackedGraph(TrgmPackedGraph *packedGraph, TRGM *trigrams)
+{
+	StringInfoData buf;
+	trgm	   *p;
+	int			i;
+
+	initStringInfo(&buf);
+
+	appendStringInfoString(&buf, "\ndigraph packedGraph {\n");
+
+	for (i = 0; i < packedGraph->statesCount; i++)
+	{
+		TrgmPackedState *state = &packedGraph->states[i];
+		int			j;
+
+		appendStringInfo(&buf, " s%d", i);
+		if (i == 1)
+			appendStringInfoString(&buf, " [shape = doublecircle]");
+
+		appendStringInfo(&buf, " [label = <s%d>];\n", i);
+
+		for (j = 0; j < state->arcsCount; j++)
+		{
+			TrgmPackedArc *arc = &state->arcs[j];
+
+			appendStringInfo(&buf, "  s%d -> s%d [label = \"trigram %d\"];\n",
+							 i, arc->targetState, arc->colorTrgm);
+		}
+	}
+
+	appendStringInfoString(&buf, " node [shape = point ]; initial;\n");
+	appendStringInfo(&buf, " initial -> s%d;\n", 0);
+
+	/* Print trigrams */
+	appendStringInfoString(&buf, " { rank = sink;\n");
+	appendStringInfoString(&buf, "  Trigrams [shape = none, margin=0, label=<\n");
+
+	p = GETARR(trigrams);
+	for (i = 0; i < packedGraph->colorTrigramsCount; i++)
+	{
+		int			count = packedGraph->colorTrigramGroups[i];
+		int			j;
+
+		appendStringInfo(&buf, "<br/>Trigram %d: ", i);
+
+		for (j = 0; j < count; j++)
+		{
+			if (j > 0)
+				appendStringInfoString(&buf, ", ");
+
+			/*
+			 * XXX This representation is nice only for all-ASCII trigrams.
+			 */
+			appendStringInfo(&buf, "\"%c%c%c\"", (*p)[0], (*p)[1], (*p)[2]);
+			p++;
+		}
+	}
+
+	appendStringInfoString(&buf, "  >];\n");
+	appendStringInfoString(&buf, " }\n");
+	appendStringInfoString(&buf, "}\n");
+
+	{
+		/* dot -Tpng -o /tmp/packed.png < /tmp/packed.dot */
+		FILE	   *fp = fopen("/tmp/packed.dot", "w");
+
+		fprintf(fp, "%s", buf.data);
+		fclose(fp);
+	}
+
+	pfree(buf.data);
+}
+
+#endif   /* TRGM_REGEXP_DEBUG */