summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorÁlvaro Herrera2025-06-12 12:21:21 +0000
committerÁlvaro Herrera2025-06-12 12:21:21 +0000
commit0f65f3eec478db8ac4f217a608b4478fed023bac (patch)
treeb792a68d36cd2e559c3e579214833a9348ab16f9 /src
parentf7b11414e939a00faa63e27feae69ceef5bf4b02 (diff)
Fix squashing algorithm for query texts
The algorithm to squash lists of constants added by commit 62d712ecfd94 was a bit too simplistic; we wanted to avoid adding unnecessary complexity, but cases like direct function calls of typecasting functions (and others) were missed, and bogus SQL syntax was being shown in pg_stat_statements normalized query text field. To fix normalization for those cases, we need the parser to transmit information about were each list of constant values starts and ends, so add that to a couple of nodes. Also add a few more test cases to make sure we're doing the right thing. The patch initially submitted by Sami added a new private struct in gram.y to carry the start/end information for A_Expr, but I (Álvaro) decided that a better fix was to remove the parser indirection via the in_expr production, and instead create separate components in the a_expr rule. I'm surprised that this works and doesn't require more changes, but I assume (without checking) that the grammar used to be more complex and got simplified at some point. Bump catversion. Author: Sami Imseih <samimseih@gmail.com> Author: Dmitry Dolgov <9erthalion6@gmail.com> Reviewed-by: Michael Paquier <michael@paquier.xyz> Discussion: https://postgr.es/m/CAA5RZ0tRXoPG2y6bMgBCWNDt0Tn=unRerbzYM=oW0syi1=C1OA@mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/nodes/gen_node_support.pl2
-rw-r--r--src/backend/nodes/outfuncs.c2
-rw-r--r--src/backend/nodes/queryjumblefuncs.c153
-rw-r--r--src/backend/nodes/readfuncs.c2
-rw-r--r--src/backend/parser/gram.y104
-rw-r--r--src/backend/parser/parse_expr.c4
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/nodes/parsenodes.h10
-rw-r--r--src/include/nodes/primnodes.h4
9 files changed, 158 insertions, 125 deletions
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index c8595109b0e..9ecddb14231 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1329,7 +1329,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
# Node type. Squash constants if requested.
if ($query_jumble_squash)
{
- print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ print $jff "\tJUMBLE_ELEMENTS($f, node);\n"
unless $query_jumble_ignore;
}
else
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 25e08ba3426..eaf391fc2ab 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -653,6 +653,8 @@ _outA_Expr(StringInfo str, const A_Expr *node)
WRITE_NODE_FIELD(lexpr);
WRITE_NODE_FIELD(rexpr);
+ WRITE_LOCATION_FIELD(rexpr_list_start);
+ WRITE_LOCATION_FIELD(rexpr_list_end);
WRITE_LOCATION_FIELD(location);
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index ac3cb3d9caf..fb33e6931ad 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -61,9 +61,9 @@ static void AppendJumble(JumbleState *jstate,
const unsigned char *value, Size size);
static void FlushPendingNulls(JumbleState *jstate);
static void RecordConstLocation(JumbleState *jstate,
- int location, bool squashed);
+ int location, int len);
static void _jumbleNode(JumbleState *jstate, Node *node);
-static void _jumbleElements(JumbleState *jstate, List *elements);
+static void _jumbleElements(JumbleState *jstate, List *elements, Node *node);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -373,15 +373,17 @@ FlushPendingNulls(JumbleState *jstate)
/*
- * Record location of constant within query string of query tree that is
- * currently being walked.
+ * Record the location of some kind of constant within a query string.
+ * These are not only bare constants but also expressions that ultimately
+ * constitute a constant, such as those inside casts and simple function
+ * calls.
*
- * 'squashed' signals that the constant represents the first or the last
- * element in a series of merged constants, and everything but the first/last
- * element contributes nothing to the jumble hash.
+ * If length is -1, it indicates a single such constant element. If
+ * it's a positive integer, it indicates the length of a squashable
+ * list of them.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location, bool squashed)
+RecordConstLocation(JumbleState *jstate, int location, int len)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -396,9 +398,14 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
- /* initialize lengths to -1 to simplify third-party module usage */
- jstate->clocations[jstate->clocations_count].squashed = squashed;
- jstate->clocations[jstate->clocations_count].length = -1;
+
+ /*
+ * Lengths are either positive integers (indicating a squashable
+ * list), or -1.
+ */
+ Assert(len > -1 || len == -1);
+ jstate->clocations[jstate->clocations_count].length = len;
+ jstate->clocations[jstate->clocations_count].squashed = (len > -1);
jstate->clocations_count++;
}
}
@@ -408,12 +415,12 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed)
* deduce that the expression is a constant:
*
* - Ignore a possible wrapping RelabelType and CoerceViaIO.
- * - If it's a FuncExpr, check that the function is an implicit
+ * - If it's a FuncExpr, check that the function is a builtin
* cast and its arguments are Const.
* - Otherwise test if the expression is a simple Const.
*/
static bool
-IsSquashableConst(Node *element)
+IsSquashableConstant(Node *element)
{
if (IsA(element, RelabelType))
element = (Node *) ((RelabelType *) element)->arg;
@@ -421,32 +428,50 @@ IsSquashableConst(Node *element)
if (IsA(element, CoerceViaIO))
element = (Node *) ((CoerceViaIO *) element)->arg;
- if (IsA(element, FuncExpr))
+ switch (nodeTag(element))
{
- FuncExpr *func = (FuncExpr *) element;
- ListCell *temp;
+ case T_FuncExpr:
+ {
+ FuncExpr *func = (FuncExpr *) element;
+ ListCell *temp;
- if (func->funcformat != COERCE_IMPLICIT_CAST &&
- func->funcformat != COERCE_EXPLICIT_CAST)
- return false;
+ if (func->funcformat != COERCE_IMPLICIT_CAST &&
+ func->funcformat != COERCE_EXPLICIT_CAST)
+ return false;
- if (func->funcid > FirstGenbkiObjectId)
- return false;
+ if (func->funcid > FirstGenbkiObjectId)
+ return false;
- foreach(temp, func->args)
- {
- Node *arg = lfirst(temp);
+ /*
+ * We can check function arguments recursively, being careful
+ * about recursing too deep. At each recursion level it's
+ * enough to test the stack on the first element. (Note that
+ * I wasn't able to hit this without bloating the stack
+ * artificially in this function: the parser errors out before
+ * stack size becomes a problem here.)
+ */
+ foreach(temp, func->args)
+ {
+ Node *arg = lfirst(temp);
+
+ if (!IsA(arg, Const))
+ {
+ if (foreach_current_index(temp) == 0 &&
+ stack_is_too_deep())
+ return false;
+ else if (!IsSquashableConstant(arg))
+ return false;
+ }
+ }
+
+ return true;
+ }
- if (!IsA(arg, Const)) /* XXX we could recurse here instead */
+ default:
+ if (!IsA(element, Const))
return false;
- }
-
- return true;
}
- if (!IsA(element, Const))
- return false;
-
return true;
}
@@ -461,35 +486,29 @@ IsSquashableConst(Node *element)
* expressions.
*/
static bool
-IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+IsSquashableConstantList(List *elements)
{
ListCell *temp;
- /*
- * If squashing is disabled, or the list is too short, we don't try to
- * squash it.
- */
+ /* If the list is too short, we don't try to squash it. */
if (list_length(elements) < 2)
return false;
foreach(temp, elements)
{
- if (!IsSquashableConst(lfirst(temp)))
+ if (!IsSquashableConstant(lfirst(temp)))
return false;
}
- *firstExpr = linitial(elements);
- *lastExpr = llast(elements);
-
return true;
}
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_ELEMENTS(list) \
- _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_ELEMENTS(list, node) \
+ _jumbleElements(jstate, (List *) expr->list, node)
#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location, false)
+ RecordConstLocation(jstate, expr->location, -1)
#define JUMBLE_FIELD(item) \
do { \
if (sizeof(expr->item) == 8) \
@@ -517,36 +536,36 @@ do { \
#include "queryjumblefuncs.funcs.c"
/*
- * We jumble lists of constant elements as one individual item regardless
- * of how many elements are in the list. This means different queries
- * jumble to the same query_id, if the only difference is the number of
- * elements in the list.
+ * We try to jumble lists of expressions as one individual item regardless
+ * of how many elements are in the list. This is know as squashing, which
+ * results in different queries jumbling to the same query_id, if the only
+ * difference is the number of elements in the list.
+ *
+ * We allow constants to be squashed. To normalize such queries, we use
+ * the start and end locations of the list of elements in a list.
*/
static void
-_jumbleElements(JumbleState *jstate, List *elements)
+_jumbleElements(JumbleState *jstate, List *elements, Node *node)
{
- Node *first,
- *last;
+ bool normalize_list = false;
- if (IsSquashableConstList(elements, &first, &last))
+ if (IsSquashableConstantList(elements))
{
- /*
- * If this list of elements is squashable, keep track of the location
- * of its first and last elements. When reading back the locations
- * array, we'll see two consecutive locations with ->squashed set to
- * true, indicating the location of initial and final elements of this
- * list.
- *
- * For the limited set of cases we support now (implicit coerce via
- * FuncExpr, Const) it's fine to use exprLocation of the 'last'
- * expression, but if more complex composite expressions are to be
- * supported (e.g., OpExpr or FuncExpr as an explicit call), more
- * sophisticated tracking will be needed.
- */
- RecordConstLocation(jstate, exprLocation(first), true);
- RecordConstLocation(jstate, exprLocation(last), true);
+ if (IsA(node, ArrayExpr))
+ {
+ ArrayExpr *aexpr = (ArrayExpr *) node;
+
+ if (aexpr->list_start > 0 && aexpr->list_end > 0)
+ {
+ RecordConstLocation(jstate,
+ aexpr->list_start + 1,
+ (aexpr->list_end - aexpr->list_start) - 1);
+ normalize_list = true;
+ }
+ }
}
- else
+
+ if (!normalize_list)
{
_jumbleNode(jstate, (Node *) elements);
}
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 8c90ab54af8..48b5d13b9b6 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -526,6 +526,8 @@ _readA_Expr(void)
READ_NODE_FIELD(lexpr);
READ_NODE_FIELD(rexpr);
+ READ_LOCATION_FIELD(rexpr_list_start);
+ READ_LOCATION_FIELD(rexpr_list_end);
READ_LOCATION_FIELD(location);
READ_DONE();
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 7a9b2ddddec..50f53159d58 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -183,7 +183,7 @@ static void doNegateFloat(Float *v);
static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location);
static Node *makeOrExpr(Node *lexpr, Node *rexpr, int location);
static Node *makeNotExpr(Node *expr, int location);
-static Node *makeAArrayExpr(List *elements, int location);
+static Node *makeAArrayExpr(List *elements, int location, int end_location);
static Node *makeSQLValueFunction(SQLValueFunctionOp op, int32 typmod,
int location);
static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args,
@@ -522,7 +522,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type <defelt> def_elem reloption_elem old_aggr_elem operator_def_elem
%type <node> def_arg columnElem where_clause where_or_current_clause
a_expr b_expr c_expr AexprConst indirection_el opt_slice_bound
- columnref in_expr having_clause func_table xmltable array_expr
+ columnref having_clause func_table xmltable array_expr
OptWhereClause operator_def_arg
%type <list> opt_column_and_period_list
%type <list> rowsfrom_item rowsfrom_list opt_col_def_list
@@ -15264,49 +15264,50 @@ a_expr: c_expr { $$ = $1; }
(Node *) list_make2($5, $7),
@2);
}
- | a_expr IN_P in_expr
+ | a_expr IN_P select_with_parens
{
- /* in_expr returns a SubLink or a list of a_exprs */
- if (IsA($3, SubLink))
- {
- /* generate foo = ANY (subquery) */
- SubLink *n = (SubLink *) $3;
+ /* generate foo = ANY (subquery) */
+ SubLink *n = makeNode(SubLink);
- n->subLinkType = ANY_SUBLINK;
- n->subLinkId = 0;
- n->testexpr = $1;
- n->operName = NIL; /* show it's IN not = ANY */
- n->location = @2;
- $$ = (Node *) n;
- }
- else
- {
- /* generate scalar IN expression */
- $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "=", $1, $3, @2);
- }
+ n->subselect = $3;
+ n->subLinkType = ANY_SUBLINK;
+ n->subLinkId = 0;
+ n->testexpr = $1;
+ n->operName = NIL; /* show it's IN not = ANY */
+ n->location = @2;
+ $$ = (Node *) n;
}
- | a_expr NOT_LA IN_P in_expr %prec NOT_LA
+ | a_expr IN_P '(' expr_list ')'
{
- /* in_expr returns a SubLink or a list of a_exprs */
- if (IsA($4, SubLink))
- {
- /* generate NOT (foo = ANY (subquery)) */
- /* Make an = ANY node */
- SubLink *n = (SubLink *) $4;
-
- n->subLinkType = ANY_SUBLINK;
- n->subLinkId = 0;
- n->testexpr = $1;
- n->operName = NIL; /* show it's IN not = ANY */
- n->location = @2;
- /* Stick a NOT on top; must have same parse location */
- $$ = makeNotExpr((Node *) n, @2);
- }
- else
- {
- /* generate scalar NOT IN expression */
- $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "<>", $1, $4, @2);
- }
+ /* generate scalar IN expression */
+ A_Expr *n = makeSimpleA_Expr(AEXPR_IN, "=", $1, (Node *) $4, @2);
+
+ n->rexpr_list_start = @3;
+ n->rexpr_list_end = @5;
+ $$ = (Node *) n;
+ }
+ | a_expr NOT_LA IN_P select_with_parens %prec NOT_LA
+ {
+ /* generate NOT (foo = ANY (subquery)) */
+ SubLink *n = makeNode(SubLink);
+
+ n->subselect = $4;
+ n->subLinkType = ANY_SUBLINK;
+ n->subLinkId = 0;
+ n->testexpr = $1;
+ n->operName = NIL; /* show it's IN not = ANY */
+ n->location = @2;
+ /* Stick a NOT on top; must have same parse location */
+ $$ = makeNotExpr((Node *) n, @2);
+ }
+ | a_expr NOT_LA IN_P '(' expr_list ')'
+ {
+ /* generate scalar NOT IN expression */
+ A_Expr *n = makeSimpleA_Expr(AEXPR_IN, "<>", $1, (Node *) $5, @2);
+
+ n->rexpr_list_start = @4;
+ n->rexpr_list_end = @6;
+ $$ = (Node *) n;
}
| a_expr subquery_Op sub_type select_with_parens %prec Op
{
@@ -16741,15 +16742,15 @@ type_list: Typename { $$ = list_make1($1); }
array_expr: '[' expr_list ']'
{
- $$ = makeAArrayExpr($2, @1);
+ $$ = makeAArrayExpr($2, @1, @3);
}
| '[' array_expr_list ']'
{
- $$ = makeAArrayExpr($2, @1);
+ $$ = makeAArrayExpr($2, @1, @3);
}
| '[' ']'
{
- $$ = makeAArrayExpr(NIL, @1);
+ $$ = makeAArrayExpr(NIL, @1, @2);
}
;
@@ -16871,17 +16872,6 @@ trim_list: a_expr FROM expr_list { $$ = lappend($3, $1); }
| expr_list { $$ = $1; }
;
-in_expr: select_with_parens
- {
- SubLink *n = makeNode(SubLink);
-
- n->subselect = $1;
- /* other fields will be filled later */
- $$ = (Node *) n;
- }
- | '(' expr_list ')' { $$ = (Node *) $2; }
- ;
-
/*
* Define SQL-style CASE clause.
* - Full specification
@@ -19232,12 +19222,14 @@ makeNotExpr(Node *expr, int location)
}
static Node *
-makeAArrayExpr(List *elements, int location)
+makeAArrayExpr(List *elements, int location, int location_end)
{
A_ArrayExpr *n = makeNode(A_ArrayExpr);
n->elements = elements;
n->location = location;
+ n->list_start = location;
+ n->list_end = location_end;
return (Node *) n;
}
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 1f8e2d54673..d66276801c6 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -1223,6 +1223,8 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
newa->element_typeid = scalar_type;
newa->elements = aexprs;
newa->multidims = false;
+ newa->list_start = a->rexpr_list_start;
+ newa->list_end = a->rexpr_list_end;
newa->location = -1;
result = (Node *) make_scalar_array_op(pstate,
@@ -2165,6 +2167,8 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
/* array_collid will be set by parse_collate.c */
newa->element_typeid = element_type;
newa->elements = newcoercedelems;
+ newa->list_start = a->list_start;
+ newa->list_end = a->list_end;
newa->location = a->location;
return (Node *) newa;
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index f2971485d8f..67fbe9c9292 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202506021
+#define CATALOG_VERSION_NO 202506121
#endif
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 0c1120beff3..ba12678d1cb 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -351,6 +351,14 @@ typedef struct A_Expr
List *name; /* possibly-qualified name of operator */
Node *lexpr; /* left argument, or NULL if none */
Node *rexpr; /* right argument, or NULL if none */
+
+ /*
+ * If rexpr is a list of some kind, we separately track its starting and
+ * ending location; it's not the same as the starting and ending location
+ * of the token itself.
+ */
+ ParseLoc rexpr_list_start;
+ ParseLoc rexpr_list_end;
ParseLoc location; /* token location, or -1 if unknown */
} A_Expr;
@@ -506,6 +514,8 @@ typedef struct A_ArrayExpr
{
NodeTag type;
List *elements; /* array element expressions */
+ ParseLoc list_start; /* start of the element list */
+ ParseLoc list_end; /* end of the elements list */
ParseLoc location; /* token location, or -1 if unknown */
} A_ArrayExpr;
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 7d3b4198f26..01510b01b64 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1397,6 +1397,10 @@ typedef struct ArrayExpr
List *elements pg_node_attr(query_jumble_squash);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
+ /* location of the start of the elements list */
+ ParseLoc list_start;
+ /* location of the end of the elements list */
+ ParseLoc list_end;
/* token location, or -1 if unknown */
ParseLoc location;
} ArrayExpr;