summaryrefslogtreecommitdiff
path: root/src/backend/parser
diff options
context:
space:
mode:
authorMichael Paquier2024-10-24 00:28:51 +0000
committerMichael Paquier2024-10-24 00:29:54 +0000
commit499edb09741b8fad2de038361fb342aae6e6007f (patch)
tree03e40bbc6cf03c6f6442525865391e2b7066de57 /src/backend/parser
parent4b096c67e0eed81e287094b9692fff72b9ea3eef (diff)
Track more precisely query locations for nested statements
Previously, a Query generated through the transform phase would have unset stmt_location, tracking the starting point of a query string. Extensions relying on the statement location to extract its relevant parts in the source text string would fallback to use the whole statement instead, leading to confusing results like in pg_stat_statements for queries relying on nested queries, like: - EXPLAIN, with top-level and nested query using the same query string, and a query ID coming from the nested query when the non-top-level entry. - Multi-statements, with only partial portions of queries being normalized. - COPY TO with a query, SELECT or DMLs. This patch improves things by keeping track of the statement locations and propagate it to Query during transform, allowing PGSS to only show the relevant part of the query for nested query. This leads to less bloat in entries for non-top-level entries, as queries can now be grouped within the same (toplevel, queryid) duos in pg_stat_statements. The result gives a stricter one-one mapping between query IDs and its query strings. The regression tests introduced in 45e0ba30fc40 produce differences reflecting the new logic. Author: Anthonin Bonnefoy Reviewed-by: Michael Paquier, Jian He Discussion: https://postgr.es/m/CAO6_XqqM6S9bQ2qd=75W+yKATwoazxSNhv5sjW06fjGAtHbTUA@mail.gmail.com
Diffstat (limited to 'src/backend/parser')
-rw-r--r--src/backend/parser/analyze.c95
-rw-r--r--src/backend/parser/gram.y80
2 files changed, 164 insertions, 11 deletions
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 8a6ba1692e8..506e0631615 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -239,23 +239,107 @@ parse_sub_analyze(Node *parseTree, ParseState *parentParseState,
}
/*
+ * setQueryLocationAndLength
+ * Set query's location and length from statement and ParseState
+ *
+ * Some statements, like PreparableStmt, can be located within parentheses.
+ * For example "(SELECT 1)" or "COPY (UPDATE ...) to x;". For those, we
+ * cannot use the whole string from the statement's location or the SQL
+ * string would yield incorrectly. The parser will set stmt_len, reflecting
+ * the size of the statement within the parentheses. Thus, when stmt_len is
+ * available, we need to use it for the Query's stmt_len.
+ *
+ * For other cases, the parser can't provide the length of individual
+ * statements. However, we have the statement's location plus the length
+ * (p_stmt_len) and location (p_stmt_location) of the top level RawStmt,
+ * stored in pstate. Thus, the statement's length is the RawStmt's length
+ * minus how much we've advanced in the RawStmt's string.
+ */
+static void
+setQueryLocationAndLength(ParseState *pstate, Query *qry, Node *parseTree)
+{
+ ParseLoc stmt_len = 0;
+
+ /*
+ * If there is no information about the top RawStmt's length, leave it at
+ * 0 to use the whole string.
+ */
+ if (pstate->p_stmt_len == 0)
+ return;
+
+ switch (nodeTag(parseTree))
+ {
+ case T_InsertStmt:
+ qry->stmt_location = ((InsertStmt *) parseTree)->stmt_location;
+ stmt_len = ((InsertStmt *) parseTree)->stmt_len;
+ break;
+
+ case T_DeleteStmt:
+ qry->stmt_location = ((DeleteStmt *) parseTree)->stmt_location;
+ stmt_len = ((DeleteStmt *) parseTree)->stmt_len;
+ break;
+
+ case T_UpdateStmt:
+ qry->stmt_location = ((UpdateStmt *) parseTree)->stmt_location;
+ stmt_len = ((UpdateStmt *) parseTree)->stmt_len;
+ break;
+
+ case T_MergeStmt:
+ qry->stmt_location = ((MergeStmt *) parseTree)->stmt_location;
+ stmt_len = ((MergeStmt *) parseTree)->stmt_len;
+ break;
+
+ case T_SelectStmt:
+ qry->stmt_location = ((SelectStmt *) parseTree)->stmt_location;
+ stmt_len = ((SelectStmt *) parseTree)->stmt_len;
+ break;
+
+ case T_PLAssignStmt:
+ qry->stmt_location = ((PLAssignStmt *) parseTree)->location;
+ break;
+
+ default:
+ qry->stmt_location = pstate->p_stmt_location;
+ break;
+ }
+
+ if (stmt_len > 0)
+ {
+ /* Statement's length is known, use it */
+ qry->stmt_len = stmt_len;
+ }
+ else
+ {
+ /*
+ * Compute the statement's length from the statement's location and
+ * the RawStmt's length and location.
+ */
+ qry->stmt_len = pstate->p_stmt_len - (qry->stmt_location - pstate->p_stmt_location);
+ }
+
+ /* The calculated statement length should be calculated as positive. */
+ Assert(qry->stmt_len >= 0);
+}
+
+/*
* transformTopLevelStmt -
* transform a Parse tree into a Query tree.
*
- * This function is just responsible for transferring statement location data
- * from the RawStmt into the finished Query.
+ * This function is just responsible for storing location data
+ * from the RawStmt into the ParseState.
*/
Query *
transformTopLevelStmt(ParseState *pstate, RawStmt *parseTree)
{
Query *result;
+ /* Store RawStmt's length and location in pstate */
+ pstate->p_stmt_len = parseTree->stmt_len;
+ pstate->p_stmt_location = parseTree->stmt_location;
+
/* We're at top level, so allow SELECT INTO */
result = transformOptionalSelectInto(pstate, parseTree->stmt);
- result->stmt_location = parseTree->stmt_location;
- result->stmt_len = parseTree->stmt_len;
-
return result;
}
@@ -424,6 +508,7 @@ transformStmt(ParseState *pstate, Node *parseTree)
/* Mark as original query until we learn differently */
result->querySource = QSRC_ORIGINAL;
result->canSetTag = true;
+ setQueryLocationAndLength(pstate, result, parseTree);
return result;
}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 4bab2117d96..baca4059d2e 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -153,6 +153,7 @@ static void base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner,
const char *msg);
static RawStmt *makeRawStmt(Node *stmt, int stmt_location);
static void updateRawStmtEnd(RawStmt *rs, int end_location);
+static void updatePreparableStmtEnd(Node *n, int end_location);
static Node *makeColumnRef(char *colname, List *indirection,
int location, core_yyscan_t yyscanner);
static Node *makeTypeCast(Node *arg, TypeName *typename, int location);
@@ -176,7 +177,7 @@ static void insertSelectOptions(SelectStmt *stmt,
SelectLimit *limitClause,
WithClause *withClause,
core_yyscan_t yyscanner);
-static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg);
+static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location);
static Node *doNegate(Node *n, int location);
static void doNegateFloat(Float *v);
static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location);
@@ -3383,6 +3384,7 @@ CopyStmt: COPY opt_binary qualified_name opt_column_list
{
CopyStmt *n = makeNode(CopyStmt);
+ updatePreparableStmtEnd($3, @4);
n->relation = NULL;
n->query = $3;
n->attlist = NIL;
@@ -12150,6 +12152,7 @@ InsertStmt:
$5->onConflictClause = $6;
$5->returningList = $7;
$5->withClause = $1;
+ $5->stmt_location = @$;
$$ = (Node *) $5;
}
;
@@ -12303,6 +12306,7 @@ DeleteStmt: opt_with_clause DELETE_P FROM relation_expr_opt_alias
n->whereClause = $6;
n->returningList = $7;
n->withClause = $1;
+ n->stmt_location = @$;
$$ = (Node *) n;
}
;
@@ -12377,6 +12381,7 @@ UpdateStmt: opt_with_clause UPDATE relation_expr_opt_alias
n->whereClause = $7;
n->returningList = $8;
n->withClause = $1;
+ n->stmt_location = @$;
$$ = (Node *) n;
}
;
@@ -12454,6 +12459,7 @@ MergeStmt:
m->joinCondition = $8;
m->mergeWhenClauses = $9;
m->returningList = $10;
+ m->stmt_location = @$;
$$ = (Node *) m;
}
@@ -12694,7 +12700,20 @@ SelectStmt: select_no_parens %prec UMINUS
;
select_with_parens:
- '(' select_no_parens ')' { $$ = $2; }
+ '(' select_no_parens ')'
+ {
+ SelectStmt *n = (SelectStmt *) $2;
+
+ /*
+ * As SelectStmt's location starts at the SELECT keyword,
+ * we need to track the length of the SelectStmt within
+ * parentheses to be able to extract the relevant part
+ * of the query. Without this, the RawStmt's length would
+ * be used and would include the closing parenthesis.
+ */
+ n->stmt_len = @3 - @2;
+ $$ = $2;
+ }
| '(' select_with_parens ')' { $$ = $2; }
;
@@ -12816,6 +12835,7 @@ simple_select:
n->groupDistinct = ($7)->distinct;
n->havingClause = $8;
n->windowClause = $9;
+ n->stmt_location = @1;
$$ = (Node *) n;
}
| SELECT distinct_clause target_list
@@ -12833,6 +12853,7 @@ simple_select:
n->groupDistinct = ($7)->distinct;
n->havingClause = $8;
n->windowClause = $9;
+ n->stmt_location = @1;
$$ = (Node *) n;
}
| values_clause { $$ = $1; }
@@ -12853,19 +12874,20 @@ simple_select:
n->targetList = list_make1(rt);
n->fromClause = list_make1($2);
+ n->stmt_location = @1;
$$ = (Node *) n;
}
| select_clause UNION set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4);
+ $$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
}
| select_clause INTERSECT set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4);
+ $$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
}
| select_clause EXCEPT set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4);
+ $$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
}
;
@@ -13423,6 +13445,7 @@ values_clause:
{
SelectStmt *n = makeNode(SelectStmt);
+ n->stmt_location = @1;
n->valuesLists = list_make1($3);
$$ = (Node *) n;
}
@@ -18565,6 +18588,47 @@ updateRawStmtEnd(RawStmt *rs, int end_location)
rs->stmt_len = end_location - rs->stmt_location;
}
+/*
+ * Adjust a PreparableStmt to reflect that it doesn't run to the end of the
+ * string.
+ */
+static void
+updatePreparableStmtEnd(Node *n, int end_location)
+{
+ if (IsA(n, SelectStmt))
+ {
+ SelectStmt *stmt = (SelectStmt *)n;
+
+ stmt->stmt_len = end_location - stmt->stmt_location;
+ }
+ else if (IsA(n, InsertStmt))
+ {
+ InsertStmt *stmt = (InsertStmt *)n;
+
+ stmt->stmt_len = end_location - stmt->stmt_location;
+ }
+ else if (IsA(n, UpdateStmt))
+ {
+ UpdateStmt *stmt = (UpdateStmt *)n;
+
+ stmt->stmt_len = end_location - stmt->stmt_location;
+ }
+ else if (IsA(n, DeleteStmt))
+ {
+ DeleteStmt *stmt = (DeleteStmt *)n;
+
+ stmt->stmt_len = end_location - stmt->stmt_location;
+ }
+ else if (IsA(n, MergeStmt))
+ {
+ MergeStmt *stmt = (MergeStmt *)n;
+
+ stmt->stmt_len = end_location - stmt->stmt_location;
+ }
+ else
+ elog(ERROR, "unexpected node type %d", (int) n->type);
+}
+
static Node *
makeColumnRef(char *colname, List *indirection,
int location, core_yyscan_t yyscanner)
@@ -18943,11 +19007,14 @@ insertSelectOptions(SelectStmt *stmt,
errmsg("multiple WITH clauses not allowed"),
parser_errposition(exprLocation((Node *) withClause))));
stmt->withClause = withClause;
+
+ /* Update SelectStmt's location to the start of the WITH clause */
+ stmt->stmt_location = withClause->location;
}
}
static Node *
-makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg)
+makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location)
{
SelectStmt *n = makeNode(SelectStmt);
@@ -18955,6 +19022,7 @@ makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg)
n->all = all;
n->larg = (SelectStmt *) larg;
n->rarg = (SelectStmt *) rarg;
+ n->stmt_location = location;
return (Node *) n;
}