summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichael Paquier2025-06-12 01:08:55 +0000
committerMichael Paquier2025-06-12 01:08:55 +0000
commitf85f6ab051b7cf6950247e5fa6072c4130613555 (patch)
tree690848a22a978346b92b7b744651e218dc570c50 /src
parentdd2ce3792754e0705884aed15d1fd2ae43cc1500 (diff)
Revert support for improved tracking of nested queries
This commit reverts the two following commits: - 499edb09741b, track more precisely query locations for nested statements. - 06450c7b8c70, a follow-up fix of 499edb09741b with query locations. The test introduced in this commit is not reverted. This is proving useful to track a problem that only pgaudit was able to detect. These prove to have issues with the tracking of SELECT statements, when these use multiple parenthesis which is something supported by the grammar. Incorrect location and lengths are causing pg_stat_statements to become confused, failing its job in query normalization with potential out-of-bound writes because the location and the length may not match with what can be handled. A lot of the query patterns discussed when this issue was reported have no test coverage in the main regression test suite, or the recovery test 027_stream_regress.pl would have caught the problems as pg_stat_statements is loaded by the node running the regression tests. A first step would be to improve the test coverage to stress more the query normalization logic. A different portion of this work was done in 45e0ba30fc40, with the addition of tests for nested queries. These can be left in the tree. They are useful to track the way inner queries are currently tracked by PGSS with non-top-level entries, and will be useful when reconsidering in the future the work reverted here. Reported-by: Alexander Kozhemyakin <a.kozhemyakin@postgrespro.ru> Discussion: https://postgr.es/m/18947-cdd2668beffe02bf@postgresql.org
Diffstat (limited to 'src')
-rw-r--r--src/backend/parser/analyze.c90
-rw-r--r--src/backend/parser/gram.y80
-rw-r--r--src/include/nodes/parsenodes.h10
-rw-r--r--src/include/parser/parse_node.h16
4 files changed, 11 insertions, 185 deletions
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index a16fdd65601..34f7c17f576 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -239,102 +239,23 @@ parse_sub_analyze(Node *parseTree, ParseState *parentParseState,
}
/*
- * setQueryLocationAndLength
- * Set query's location and length from statement and ParseState
- *
- * Some statements, like PreparableStmt, can be located within parentheses.
- * For example "(SELECT 1)" or "COPY (UPDATE ...) to x;". For those, we
- * cannot use the whole string from the statement's location or the SQL
- * string would yield incorrectly. The parser will set stmt_len, reflecting
- * the size of the statement within the parentheses. Thus, when stmt_len is
- * available, we need to use it for the Query's stmt_len.
- *
- * For other cases, the parser can't provide the length of individual
- * statements. However, we have the statement's location plus the length
- * (p_stmt_len) and location (p_stmt_location) of the top level RawStmt,
- * stored in pstate. Thus, the statement's length is the RawStmt's length
- * minus how much we've advanced in the RawStmt's string. If p_stmt_len
- * is 0, the SQL string is used up to its end.
- */
-static void
-setQueryLocationAndLength(ParseState *pstate, Query *qry, Node *parseTree)
-{
- ParseLoc stmt_len = 0;
-
- switch (nodeTag(parseTree))
- {
- case T_InsertStmt:
- qry->stmt_location = ((InsertStmt *) parseTree)->stmt_location;
- stmt_len = ((InsertStmt *) parseTree)->stmt_len;
- break;
-
- case T_DeleteStmt:
- qry->stmt_location = ((DeleteStmt *) parseTree)->stmt_location;
- stmt_len = ((DeleteStmt *) parseTree)->stmt_len;
- break;
-
- case T_UpdateStmt:
- qry->stmt_location = ((UpdateStmt *) parseTree)->stmt_location;
- stmt_len = ((UpdateStmt *) parseTree)->stmt_len;
- break;
-
- case T_MergeStmt:
- qry->stmt_location = ((MergeStmt *) parseTree)->stmt_location;
- stmt_len = ((MergeStmt *) parseTree)->stmt_len;
- break;
-
- case T_SelectStmt:
- qry->stmt_location = ((SelectStmt *) parseTree)->stmt_location;
- stmt_len = ((SelectStmt *) parseTree)->stmt_len;
- break;
-
- case T_PLAssignStmt:
- qry->stmt_location = ((PLAssignStmt *) parseTree)->location;
- break;
-
- default:
- qry->stmt_location = pstate->p_stmt_location;
- break;
- }
-
- if (stmt_len > 0)
- {
- /* Statement's length is known, use it */
- qry->stmt_len = stmt_len;
- }
- else if (pstate->p_stmt_len > 0)
- {
- /*
- * The top RawStmt's length is known, so calculate the statement's
- * length from the statement's location and the RawStmt's length and
- * location.
- */
- qry->stmt_len = pstate->p_stmt_len - (qry->stmt_location - pstate->p_stmt_location);
- }
-
- /* The calculated statement length should be calculated as positive. */
- Assert(qry->stmt_len >= 0);
-}
-
-/*
* transformTopLevelStmt -
* transform a Parse tree into a Query tree.
*
- * This function is just responsible for storing location data
- * from the RawStmt into the ParseState.
+ * This function is just responsible for transferring statement location data
+ * from the RawStmt into the finished Query.
*/
Query *
transformTopLevelStmt(ParseState *pstate, RawStmt *parseTree)
{
Query *result;
- /* Store RawStmt's length and location in pstate */
- pstate->p_stmt_len = parseTree->stmt_len;
- pstate->p_stmt_location = parseTree->stmt_location;
-
/* We're at top level, so allow SELECT INTO */
result = transformOptionalSelectInto(pstate, parseTree->stmt);
+ result->stmt_location = parseTree->stmt_location;
+ result->stmt_len = parseTree->stmt_len;
+
return result;
}
@@ -503,7 +424,6 @@ transformStmt(ParseState *pstate, Node *parseTree)
/* Mark as original query until we learn differently */
result->querySource = QSRC_ORIGINAL;
result->canSetTag = true;
- setQueryLocationAndLength(pstate, result, parseTree);
return result;
}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 0b5652071d1..7a9b2ddddec 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -154,7 +154,6 @@ static void base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner,
const char *msg);
static RawStmt *makeRawStmt(Node *stmt, int stmt_location);
static void updateRawStmtEnd(RawStmt *rs, int end_location);
-static void updatePreparableStmtEnd(Node *n, int end_location);
static Node *makeColumnRef(char *colname, List *indirection,
int location, core_yyscan_t yyscanner);
static Node *makeTypeCast(Node *arg, TypeName *typename, int location);
@@ -178,7 +177,7 @@ static void insertSelectOptions(SelectStmt *stmt,
SelectLimit *limitClause,
WithClause *withClause,
core_yyscan_t yyscanner);
-static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location);
+static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg);
static Node *doNegate(Node *n, int location);
static void doNegateFloat(Float *v);
static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location);
@@ -3417,7 +3416,6 @@ CopyStmt: COPY opt_binary qualified_name opt_column_list
{
CopyStmt *n = makeNode(CopyStmt);
- updatePreparableStmtEnd($3, @4);
n->relation = NULL;
n->query = $3;
n->attlist = NIL;
@@ -12240,7 +12238,6 @@ InsertStmt:
$5->onConflictClause = $6;
$5->returningClause = $7;
$5->withClause = $1;
- $5->stmt_location = @$;
$$ = (Node *) $5;
}
;
@@ -12431,7 +12428,6 @@ DeleteStmt: opt_with_clause DELETE_P FROM relation_expr_opt_alias
n->whereClause = $6;
n->returningClause = $7;
n->withClause = $1;
- n->stmt_location = @$;
$$ = (Node *) n;
}
;
@@ -12506,7 +12502,6 @@ UpdateStmt: opt_with_clause UPDATE relation_expr_opt_alias
n->whereClause = $7;
n->returningClause = $8;
n->withClause = $1;
- n->stmt_location = @$;
$$ = (Node *) n;
}
;
@@ -12584,7 +12579,6 @@ MergeStmt:
m->joinCondition = $8;
m->mergeWhenClauses = $9;
m->returningClause = $10;
- m->stmt_location = @$;
$$ = (Node *) m;
}
@@ -12825,20 +12819,7 @@ SelectStmt: select_no_parens %prec UMINUS
;
select_with_parens:
- '(' select_no_parens ')'
- {
- SelectStmt *n = (SelectStmt *) $2;
-
- /*
- * As SelectStmt's location starts at the SELECT keyword,
- * we need to track the length of the SelectStmt within
- * parentheses to be able to extract the relevant part
- * of the query. Without this, the RawStmt's length would
- * be used and would include the closing parenthesis.
- */
- n->stmt_len = @3 - @2;
- $$ = $2;
- }
+ '(' select_no_parens ')' { $$ = $2; }
| '(' select_with_parens ')' { $$ = $2; }
;
@@ -12960,7 +12941,6 @@ simple_select:
n->groupDistinct = ($7)->distinct;
n->havingClause = $8;
n->windowClause = $9;
- n->stmt_location = @1;
$$ = (Node *) n;
}
| SELECT distinct_clause target_list
@@ -12978,7 +12958,6 @@ simple_select:
n->groupDistinct = ($7)->distinct;
n->havingClause = $8;
n->windowClause = $9;
- n->stmt_location = @1;
$$ = (Node *) n;
}
| values_clause { $$ = $1; }
@@ -12999,20 +12978,19 @@ simple_select:
n->targetList = list_make1(rt);
n->fromClause = list_make1($2);
- n->stmt_location = @1;
$$ = (Node *) n;
}
| select_clause UNION set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
+ $$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4);
}
| select_clause INTERSECT set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
+ $$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4);
}
| select_clause EXCEPT set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
+ $$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4);
}
;
@@ -13590,7 +13568,6 @@ values_clause:
{
SelectStmt *n = makeNode(SelectStmt);
- n->stmt_location = @1;
n->valuesLists = list_make1($3);
$$ = (Node *) n;
}
@@ -18748,47 +18725,6 @@ updateRawStmtEnd(RawStmt *rs, int end_location)
rs->stmt_len = end_location - rs->stmt_location;
}
-/*
- * Adjust a PreparableStmt to reflect that it doesn't run to the end of the
- * string.
- */
-static void
-updatePreparableStmtEnd(Node *n, int end_location)
-{
- if (IsA(n, SelectStmt))
- {
- SelectStmt *stmt = (SelectStmt *) n;
-
- stmt->stmt_len = end_location - stmt->stmt_location;
- }
- else if (IsA(n, InsertStmt))
- {
- InsertStmt *stmt = (InsertStmt *) n;
-
- stmt->stmt_len = end_location - stmt->stmt_location;
- }
- else if (IsA(n, UpdateStmt))
- {
- UpdateStmt *stmt = (UpdateStmt *) n;
-
- stmt->stmt_len = end_location - stmt->stmt_location;
- }
- else if (IsA(n, DeleteStmt))
- {
- DeleteStmt *stmt = (DeleteStmt *) n;
-
- stmt->stmt_len = end_location - stmt->stmt_location;
- }
- else if (IsA(n, MergeStmt))
- {
- MergeStmt *stmt = (MergeStmt *) n;
-
- stmt->stmt_len = end_location - stmt->stmt_location;
- }
- else
- elog(ERROR, "unexpected node type %d", (int) n->type);
-}
-
static Node *
makeColumnRef(char *colname, List *indirection,
int location, core_yyscan_t yyscanner)
@@ -19167,14 +19103,11 @@ insertSelectOptions(SelectStmt *stmt,
errmsg("multiple WITH clauses not allowed"),
parser_errposition(exprLocation((Node *) withClause))));
stmt->withClause = withClause;
-
- /* Update SelectStmt's location to the start of the WITH clause */
- stmt->stmt_location = withClause->location;
}
}
static Node *
-makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location)
+makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg)
{
SelectStmt *n = makeNode(SelectStmt);
@@ -19182,7 +19115,6 @@ makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location)
n->all = all;
n->larg = (SelectStmt *) larg;
n->rarg = (SelectStmt *) rarg;
- n->stmt_location = location;
return (Node *) n;
}
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index dd00ab420b8..0c1120beff3 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -2100,8 +2100,6 @@ typedef struct InsertStmt
ReturningClause *returningClause; /* RETURNING clause */
WithClause *withClause; /* WITH clause */
OverridingKind override; /* OVERRIDING clause */
- ParseLoc stmt_location; /* start location, or -1 if unknown */
- ParseLoc stmt_len; /* length in bytes; 0 means "rest of string" */
} InsertStmt;
/* ----------------------
@@ -2116,8 +2114,6 @@ typedef struct DeleteStmt
Node *whereClause; /* qualifications */
ReturningClause *returningClause; /* RETURNING clause */
WithClause *withClause; /* WITH clause */
- ParseLoc stmt_location; /* start location, or -1 if unknown */
- ParseLoc stmt_len; /* length in bytes; 0 means "rest of string" */
} DeleteStmt;
/* ----------------------
@@ -2133,8 +2129,6 @@ typedef struct UpdateStmt
List *fromClause; /* optional from clause for more tables */
ReturningClause *returningClause; /* RETURNING clause */
WithClause *withClause; /* WITH clause */
- ParseLoc stmt_location; /* start location, or -1 if unknown */
- ParseLoc stmt_len; /* length in bytes; 0 means "rest of string" */
} UpdateStmt;
/* ----------------------
@@ -2150,8 +2144,6 @@ typedef struct MergeStmt
List *mergeWhenClauses; /* list of MergeWhenClause(es) */
ReturningClause *returningClause; /* RETURNING clause */
WithClause *withClause; /* WITH clause */
- ParseLoc stmt_location; /* start location, or -1 if unknown */
- ParseLoc stmt_len; /* length in bytes; 0 means "rest of string" */
} MergeStmt;
/* ----------------------
@@ -2221,8 +2213,6 @@ typedef struct SelectStmt
bool all; /* ALL specified? */
struct SelectStmt *larg; /* left child */
struct SelectStmt *rarg; /* right child */
- ParseLoc stmt_location; /* start location, or -1 if unknown */
- ParseLoc stmt_len; /* length in bytes; 0 means "rest of string" */
/* Eventually add fields for CORRESPONDING spec here */
} SelectStmt;
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index 994284019fb..f7d07c84542 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -108,20 +108,6 @@ typedef Node *(*CoerceParamHook) (ParseState *pstate, Param *param,
* byte-wise locations in parse structures to character-wise cursor
* positions.)
*
- * p_stmt_location: location of the top level RawStmt's start. During
- * transformation, the Query's location will be set to the statement's
- * location if available. Otherwise, the RawStmt's start location will
- * be used. Propagating the location through ParseState is needed for
- * the Query length calculation (see p_stmt_len below).
- *
- * p_stmt_len: length of the top level RawStmt. Most of the time, the
- * statement's length is not provided by the parser, with the exception
- * of SelectStmt within parentheses and PreparableStmt in COPY. If the
- * statement's location is provided by the parser, the top-level location
- * and length are needed to accurately compute the Query's length. If the
- * statement's location is not provided, the RawStmt's length can be used
- * directly.
- *
* p_rtable: list of RTEs that will become the rangetable of the query.
* Note that neither relname nor refname of these entries are necessarily
* unique; searching the rtable by name is a bad idea.
@@ -207,8 +193,6 @@ struct ParseState
{
ParseState *parentParseState; /* stack link */
const char *p_sourcetext; /* source text, or NULL if not available */
- ParseLoc p_stmt_location; /* start location, or -1 if unknown */
- ParseLoc p_stmt_len; /* length in bytes; 0 means "rest of string" */
List *p_rtable; /* range table so far */
List *p_rteperminfos; /* list of RTEPermissionInfo nodes for each
* RTE_RELATION entry in rtable */