Skip to content

Commit e17fa2c

Browse files
author
Commitfest Bot
committed
[CF 50/4573] v6 - pg_ctl start may return 0 even if the postmaster has been already started on Windows
This commit was automatically generated by a robot at cfbot.cputube.org. It is based on patches submitted to the PostgreSQL mailing lists and registered in the PostgreSQL Commitfest application. This branch will be overwritten each time a new patch version is posted to the email thread, and also periodically to check for bitrot caused by changes on the master branch. Commitfest entry: https://commitfest.postgresql.org/50/4573 Patch(es): https://www.postgresql.org/message-id/20240111.173322.1809044112677090191.horikyota.ntt@gmail.com Author(s): Kyotaro Horiguchi
2 parents 2e7c4ab + 4751505 commit e17fa2c

File tree

2 files changed

+96
-14
lines changed

2 files changed

+96
-14
lines changed

src/bin/pg_ctl/pg_ctl.c

Lines changed: 96 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ static void adjust_data_dir(void);
132132

133133
#ifdef WIN32
134134
#include <versionhelpers.h>
135+
#include <tlhelp32.h>
135136
static bool pgwin32_IsInstalled(SC_HANDLE);
136137
static char *pgwin32_CommandLine(bool);
137138
static void pgwin32_doRegister(void);
@@ -142,6 +143,7 @@ static void WINAPI pgwin32_ServiceMain(DWORD, LPTSTR *);
142143
static void pgwin32_doRunAsService(void);
143144
static int CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo, bool as_service);
144145
static PTOKEN_PRIVILEGES GetPrivilegesToDelete(HANDLE hToken);
146+
static pid_t pgwin32_find_postmaster_pid(pid_t shell_pid);
145147
#endif
146148

147149
static pid_t get_pgpid(bool is_status_request);
@@ -609,7 +611,11 @@ wait_for_postmaster_start(pid_t pm_pid, bool do_checkpoint)
609611
/* File is complete enough for us, parse it */
610612
pid_t pmpid;
611613
time_t pmstart;
612-
614+
#ifndef WIN32
615+
pid_t wait_pid = pm_pid;
616+
#else
617+
pid_t wait_pid = pgwin32_find_postmaster_pid(pm_pid);
618+
#endif
613619
/*
614620
* Make sanity checks. If it's for the wrong PID, or the recorded
615621
* start time is before pg_ctl started, then either we are looking
@@ -619,14 +625,8 @@ wait_for_postmaster_start(pid_t pm_pid, bool do_checkpoint)
619625
*/
620626
pmpid = atol(optlines[LOCK_FILE_LINE_PID - 1]);
621627
pmstart = atol(optlines[LOCK_FILE_LINE_START_TIME - 1]);
622-
if (pmstart >= start_time - 2 &&
623-
#ifndef WIN32
624-
pmpid == pm_pid
625-
#else
626-
/* Windows can only reject standalone-backend PIDs */
627-
pmpid > 0
628-
#endif
629-
)
628+
629+
if (pmstart >= start_time - 2 && pmpid == wait_pid)
630630
{
631631
/*
632632
* OK, seems to be a valid pidfile from our child. Check the
@@ -1959,6 +1959,93 @@ GetPrivilegesToDelete(HANDLE hToken)
19591959

19601960
return tokenPrivs;
19611961
}
1962+
1963+
/*
1964+
* Find the PID of the launched postmaster.
1965+
*
1966+
* On Windows, the cmd.exe doesn't support the exec command. As a result, we
1967+
* don't directly get the postmaster's PID. This function identifies the PID of
1968+
* the postmaster started by the child cmd.exe.
1969+
*
1970+
* Returns the postmaster's PID. If the shell is alive but the postmaster is
1971+
* missing, returns 0. Otherwise terminates this command with an error.
1972+
*
1973+
* This function uses PID 0 as an invalid value, assuming the system idle
1974+
* process occupies it and it won't be a PID for a shell or postmaster.
1975+
*/
1976+
static pid_t
1977+
pgwin32_find_postmaster_pid(pid_t shell_pid)
1978+
{
1979+
HANDLE hSnapshot;
1980+
PROCESSENTRY32 ppe;
1981+
pid_t pm_pid = 0; /* abusing 0 as an invalid value */
1982+
bool multiple_children = false;
1983+
DWORD last_error;
1984+
1985+
/* create a process snapshot */
1986+
hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
1987+
if (hSnapshot == INVALID_HANDLE_VALUE)
1988+
{
1989+
write_stderr(_("%s: CreateToolhelp32Snapshot failed: error code %lu\n"),
1990+
progname, (unsigned long) GetLastError());
1991+
exit(1);
1992+
}
1993+
1994+
/* start iterating on the snapshot */
1995+
ppe.dwSize = sizeof(PROCESSENTRY32);
1996+
if (!Process32First(hSnapshot, &ppe))
1997+
{
1998+
write_stderr(_("%s: Process32First failed: error code %lu\n"),
1999+
progname, (unsigned long) GetLastError());
2000+
exit(1);
2001+
}
2002+
2003+
/*
2004+
* Iterate over the snapshot
2005+
*
2006+
* Check for duplicate processes to ensure reliability.
2007+
*
2008+
* The ppe entry to be examined is identified by th32ParentProcessID, which
2009+
* should correspond to the cmd.exe process that executes the postgres.exe
2010+
* binary. Additionally, th32ProcessID in the same entry should be the PID
2011+
* of the launched postgres.exe. However, even though we have launched the
2012+
* parent cmd.exe with the /D option specified, it is sometimes observed
2013+
* that another cmd.exe is launched for unknown reasons. Therefore, it is
2014+
* crucial to verify the program file name to avoid returning the wrong
2015+
* PID.
2016+
*/
2017+
do
2018+
{
2019+
if (ppe.th32ParentProcessID == shell_pid &&
2020+
strcmp("postgres.exe", ppe.szExeFile) == 0)
2021+
{
2022+
if (pm_pid != ppe.th32ProcessID && pm_pid != 0)
2023+
multiple_children = true;
2024+
pm_pid = ppe.th32ProcessID;
2025+
}
2026+
}
2027+
while (Process32Next(hSnapshot, &ppe));
2028+
2029+
/* avoid multiple calls primary for clarity, not out of necessity */
2030+
last_error = GetLastError();
2031+
if (last_error != ERROR_NO_MORE_FILES)
2032+
{
2033+
write_stderr(_("%s: Process32Next failed: error code %lu\n"),
2034+
progname, (unsigned long) last_error);
2035+
exit(1);
2036+
}
2037+
CloseHandle(hSnapshot);
2038+
2039+
/* assuming the launching shell executes a single process */
2040+
if (multiple_children)
2041+
{
2042+
write_stderr(_("%s: multiple postmasters found\n"),
2043+
progname);
2044+
exit(1);
2045+
}
2046+
2047+
return pm_pid;
2048+
}
19622049
#endif /* WIN32 */
19632050

19642051
static void

src/bin/pg_ctl/t/001_start_stop.pl

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,6 @@
4646
];
4747
command_like($ctlcmd, qr/done.*server started/s, 'pg_ctl start');
4848

49-
# sleep here is because Windows builds can't check postmaster.pid exactly,
50-
# so they may mistake a pre-existing postmaster.pid for one created by the
51-
# postmaster they start. Waiting more than the 2 seconds slop time allowed
52-
# by wait_for_postmaster() prevents that mistake.
53-
sleep 3 if ($windows_os);
5449
command_fails([ 'pg_ctl', 'start', '-D', "$tempdir/data" ],
5550
'second pg_ctl start fails');
5651
command_ok([ 'pg_ctl', 'stop', '-D', "$tempdir/data" ], 'pg_ctl stop');

0 commit comments

Comments
 (0)