Code review for postmaster.pid contents changes.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 14 Jan 2011 00:01:28 +0000 (19:01 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 14 Jan 2011 00:01:28 +0000 (19:01 -0500)
Fix broken test for pre-existing postmaster, caused by wrong code for
appending lines to the lockfile; don't write a failed listen_address
setting into the lockfile; don't arbitrarily change the location of the
data directory in the lockfile compared to previous releases; provide more
consistent and useful definitions of the socket path and listen_address
entries; avoid assuming that pg_ctl has the same DEFAULT_PGSOCKET_DIR as
the postmaster; assorted code style improvements.

doc/src/sgml/storage.sgml
src/backend/port/ipc_test.c
src/backend/port/sysv_shmem.c
src/backend/postmaster/postmaster.c
src/backend/utils/init/miscinit.c
src/bin/pg_ctl/pg_ctl.c
src/include/miscadmin.h

index 746a48219fd37b7c9c334625f938193cab745004..ad9fba2d0f3f556a80ec79184d8f13b7a2459c8b 100644 (file)
@@ -117,9 +117,14 @@ last started with</entry>
 <row>
  <entry><filename>postmaster.pid</></entry>
  <entry>A lock file recording the current postmaster process id (PID),
- postmaster start time, cluster data directory, port number, user-specified
- Unix domain socket directory, first valid listen_address host, and
- shared memory segment ID</entry>
+  cluster data directory path,
+  postmaster start timestamp,
+  port number,
+  Unix-domain socket directory path (empty on Windows),
+  first valid listen_address (IP address or <literal>*</>, or empty if
+  not listening on TCP),
+  and shared memory segment ID
+  (this file is not present after server shutdown)</entry>
 </row>
 
 </tbody>
index 2518007c4b6441266cda8aa0104d7c76d7f15aaf..b4bcf40a7aba6dfdc85f75950436137486fd820d 100644 (file)
@@ -104,7 +104,7 @@ on_exit_reset(void)
 }
 
 void
-AddToLockFile(int target_line, const char *str)
+AddToDataDirLockFile(int target_line, const char *str)
 {
 }
 
@@ -135,7 +135,7 @@ errcode_for_file_access(void)
 
 bool
 errstart(int elevel, const char *filename, int lineno,
-        const char *funcname)
+        const char *funcname, const char *domain)
 {
    return (elevel >= ERROR);
 }
index 474c6142ebeb164027f8a6a4c93c4b7ea897a155..aece026ec649c057c40261f7668b447bf18273cf 100644 (file)
@@ -199,15 +199,16 @@ InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size)
    on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));
 
    /*
-    * Append record key and ID in lockfile for data directory. Format
-    * to try to keep it the same length.
+    * Store shmem key and ID in data directory lockfile.  Format to try to
+    * keep it the same length always (trailing junk in the lockfile won't
+    * hurt, but might confuse humans).
     */
    {
-       char line[32];
+       char line[64];
 
-       sprintf(line, "%9lu %9lu\n", (unsigned long) memKey,
-                                    (unsigned long) shmid);
-       AddToLockFile(LOCK_FILE_LINES, line);
+       sprintf(line, "%9lu %9lu",
+               (unsigned long) memKey, (unsigned long) shmid);
+       AddToDataDirLockFile(LOCK_FILE_LINE_SHMEM_KEY, line);
    }
 
    return memAddress;
index d158ded390be29c6352ad6d7b79372a38833b3b8..179048f32d16ef6af08207b51018b6291c748d86 100644 (file)
@@ -482,9 +482,9 @@ PostmasterMain(int argc, char *argv[])
    int         opt;
    int         status;
    char       *userDoption = NULL;
+   bool        listen_addr_saved = false;
    int         i;
-   bool        connection_line_output = false;
-   
+
    MyProcPid = PostmasterPid = getpid();
 
    MyStartTime = time(NULL);
@@ -861,24 +861,21 @@ PostmasterMain(int argc, char *argv[])
                                          UnixSocketDir,
                                          ListenSocket, MAXLISTEN);
            else
-           {
                status = StreamServerPort(AF_UNSPEC, curhost,
                                          (unsigned short) PostPortNumber,
                                          UnixSocketDir,
                                          ListenSocket, MAXLISTEN);
-               /* must supply a valid listen_address for PQping() */
-               if (!connection_line_output)
-               {
-                   char line[MAXPGPATH + 2];
 
-                   sprintf(line, "%s\n", curhost);
-                   AddToLockFile(LOCK_FILE_LINES - 1, line);
-                   connection_line_output = true;
-               }
-           }
-               
            if (status == STATUS_OK)
+           {
                success++;
+               /* record the first successful host addr in lockfile */
+               if (!listen_addr_saved)
+               {
+                   AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, curhost);
+                   listen_addr_saved = true;
+               }
+           }
            else
                ereport(WARNING,
                        (errmsg("could not create listen socket for \"%s\"",
@@ -893,10 +890,6 @@ PostmasterMain(int argc, char *argv[])
        pfree(rawstring);
    }
 
-   /* Supply an empty listen_address line for PQping() */
-   if (!connection_line_output)
-       AddToLockFile(LOCK_FILE_LINES - 1, "\n");
-
 #ifdef USE_BONJOUR
    /* Register for Bonjour only if we opened TCP socket(s) */
    if (enable_bonjour && ListenSocket[0] != PGINVALID_SOCKET)
@@ -952,6 +945,14 @@ PostmasterMain(int argc, char *argv[])
        ereport(FATAL,
                (errmsg("no socket created for listening")));
 
+   /*
+    * If no valid TCP ports, write an empty line for listen address,
+    * indicating the Unix socket must be used.  Note that this line is not
+    * added to the lock file until there is a socket backing it.
+    */
+   if (!listen_addr_saved)
+       AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, "");
+
    /*
     * Set up shared memory and semaphores.
     */
index 4f708352daa5e6fcc73ddb018329bbbe60a2d784..ef6422e75c35872f520133b796be4a932b1562d7 100644 (file)
 
 
 #define DIRECTORY_LOCK_FILE        "postmaster.pid"
-/*
- * The lock file contents are:
- *
- * line #
- *     1   pid
- *     2   postmaster start time
- *     3   data directory
- *     4   port #
- *     5   user-specified socket directory
- *         (the lines below are added later)
- *     6   first valid listen_address
- *     7   shared memory key
- */
-   
+
 ProcessingMode Mode = InitProcessing;
 
 /* Note: we rely on this to initialize as zeroes */
@@ -244,7 +231,6 @@ static int  SecurityRestrictionContext = 0;
 static bool SetRoleIsActive = false;
 
 
-
 /*
  * GetUserId - get the current effective user ID.
  *
@@ -691,7 +677,7 @@ CreateLockFile(const char *filename, bool amPostmaster,
               bool isDDLock, const char *refName)
 {
    int         fd;
-   char        buffer[MAXPGPATH * 3 + 256];
+   char        buffer[MAXPGPATH * 2 + 256];
    int         ntries;
    int         len;
    int         encoded_pid;
@@ -852,26 +838,26 @@ CreateLockFile(const char *filename, bool amPostmaster,
         * looking to see if there is an associated shmem segment that is
         * still in use.
         *
-        * Note: because postmaster.pid is written in two steps, we might not
-        * find the shmem ID values in it; we can't treat that as an error.
+        * Note: because postmaster.pid is written in multiple steps, we might
+        * not find the shmem ID values in it; we can't treat that as an
+        * error.
         */
        if (isDDLock)
        {
            char       *ptr = buffer;
-           unsigned long id1, id2;
-           int lineno;
+           unsigned long id1,
+                       id2;
+           int         lineno;
 
-           for (lineno = 1; lineno <= LOCK_FILE_LINES - 1; lineno++)
+           for (lineno = 1; lineno < LOCK_FILE_LINE_SHMEM_KEY; lineno++)
            {
                if ((ptr = strchr(ptr, '\n')) == NULL)
-               {
-                   elog(LOG, "bogus data in \"%s\"", DIRECTORY_LOCK_FILE);
                    break;
-               }
                ptr++;
            }
 
-           if (ptr && sscanf(ptr, "%lu %lu", &id1, &id2) == 2)
+           if (ptr != NULL &&
+               sscanf(ptr, "%lu %lu", &id1, &id2) == 2)
            {
                if (PGSharedMemoryIsInUse(id1, id2))
                    ereport(FATAL,
@@ -903,12 +889,23 @@ CreateLockFile(const char *filename, bool amPostmaster,
    }
 
    /*
-    * Successfully created the file, now fill it.
+    * Successfully created the file, now fill it.  See comment in miscadmin.h
+    * about the contents.  Note that we write the same info into both datadir
+    * and socket lockfiles; although more stuff may get added to the datadir
+    * lockfile later.
     */
-   snprintf(buffer, sizeof(buffer), "%d\n%ld\n%s\n%d\n%s\n",
+   snprintf(buffer, sizeof(buffer), "%d\n%s\n%ld\n%d\n%s\n",
             amPostmaster ? (int) my_pid : -((int) my_pid),
-            (long) MyStartTime, DataDir, PostPortNumber,
-            UnixSocketDir);
+            DataDir,
+            (long) MyStartTime,
+            PostPortNumber,
+#ifdef HAVE_UNIX_SOCKETS
+            (*UnixSocketDir != '\0') ? UnixSocketDir : DEFAULT_PGSOCKET_DIR
+#else
+            ""
+#endif
+            );
+
    errno = 0;
    if (write(fd, buffer, strlen(buffer)) != strlen(buffer))
    {
@@ -1019,17 +1016,20 @@ TouchSocketLockFile(void)
 
 
 /*
- * Add lines to the data directory lock file.  This erases all following
- * lines, but that is OK because lines are added in order.
+ * Add (or replace) a line in the data directory lock file.
+ * The given string should not include a trailing newline.
+ *
+ * Caution: this erases all following lines.  In current usage that is OK
+ * because lines are added in order.  We could improve it if needed.
  */
 void
-AddToLockFile(int target_line, const char *str)
+AddToDataDirLockFile(int target_line, const char *str)
 {
    int         fd;
    int         len;
    int         lineno;
    char       *ptr;
-   char        buffer[MAXPGPATH * 3 + 256];
+   char        buffer[BLCKSZ];
 
    fd = open(DIRECTORY_LOCK_FILE, O_RDWR | PG_BINARY, 0);
    if (fd < 0)
@@ -1040,7 +1040,7 @@ AddToLockFile(int target_line, const char *str)
                        DIRECTORY_LOCK_FILE)));
        return;
    }
-   len = read(fd, buffer, sizeof(buffer) - 100);
+   len = read(fd, buffer, sizeof(buffer) - 1);
    if (len < 0)
    {
        ereport(LOG,
@@ -1053,7 +1053,7 @@ AddToLockFile(int target_line, const char *str)
    buffer[len] = '\0';
 
    /*
-    * Skip over first four lines (PID, pgdata, portnum, socketdir).
+    * Skip over lines we are not supposed to rewrite.
     */
    ptr = buffer;
    for (lineno = 1; lineno < target_line; lineno++)
@@ -1066,8 +1066,11 @@ AddToLockFile(int target_line, const char *str)
        }
        ptr++;
    }
-   
-   strlcat(buffer, str, sizeof(buffer));
+
+   /*
+    * Write or rewrite the target line.
+    */
+   snprintf(ptr, buffer + sizeof(buffer) - ptr, "%s\n", str);
 
    /*
     * And rewrite the data.  Since we write in a single kernel call, this
index 03a932970d1ae21fd6f9e0aef1f1f1d7c4599a4f..6c87f158f3f3e8215620cf17455cbdf77dc93382 100644 (file)
@@ -22,7 +22,6 @@
 
 #include <locale.h>
 #include <signal.h>
-#include <stdlib.h>
 #include <time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -91,6 +90,24 @@ static char *register_username = NULL;
 static char *register_password = NULL;
 static char *argv0 = NULL;
 static bool allow_core_files = false;
+static time_t start_time;
+
+static char postopts_file[MAXPGPATH];
+static char pid_file[MAXPGPATH];
+static char backup_file[MAXPGPATH];
+static char recovery_file[MAXPGPATH];
+
+#if defined(WIN32) || defined(__CYGWIN__)
+static DWORD pgctl_start_type = SERVICE_AUTO_START;
+static SERVICE_STATUS status;
+static SERVICE_STATUS_HANDLE hStatus = (SERVICE_STATUS_HANDLE) 0;
+static HANDLE shutdownHandles[2];
+static pid_t postmasterPID = -1;
+
+#define shutdownEvent    shutdownHandles[0]
+#define postmasterProcess shutdownHandles[1]
+#endif
+
 
 static void
 write_stderr(const char *fmt,...)
@@ -122,15 +139,6 @@ static void WINAPI pgwin32_ServiceHandler(DWORD);
 static void WINAPI pgwin32_ServiceMain(DWORD, LPTSTR *);
 static void pgwin32_doRunAsService(void);
 static int CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo, bool as_service);
-
-static DWORD pgctl_start_type = SERVICE_AUTO_START;
-static SERVICE_STATUS status;
-static SERVICE_STATUS_HANDLE hStatus = (SERVICE_STATUS_HANDLE) 0;
-static HANDLE shutdownHandles[2];
-static pid_t postmasterPID = -1;
-
-#define shutdownEvent    shutdownHandles[0]
-#define postmasterProcess shutdownHandles[1]
 #endif
 
 static pgpid_t get_pgpid(void);
@@ -140,12 +148,6 @@ static void read_post_opts(void);
 
 static PGPing test_postmaster_connection(bool);
 static bool postmaster_is_alive(pid_t pid);
-static time_t start_time;
-
-static char postopts_file[MAXPGPATH];
-static char pid_file[MAXPGPATH];
-static char backup_file[MAXPGPATH];
-static char recovery_file[MAXPGPATH];
 
 #if defined(HAVE_GETRLIMIT) && defined(RLIMIT_CORE)
 static void unlimit_core_size(void);
@@ -406,14 +408,10 @@ start_postmaster(void)
 static PGPing
 test_postmaster_connection(bool do_checkpoint)
 {
-   int         portnum = 0;
-   char        host_str[MAXPGPATH];
-   char        connstr[MAXPGPATH + 256];
    PGPing      ret = PQPING_OK;    /* assume success for wait == zero */
-   char      **optlines;
+   char        connstr[MAXPGPATH * 2 + 256];
    int         i;
 
-   host_str[0] = '\0';
    connstr[0] = '\0';
    
    for (i = 0; i < wait_seconds; i++)
@@ -421,108 +419,111 @@ test_postmaster_connection(bool do_checkpoint)
        /* Do we need a connection string? */
        if (connstr[0] == '\0')
        {
-           /*
-            *  The number of lines in postmaster.pid tells us several things:
+           /*----------
+            * The number of lines in postmaster.pid tells us several things:
             *
-            *  # of lines
+            * # of lines
             *      0   lock file created but status not written
             *      2   pre-9.1 server, shared memory not created
             *      3   pre-9.1 server, shared memory created
-            *      5   9.1+ server, listen host not created
+            *      5   9.1+ server, ports not opened
             *      6   9.1+ server, shared memory not created
             *      7   9.1+ server, shared memory created
             *
-            *  For pre-9.1 Unix servers, we grab the port number from the
-            *  shmem key (first value on line 3).  Pre-9.1 Win32 has no
-            *  written shmem key, so we fail.  9.1+ writes connection
-            *  information in the file for us to use.
-            *  (PG_VERSION could also have told us the major version.)
+            * This code does not support pre-9.1 servers.  On Unix machines
+            * we could consider extracting the port number from the shmem
+            * key, but that (a) is not robust, and (b) doesn't help with
+            * finding out the socket directory.  And it wouldn't work anyway
+            * on Windows.
+            *
+            * If we see less than 6 lines in postmaster.pid, just keep
+            * waiting.
+            *----------
             */
-       
-           /* Try to read a completed postmaster.pid file */
+           char      **optlines;
+
+           /* Try to read the postmaster.pid file */
            if ((optlines = readfile(pid_file)) != NULL &&
                optlines[0] != NULL &&
                optlines[1] != NULL &&
-               optlines[2] != NULL &&
-               /* pre-9.1 server or listen_address line is present? */
-               (optlines[3] == NULL ||
-                optlines[5] != NULL))
-           {               
-               /* A 3-line file? */
+               optlines[2] != NULL)
+           {
                if (optlines[3] == NULL)
                {
-                   /*
-                    *  Pre-9.1:  on Unix, we get the port number by
-                    *  deriving it from the shmem key (the first number on
-                    *  on the line);  see
-                    *  miscinit.c::RecordSharedMemoryInLockFile().
-                    */
-                   portnum = atoi(optlines[2]) / 1000;
-                   /* Win32 does not give us a shmem key, so we fail. */
-                   if (portnum == 0)
-                   {
-                       write_stderr(_("%s: -w option is not supported on this platform\nwhen connecting to a pre-9.1 server\n"),
-                                    progname);
-                       return PQPING_NO_ATTEMPT;
-                   }
+                   /* File is exactly three lines, must be pre-9.1 */
+                   write_stderr(_("%s: -w option is not supported when starting a pre-9.1 server\n"),
+                                progname);
+                   return PQPING_NO_ATTEMPT;
                }
-               else
+               else if (optlines[4] != NULL &&
+                        optlines[5] != NULL)
                {
+                   /* File is complete enough for us, parse it */
+                   time_t  pmstart;
+                   int     portnum;
+                   char   *sockdir;
+                   char   *hostaddr;
+                   char    host_str[MAXPGPATH];
+
                    /*
-                    *  Easy check to see if we are looking at the right
-                    *  data directory:  Is the postmaster older than this
-                    *  execution of pg_ctl?  Subtract 2 seconds to account
-                    *  for possible clock skew between pg_ctl and the
-                    *  postmaster.
+                    * Easy cross-check that we are looking at the right data
+                    * directory: is the postmaster older than this execution
+                    * of pg_ctl?  Subtract 2 seconds to allow for possible
+                    * clock skew between pg_ctl and the postmaster.
                     */
-                   if (atol(optlines[1]) < start_time - 2)
+                   pmstart = atol(optlines[LOCK_FILE_LINE_START_TIME - 1]);
+                   if (pmstart < start_time - 2)
                    {
-                       write_stderr(_("%s: this data directory is running an older postmaster\n"),
+                       write_stderr(_("%s: this data directory is running a pre-existing postmaster\n"),
                                     progname);
                        return PQPING_NO_ATTEMPT;
                    }
-                       
-                   portnum = atoi(optlines[3]);
 
                    /*
-                    *  Determine the proper host string to use.
-                    */
-#ifdef HAVE_UNIX_SOCKETS
-                   /*
-                    *  Use socket directory, if specified.  We assume if we
-                    *  have unix sockets, the server does too because we
-                    *  just started the postmaster.
+                    * OK, extract port number and host string to use.
+                    * Prefer using Unix socket if available.
                     */
+                   portnum = atoi(optlines[LOCK_FILE_LINE_PORT - 1]);
+
+                   sockdir = optlines[LOCK_FILE_LINE_SOCKET_DIR - 1];
+                   hostaddr = optlines[LOCK_FILE_LINE_LISTEN_ADDR - 1];
+
                    /*
-                    *  While unix_socket_directory can accept relative
-                    *  directories, libpq's host must have a leading slash
-                    *  to indicate a socket directory.
+                    * While unix_socket_directory can accept relative
+                    * directories, libpq's host parameter must have a leading
+                    * slash to indicate a socket directory.  So, ignore
+                    * sockdir if it's relative, and try to use TCP instead.
                     */
-                   if (optlines[4][0] != '\n' && optlines[4][0] != '/')
+                   if (sockdir[0] == '/')
+                       strlcpy(host_str, sockdir, sizeof(host_str));
+                   else
+                       strlcpy(host_str, hostaddr, sizeof(host_str));
+
+                   /* remove trailing newline */
+                   if (strchr(host_str, '\n') != NULL)
+                       *strchr(host_str, '\n') = '\0';
+
+                   /* Fail if we couldn't get either sockdir or host addr */
+                   if (host_str[0] == '\0')
                    {
                        write_stderr(_("%s: -w option cannot use a relative socket directory specification\n"),
                                     progname);
                        return PQPING_NO_ATTEMPT;
                    }
-                   strlcpy(host_str, optlines[4], sizeof(host_str));
-#else
-                   strlcpy(host_str, optlines[5], sizeof(host_str));
-#endif
-                   /* remove newline */
-                   if (strchr(host_str, '\n') != NULL)
-                       *strchr(host_str, '\n') = '\0';
-               }
-               
-               /*
-                * We need to set connect_timeout otherwise on Windows the
-                * Service Control Manager (SCM) will probably timeout first.
-                */
-               snprintf(connstr, sizeof(connstr),
-                        "dbname=postgres port=%d connect_timeout=5", portnum);
 
-               if (host_str[0] != '\0')
-                   snprintf(connstr + strlen(connstr), sizeof(connstr) - strlen(connstr),
-                       " host='%s'", host_str);
+                   /* If postmaster is listening on "*", use "localhost" */
+                   if (strcmp(host_str, "*") == 0)
+                       strcpy(host_str, "localhost");
+
+                   /*
+                    * We need to set connect_timeout otherwise on Windows the
+                    * Service Control Manager (SCM) will probably timeout
+                    * first.
+                    */
+                   snprintf(connstr, sizeof(connstr),
+                            "dbname=postgres port=%d host='%s' connect_timeout=5",
+                            portnum, host_str);
+               }
            }
        }
 
@@ -544,7 +545,7 @@ test_postmaster_connection(bool do_checkpoint)
             * startup time is changing, otherwise it'll usually send a
             * stop signal after 20 seconds, despite incrementing the
             * checkpoint counter.
-                */
+            */
            status.dwWaitHint += 6000;
            status.dwCheckPoint++;
            SetServiceStatus(hStatus, (LPSERVICE_STATUS) &status);
index 382a5deced44b245a724c026e25ec62ed3465281..aa8cce5ca816d19cd0b8149f3abf4d6d5f09229f 100644 (file)
@@ -348,11 +348,35 @@ extern PGDLLIMPORT bool process_shared_preload_libraries_in_progress;
 extern char *shared_preload_libraries_string;
 extern char *local_preload_libraries_string;
 
-#define LOCK_FILE_LINES        7
+/*
+ * As of 9.1, the contents of the data-directory lock file are:
+ *
+ * line #
+ *     1   postmaster PID (or negative of a standalone backend's PID)
+ *     2   data directory path
+ *     3   postmaster start timestamp (time_t representation)
+ *     4   port number
+ *     5   socket directory path (empty on Windows)
+ *     6   first listen_address (IP address or "*"; empty if no TCP port)
+ *     7   shared memory key (not present on Windows)
+ *
+ * Lines 6 and up are added via AddToDataDirLockFile() after initial file
+ * creation; they have to be ordered according to time of addition.
+ *
+ * The socket lock file, if used, has the same contents as lines 1-5.
+ */
+#define LOCK_FILE_LINE_PID         1
+#define LOCK_FILE_LINE_DATA_DIR        2
+#define LOCK_FILE_LINE_START_TIME  3
+#define LOCK_FILE_LINE_PORT            4
+#define LOCK_FILE_LINE_SOCKET_DIR  5
+#define LOCK_FILE_LINE_LISTEN_ADDR 6
+#define LOCK_FILE_LINE_SHMEM_KEY   7
+
 extern void CreateDataDirLockFile(bool amPostmaster);
 extern void CreateSocketLockFile(const char *socketfile, bool amPostmaster);
 extern void TouchSocketLockFile(void);
-extern void AddToLockFile(int target_line, const char *str);
+extern void AddToDataDirLockFile(int target_line, const char *str);
 extern void ValidatePgVersion(const char *path);
 extern void process_shared_preload_libraries(void);
 extern void process_local_preload_libraries(void);