Extend format() to handle field width and left/right alignment.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 15 Mar 2013 02:56:56 +0000 (22:56 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 15 Mar 2013 02:56:56 +0000 (22:56 -0400)
This change adds some more standard sprintf() functionality to format().

Pavel Stehule, reviewed by Dean Rasheed and Kyotaro Horiguchi

doc/src/sgml/func.sgml
src/backend/utils/adt/varlena.c
src/test/regress/expected/text.out
src/test/regress/sql/text.sql

index 372e2b65751c45c581838f651d2ddac6c382d387..896c08c09cff7c50a1064a77e940e7b9731c23ad 100644 (file)
          <primary>format</primary>
         </indexterm>
         <literal><function>format</function>(<parameter>formatstr</parameter> <type>text</type>
-        [, <parameter>str</parameter> <type>"any"</type> [, ...] ])</literal>
+        [, <parameter>formatarg</parameter> <type>"any"</type> [, ...] ])</literal>
        </entry>
        <entry><type>text</type></entry>
        <entry>
          Format arguments according to a format string.
-         This function is similar to the C function
-         <function>sprintf</>, but only the following conversion specifications
-         are recognized: <literal>%s</literal> interpolates the corresponding
-         argument as a string; <literal>%I</literal> escapes its argument as
-         an SQL identifier; <literal>%L</literal> escapes its argument as an
-         SQL literal; <literal>%%</literal> outputs a literal <literal>%</>.
-         A conversion can reference an explicit parameter position by preceding
-         the conversion specifier with <literal><replaceable>n</>$</>, where
-         <replaceable>n</replaceable> is the argument position.
-         See also <xref linkend="plpgsql-quote-literal-example">.
+         This function is similar to the C function <function>sprintf</>.
+         See <xref linkend="functions-string-format">.
        </entry>
        <entry><literal>format('Hello %s, %1$s', 'World')</literal></entry>
        <entry><literal>Hello World, World</literal></entry>
     </tgroup>
    </table>
 
+   <sect2 id="functions-string-format">
+    <title><function>format</function></title>
+
+    <indexterm>
+     <primary>format</primary>
+    </indexterm>
+
+    <para>
+     The function <function>format</> produces output formatted according to
+     a format string, in a style similar to the C function
+     <function>sprintf</>.
+    </para>
+
+    <para>
+<synopsis>
+<function>format</>(<parameter>formatstr</> <type>text</> [, <parameter>formatarg</> <type>"any"</> [, ...] ])
+</synopsis>
+     <replaceable>formatstr</> is a format string that specifies how the
+     result should be formatted.  Text in the format string is copied
+     directly to the result, except where <firstterm>format specifiers</> are
+     used.  Format specifiers act as placeholders in the string, defining how
+     subsequent function arguments should be formatted and inserted into the
+     result.  Each <replaceable>formatarg</> argument is converted to text
+     according to the usual output rules for its data type, and then formatted
+     and inserted into the result string according to the format specifier(s).
+    </para>
+
+    <para>
+     Format specifiers are introduced by a <literal>%</> character and have
+     the form
+<synopsis>
+%[<replaceable>position</>][<replaceable>flags</>][<replaceable>width</>]<replaceable>type</>
+</synopsis>
+     where the component fields are:
+
+     <variablelist>
+      <varlistentry>
+       <term><replaceable>position</replaceable> (optional)</term>
+       <listitem>
+        <para>
+         A string of the form <literal><replaceable>n</>$</> where
+         <replaceable>n</> is the index of the argument to print.
+         Index 1 means the first argument after
+         <replaceable>formatstr</>.  If the <replaceable>position</> is
+         omitted, the default is to use the next argument in sequence.
+        </para>
+       </listitem>
+      </varlistentry>
+
+      <varlistentry>
+       <term><replaceable>flags</replaceable> (optional)</term>
+       <listitem>
+        <para>
+         Additional options controlling how the format specifier's output is
+         formatted.  Currently the only supported flag is a minus sign
+         (<literal>-</>) which will cause the format specifier's output to be
+         left-justified.  This has no effect unless the <replaceable>width</>
+         field is also specified.
+        </para>
+       </listitem>
+      </varlistentry>
+
+      <varlistentry>
+       <term><replaceable>width</replaceable> (optional)</term>
+       <listitem>
+        <para>
+         Specifies the <emphasis>minimum</> number of characters to use to
+         display the format specifier's output.  The output is padded on the
+         left or right (depending on the <literal>-</> flag) with spaces as
+         needed to fill the width.  A too-small width does not cause
+         truncation of the output, but is simply ignored.  The width may be
+         specified using any of the following: a positive integer; an
+         asterisk (<literal>*</>) to use the next function argument as the
+         width; or a string of the form <literal>*<replaceable>n</>$</> to
+         use the <replaceable>n</>th function argument as the width.
+        </para>
+
+        <para>
+         If the width comes from a function argument, that argument is
+         consumed before the argument that is used for the format specifier's
+         value.  If the width argument is negative, the result is left
+         aligned (as if the <literal>-</> flag had been specified) within a
+         field of length <function>abs</>(<replaceable>width</replaceable>).
+        </para>
+       </listitem>
+      </varlistentry>
+
+      <varlistentry>
+       <term><replaceable>type</replaceable> (required)</term>
+       <listitem>
+        <para>
+         The type of format conversion to use to produce the format
+         specifier's output.  The following types are supported:
+         <itemizedlist>
+          <listitem>
+           <para>
+            <literal>s</literal> formats the argument value as a simple
+            string.  A null value is treated as an empty string.
+           </para>
+          </listitem>
+          <listitem>
+           <para>
+            <literal>I</literal> treats the argument value as an SQL
+            identifier, double-quoting it if necessary.
+            It is an error for the value to be null.
+           </para>
+          </listitem>
+          <listitem>
+           <para>
+            <literal>L</literal> quotes the argument value as an SQL literal.
+            A null value is displayed as the string <literal>NULL</>, without
+            quotes.
+           </para>
+          </listitem>
+         </itemizedlist>
+        </para>
+       </listitem>
+      </varlistentry>
+     </variablelist>
+    </para>
+
+    <para>
+     In addition to the format specifiers described above, the special sequence
+     <literal>%%</> may be used to output a literal <literal>%</> character.
+    </para>
+
+    <para>
+     Here are some examples of the basic format conversions:
+
+<screen>
+SELECT format('Hello %s', 'World');
+<lineannotation>Result: </lineannotation><computeroutput>Hello World</computeroutput>
+
+SELECT format('Testing %s, %s, %s, %%', 'one', 'two', 'three');
+<lineannotation>Result: </><computeroutput>Testing one, two, three, %</>
+
+SELECT format('INSERT INTO %I VALUES(%L)', 'Foo bar', E'O\'Reilly');
+<lineannotation>Result: </lineannotation><computeroutput>INSERT INTO "Foo bar" VALUES('O''Reilly')</computeroutput>
+
+SELECT format('INSERT INTO %I VALUES(%L)', 'locations', E'C:\\Program Files');
+<lineannotation>Result: </lineannotation><computeroutput>INSERT INTO locations VALUES(E'C:\\Program Files')</computeroutput>
+</screen>
+    </para>
+
+    <para>
+     Here are examples using <replaceable>width</replaceable> fields
+     and the <literal>-</> flag:
+
+<screen>
+SELECT format('|%10s|', 'foo');
+<lineannotation>Result: </><computeroutput>|       foo|</>
+
+SELECT format('|%-10s|', 'foo');
+<lineannotation>Result: </><computeroutput>|foo       |</>
+
+SELECT format('|%*s|', 10, 'foo');
+<lineannotation>Result: </><computeroutput>|       foo|</>
+
+SELECT format('|%*s|', -10, 'foo');
+<lineannotation>Result: </><computeroutput>|foo       |</>
+
+SELECT format('|%-*s|', 10, 'foo');
+<lineannotation>Result: </><computeroutput>|foo       |</>
+
+SELECT format('|%-*s|', -10, 'foo');
+<lineannotation>Result: </><computeroutput>|foo       |</>
+</screen>
+    </para>
+
+    <para>
+     These examples show use of <replaceable>position</> fields:
+
+<screen>
+SELECT format('Testing %3$s, %2$s, %1$s', 'one', 'two', 'three');
+<lineannotation>Result: </><computeroutput>Testing three, two, one</>
+
+SELECT format('|%*2$s|', 'foo', 10, 'bar');
+<lineannotation>Result: </><computeroutput>|       bar|</>
+
+SELECT format('|%1$*2$s|', 'foo', 10, 'bar');
+<lineannotation>Result: </><computeroutput>|       foo|</>
+</screen>
+    </para>
+
+    <para>
+     Unlike the standard C function <function>sprintf</>,
+     <productname>PostgreSQL</>'s <function>format</> function allows format
+     specifiers with and without <replaceable>position</> fields to be mixed
+     in the same format string.  A format specifier without a
+     <replaceable>position</> field always uses the next argument after the
+     last argument consumed.
+     In addition, the <function>format</> function does not require all
+     function arguments to be used in the format string.
+     For example:
+
+<screen>
+SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
+<lineannotation>Result: </><computeroutput>Testing three, two, three</>
+</screen>
+    </para>
+
+    <para>
+     The <literal>%I</> and <literal>%L</> format specifiers are particularly
+     useful for safely constructing dynamic SQL statements.  See
+     <xref linkend="plpgsql-quote-literal-example">.
+    </para>
+   </sect2>
+
   </sect1>
 
 
index e69b7dd3e6b6d5f966fda7a9ccffbd898af519e5..f41abe3b2e71558b4703cbadccbf70a24772b406 100644 (file)
@@ -56,32 +56,41 @@ typedef struct
 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
 #define PG_RETURN_UNKNOWN_P(x)         PG_RETURN_POINTER(x)
 
-static int     text_cmp(text *arg1, text *arg2, Oid collid);
 static int32 text_length(Datum str);
-static int     text_position(text *t1, text *t2);
-static void text_position_setup(text *t1, text *t2, TextPositionState *state);
-static int     text_position_next(int start_pos, TextPositionState *state);
-static void text_position_cleanup(TextPositionState *state);
 static text *text_catenate(text *t1, text *t2);
 static text *text_substring(Datum str,
                           int32 start,
                           int32 length,
                           bool length_not_specified);
 static text *text_overlay(text *t1, text *t2, int sp, int sl);
-static void appendStringInfoText(StringInfo str, const text *t);
+static int     text_position(text *t1, text *t2);
+static void text_position_setup(text *t1, text *t2, TextPositionState *state);
+static int     text_position_next(int start_pos, TextPositionState *state);
+static void text_position_cleanup(TextPositionState *state);
+static int     text_cmp(text *arg1, text *arg2, Oid collid);
 static bytea *bytea_catenate(bytea *t1, bytea *t2);
 static bytea *bytea_substring(Datum str,
                                int S,
                                int L,
                                bool length_not_specified);
 static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
-static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
-static void text_format_string_conversion(StringInfo buf, char conversion,
-                                                         FmgrInfo *typOutputInfo,
-                                                         Datum value, bool isNull);
+static void appendStringInfoText(StringInfo str, const text *t);
 static Datum text_to_array_internal(PG_FUNCTION_ARGS);
 static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
                                           const char *fldsep, const char *null_string);
+static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
+static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
+                                                int *value);
+static const char *text_format_parse_format(const char *start_ptr,
+                                                const char *end_ptr,
+                                                int *argpos, int *widthpos,
+                                                int *flags, int *width);
+static void text_format_string_conversion(StringInfo buf, char conversion,
+                                                         FmgrInfo *typOutputInfo,
+                                                         Datum value, bool isNull,
+                                                         int flags, int width);
+static void text_format_append_string(StringInfo buf, const char *str,
+                                                 int flags, int width);
 
 
 /*****************************************************************************
@@ -3996,8 +4005,22 @@ text_reverse(PG_FUNCTION_ARGS)
        PG_RETURN_TEXT_P(result);
 }
 
+
 /*
- * Returns a formated string
+ * Support macros for text_format()
+ */
+#define TEXT_FORMAT_FLAG_MINUS 0x0001  /* is minus flag present? */
+
+#define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
+       do { \
+               if (++(ptr) >= (end_ptr)) \
+                       ereport(ERROR, \
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
+                                        errmsg("unterminated format specifier"))); \
+       } while (0)
+
+/*
+ * Returns a formatted string
  */
 Datum
 text_format(PG_FUNCTION_ARGS)
@@ -4008,16 +4031,18 @@ text_format(PG_FUNCTION_ARGS)
        const char *start_ptr;
        const char *end_ptr;
        text       *result;
-       int                     arg = 0;
+       int                     arg;
        bool            funcvariadic;
        int                     nargs;
        Datum      *elements = NULL;
        bool       *nulls = NULL;
        Oid                     element_type = InvalidOid;
        Oid                     prev_type = InvalidOid;
+       Oid                     prev_width_type = InvalidOid;
        FmgrInfo        typoutputfinfo;
+       FmgrInfo        typoutputinfo_width;
 
-       /* When format string is null, returns null */
+       /* When format string is null, immediately return null */
        if (PG_ARGISNULL(0))
                PG_RETURN_NULL();
 
@@ -4081,10 +4106,15 @@ text_format(PG_FUNCTION_ARGS)
        start_ptr = VARDATA_ANY(fmt);
        end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
        initStringInfo(&str);
+       arg = 1;                                        /* next argument position to print */
 
        /* Scan format string, looking for conversion specifiers. */
        for (cp = start_ptr; cp < end_ptr; cp++)
        {
+               int                     argpos;
+               int                     widthpos;
+               int                     flags;
+               int                     width;
                Datum           value;
                bool            isNull;
                Oid                     typid;
@@ -4099,11 +4129,7 @@ text_format(PG_FUNCTION_ARGS)
                        continue;
                }
 
-               /* Did we run off the end of the string? */
-               if (++cp >= end_ptr)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("unterminated conversion specifier")));
+               ADVANCE_PARSE_POINTER(cp, end_ptr);
 
                /* Easy case: %% outputs a single % */
                if (*cp == '%')
@@ -4112,69 +4138,89 @@ text_format(PG_FUNCTION_ARGS)
                        continue;
                }
 
+               /* Parse the optional portions of the format specifier */
+               cp = text_format_parse_format(cp, end_ptr,
+                                                                         &argpos, &widthpos,
+                                                                         &flags, &width);
+
                /*
-                * If the user hasn't specified an argument position, we just advance
-                * to the next one.  If they have, we must parse it.
+                * Next we should see the main conversion specifier.  Whether or not
+                * an argument position was present, it's known that at least one
+                * character remains in the string at this point.  Experience suggests
+                * that it's worth checking that that character is one of the expected
+                * ones before we try to fetch arguments, so as to produce the least
+                * confusing response to a mis-formatted specifier.
                 */
-               if (*cp < '0' || *cp > '9')
+               if (strchr("sIL", *cp) == NULL)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("unrecognized conversion type specifier \"%c\"",
+                                                       *cp)));
+
+               /* If indirect width was specified, get its value */
+               if (widthpos >= 0)
                {
-                       ++arg;
-                       if (arg <= 0)           /* overflow? */
-                       {
-                               /*
-                                * Should not happen, as you can't pass billions of arguments
-                                * to a function, but better safe than sorry.
-                                */
+                       /* Collect the specified or next argument position */
+                       if (widthpos > 0)
+                               arg = widthpos;
+                       if (arg >= nargs)
                                ereport(ERROR,
-                                               (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-                                                errmsg("argument number is out of range")));
-                       }
-               }
-               else
-               {
-                       bool            unterminated = false;
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("too few arguments for format")));
 
-                       /* Parse digit string. */
-                       arg = 0;
-                       do
+                       /* Get the value and type of the selected argument */
+                       if (!funcvariadic)
                        {
-                               int                     newarg = arg * 10 + (*cp - '0');
+                               value = PG_GETARG_DATUM(arg);
+                               isNull = PG_ARGISNULL(arg);
+                               typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
+                       }
+                       else
+                       {
+                               value = elements[arg - 1];
+                               isNull = nulls[arg - 1];
+                               typid = element_type;
+                       }
+                       if (!OidIsValid(typid))
+                               elog(ERROR, "could not determine data type of format() input");
 
-                               if (newarg / 10 != arg) /* overflow? */
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-                                                        errmsg("argument number is out of range")));
-                               arg = newarg;
-                               ++cp;
-                       } while (cp < end_ptr && *cp >= '0' && *cp <= '9');
+                       arg++;
 
-                       /*
-                        * If we ran off the end, or if there's not a $ next, or if the $
-                        * is the last character, the conversion specifier is improperly
-                        * terminated.
-                        */
-                       if (cp == end_ptr || *cp != '$')
-                               unterminated = true;
+                       /* We can treat NULL width the same as zero */
+                       if (isNull)
+                               width = 0;
+                       else if (typid == INT4OID)
+                               width = DatumGetInt32(value);
+                       else if (typid == INT2OID)
+                               width = DatumGetInt16(value);
                        else
                        {
-                               ++cp;
-                               if (cp == end_ptr)
-                                       unterminated = true;
-                       }
-                       if (unterminated)
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                                errmsg("unterminated conversion specifier")));
+                               /* For less-usual datatypes, convert to text then to int */
+                               char       *str;
 
-                       /* There's no argument 0. */
-                       if (arg == 0)
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                                errmsg("conversion specifies argument 0, but arguments are numbered from 1")));
+                               if (typid != prev_width_type)
+                               {
+                                       Oid                     typoutputfunc;
+                                       bool            typIsVarlena;
+
+                                       getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
+                                       fmgr_info(typoutputfunc, &typoutputinfo_width);
+                                       prev_width_type = typid;
+                               }
+
+                               str = OutputFunctionCall(&typoutputinfo_width, value);
+
+                               /* pg_atoi will complain about bad data or overflow */
+                               width = pg_atoi(str, sizeof(int), '\0');
+
+                               pfree(str);
+                       }
                }
 
-               /* Not enough arguments?  Deduct 1 to avoid counting format string. */
-               if (arg > nargs - 1)
+               /* Collect the specified or next argument position */
+               if (argpos > 0)
+                       arg = argpos;
+               if (arg >= nargs)
                        ereport(ERROR,
                                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                                         errmsg("too few arguments for format")));
@@ -4195,6 +4241,8 @@ text_format(PG_FUNCTION_ARGS)
                if (!OidIsValid(typid))
                        elog(ERROR, "could not determine data type of format() input");
 
+               arg++;
+
                /*
                 * Get the appropriate typOutput function, reusing previous one if
                 * same type as previous argument.  That's particularly useful in the
@@ -4211,9 +4259,7 @@ text_format(PG_FUNCTION_ARGS)
                }
 
                /*
-                * At this point, we should see the main conversion specifier. Whether
-                * or not an argument position was present, it's known that at least
-                * one character remains in the string at this point.
+                * And now we can format the value.
                 */
                switch (*cp)
                {
@@ -4221,13 +4267,16 @@ text_format(PG_FUNCTION_ARGS)
                        case 'I':
                        case 'L':
                                text_format_string_conversion(&str, *cp, &typoutputfinfo,
-                                                                                         value, isNull);
+                                                                                         value, isNull,
+                                                                                         flags, width);
                                break;
                        default:
+                               /* should not get here, because of previous check */
                                ereport(ERROR,
                                                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                                errmsg("unrecognized conversion specifier \"%c\"",
+                                                errmsg("unrecognized conversion type specifier \"%c\"",
                                                                *cp)));
+                               break;
                }
        }
 
@@ -4244,19 +4293,157 @@ text_format(PG_FUNCTION_ARGS)
        PG_RETURN_TEXT_P(result);
 }
 
-/* Format a %s, %I, or %L conversion. */
+/*
+ * Parse contiguous digits as a decimal number.
+ *
+ * Returns true if some digits could be parsed.
+ * The value is returned into *value, and *ptr is advanced to the next
+ * character to be parsed.
+ *
+ * Note parsing invariant: at least one character is known available before
+ * string end (end_ptr) at entry, and this is still true at exit.
+ */
+static bool
+text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
+{
+       bool            found = false;
+       const char *cp = *ptr;
+       int                     val = 0;
+
+       while (*cp >= '0' && *cp <= '9')
+       {
+               int                     newval = val * 10 + (*cp - '0');
+
+               if (newval / 10 != val) /* overflow? */
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                                        errmsg("number is out of range")));
+               val = newval;
+               ADVANCE_PARSE_POINTER(cp, end_ptr);
+               found = true;
+       }
+
+       *ptr = cp;
+       *value = val;
+
+       return found;
+}
+
+/*
+ * Parse a format specifier (generally following the SUS printf spec).
+ *
+ * We have already advanced over the initial '%', and we are looking for
+ * [argpos][flags][width]type (but the type character is not consumed here).
+ *
+ * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
+ * Output parameters:
+ *     argpos: argument position for value to be printed.      -1 means unspecified.
+ *     widthpos: argument position for width.  Zero means the argument position
+ *                     was unspecified (ie, take the next arg) and -1 means no width
+ *                     argument (width was omitted or specified as a constant).
+ *     flags: bitmask of flags.
+ *     width: directly-specified width value.  Zero means the width was omitted
+ *                     (note it's not necessary to distinguish this case from an explicit
+ *                     zero width value).
+ *
+ * The function result is the next character position to be parsed, ie, the
+ * location where the type character is/should be.
+ *
+ * Note parsing invariant: at least one character is known available before
+ * string end (end_ptr) at entry, and this is still true at exit.
+ */
+static const char *
+text_format_parse_format(const char *start_ptr, const char *end_ptr,
+                                                int *argpos, int *widthpos,
+                                                int *flags, int *width)
+{
+       const char *cp = start_ptr;
+       int                     n;
+
+       /* set defaults for output parameters */
+       *argpos = -1;
+       *widthpos = -1;
+       *flags = 0;
+       *width = 0;
+
+       /* try to identify first number */
+       if (text_format_parse_digits(&cp, end_ptr, &n))
+       {
+               if (*cp != '$')
+               {
+                       /* Must be just a width and a type, so we're done */
+                       *width = n;
+                       return cp;
+               }
+               /* The number was argument position */
+               *argpos = n;
+               /* Explicit 0 for argument index is immediately refused */
+               if (n == 0)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("format specifies argument 0, but arguments are numbered from 1")));
+               ADVANCE_PARSE_POINTER(cp, end_ptr);
+       }
+
+       /* Handle flags (only minus is supported now) */
+       while (*cp == '-')
+       {
+               *flags |= TEXT_FORMAT_FLAG_MINUS;
+               ADVANCE_PARSE_POINTER(cp, end_ptr);
+       }
+
+       if (*cp == '*')
+       {
+               /* Handle indirect width */
+               ADVANCE_PARSE_POINTER(cp, end_ptr);
+               if (text_format_parse_digits(&cp, end_ptr, &n))
+               {
+                       /* number in this position must be closed by $ */
+                       if (*cp != '$')
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                 errmsg("width argument position must be ended by \"$\"")));
+                       /* The number was width argument position */
+                       *widthpos = n;
+                       /* Explicit 0 for argument index is immediately refused */
+                       if (n == 0)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("format specifies argument 0, but arguments are numbered from 1")));
+                       ADVANCE_PARSE_POINTER(cp, end_ptr);
+               }
+               else
+                       *widthpos = 0;          /* width's argument position is unspecified */
+       }
+       else
+       {
+               /* Check for direct width specification */
+               if (text_format_parse_digits(&cp, end_ptr, &n))
+                       *width = n;
+       }
+
+       /* cp should now be pointing at type character */
+       return cp;
+}
+
+/*
+ * Format a %s, %I, or %L conversion
+ */
 static void
 text_format_string_conversion(StringInfo buf, char conversion,
                                                          FmgrInfo *typOutputInfo,
-                                                         Datum value, bool isNull)
+                                                         Datum value, bool isNull,
+                                                         int flags, int width)
 {
        char       *str;
 
        /* Handle NULL arguments before trying to stringify the value. */
        if (isNull)
        {
-               if (conversion == 'L')
-                       appendStringInfoString(buf, "NULL");
+               if (conversion == 's')
+                       text_format_append_string(buf, "", flags, width);
+               else if (conversion == 'L')
+                       text_format_append_string(buf, "NULL", flags, width);
                else if (conversion == 'I')
                        ereport(ERROR,
                                        (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
@@ -4271,23 +4458,71 @@ text_format_string_conversion(StringInfo buf, char conversion,
        if (conversion == 'I')
        {
                /* quote_identifier may or may not allocate a new string. */
-               appendStringInfoString(buf, quote_identifier(str));
+               text_format_append_string(buf, quote_identifier(str), flags, width);
        }
        else if (conversion == 'L')
        {
                char       *qstr = quote_literal_cstr(str);
 
-               appendStringInfoString(buf, qstr);
+               text_format_append_string(buf, qstr, flags, width);
                /* quote_literal_cstr() always allocates a new string */
                pfree(qstr);
        }
        else
-               appendStringInfoString(buf, str);
+               text_format_append_string(buf, str, flags, width);
 
        /* Cleanup. */
        pfree(str);
 }
 
+/*
+ * Append str to buf, padding as directed by flags/width
+ */
+static void
+text_format_append_string(StringInfo buf, const char *str,
+                                                 int flags, int width)
+{
+       bool            align_to_left = false;
+       int                     len;
+
+       /* fast path for typical easy case */
+       if (width == 0)
+       {
+               appendStringInfoString(buf, str);
+               return;
+       }
+
+       if (width < 0)
+       {
+               /* Negative width: implicit '-' flag, then take absolute value */
+               align_to_left = true;
+               /* -INT_MIN is undefined */
+               if (width <= INT_MIN)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                                        errmsg("number is out of range")));
+               width = -width;
+       }
+       else if (flags & TEXT_FORMAT_FLAG_MINUS)
+               align_to_left = true;
+
+       len = pg_mbstrlen(str);
+       if (align_to_left)
+       {
+               /* left justify */
+               appendStringInfoString(buf, str);
+               if (len < width)
+                       appendStringInfoSpaces(buf, width - len);
+       }
+       else
+       {
+               /* right justify */
+               if (len < width)
+                       appendStringInfoSpaces(buf, width - len);
+               appendStringInfoString(buf, str);
+       }
+}
+
 /*
  * text_format_nv - nonvariadic wrapper for text_format function.
  *
index b7565830d6f165035625b9c856c6a15118d2f731..4b1c62bf53c9f92c0e01ed111fa2710e8d7658a7 100644 (file)
@@ -209,7 +209,7 @@ ERROR:  too few arguments for format
 select format('Hello %s');
 ERROR:  too few arguments for format
 select format('Hello %x', 20);
-ERROR:  unrecognized conversion specifier "x"
+ERROR:  unrecognized conversion type specifier "x"
 -- check literal and sql identifiers
 select format('INSERT INTO %I VALUES(%L,%L)', 'mytab', 10, 'Hello');
                  format                 
@@ -256,12 +256,14 @@ select format('%1$s %4$s', 1, 2, 3);
 ERROR:  too few arguments for format
 select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
 ERROR:  too few arguments for format
-select format('%1s', 1);
-ERROR:  unterminated conversion specifier
+select format('%0$s', 'Hello');
+ERROR:  format specifies argument 0, but arguments are numbered from 1
+select format('%*0$s', 'Hello');
+ERROR:  format specifies argument 0, but arguments are numbered from 1
 select format('%1$', 1);
-ERROR:  unterminated conversion specifier
+ERROR:  unterminated format specifier
 select format('%1$1', 1);
-ERROR:  unrecognized conversion specifier "1"
+ERROR:  unterminated format specifier
 -- check mix of positional and ordered placeholders
 select format('Hello %s %1$s %s', 'World', 'Hello again');
             format             
@@ -328,3 +330,106 @@ from generate_series(1,200) g(i);
  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200
 (1 row)
 
+-- check field widths and left, right alignment
+select format('>>%10s<<', 'Hello');
+     format     
+----------------
+ >>     Hello<<
+(1 row)
+
+select format('>>%10s<<', NULL);
+     format     
+----------------
+ >>          <<
+(1 row)
+
+select format('>>%10s<<', '');
+     format     
+----------------
+ >>          <<
+(1 row)
+
+select format('>>%-10s<<', '');
+     format     
+----------------
+ >>          <<
+(1 row)
+
+select format('>>%-10s<<', 'Hello');
+     format     
+----------------
+ >>Hello     <<
+(1 row)
+
+select format('>>%-10s<<', NULL);
+     format     
+----------------
+ >>          <<
+(1 row)
+
+select format('>>%1$10s<<', 'Hello');
+     format     
+----------------
+ >>     Hello<<
+(1 row)
+
+select format('>>%1$-10I<<', 'Hello');
+     format     
+----------------
+ >>"Hello"   <<
+(1 row)
+
+select format('>>%2$*1$L<<', 10, 'Hello');
+     format     
+----------------
+ >>   'Hello'<<
+(1 row)
+
+select format('>>%2$*1$L<<', 10, NULL);
+     format     
+----------------
+ >>      NULL<<
+(1 row)
+
+select format('>>%2$*1$L<<', -10, NULL);
+     format     
+----------------
+ >>NULL      <<
+(1 row)
+
+select format('>>%*s<<', 10, 'Hello');
+     format     
+----------------
+ >>     Hello<<
+(1 row)
+
+select format('>>%*1$s<<', 10, 'Hello');
+     format     
+----------------
+ >>     Hello<<
+(1 row)
+
+select format('>>%-s<<', 'Hello');
+  format   
+-----------
+ >>Hello<<
+(1 row)
+
+select format('>>%10L<<', NULL);
+     format     
+----------------
+ >>      NULL<<
+(1 row)
+
+select format('>>%2$*1$L<<', NULL, 'Hello');
+   format    
+-------------
+ >>'Hello'<<
+(1 row)
+
+select format('>>%2$*1$L<<', 0, 'Hello');
+   format    
+-------------
+ >>'Hello'<<
+(1 row)
+
index a96e9f7d1e7e4e3817e67f1223f74ed6a3d7142c..c4ed74b39d4424627f6456feeef03a5193ba8999 100644 (file)
@@ -78,7 +78,8 @@ select format('%1$s %12$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
 -- should fail
 select format('%1$s %4$s', 1, 2, 3);
 select format('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
-select format('%1s', 1);
+select format('%0$s', 'Hello');
+select format('%*0$s', 'Hello');
 select format('%1$', 1);
 select format('%1$1', 1);
 -- check mix of positional and ordered placeholders
@@ -97,3 +98,21 @@ select format('Hello', variadic NULL);
 -- variadic argument allows simulating more than FUNC_MAX_ARGS parameters
 select format(string_agg('%s',','), variadic array_agg(i))
 from generate_series(1,200) g(i);
+-- check field widths and left, right alignment
+select format('>>%10s<<', 'Hello');
+select format('>>%10s<<', NULL);
+select format('>>%10s<<', '');
+select format('>>%-10s<<', '');
+select format('>>%-10s<<', 'Hello');
+select format('>>%-10s<<', NULL);
+select format('>>%1$10s<<', 'Hello');
+select format('>>%1$-10I<<', 'Hello');
+select format('>>%2$*1$L<<', 10, 'Hello');
+select format('>>%2$*1$L<<', 10, NULL);
+select format('>>%2$*1$L<<', -10, NULL);
+select format('>>%*s<<', 10, 'Hello');
+select format('>>%*1$s<<', 10, 'Hello');
+select format('>>%-s<<', 'Hello');
+select format('>>%10L<<', NULL);
+select format('>>%2$*1$L<<', NULL, 'Hello');
+select format('>>%2$*1$L<<', 0, 'Hello');