diff options
author | Pavan Deolasee | 2017-06-14 05:42:18 +0000 |
---|---|---|
committer | Pavan Deolasee | 2017-06-14 05:42:18 +0000 |
commit | 15dd5274c323fb93e4e3ea9ad2185aaaec10f79c (patch) | |
tree | 9dafb4c7f735d9429ea461dc792933af87493c33 /src/common | |
parent | dfbb88e3bbb526dcb204b456b9e5cfd9d10d0d0a (diff) | |
parent | d5cb3bab564e0927ffac7c8729eacf181a12dd40 (diff) |
Merge from PG master upto d5cb3bab564e0927ffac7c8729eacf181a12dd40
This is the result of the "git merge remotes/PGSQL/master" upto the said commit
point. We have done some basic analysis, fixed compilation problems etc, but
bulk of the logical problems in conflict resolution etc will be handled by
subsequent commits.
Diffstat (limited to 'src/common')
30 files changed, 4836 insertions, 38 deletions
diff --git a/src/common/Makefile b/src/common/Makefile index 84735f36f3..b9c9a05fa1 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -6,11 +6,15 @@ # This makefile generates two outputs: # # libpgcommon.a - contains object files with FRONTEND defined, -# for use by client application and libraries +# for use by client applications # # libpgcommon_srv.a - contains object files without FRONTEND defined, # for use only by the backend binaries # +# You can also symlink/copy individual source files from this directory, +# to compile with different options. (libpq does that, because it needs +# to use -fPIC on some platforms.) +# # IDENTIFICATION # src/common/Makefile # @@ -36,11 +40,18 @@ override CPPFLAGS += -DVAL_LDFLAGS_EX="\"$(LDFLAGS_EX)\"" override CPPFLAGS += -DVAL_LDFLAGS_SL="\"$(LDFLAGS_SL)\"" override CPPFLAGS += -DVAL_LIBS="\"$(LIBS)\"" -OBJS_COMMON = config_info.o controldata_utils.o exec.o keywords.o \ - pg_lzcompress.o pgfnames.o psprintf.o relpath.o rmtree.o \ - string.o username.o wait_error.o +OBJS_COMMON = base64.o config_info.o controldata_utils.o exec.o ip.o \ + keywords.o md5.o pg_lzcompress.o pgfnames.o psprintf.o relpath.o \ + rmtree.o saslprep.o scram-common.o string.o unicode_norm.o \ + username.o wait_error.o + +ifeq ($(with_openssl),yes) +OBJS_COMMON += sha2_openssl.o +else +OBJS_COMMON += sha2.o +endif -OBJS_FRONTEND = $(OBJS_COMMON) fe_memutils.o restricted_token.o +OBJS_FRONTEND = $(OBJS_COMMON) fe_memutils.o file_utils.o restricted_token.o OBJS_SRV = $(OBJS_COMMON:%.o=%_srv.o) diff --git a/src/common/base64.c b/src/common/base64.c new file mode 100644 index 0000000000..e8e28ecca4 --- /dev/null +++ b/src/common/base64.c @@ -0,0 +1,199 @@ +/*------------------------------------------------------------------------- + * + * base64.c + * Encoding and decoding routines for base64 without whitespace. + * + * Copyright (c) 2001-2017, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/common/base64.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/base64.h" + +/* + * BASE64 + */ + +static const char _base64[] = +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +static const int8 b64lookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, +}; + +/* + * pg_b64_encode + * + * Encode into base64 the given string. Returns the length of the encoded + * string. + */ +int +pg_b64_encode(const char *src, int len, char *dst) +{ + char *p; + const char *s, + *end = src + len; + int pos = 2; + uint32 buf = 0; + + s = src; + p = dst; + + while (s < end) + { + buf |= (unsigned char) *s << (pos << 3); + pos--; + s++; + + /* write it out */ + if (pos < 0) + { + *p++ = _base64[(buf >> 18) & 0x3f]; + *p++ = _base64[(buf >> 12) & 0x3f]; + *p++ = _base64[(buf >> 6) & 0x3f]; + *p++ = _base64[buf & 0x3f]; + + pos = 2; + buf = 0; + } + } + if (pos != 2) + { + *p++ = _base64[(buf >> 18) & 0x3f]; + *p++ = _base64[(buf >> 12) & 0x3f]; + *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '='; + *p++ = '='; + } + + return p - dst; +} + +/* + * pg_b64_decode + * + * Decode the given base64 string. Returns the length of the decoded + * string on success, and -1 in the event of an error. + */ +int +pg_b64_decode(const char *src, int len, char *dst) +{ + const char *srcend = src + len, + *s = src; + char *p = dst; + char c; + int b = 0; + uint32 buf = 0; + int pos = 0, + end = 0; + + while (s < srcend) + { + c = *s++; + + /* Leave if a whitespace is found */ + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') + return -1; + + if (c == '=') + { + /* end sequence */ + if (!end) + { + if (pos == 2) + end = 1; + else if (pos == 3) + end = 2; + else + { + /* + * Unexpected "=" character found while decoding base64 + * sequence. + */ + return -1; + } + } + b = 0; + } + else + { + b = -1; + if (c > 0 && c < 127) + b = b64lookup[(unsigned char) c]; + if (b < 0) + { + /* invalid symbol found */ + return -1; + } + } + /* add it to buffer */ + buf = (buf << 6) + b; + pos++; + if (pos == 4) + { + *p++ = (buf >> 16) & 255; + if (end == 0 || end > 1) + *p++ = (buf >> 8) & 255; + if (end == 0 || end > 2) + *p++ = buf & 255; + buf = 0; + pos = 0; + } + } + + if (pos != 0) + { + /* + * base64 end sequence is invalid. Input data is missing padding, is + * truncated or is otherwise corrupted. + */ + return -1; + } + + return p - dst; +} + +/* + * pg_b64_enc_len + * + * Returns to caller the length of the string if it were encoded with + * base64 based on the length provided by caller. This is useful to + * estimate how large a buffer allocation needs to be done before doing + * the actual encoding. + */ +int +pg_b64_enc_len(int srclen) +{ + /* 3 bytes will be converted to 4 */ + return (srclen + 2) * 4 / 3; +} + +/* + * pg_b64_dec_len + * + * Returns to caller the length of the string if it were to be decoded + * with base64, based on the length given by caller. This is useful to + * estimate how large a buffer allocation needs to be done before doing + * the actual decoding. + */ +int +pg_b64_dec_len(int srclen) +{ + return (srclen * 3) >> 2; +} diff --git a/src/common/config_info.c b/src/common/config_info.c index 0fab3ab527..ad506be9ca 100644 --- a/src/common/config_info.c +++ b/src/common/config_info.c @@ -4,7 +4,7 @@ * Common code for pg_config output * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c index 5592fe7039..f1a097a974 100644 --- a/src/common/controldata_utils.c +++ b/src/common/controldata_utils.c @@ -4,7 +4,7 @@ * Common code for control data file output. * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -29,19 +29,24 @@ #include "port/pg_crc32c.h" /* - * get_controlfile(char *DataDir, const char *progname) + * get_controlfile(char *DataDir, const char *progname, bool *crc_ok_p) * - * Get controlfile values. The caller is responsible - * for pfreeing the result. + * Get controlfile values. The result is returned as a palloc'd copy of the + * control file data. + * + * crc_ok_p can be used by the caller to see whether the CRC of the control + * file data is correct. */ ControlFileData * -get_controlfile(char *DataDir, const char *progname) +get_controlfile(const char *DataDir, const char *progname, bool *crc_ok_p) { ControlFileData *ControlFile; int fd; char ControlFilePath[MAXPGPATH]; pg_crc32c crc; + AssertArg(crc_ok_p); + ControlFile = palloc(sizeof(ControlFileData)); snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir); @@ -81,14 +86,7 @@ get_controlfile(char *DataDir, const char *progname) offsetof(ControlFileData, crc)); FIN_CRC32C(crc); - if (!EQ_CRC32C(crc, ControlFile->crc)) -#ifndef FRONTEND - elog(ERROR, _("calculated CRC checksum does not match value stored in file")); -#else - printf(_("WARNING: Calculated CRC checksum does not match value stored in file.\n" - "Either the file is corrupt, or it has a different layout than this program\n" - "is expecting. The results below are untrustworthy.\n\n")); -#endif + *crc_ok_p = EQ_CRC32C(crc, ControlFile->crc); /* Make sure the control file is valid byte order. */ if (ControlFile->pg_control_version % 65536 == 0 && diff --git a/src/common/exec.c b/src/common/exec.c index d736b02280..bd01c2d9a2 100644 --- a/src/common/exec.c +++ b/src/common/exec.c @@ -4,7 +4,7 @@ * Functions for finding and validating executable files * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -35,7 +35,7 @@ #define log_error4(str, param, arg1) (fprintf(stderr, str, param, arg1), fputc('\n', stderr)) #endif -#ifdef WIN32_ONLY_COMPILER +#ifdef _MSC_VER #define getcwd(cwd,len) GetCurrentDirectory(len, cwd) #endif @@ -553,6 +553,7 @@ set_pglocale_pgservice(const char *argv0, const char *app) char my_exec_path[MAXPGPATH]; char env_path[MAXPGPATH + sizeof("PGSYSCONFDIR=")]; /* longer than * PGLOCALEDIR */ + char *dup_path; /* don't set LC_ALL in the backend */ if (strcmp(app, PG_TEXTDOMAIN("postgres")) != 0) @@ -583,7 +584,9 @@ set_pglocale_pgservice(const char *argv0, const char *app) /* set for libpq to use */ snprintf(env_path, sizeof(env_path), "PGLOCALEDIR=%s", path); canonicalize_path(env_path + 12); - putenv(strdup(env_path)); + dup_path = strdup(env_path); + if (dup_path) + putenv(dup_path); } #endif @@ -594,7 +597,9 @@ set_pglocale_pgservice(const char *argv0, const char *app) /* set for libpq to use */ snprintf(env_path, sizeof(env_path), "PGSYSCONFDIR=%s", path); canonicalize_path(env_path + 13); - putenv(strdup(env_path)); + dup_path = strdup(env_path); + if (dup_path) + putenv(dup_path); } } diff --git a/src/common/fe_memutils.c b/src/common/fe_memutils.c index 58c5c4c352..fb38067d97 100644 --- a/src/common/fe_memutils.c +++ b/src/common/fe_memutils.c @@ -3,7 +3,7 @@ * fe_memutils.c * memory management support for frontend code * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/common/file_utils.c b/src/common/file_utils.c new file mode 100644 index 0000000000..7f24325b8a --- /dev/null +++ b/src/common/file_utils.c @@ -0,0 +1,403 @@ +/*------------------------------------------------------------------------- + * + * File-processing utility routines. + * + * Assorted utility functions to work on files. + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/common/file_utils.c + * + *------------------------------------------------------------------------- + */ +#include "postgres_fe.h" + +#include <dirent.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "common/file_utils.h" + + +/* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */ +#if defined(HAVE_SYNC_FILE_RANGE) +#define PG_FLUSH_DATA_WORKS 1 +#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) +#define PG_FLUSH_DATA_WORKS 1 +#endif + +/* + * pg_xlog has been renamed to pg_wal in version 10. + */ +#define MINIMUM_VERSION_FOR_PG_WAL 100000 + +#ifdef PG_FLUSH_DATA_WORKS +static int pre_sync_fname(const char *fname, bool isdir, + const char *progname); +#endif +static void walkdir(const char *path, + int (*action) (const char *fname, bool isdir, const char *progname), + bool process_symlinks, const char *progname); + +/* + * Issue fsync recursively on PGDATA and all its contents. + * + * We fsync regular files and directories wherever they are, but we follow + * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc. + * Other symlinks are presumed to point at files we're not responsible for + * fsyncing, and might not have privileges to write at all. + * + * serverVersion indicates the version of the server to be fsync'd. + * + * Errors are reported but not considered fatal. + */ +void +fsync_pgdata(const char *pg_data, + const char *progname, + int serverVersion) +{ + bool xlog_is_symlink; + char pg_wal[MAXPGPATH]; + char pg_tblspc[MAXPGPATH]; + + /* handle renaming of pg_xlog to pg_wal in post-10 clusters */ + snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data, + serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal"); + snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data); + + /* + * If pg_wal is a symlink, we'll need to recurse into it separately, + * because the first walkdir below will ignore it. + */ + xlog_is_symlink = false; + +#ifndef WIN32 + { + struct stat st; + + if (lstat(pg_wal, &st) < 0) + fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), + progname, pg_wal, strerror(errno)); + else if (S_ISLNK(st.st_mode)) + xlog_is_symlink = true; + } +#else + if (pgwin32_is_junction(pg_wal)) + xlog_is_symlink = true; +#endif + + /* + * If possible, hint to the kernel that we're soon going to fsync the data + * directory and its contents. + */ +#ifdef PG_FLUSH_DATA_WORKS + walkdir(pg_data, pre_sync_fname, false, progname); + if (xlog_is_symlink) + walkdir(pg_wal, pre_sync_fname, false, progname); + walkdir(pg_tblspc, pre_sync_fname, true, progname); +#endif + + /* + * Now we do the fsync()s in the same order. + * + * The main call ignores symlinks, so in addition to specially processing + * pg_wal if it's a symlink, pg_tblspc has to be visited separately with + * process_symlinks = true. Note that if there are any plain directories + * in pg_tblspc, they'll get fsync'd twice. That's not an expected case + * so we don't worry about optimizing it. + */ + walkdir(pg_data, fsync_fname, false, progname); + if (xlog_is_symlink) + walkdir(pg_wal, fsync_fname, false, progname); + walkdir(pg_tblspc, fsync_fname, true, progname); +} + +/* + * Issue fsync recursively on the given directory and all its contents. + * + * This is a convenient wrapper on top of walkdir(). + */ +void +fsync_dir_recurse(const char *dir, const char *progname) +{ + /* + * If possible, hint to the kernel that we're soon going to fsync the data + * directory and its contents. + */ +#ifdef PG_FLUSH_DATA_WORKS + walkdir(dir, pre_sync_fname, false, progname); +#endif + + walkdir(dir, fsync_fname, false, progname); +} + +/* + * walkdir: recursively walk a directory, applying the action to each + * regular file and directory (including the named directory itself). + * + * If process_symlinks is true, the action and recursion are also applied + * to regular files and directories that are pointed to by symlinks in the + * given directory; otherwise symlinks are ignored. Symlinks are always + * ignored in subdirectories, ie we intentionally don't pass down the + * process_symlinks flag to recursive calls. + * + * Errors are reported but not considered fatal. + * + * See also walkdir in fd.c, which is a backend version of this logic. + */ +static void +walkdir(const char *path, + int (*action) (const char *fname, bool isdir, const char *progname), + bool process_symlinks, const char *progname) +{ + DIR *dir; + struct dirent *de; + + dir = opendir(path); + if (dir == NULL) + { + fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"), + progname, path, strerror(errno)); + return; + } + + while (errno = 0, (de = readdir(dir)) != NULL) + { + char subpath[MAXPGPATH * 2]; + struct stat fst; + int sret; + + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) + continue; + + snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name); + + if (process_symlinks) + sret = stat(subpath, &fst); + else + sret = lstat(subpath, &fst); + + if (sret < 0) + { + fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), + progname, subpath, strerror(errno)); + continue; + } + + if (S_ISREG(fst.st_mode)) + (*action) (subpath, false, progname); + else if (S_ISDIR(fst.st_mode)) + walkdir(subpath, action, false, progname); + } + + if (errno) + fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"), + progname, path, strerror(errno)); + + (void) closedir(dir); + + /* + * It's important to fsync the destination directory itself as individual + * file fsyncs don't guarantee that the directory entry for the file is + * synced. Recent versions of ext4 have made the window much wider but + * it's been an issue for ext3 and other filesystems in the past. + */ + (*action) (path, true, progname); +} + +/* + * Hint to the OS that it should get ready to fsync() this file. + * + * Ignores errors trying to open unreadable files, and reports other errors + * non-fatally. + */ +#ifdef PG_FLUSH_DATA_WORKS + +static int +pre_sync_fname(const char *fname, bool isdir, const char *progname) +{ + int fd; + + fd = open(fname, O_RDONLY | PG_BINARY); + + if (fd < 0) + { + if (errno == EACCES || (isdir && errno == EISDIR)) + return 0; + fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), + progname, fname, strerror(errno)); + return -1; + } + + /* + * We do what pg_flush_data() would do in the backend: prefer to use + * sync_file_range, but fall back to posix_fadvise. We ignore errors + * because this is only a hint. + */ +#if defined(HAVE_SYNC_FILE_RANGE) + (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE); +#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) + (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); +#else +#error PG_FLUSH_DATA_WORKS should not have been defined +#endif + + (void) close(fd); + return 0; +} + +#endif /* PG_FLUSH_DATA_WORKS */ + +/* + * fsync_fname -- Try to fsync a file or directory + * + * Ignores errors trying to open unreadable files, or trying to fsync + * directories on systems where that isn't allowed/required. Reports + * other errors non-fatally. + */ +int +fsync_fname(const char *fname, bool isdir, const char *progname) +{ + int fd; + int flags; + int returncode; + + /* + * Some OSs require directories to be opened read-only whereas other + * systems don't allow us to fsync files opened read-only; so we need both + * cases here. Using O_RDWR will cause us to fail to fsync files that are + * not writable by our userid, but we assume that's OK. + */ + flags = PG_BINARY; + if (!isdir) + flags |= O_RDWR; + else + flags |= O_RDONLY; + + /* + * Open the file, silently ignoring errors about unreadable files (or + * unsupported operations, e.g. opening a directory under Windows), and + * logging others. + */ + fd = open(fname, flags); + if (fd < 0) + { + if (errno == EACCES || (isdir && errno == EISDIR)) + return 0; + fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), + progname, fname, strerror(errno)); + return -1; + } + + returncode = fsync(fd); + + /* + * Some OSes don't allow us to fsync directories at all, so we can ignore + * those errors. Anything else needs to be reported. + */ + if (returncode != 0 && !(isdir && errno == EBADF)) + { + fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), + progname, fname, strerror(errno)); + (void) close(fd); + return -1; + } + + (void) close(fd); + return 0; +} + +/* + * fsync_parent_path -- fsync the parent path of a file or directory + * + * This is aimed at making file operations persistent on disk in case of + * an OS crash or power failure. + */ +int +fsync_parent_path(const char *fname, const char *progname) +{ + char parentpath[MAXPGPATH]; + + strlcpy(parentpath, fname, MAXPGPATH); + get_parent_directory(parentpath); + + /* + * get_parent_directory() returns an empty string if the input argument is + * just a file name (see comments in path.c), so handle that as being the + * current directory. + */ + if (strlen(parentpath) == 0) + strlcpy(parentpath, ".", MAXPGPATH); + + if (fsync_fname(parentpath, true, progname) != 0) + return -1; + + return 0; +} + +/* + * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability + * + * Wrapper around rename, similar to the backend version. + */ +int +durable_rename(const char *oldfile, const char *newfile, const char *progname) +{ + int fd; + + /* + * First fsync the old and target path (if it exists), to ensure that they + * are properly persistent on disk. Syncing the target file is not + * strictly necessary, but it makes it easier to reason about crashes; + * because it's then guaranteed that either source or target file exists + * after a crash. + */ + if (fsync_fname(oldfile, false, progname) != 0) + return -1; + + fd = open(newfile, PG_BINARY | O_RDWR, 0); + if (fd < 0) + { + if (errno != ENOENT) + { + fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), + progname, newfile, strerror(errno)); + return -1; + } + } + else + { + if (fsync(fd) != 0) + { + fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), + progname, newfile, strerror(errno)); + close(fd); + return -1; + } + close(fd); + } + + /* Time to do the real deal... */ + if (rename(oldfile, newfile) != 0) + { + fprintf(stderr, _("%s: could not rename file \"%s\" to \"%s\": %s\n"), + progname, oldfile, newfile, strerror(errno)); + return -1; + } + + /* + * To guarantee renaming the file is persistent, fsync the file with its + * new name, and its containing directory. + */ + if (fsync_fname(newfile, false, progname) != 0) + return -1; + + if (fsync_parent_path(newfile, progname) != 0) + return -1; + + return 0; +} diff --git a/src/common/ip.c b/src/common/ip.c new file mode 100644 index 0000000000..80711dbb98 --- /dev/null +++ b/src/common/ip.c @@ -0,0 +1,259 @@ +/*------------------------------------------------------------------------- + * + * ip.c + * IPv6-aware network access. + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/ip.c + * + * This file and the IPV6 implementation were initially provided by + * Nigel Kukard <nkukard@lbsd.net>, Linux Based Systems Design + * http://www.lbsd.net. + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <unistd.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <netdb.h> +#include <netinet/in.h> +#ifdef HAVE_NETINET_TCP_H +#include <netinet/tcp.h> +#endif +#include <arpa/inet.h> +#include <sys/file.h> + +#include "common/ip.h" + + + +#ifdef HAVE_UNIX_SOCKETS +static int getaddrinfo_unix(const char *path, + const struct addrinfo * hintsp, + struct addrinfo ** result); + +static int getnameinfo_unix(const struct sockaddr_un * sa, int salen, + char *node, int nodelen, + char *service, int servicelen, + int flags); +#endif + + +/* + * pg_getaddrinfo_all - get address info for Unix, IPv4 and IPv6 sockets + */ +int +pg_getaddrinfo_all(const char *hostname, const char *servname, + const struct addrinfo * hintp, struct addrinfo ** result) +{ + int rc; + + /* not all versions of getaddrinfo() zero *result on failure */ + *result = NULL; + +#ifdef HAVE_UNIX_SOCKETS + if (hintp->ai_family == AF_UNIX) + return getaddrinfo_unix(servname, hintp, result); +#endif + + /* NULL has special meaning to getaddrinfo(). */ + rc = getaddrinfo((!hostname || hostname[0] == '\0') ? NULL : hostname, + servname, hintp, result); + + return rc; +} + + +/* + * pg_freeaddrinfo_all - free addrinfo structures for IPv4, IPv6, or Unix + * + * Note: the ai_family field of the original hint structure must be passed + * so that we can tell whether the addrinfo struct was built by the system's + * getaddrinfo() routine or our own getaddrinfo_unix() routine. Some versions + * of getaddrinfo() might be willing to return AF_UNIX addresses, so it's + * not safe to look at ai_family in the addrinfo itself. + */ +void +pg_freeaddrinfo_all(int hint_ai_family, struct addrinfo * ai) +{ +#ifdef HAVE_UNIX_SOCKETS + if (hint_ai_family == AF_UNIX) + { + /* struct was built by getaddrinfo_unix (see pg_getaddrinfo_all) */ + while (ai != NULL) + { + struct addrinfo *p = ai; + + ai = ai->ai_next; + free(p->ai_addr); + free(p); + } + } + else +#endif /* HAVE_UNIX_SOCKETS */ + { + /* struct was built by getaddrinfo() */ + if (ai != NULL) + freeaddrinfo(ai); + } +} + + +/* + * pg_getnameinfo_all - get name info for Unix, IPv4 and IPv6 sockets + * + * The API of this routine differs from the standard getnameinfo() definition + * in two ways: first, the addr parameter is declared as sockaddr_storage + * rather than struct sockaddr, and second, the node and service fields are + * guaranteed to be filled with something even on failure return. + */ +int +pg_getnameinfo_all(const struct sockaddr_storage * addr, int salen, + char *node, int nodelen, + char *service, int servicelen, + int flags) +{ + int rc; + +#ifdef HAVE_UNIX_SOCKETS + if (addr && addr->ss_family == AF_UNIX) + rc = getnameinfo_unix((const struct sockaddr_un *) addr, salen, + node, nodelen, + service, servicelen, + flags); + else +#endif + rc = getnameinfo((const struct sockaddr *) addr, salen, + node, nodelen, + service, servicelen, + flags); + + if (rc != 0) + { + if (node) + strlcpy(node, "???", nodelen); + if (service) + strlcpy(service, "???", servicelen); + } + + return rc; +} + + +#if defined(HAVE_UNIX_SOCKETS) + +/* ------- + * getaddrinfo_unix - get unix socket info using IPv6-compatible API + * + * Bugs: only one addrinfo is set even though hintsp is NULL or + * ai_socktype is 0 + * AI_CANONNAME is not supported. + * ------- + */ +static int +getaddrinfo_unix(const char *path, const struct addrinfo * hintsp, + struct addrinfo ** result) +{ + struct addrinfo hints; + struct addrinfo *aip; + struct sockaddr_un *unp; + + *result = NULL; + + MemSet(&hints, 0, sizeof(hints)); + + if (strlen(path) >= sizeof(unp->sun_path)) + return EAI_FAIL; + + if (hintsp == NULL) + { + hints.ai_family = AF_UNIX; + hints.ai_socktype = SOCK_STREAM; + } + else + memcpy(&hints, hintsp, sizeof(hints)); + + if (hints.ai_socktype == 0) + hints.ai_socktype = SOCK_STREAM; + + if (hints.ai_family != AF_UNIX) + { + /* shouldn't have been called */ + return EAI_FAIL; + } + + aip = calloc(1, sizeof(struct addrinfo)); + if (aip == NULL) + return EAI_MEMORY; + + unp = calloc(1, sizeof(struct sockaddr_un)); + if (unp == NULL) + { + free(aip); + return EAI_MEMORY; + } + + aip->ai_family = AF_UNIX; + aip->ai_socktype = hints.ai_socktype; + aip->ai_protocol = hints.ai_protocol; + aip->ai_next = NULL; + aip->ai_canonname = NULL; + *result = aip; + + unp->sun_family = AF_UNIX; + aip->ai_addr = (struct sockaddr *) unp; + aip->ai_addrlen = sizeof(struct sockaddr_un); + + strcpy(unp->sun_path, path); + +#ifdef HAVE_STRUCT_SOCKADDR_STORAGE_SS_LEN + unp->sun_len = sizeof(struct sockaddr_un); +#endif + + return 0; +} + +/* + * Convert an address to a hostname. + */ +static int +getnameinfo_unix(const struct sockaddr_un * sa, int salen, + char *node, int nodelen, + char *service, int servicelen, + int flags) +{ + int ret = -1; + + /* Invalid arguments. */ + if (sa == NULL || sa->sun_family != AF_UNIX || + (node == NULL && service == NULL)) + return EAI_FAIL; + + if (node) + { + ret = snprintf(node, nodelen, "%s", "[local]"); + if (ret == -1 || ret > nodelen) + return EAI_MEMORY; + } + + if (service) + { + ret = snprintf(service, servicelen, "%s", sa->sun_path); + if (ret == -1 || ret > servicelen) + return EAI_MEMORY; + } + + return 0; +} +#endif /* HAVE_UNIX_SOCKETS */ diff --git a/src/common/keywords.c b/src/common/keywords.c index 485dd02e89..266c29205f 100644 --- a/src/common/keywords.c +++ b/src/common/keywords.c @@ -4,7 +4,7 @@ * lexical token lookup for key words in PostgreSQL * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/common/md5.c b/src/common/md5.c new file mode 100644 index 0000000000..6e70b218d2 --- /dev/null +++ b/src/common/md5.c @@ -0,0 +1,348 @@ +/* + * md5.c + * + * Implements the MD5 Message-Digest Algorithm as specified in + * RFC 1321. This implementation is a simple one, in that it + * needs every input byte to be buffered before doing any + * calculations. I do not expect this file to be used for + * general purpose MD5'ing of large amounts of data, only for + * generating hashed passwords from limited input. + * + * Sverre H. Huseby <sverrehu@online.no> + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/common/md5.c + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/md5.h" + + +/* + * PRIVATE FUNCTIONS + */ + + +/* + * The returned array is allocated using malloc. the caller should free it + * when it is no longer needed. + */ +static uint8 * +createPaddedCopyWithLength(const uint8 *b, uint32 *l) +{ + uint8 *ret; + uint32 q; + uint32 len, + newLen448; + uint32 len_high, + len_low; /* 64-bit value split into 32-bit sections */ + + len = ((b == NULL) ? 0 : *l); + newLen448 = len + 64 - (len % 64) - 8; + if (newLen448 <= len) + newLen448 += 64; + + *l = newLen448 + 8; + if ((ret = (uint8 *) malloc(sizeof(uint8) * *l)) == NULL) + return NULL; + + if (b != NULL) + memcpy(ret, b, sizeof(uint8) * len); + + /* pad */ + ret[len] = 0x80; + for (q = len + 1; q < newLen448; q++) + ret[q] = 0x00; + + /* append length as a 64 bit bitcount */ + len_low = len; + /* split into two 32-bit values */ + /* we only look at the bottom 32-bits */ + len_high = len >> 29; + len_low <<= 3; + q = newLen448; + ret[q++] = (len_low & 0xff); + len_low >>= 8; + ret[q++] = (len_low & 0xff); + len_low >>= 8; + ret[q++] = (len_low & 0xff); + len_low >>= 8; + ret[q++] = (len_low & 0xff); + ret[q++] = (len_high & 0xff); + len_high >>= 8; + ret[q++] = (len_high & 0xff); + len_high >>= 8; + ret[q++] = (len_high & 0xff); + len_high >>= 8; + ret[q] = (len_high & 0xff); + + return ret; +} + +#define F(x, y, z) (((x) & (y)) | (~(x) & (z))) +#define G(x, y, z) (((x) & (z)) | ((y) & ~(z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) +#define I(x, y, z) ((y) ^ ((x) | ~(z))) +#define ROT_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) + +static void +doTheRounds(uint32 X[16], uint32 state[4]) +{ + uint32 a, + b, + c, + d; + + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + + /* round 1 */ + a = b + ROT_LEFT((a + F(b, c, d) + X[0] + 0xd76aa478), 7); /* 1 */ + d = a + ROT_LEFT((d + F(a, b, c) + X[1] + 0xe8c7b756), 12); /* 2 */ + c = d + ROT_LEFT((c + F(d, a, b) + X[2] + 0x242070db), 17); /* 3 */ + b = c + ROT_LEFT((b + F(c, d, a) + X[3] + 0xc1bdceee), 22); /* 4 */ + a = b + ROT_LEFT((a + F(b, c, d) + X[4] + 0xf57c0faf), 7); /* 5 */ + d = a + ROT_LEFT((d + F(a, b, c) + X[5] + 0x4787c62a), 12); /* 6 */ + c = d + ROT_LEFT((c + F(d, a, b) + X[6] + 0xa8304613), 17); /* 7 */ + b = c + ROT_LEFT((b + F(c, d, a) + X[7] + 0xfd469501), 22); /* 8 */ + a = b + ROT_LEFT((a + F(b, c, d) + X[8] + 0x698098d8), 7); /* 9 */ + d = a + ROT_LEFT((d + F(a, b, c) + X[9] + 0x8b44f7af), 12); /* 10 */ + c = d + ROT_LEFT((c + F(d, a, b) + X[10] + 0xffff5bb1), 17); /* 11 */ + b = c + ROT_LEFT((b + F(c, d, a) + X[11] + 0x895cd7be), 22); /* 12 */ + a = b + ROT_LEFT((a + F(b, c, d) + X[12] + 0x6b901122), 7); /* 13 */ + d = a + ROT_LEFT((d + F(a, b, c) + X[13] + 0xfd987193), 12); /* 14 */ + c = d + ROT_LEFT((c + F(d, a, b) + X[14] + 0xa679438e), 17); /* 15 */ + b = c + ROT_LEFT((b + F(c, d, a) + X[15] + 0x49b40821), 22); /* 16 */ + + /* round 2 */ + a = b + ROT_LEFT((a + G(b, c, d) + X[1] + 0xf61e2562), 5); /* 17 */ + d = a + ROT_LEFT((d + G(a, b, c) + X[6] + 0xc040b340), 9); /* 18 */ + c = d + ROT_LEFT((c + G(d, a, b) + X[11] + 0x265e5a51), 14); /* 19 */ + b = c + ROT_LEFT((b + G(c, d, a) + X[0] + 0xe9b6c7aa), 20); /* 20 */ + a = b + ROT_LEFT((a + G(b, c, d) + X[5] + 0xd62f105d), 5); /* 21 */ + d = a + ROT_LEFT((d + G(a, b, c) + X[10] + 0x02441453), 9); /* 22 */ + c = d + ROT_LEFT((c + G(d, a, b) + X[15] + 0xd8a1e681), 14); /* 23 */ + b = c + ROT_LEFT((b + G(c, d, a) + X[4] + 0xe7d3fbc8), 20); /* 24 */ + a = b + ROT_LEFT((a + G(b, c, d) + X[9] + 0x21e1cde6), 5); /* 25 */ + d = a + ROT_LEFT((d + G(a, b, c) + X[14] + 0xc33707d6), 9); /* 26 */ + c = d + ROT_LEFT((c + G(d, a, b) + X[3] + 0xf4d50d87), 14); /* 27 */ + b = c + ROT_LEFT((b + G(c, d, a) + X[8] + 0x455a14ed), 20); /* 28 */ + a = b + ROT_LEFT((a + G(b, c, d) + X[13] + 0xa9e3e905), 5); /* 29 */ + d = a + ROT_LEFT((d + G(a, b, c) + X[2] + 0xfcefa3f8), 9); /* 30 */ + c = d + ROT_LEFT((c + G(d, a, b) + X[7] + 0x676f02d9), 14); /* 31 */ + b = c + ROT_LEFT((b + G(c, d, a) + X[12] + 0x8d2a4c8a), 20); /* 32 */ + + /* round 3 */ + a = b + ROT_LEFT((a + H(b, c, d) + X[5] + 0xfffa3942), 4); /* 33 */ + d = a + ROT_LEFT((d + H(a, b, c) + X[8] + 0x8771f681), 11); /* 34 */ + c = d + ROT_LEFT((c + H(d, a, b) + X[11] + 0x6d9d6122), 16); /* 35 */ + b = c + ROT_LEFT((b + H(c, d, a) + X[14] + 0xfde5380c), 23); /* 36 */ + a = b + ROT_LEFT((a + H(b, c, d) + X[1] + 0xa4beea44), 4); /* 37 */ + d = a + ROT_LEFT((d + H(a, b, c) + X[4] + 0x4bdecfa9), 11); /* 38 */ + c = d + ROT_LEFT((c + H(d, a, b) + X[7] + 0xf6bb4b60), 16); /* 39 */ + b = c + ROT_LEFT((b + H(c, d, a) + X[10] + 0xbebfbc70), 23); /* 40 */ + a = b + ROT_LEFT((a + H(b, c, d) + X[13] + 0x289b7ec6), 4); /* 41 */ + d = a + ROT_LEFT((d + H(a, b, c) + X[0] + 0xeaa127fa), 11); /* 42 */ + c = d + ROT_LEFT((c + H(d, a, b) + X[3] + 0xd4ef3085), 16); /* 43 */ + b = c + ROT_LEFT((b + H(c, d, a) + X[6] + 0x04881d05), 23); /* 44 */ + a = b + ROT_LEFT((a + H(b, c, d) + X[9] + 0xd9d4d039), 4); /* 45 */ + d = a + ROT_LEFT((d + H(a, b, c) + X[12] + 0xe6db99e5), 11); /* 46 */ + c = d + ROT_LEFT((c + H(d, a, b) + X[15] + 0x1fa27cf8), 16); /* 47 */ + b = c + ROT_LEFT((b + H(c, d, a) + X[2] + 0xc4ac5665), 23); /* 48 */ + + /* round 4 */ + a = b + ROT_LEFT((a + I(b, c, d) + X[0] + 0xf4292244), 6); /* 49 */ + d = a + ROT_LEFT((d + I(a, b, c) + X[7] + 0x432aff97), 10); /* 50 */ + c = d + ROT_LEFT((c + I(d, a, b) + X[14] + 0xab9423a7), 15); /* 51 */ + b = c + ROT_LEFT((b + I(c, d, a) + X[5] + 0xfc93a039), 21); /* 52 */ + a = b + ROT_LEFT((a + I(b, c, d) + X[12] + 0x655b59c3), 6); /* 53 */ + d = a + ROT_LEFT((d + I(a, b, c) + X[3] + 0x8f0ccc92), 10); /* 54 */ + c = d + ROT_LEFT((c + I(d, a, b) + X[10] + 0xffeff47d), 15); /* 55 */ + b = c + ROT_LEFT((b + I(c, d, a) + X[1] + 0x85845dd1), 21); /* 56 */ + a = b + ROT_LEFT((a + I(b, c, d) + X[8] + 0x6fa87e4f), 6); /* 57 */ + d = a + ROT_LEFT((d + I(a, b, c) + X[15] + 0xfe2ce6e0), 10); /* 58 */ + c = d + ROT_LEFT((c + I(d, a, b) + X[6] + 0xa3014314), 15); /* 59 */ + b = c + ROT_LEFT((b + I(c, d, a) + X[13] + 0x4e0811a1), 21); /* 60 */ + a = b + ROT_LEFT((a + I(b, c, d) + X[4] + 0xf7537e82), 6); /* 61 */ + d = a + ROT_LEFT((d + I(a, b, c) + X[11] + 0xbd3af235), 10); /* 62 */ + c = d + ROT_LEFT((c + I(d, a, b) + X[2] + 0x2ad7d2bb), 15); /* 63 */ + b = c + ROT_LEFT((b + I(c, d, a) + X[9] + 0xeb86d391), 21); /* 64 */ + + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; +} + +static int +calculateDigestFromBuffer(const uint8 *b, uint32 len, uint8 sum[16]) +{ + register uint32 i, + j, + k, + newI; + uint32 l; + uint8 *input; + register uint32 *wbp; + uint32 workBuff[16], + state[4]; + + l = len; + + state[0] = 0x67452301; + state[1] = 0xEFCDAB89; + state[2] = 0x98BADCFE; + state[3] = 0x10325476; + + if ((input = createPaddedCopyWithLength(b, &l)) == NULL) + return 0; + + for (i = 0;;) + { + if ((newI = i + 16 * 4) > l) + break; + k = i + 3; + for (j = 0; j < 16; j++) + { + wbp = (workBuff + j); + *wbp = input[k--]; + *wbp <<= 8; + *wbp |= input[k--]; + *wbp <<= 8; + *wbp |= input[k--]; + *wbp <<= 8; + *wbp |= input[k]; + k += 7; + } + doTheRounds(workBuff, state); + i = newI; + } + free(input); + + j = 0; + for (i = 0; i < 4; i++) + { + k = state[i]; + sum[j++] = (k & 0xff); + k >>= 8; + sum[j++] = (k & 0xff); + k >>= 8; + sum[j++] = (k & 0xff); + k >>= 8; + sum[j++] = (k & 0xff); + } + return 1; +} + +static void +bytesToHex(uint8 b[16], char *s) +{ + static const char *hex = "0123456789abcdef"; + int q, + w; + + for (q = 0, w = 0; q < 16; q++) + { + s[w++] = hex[(b[q] >> 4) & 0x0F]; + s[w++] = hex[b[q] & 0x0F]; + } + s[w] = '\0'; +} + +/* + * PUBLIC FUNCTIONS + */ + +/* + * pg_md5_hash + * + * Calculates the MD5 sum of the bytes in a buffer. + * + * SYNOPSIS #include "md5.h" + * int pg_md5_hash(const void *buff, size_t len, char *hexsum) + * + * INPUT buff the buffer containing the bytes that you want + * the MD5 sum of. + * len number of bytes in the buffer. + * + * OUTPUT hexsum the MD5 sum as a '\0'-terminated string of + * hexadecimal digits. an MD5 sum is 16 bytes long. + * each byte is represented by two heaxadecimal + * characters. you thus need to provide an array + * of 33 characters, including the trailing '\0'. + * + * RETURNS false on failure (out of memory for internal buffers) or + * true on success. + * + * STANDARDS MD5 is described in RFC 1321. + * + * AUTHOR Sverre H. Huseby <sverrehu@online.no> + * + */ +bool +pg_md5_hash(const void *buff, size_t len, char *hexsum) +{ + uint8 sum[16]; + + if (!calculateDigestFromBuffer(buff, len, sum)) + return false; + + bytesToHex(sum, hexsum); + return true; +} + +bool +pg_md5_binary(const void *buff, size_t len, void *outbuf) +{ + if (!calculateDigestFromBuffer(buff, len, outbuf)) + return false; + return true; +} + + +/* + * Computes MD5 checksum of "passwd" (a null-terminated string) followed + * by "salt" (which need not be null-terminated). + * + * Output format is "md5" followed by a 32-hex-digit MD5 checksum. + * Hence, the output buffer "buf" must be at least 36 bytes long. + * + * Returns TRUE if okay, FALSE on error (out of memory). + */ +bool +pg_md5_encrypt(const char *passwd, const char *salt, size_t salt_len, + char *buf) +{ + size_t passwd_len = strlen(passwd); + + /* +1 here is just to avoid risk of unportable malloc(0) */ + char *crypt_buf = malloc(passwd_len + salt_len + 1); + bool ret; + + if (!crypt_buf) + return false; + + /* + * Place salt at the end because it may be known by users trying to crack + * the MD5 output. + */ + memcpy(crypt_buf, passwd, passwd_len); + memcpy(crypt_buf + passwd_len, salt, salt_len); + + strcpy(buf, "md5"); + ret = pg_md5_hash(crypt_buf, passwd_len + salt_len, buf + 3); + + free(crypt_buf); + + return ret; +} diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c index 985841de29..5ec93ec7a6 100644 --- a/src/common/pg_lzcompress.c +++ b/src/common/pg_lzcompress.c @@ -166,7 +166,7 @@ * * Jan Wieck * - * Copyright (c) 1999-2016, PostgreSQL Global Development Group + * Copyright (c) 1999-2017, PostgreSQL Global Development Group * * src/common/pg_lzcompress.c * ---------- diff --git a/src/common/pgfnames.c b/src/common/pgfnames.c index ab3ab800c1..e161d7dc04 100644 --- a/src/common/pgfnames.c +++ b/src/common/pgfnames.c @@ -3,7 +3,7 @@ * pgfnames.c * directory handling functions * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/common/psprintf.c b/src/common/psprintf.c index 8ca6f9cea8..8561e9aed6 100644 --- a/src/common/psprintf.c +++ b/src/common/psprintf.c @@ -4,7 +4,7 @@ * sprintf into an allocated-on-demand buffer * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/common/relpath.c b/src/common/relpath.c index c3ca85726e..66c0ad489a 100644 --- a/src/common/relpath.c +++ b/src/common/relpath.c @@ -4,7 +4,7 @@ * * This module also contains some logic associated with fork names. * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/common/restricted_token.c b/src/common/restricted_token.c index d155b5ed0e..1a00293695 100644 --- a/src/common/restricted_token.c +++ b/src/common/restricted_token.c @@ -4,7 +4,7 @@ * helper routine to ensure restricted token on Windows * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/common/rmtree.c b/src/common/rmtree.c index 8a61213e02..09824b5463 100644 --- a/src/common/rmtree.c +++ b/src/common/rmtree.c @@ -2,7 +2,7 @@ * * rmtree.c * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION diff --git a/src/common/saslprep.c b/src/common/saslprep.c new file mode 100644 index 0000000000..0a3585850b --- /dev/null +++ b/src/common/saslprep.c @@ -0,0 +1,1279 @@ +/*------------------------------------------------------------------------- + * saslprep.c + * SASLprep normalization, for SCRAM authentication + * + * The SASLprep algorithm is used to process a user-supplied password into + * canonical form. For more details, see: + * + * [RFC3454] Preparation of Internationalized Strings ("stringprep"), + * http://www.ietf.org/rfc/rfc3454.txt + * + * [RFC4013] SASLprep: Stringprep Profile for User Names and Passwords + * http://www.ietf.org/rfc/rfc4013.txt + * + * + * Portions Copyright (c) 2017, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/saslprep.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/saslprep.h" +#include "common/unicode_norm.h" + +/* + * Note: The functions in this file depend on functions from + * src/backend/utils/mb/wchar.c, so in order to use this in frontend + * code, you will need to link that in, too. + */ +#include "mb/pg_wchar.h" + +/* + * Limit on how large password's we will try to process. A password + * larger than this will be treated the same as out-of-memory. + */ +#define MAX_PASSWORD_LENGTH 1024 + +/* + * In backend, we will use palloc/pfree. In frontend, use malloc, and + * return SASLPREP_OOM on out-of-memory. + */ +#ifndef FRONTEND +#define STRDUP(s) pstrdup(s) +#define ALLOC(size) palloc(size) +#define FREE(size) pfree(size) +#else +#define STRDUP(s) strdup(s) +#define ALLOC(size) malloc(size) +#define FREE(size) free(size) +#endif + +/* Prototypes for local functions */ +static int codepoint_range_cmp(const void *a, const void *b); +static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize); +static int pg_utf8_string_len(const char *source); +static bool pg_is_ascii_string(const char *p); + +/* + * Stringprep Mapping Tables. + * + * The stringprep specification includes a number of tables of Unicode + * codepoints, used in different parts of the algorithm. They are below, + * as arrays of codepoint ranges. Each range is a pair of codepoints, + * for the first and last codepoint included the range (inclusive!). + */ + +/* + * C.1.2 Non-ASCII space characters + * + * These are all mapped to the ASCII space character (U+00A0). + */ +static const pg_wchar non_ascii_space_ranges[] = +{ + 0x00A0, 0x00A0, + 0x1680, 0x1680, + 0x2000, 0x200B, + 0x202F, 0x202F, + 0x205F, 0x205F, + 0x3000, 0x3000 +}; + +/* + * B.1 Commonly mapped to nothing + * + * If any of these appear in the input, they are removed. + */ +static const pg_wchar commonly_mapped_to_nothing_ranges[] = +{ + 0x00AD, 0x00AD, + 0x034F, 0x034F, + 0x1806, 0x1806, + 0x180B, 0x180D, + 0x200B, 0x200D, + 0x2060, 0x2060, + 0xFE00, 0xFE0F, + 0xFEFF, 0xFEFF +}; + +/* + * prohibited_output_ranges is a union of all the characters from + * the following tables: + * + * C.1.2 Non-ASCII space characters + * C.2.1 ASCII control characters + * C.2.2 Non-ASCII control characters + * C.3 Private Use characters + * C.4 Non-character code points + * C.5 Surrogate code points + * C.6 Inappropriate for plain text characters + * C.7 Inappropriate for canonical representation characters + * C.7 Change display properties or deprecated characters + * C.8 Tagging characters + * + * These are the tables that are listed as "prohibited output" + * characters in the SASLprep profile. + * + * The comment after each code range indicates which source table + * the code came from. Note that there is some overlap in the source + * tables, so one code might originate from multiple source tables. + * Adjacent ranges have also been merged together, to save space. + */ +static const pg_wchar prohibited_output_ranges[] = +{ + 0x0000, 0x001F, /* C.2.1 */ + 0x007F, 0x00A0, /* C.1.2, C.2.1, C.2.2 */ + 0x0340, 0x0341, /* C.8 */ + 0x06DD, 0x06DD, /* C.2.2 */ + 0x070F, 0x070F, /* C.2.2 */ + 0x1680, 0x1680, /* C.1.2 */ + 0x180E, 0x180E, /* C.2.2 */ + 0x2000, 0x200F, /* C.1.2, C.2.2, C.8 */ + 0x2028, 0x202F, /* C.1.2, C.2.2, C.8 */ + 0x205F, 0x2063, /* C.1.2, C.2.2 */ + 0x206A, 0x206F, /* C.2.2, C.8 */ + 0x2FF0, 0x2FFB, /* C.7 */ + 0x3000, 0x3000, /* C.1.2 */ + 0xD800, 0xF8FF, /* C.3, C.5 */ + 0xFDD0, 0xFDEF, /* C.4 */ + 0xFEFF, 0xFEFF, /* C.2.2 */ + 0xFFF9, 0xFFFF, /* C.2.2, C.4, C.6 */ + 0x1D173, 0x1D17A, /* C.2.2 */ + 0x1FFFE, 0x1FFFF, /* C.4 */ + 0x2FFFE, 0x2FFFF, /* C.4 */ + 0x3FFFE, 0x3FFFF, /* C.4 */ + 0x4FFFE, 0x4FFFF, /* C.4 */ + 0x5FFFE, 0x5FFFF, /* C.4 */ + 0x6FFFE, 0x6FFFF, /* C.4 */ + 0x7FFFE, 0x7FFFF, /* C.4 */ + 0x8FFFE, 0x8FFFF, /* C.4 */ + 0x9FFFE, 0x9FFFF, /* C.4 */ + 0xAFFFE, 0xAFFFF, /* C.4 */ + 0xBFFFE, 0xBFFFF, /* C.4 */ + 0xCFFFE, 0xCFFFF, /* C.4 */ + 0xDFFFE, 0xDFFFF, /* C.4 */ + 0xE0001, 0xE0001, /* C.9 */ + 0xE0020, 0xE007F, /* C.9 */ + 0xEFFFE, 0xEFFFF, /* C.4 */ + 0xF0000, 0xFFFFF, /* C.3, C.4 */ + 0x100000, 0x10FFFF /* C.3, C.4 */ +}; + +/* A.1 Unassigned code points in Unicode 3.2 */ +static const pg_wchar unassigned_codepoint_ranges[] = +{ + 0x0221, 0x0221, + 0x0234, 0x024F, + 0x02AE, 0x02AF, + 0x02EF, 0x02FF, + 0x0350, 0x035F, + 0x0370, 0x0373, + 0x0376, 0x0379, + 0x037B, 0x037D, + 0x037F, 0x0383, + 0x038B, 0x038B, + 0x038D, 0x038D, + 0x03A2, 0x03A2, + 0x03CF, 0x03CF, + 0x03F7, 0x03FF, + 0x0487, 0x0487, + 0x04CF, 0x04CF, + 0x04F6, 0x04F7, + 0x04FA, 0x04FF, + 0x0510, 0x0530, + 0x0557, 0x0558, + 0x0560, 0x0560, + 0x0588, 0x0588, + 0x058B, 0x0590, + 0x05A2, 0x05A2, + 0x05BA, 0x05BA, + 0x05C5, 0x05CF, + 0x05EB, 0x05EF, + 0x05F5, 0x060B, + 0x060D, 0x061A, + 0x061C, 0x061E, + 0x0620, 0x0620, + 0x063B, 0x063F, + 0x0656, 0x065F, + 0x06EE, 0x06EF, + 0x06FF, 0x06FF, + 0x070E, 0x070E, + 0x072D, 0x072F, + 0x074B, 0x077F, + 0x07B2, 0x0900, + 0x0904, 0x0904, + 0x093A, 0x093B, + 0x094E, 0x094F, + 0x0955, 0x0957, + 0x0971, 0x0980, + 0x0984, 0x0984, + 0x098D, 0x098E, + 0x0991, 0x0992, + 0x09A9, 0x09A9, + 0x09B1, 0x09B1, + 0x09B3, 0x09B5, + 0x09BA, 0x09BB, + 0x09BD, 0x09BD, + 0x09C5, 0x09C6, + 0x09C9, 0x09CA, + 0x09CE, 0x09D6, + 0x09D8, 0x09DB, + 0x09DE, 0x09DE, + 0x09E4, 0x09E5, + 0x09FB, 0x0A01, + 0x0A03, 0x0A04, + 0x0A0B, 0x0A0E, + 0x0A11, 0x0A12, + 0x0A29, 0x0A29, + 0x0A31, 0x0A31, + 0x0A34, 0x0A34, + 0x0A37, 0x0A37, + 0x0A3A, 0x0A3B, + 0x0A3D, 0x0A3D, + 0x0A43, 0x0A46, + 0x0A49, 0x0A4A, + 0x0A4E, 0x0A58, + 0x0A5D, 0x0A5D, + 0x0A5F, 0x0A65, + 0x0A75, 0x0A80, + 0x0A84, 0x0A84, + 0x0A8C, 0x0A8C, + 0x0A8E, 0x0A8E, + 0x0A92, 0x0A92, + 0x0AA9, 0x0AA9, + 0x0AB1, 0x0AB1, + 0x0AB4, 0x0AB4, + 0x0ABA, 0x0ABB, + 0x0AC6, 0x0AC6, + 0x0ACA, 0x0ACA, + 0x0ACE, 0x0ACF, + 0x0AD1, 0x0ADF, + 0x0AE1, 0x0AE5, + 0x0AF0, 0x0B00, + 0x0B04, 0x0B04, + 0x0B0D, 0x0B0E, + 0x0B11, 0x0B12, + 0x0B29, 0x0B29, + 0x0B31, 0x0B31, + 0x0B34, 0x0B35, + 0x0B3A, 0x0B3B, + 0x0B44, 0x0B46, + 0x0B49, 0x0B4A, + 0x0B4E, 0x0B55, + 0x0B58, 0x0B5B, + 0x0B5E, 0x0B5E, + 0x0B62, 0x0B65, + 0x0B71, 0x0B81, + 0x0B84, 0x0B84, + 0x0B8B, 0x0B8D, + 0x0B91, 0x0B91, + 0x0B96, 0x0B98, + 0x0B9B, 0x0B9B, + 0x0B9D, 0x0B9D, + 0x0BA0, 0x0BA2, + 0x0BA5, 0x0BA7, + 0x0BAB, 0x0BAD, + 0x0BB6, 0x0BB6, + 0x0BBA, 0x0BBD, + 0x0BC3, 0x0BC5, + 0x0BC9, 0x0BC9, + 0x0BCE, 0x0BD6, + 0x0BD8, 0x0BE6, + 0x0BF3, 0x0C00, + 0x0C04, 0x0C04, + 0x0C0D, 0x0C0D, + 0x0C11, 0x0C11, + 0x0C29, 0x0C29, + 0x0C34, 0x0C34, + 0x0C3A, 0x0C3D, + 0x0C45, 0x0C45, + 0x0C49, 0x0C49, + 0x0C4E, 0x0C54, + 0x0C57, 0x0C5F, + 0x0C62, 0x0C65, + 0x0C70, 0x0C81, + 0x0C84, 0x0C84, + 0x0C8D, 0x0C8D, + 0x0C91, 0x0C91, + 0x0CA9, 0x0CA9, + 0x0CB4, 0x0CB4, + 0x0CBA, 0x0CBD, + 0x0CC5, 0x0CC5, + 0x0CC9, 0x0CC9, + 0x0CCE, 0x0CD4, + 0x0CD7, 0x0CDD, + 0x0CDF, 0x0CDF, + 0x0CE2, 0x0CE5, + 0x0CF0, 0x0D01, + 0x0D04, 0x0D04, + 0x0D0D, 0x0D0D, + 0x0D11, 0x0D11, + 0x0D29, 0x0D29, + 0x0D3A, 0x0D3D, + 0x0D44, 0x0D45, + 0x0D49, 0x0D49, + 0x0D4E, 0x0D56, + 0x0D58, 0x0D5F, + 0x0D62, 0x0D65, + 0x0D70, 0x0D81, + 0x0D84, 0x0D84, + 0x0D97, 0x0D99, + 0x0DB2, 0x0DB2, + 0x0DBC, 0x0DBC, + 0x0DBE, 0x0DBF, + 0x0DC7, 0x0DC9, + 0x0DCB, 0x0DCE, + 0x0DD5, 0x0DD5, + 0x0DD7, 0x0DD7, + 0x0DE0, 0x0DF1, + 0x0DF5, 0x0E00, + 0x0E3B, 0x0E3E, + 0x0E5C, 0x0E80, + 0x0E83, 0x0E83, + 0x0E85, 0x0E86, + 0x0E89, 0x0E89, + 0x0E8B, 0x0E8C, + 0x0E8E, 0x0E93, + 0x0E98, 0x0E98, + 0x0EA0, 0x0EA0, + 0x0EA4, 0x0EA4, + 0x0EA6, 0x0EA6, + 0x0EA8, 0x0EA9, + 0x0EAC, 0x0EAC, + 0x0EBA, 0x0EBA, + 0x0EBE, 0x0EBF, + 0x0EC5, 0x0EC5, + 0x0EC7, 0x0EC7, + 0x0ECE, 0x0ECF, + 0x0EDA, 0x0EDB, + 0x0EDE, 0x0EFF, + 0x0F48, 0x0F48, + 0x0F6B, 0x0F70, + 0x0F8C, 0x0F8F, + 0x0F98, 0x0F98, + 0x0FBD, 0x0FBD, + 0x0FCD, 0x0FCE, + 0x0FD0, 0x0FFF, + 0x1022, 0x1022, + 0x1028, 0x1028, + 0x102B, 0x102B, + 0x1033, 0x1035, + 0x103A, 0x103F, + 0x105A, 0x109F, + 0x10C6, 0x10CF, + 0x10F9, 0x10FA, + 0x10FC, 0x10FF, + 0x115A, 0x115E, + 0x11A3, 0x11A7, + 0x11FA, 0x11FF, + 0x1207, 0x1207, + 0x1247, 0x1247, + 0x1249, 0x1249, + 0x124E, 0x124F, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125E, 0x125F, + 0x1287, 0x1287, + 0x1289, 0x1289, + 0x128E, 0x128F, + 0x12AF, 0x12AF, + 0x12B1, 0x12B1, + 0x12B6, 0x12B7, + 0x12BF, 0x12BF, + 0x12C1, 0x12C1, + 0x12C6, 0x12C7, + 0x12CF, 0x12CF, + 0x12D7, 0x12D7, + 0x12EF, 0x12EF, + 0x130F, 0x130F, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x131F, 0x131F, + 0x1347, 0x1347, + 0x135B, 0x1360, + 0x137D, 0x139F, + 0x13F5, 0x1400, + 0x1677, 0x167F, + 0x169D, 0x169F, + 0x16F1, 0x16FF, + 0x170D, 0x170D, + 0x1715, 0x171F, + 0x1737, 0x173F, + 0x1754, 0x175F, + 0x176D, 0x176D, + 0x1771, 0x1771, + 0x1774, 0x177F, + 0x17DD, 0x17DF, + 0x17EA, 0x17FF, + 0x180F, 0x180F, + 0x181A, 0x181F, + 0x1878, 0x187F, + 0x18AA, 0x1DFF, + 0x1E9C, 0x1E9F, + 0x1EFA, 0x1EFF, + 0x1F16, 0x1F17, + 0x1F1E, 0x1F1F, + 0x1F46, 0x1F47, + 0x1F4E, 0x1F4F, + 0x1F58, 0x1F58, + 0x1F5A, 0x1F5A, + 0x1F5C, 0x1F5C, + 0x1F5E, 0x1F5E, + 0x1F7E, 0x1F7F, + 0x1FB5, 0x1FB5, + 0x1FC5, 0x1FC5, + 0x1FD4, 0x1FD5, + 0x1FDC, 0x1FDC, + 0x1FF0, 0x1FF1, + 0x1FF5, 0x1FF5, + 0x1FFF, 0x1FFF, + 0x2053, 0x2056, + 0x2058, 0x205E, + 0x2064, 0x2069, + 0x2072, 0x2073, + 0x208F, 0x209F, + 0x20B2, 0x20CF, + 0x20EB, 0x20FF, + 0x213B, 0x213C, + 0x214C, 0x2152, + 0x2184, 0x218F, + 0x23CF, 0x23FF, + 0x2427, 0x243F, + 0x244B, 0x245F, + 0x24FF, 0x24FF, + 0x2614, 0x2615, + 0x2618, 0x2618, + 0x267E, 0x267F, + 0x268A, 0x2700, + 0x2705, 0x2705, + 0x270A, 0x270B, + 0x2728, 0x2728, + 0x274C, 0x274C, + 0x274E, 0x274E, + 0x2753, 0x2755, + 0x2757, 0x2757, + 0x275F, 0x2760, + 0x2795, 0x2797, + 0x27B0, 0x27B0, + 0x27BF, 0x27CF, + 0x27EC, 0x27EF, + 0x2B00, 0x2E7F, + 0x2E9A, 0x2E9A, + 0x2EF4, 0x2EFF, + 0x2FD6, 0x2FEF, + 0x2FFC, 0x2FFF, + 0x3040, 0x3040, + 0x3097, 0x3098, + 0x3100, 0x3104, + 0x312D, 0x3130, + 0x318F, 0x318F, + 0x31B8, 0x31EF, + 0x321D, 0x321F, + 0x3244, 0x3250, + 0x327C, 0x327E, + 0x32CC, 0x32CF, + 0x32FF, 0x32FF, + 0x3377, 0x337A, + 0x33DE, 0x33DF, + 0x33FF, 0x33FF, + 0x4DB6, 0x4DFF, + 0x9FA6, 0x9FFF, + 0xA48D, 0xA48F, + 0xA4C7, 0xABFF, + 0xD7A4, 0xD7FF, + 0xFA2E, 0xFA2F, + 0xFA6B, 0xFAFF, + 0xFB07, 0xFB12, + 0xFB18, 0xFB1C, + 0xFB37, 0xFB37, + 0xFB3D, 0xFB3D, + 0xFB3F, 0xFB3F, + 0xFB42, 0xFB42, + 0xFB45, 0xFB45, + 0xFBB2, 0xFBD2, + 0xFD40, 0xFD4F, + 0xFD90, 0xFD91, + 0xFDC8, 0xFDCF, + 0xFDFD, 0xFDFF, + 0xFE10, 0xFE1F, + 0xFE24, 0xFE2F, + 0xFE47, 0xFE48, + 0xFE53, 0xFE53, + 0xFE67, 0xFE67, + 0xFE6C, 0xFE6F, + 0xFE75, 0xFE75, + 0xFEFD, 0xFEFE, + 0xFF00, 0xFF00, + 0xFFBF, 0xFFC1, + 0xFFC8, 0xFFC9, + 0xFFD0, 0xFFD1, + 0xFFD8, 0xFFD9, + 0xFFDD, 0xFFDF, + 0xFFE7, 0xFFE7, + 0xFFEF, 0xFFF8, + 0x10000, 0x102FF, + 0x1031F, 0x1031F, + 0x10324, 0x1032F, + 0x1034B, 0x103FF, + 0x10426, 0x10427, + 0x1044E, 0x1CFFF, + 0x1D0F6, 0x1D0FF, + 0x1D127, 0x1D129, + 0x1D1DE, 0x1D3FF, + 0x1D455, 0x1D455, + 0x1D49D, 0x1D49D, + 0x1D4A0, 0x1D4A1, + 0x1D4A3, 0x1D4A4, + 0x1D4A7, 0x1D4A8, + 0x1D4AD, 0x1D4AD, + 0x1D4BA, 0x1D4BA, + 0x1D4BC, 0x1D4BC, + 0x1D4C1, 0x1D4C1, + 0x1D4C4, 0x1D4C4, + 0x1D506, 0x1D506, + 0x1D50B, 0x1D50C, + 0x1D515, 0x1D515, + 0x1D51D, 0x1D51D, + 0x1D53A, 0x1D53A, + 0x1D53F, 0x1D53F, + 0x1D545, 0x1D545, + 0x1D547, 0x1D549, + 0x1D551, 0x1D551, + 0x1D6A4, 0x1D6A7, + 0x1D7CA, 0x1D7CD, + 0x1D800, 0x1FFFD, + 0x2A6D7, 0x2F7FF, + 0x2FA1E, 0x2FFFD, + 0x30000, 0x3FFFD, + 0x40000, 0x4FFFD, + 0x50000, 0x5FFFD, + 0x60000, 0x6FFFD, + 0x70000, 0x7FFFD, + 0x80000, 0x8FFFD, + 0x90000, 0x9FFFD, + 0xA0000, 0xAFFFD, + 0xB0000, 0xBFFFD, + 0xC0000, 0xCFFFD, + 0xD0000, 0xDFFFD, + 0xE0000, 0xE0000, + 0xE0002, 0xE001F, + 0xE0080, 0xEFFFD +}; + +/* D.1 Characters with bidirectional property "R" or "AL" */ +static const pg_wchar RandALCat_codepoint_ranges[] = +{ + 0x05BE, 0x05BE, + 0x05C0, 0x05C0, + 0x05C3, 0x05C3, + 0x05D0, 0x05EA, + 0x05F0, 0x05F4, + 0x061B, 0x061B, + 0x061F, 0x061F, + 0x0621, 0x063A, + 0x0640, 0x064A, + 0x066D, 0x066F, + 0x0671, 0x06D5, + 0x06DD, 0x06DD, + 0x06E5, 0x06E6, + 0x06FA, 0x06FE, + 0x0700, 0x070D, + 0x0710, 0x0710, + 0x0712, 0x072C, + 0x0780, 0x07A5, + 0x07B1, 0x07B1, + 0x200F, 0x200F, + 0xFB1D, 0xFB1D, + 0xFB1F, 0xFB28, + 0xFB2A, 0xFB36, + 0xFB38, 0xFB3C, + 0xFB3E, 0xFB3E, + 0xFB40, 0xFB41, + 0xFB43, 0xFB44, + 0xFB46, 0xFBB1, + 0xFBD3, 0xFD3D, + 0xFD50, 0xFD8F, + 0xFD92, 0xFDC7, + 0xFDF0, 0xFDFC, + 0xFE70, 0xFE74, + 0xFE76, 0xFEFC +}; + +/* D.2 Characters with bidirectional property "L" */ +static const pg_wchar LCat_codepoint_ranges[] = +{ + 0x0041, 0x005A, + 0x0061, 0x007A, + 0x00AA, 0x00AA, + 0x00B5, 0x00B5, + 0x00BA, 0x00BA, + 0x00C0, 0x00D6, + 0x00D8, 0x00F6, + 0x00F8, 0x0220, + 0x0222, 0x0233, + 0x0250, 0x02AD, + 0x02B0, 0x02B8, + 0x02BB, 0x02C1, + 0x02D0, 0x02D1, + 0x02E0, 0x02E4, + 0x02EE, 0x02EE, + 0x037A, 0x037A, + 0x0386, 0x0386, + 0x0388, 0x038A, + 0x038C, 0x038C, + 0x038E, 0x03A1, + 0x03A3, 0x03CE, + 0x03D0, 0x03F5, + 0x0400, 0x0482, + 0x048A, 0x04CE, + 0x04D0, 0x04F5, + 0x04F8, 0x04F9, + 0x0500, 0x050F, + 0x0531, 0x0556, + 0x0559, 0x055F, + 0x0561, 0x0587, + 0x0589, 0x0589, + 0x0903, 0x0903, + 0x0905, 0x0939, + 0x093D, 0x0940, + 0x0949, 0x094C, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x0964, 0x0970, + 0x0982, 0x0983, + 0x0985, 0x098C, + 0x098F, 0x0990, + 0x0993, 0x09A8, + 0x09AA, 0x09B0, + 0x09B2, 0x09B2, + 0x09B6, 0x09B9, + 0x09BE, 0x09C0, + 0x09C7, 0x09C8, + 0x09CB, 0x09CC, + 0x09D7, 0x09D7, + 0x09DC, 0x09DD, + 0x09DF, 0x09E1, + 0x09E6, 0x09F1, + 0x09F4, 0x09FA, + 0x0A05, 0x0A0A, + 0x0A0F, 0x0A10, + 0x0A13, 0x0A28, + 0x0A2A, 0x0A30, + 0x0A32, 0x0A33, + 0x0A35, 0x0A36, + 0x0A38, 0x0A39, + 0x0A3E, 0x0A40, + 0x0A59, 0x0A5C, + 0x0A5E, 0x0A5E, + 0x0A66, 0x0A6F, + 0x0A72, 0x0A74, + 0x0A83, 0x0A83, + 0x0A85, 0x0A8B, + 0x0A8D, 0x0A8D, + 0x0A8F, 0x0A91, + 0x0A93, 0x0AA8, + 0x0AAA, 0x0AB0, + 0x0AB2, 0x0AB3, + 0x0AB5, 0x0AB9, + 0x0ABD, 0x0AC0, + 0x0AC9, 0x0AC9, + 0x0ACB, 0x0ACC, + 0x0AD0, 0x0AD0, + 0x0AE0, 0x0AE0, + 0x0AE6, 0x0AEF, + 0x0B02, 0x0B03, + 0x0B05, 0x0B0C, + 0x0B0F, 0x0B10, + 0x0B13, 0x0B28, + 0x0B2A, 0x0B30, + 0x0B32, 0x0B33, + 0x0B36, 0x0B39, + 0x0B3D, 0x0B3E, + 0x0B40, 0x0B40, + 0x0B47, 0x0B48, + 0x0B4B, 0x0B4C, + 0x0B57, 0x0B57, + 0x0B5C, 0x0B5D, + 0x0B5F, 0x0B61, + 0x0B66, 0x0B70, + 0x0B83, 0x0B83, + 0x0B85, 0x0B8A, + 0x0B8E, 0x0B90, + 0x0B92, 0x0B95, + 0x0B99, 0x0B9A, + 0x0B9C, 0x0B9C, + 0x0B9E, 0x0B9F, + 0x0BA3, 0x0BA4, + 0x0BA8, 0x0BAA, + 0x0BAE, 0x0BB5, + 0x0BB7, 0x0BB9, + 0x0BBE, 0x0BBF, + 0x0BC1, 0x0BC2, + 0x0BC6, 0x0BC8, + 0x0BCA, 0x0BCC, + 0x0BD7, 0x0BD7, + 0x0BE7, 0x0BF2, + 0x0C01, 0x0C03, + 0x0C05, 0x0C0C, + 0x0C0E, 0x0C10, + 0x0C12, 0x0C28, + 0x0C2A, 0x0C33, + 0x0C35, 0x0C39, + 0x0C41, 0x0C44, + 0x0C60, 0x0C61, + 0x0C66, 0x0C6F, + 0x0C82, 0x0C83, + 0x0C85, 0x0C8C, + 0x0C8E, 0x0C90, + 0x0C92, 0x0CA8, + 0x0CAA, 0x0CB3, + 0x0CB5, 0x0CB9, + 0x0CBE, 0x0CBE, + 0x0CC0, 0x0CC4, + 0x0CC7, 0x0CC8, + 0x0CCA, 0x0CCB, + 0x0CD5, 0x0CD6, + 0x0CDE, 0x0CDE, + 0x0CE0, 0x0CE1, + 0x0CE6, 0x0CEF, + 0x0D02, 0x0D03, + 0x0D05, 0x0D0C, + 0x0D0E, 0x0D10, + 0x0D12, 0x0D28, + 0x0D2A, 0x0D39, + 0x0D3E, 0x0D40, + 0x0D46, 0x0D48, + 0x0D4A, 0x0D4C, + 0x0D57, 0x0D57, + 0x0D60, 0x0D61, + 0x0D66, 0x0D6F, + 0x0D82, 0x0D83, + 0x0D85, 0x0D96, + 0x0D9A, 0x0DB1, + 0x0DB3, 0x0DBB, + 0x0DBD, 0x0DBD, + 0x0DC0, 0x0DC6, + 0x0DCF, 0x0DD1, + 0x0DD8, 0x0DDF, + 0x0DF2, 0x0DF4, + 0x0E01, 0x0E30, + 0x0E32, 0x0E33, + 0x0E40, 0x0E46, + 0x0E4F, 0x0E5B, + 0x0E81, 0x0E82, + 0x0E84, 0x0E84, + 0x0E87, 0x0E88, + 0x0E8A, 0x0E8A, + 0x0E8D, 0x0E8D, + 0x0E94, 0x0E97, + 0x0E99, 0x0E9F, + 0x0EA1, 0x0EA3, + 0x0EA5, 0x0EA5, + 0x0EA7, 0x0EA7, + 0x0EAA, 0x0EAB, + 0x0EAD, 0x0EB0, + 0x0EB2, 0x0EB3, + 0x0EBD, 0x0EBD, + 0x0EC0, 0x0EC4, + 0x0EC6, 0x0EC6, + 0x0ED0, 0x0ED9, + 0x0EDC, 0x0EDD, + 0x0F00, 0x0F17, + 0x0F1A, 0x0F34, + 0x0F36, 0x0F36, + 0x0F38, 0x0F38, + 0x0F3E, 0x0F47, + 0x0F49, 0x0F6A, + 0x0F7F, 0x0F7F, + 0x0F85, 0x0F85, + 0x0F88, 0x0F8B, + 0x0FBE, 0x0FC5, + 0x0FC7, 0x0FCC, + 0x0FCF, 0x0FCF, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102A, + 0x102C, 0x102C, + 0x1031, 0x1031, + 0x1038, 0x1038, + 0x1040, 0x1057, + 0x10A0, 0x10C5, + 0x10D0, 0x10F8, + 0x10FB, 0x10FB, + 0x1100, 0x1159, + 0x115F, 0x11A2, + 0x11A8, 0x11F9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124A, 0x124D, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125A, 0x125D, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128A, 0x128D, + 0x1290, 0x12AE, + 0x12B0, 0x12B0, + 0x12B2, 0x12B5, + 0x12B8, 0x12BE, + 0x12C0, 0x12C0, + 0x12C2, 0x12C5, + 0x12C8, 0x12CE, + 0x12D0, 0x12D6, + 0x12D8, 0x12EE, + 0x12F0, 0x130E, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131E, + 0x1320, 0x1346, + 0x1348, 0x135A, + 0x1361, 0x137C, + 0x13A0, 0x13F4, + 0x1401, 0x1676, + 0x1681, 0x169A, + 0x16A0, 0x16F0, + 0x1700, 0x170C, + 0x170E, 0x1711, + 0x1720, 0x1731, + 0x1735, 0x1736, + 0x1740, 0x1751, + 0x1760, 0x176C, + 0x176E, 0x1770, + 0x1780, 0x17B6, + 0x17BE, 0x17C5, + 0x17C7, 0x17C8, + 0x17D4, 0x17DA, + 0x17DC, 0x17DC, + 0x17E0, 0x17E9, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18A8, + 0x1E00, 0x1E9B, + 0x1EA0, 0x1EF9, + 0x1F00, 0x1F15, + 0x1F18, 0x1F1D, + 0x1F20, 0x1F45, + 0x1F48, 0x1F4D, + 0x1F50, 0x1F57, + 0x1F59, 0x1F59, + 0x1F5B, 0x1F5B, + 0x1F5D, 0x1F5D, + 0x1F5F, 0x1F7D, + 0x1F80, 0x1FB4, + 0x1FB6, 0x1FBC, + 0x1FBE, 0x1FBE, + 0x1FC2, 0x1FC4, + 0x1FC6, 0x1FCC, + 0x1FD0, 0x1FD3, + 0x1FD6, 0x1FDB, + 0x1FE0, 0x1FEC, + 0x1FF2, 0x1FF4, + 0x1FF6, 0x1FFC, + 0x200E, 0x200E, + 0x2071, 0x2071, + 0x207F, 0x207F, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210A, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211D, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212A, 0x212D, + 0x212F, 0x2131, + 0x2133, 0x2139, + 0x213D, 0x213F, + 0x2145, 0x2149, + 0x2160, 0x2183, + 0x2336, 0x237A, + 0x2395, 0x2395, + 0x249C, 0x24E9, + 0x3005, 0x3007, + 0x3021, 0x3029, + 0x3031, 0x3035, + 0x3038, 0x303C, + 0x3041, 0x3096, + 0x309D, 0x309F, + 0x30A1, 0x30FA, + 0x30FC, 0x30FF, + 0x3105, 0x312C, + 0x3131, 0x318E, + 0x3190, 0x31B7, + 0x31F0, 0x321C, + 0x3220, 0x3243, + 0x3260, 0x327B, + 0x327F, 0x32B0, + 0x32C0, 0x32CB, + 0x32D0, 0x32FE, + 0x3300, 0x3376, + 0x337B, 0x33DD, + 0x33E0, 0x33FE, + 0x3400, 0x4DB5, + 0x4E00, 0x9FA5, + 0xA000, 0xA48C, + 0xAC00, 0xD7A3, + 0xD800, 0xFA2D, + 0xFA30, 0xFA6A, + 0xFB00, 0xFB06, + 0xFB13, 0xFB17, + 0xFF21, 0xFF3A, + 0xFF41, 0xFF5A, + 0xFF66, 0xFFBE, + 0xFFC2, 0xFFC7, + 0xFFCA, 0xFFCF, + 0xFFD2, 0xFFD7, + 0xFFDA, 0xFFDC, + 0x10300, 0x1031E, + 0x10320, 0x10323, + 0x10330, 0x1034A, + 0x10400, 0x10425, + 0x10428, 0x1044D, + 0x1D000, 0x1D0F5, + 0x1D100, 0x1D126, + 0x1D12A, 0x1D166, + 0x1D16A, 0x1D172, + 0x1D183, 0x1D184, + 0x1D18C, 0x1D1A9, + 0x1D1AE, 0x1D1DD, + 0x1D400, 0x1D454, + 0x1D456, 0x1D49C, + 0x1D49E, 0x1D49F, + 0x1D4A2, 0x1D4A2, + 0x1D4A5, 0x1D4A6, + 0x1D4A9, 0x1D4AC, + 0x1D4AE, 0x1D4B9, + 0x1D4BB, 0x1D4BB, + 0x1D4BD, 0x1D4C0, + 0x1D4C2, 0x1D4C3, + 0x1D4C5, 0x1D505, + 0x1D507, 0x1D50A, + 0x1D50D, 0x1D514, + 0x1D516, 0x1D51C, + 0x1D51E, 0x1D539, + 0x1D53B, 0x1D53E, + 0x1D540, 0x1D544, + 0x1D546, 0x1D546, + 0x1D54A, 0x1D550, + 0x1D552, 0x1D6A3, + 0x1D6A8, 0x1D7C9, + 0x20000, 0x2A6D6, + 0x2F800, 0x2FA1D, + 0xF0000, 0xFFFFD, + 0x100000, 0x10FFFD +}; + +/* End of stringprep tables */ + + +/* Is the given Unicode codepoint in the given table of ranges? */ +#define IS_CODE_IN_TABLE(code, map) is_code_in_table(code, map, lengthof(map)) + +static int +codepoint_range_cmp(const void *a, const void *b) +{ + const pg_wchar *key = (const pg_wchar *) a; + const pg_wchar *range = (const pg_wchar *) b; + + if (*key < range[0]) + return -1; /* less than lower bound */ + if (*key > range[1]) + return 1; /* greater than upper bound */ + + return 0; /* within range */ +} + +static bool +is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize) +{ + Assert(mapsize % 2 == 0); + + if (code < map[0] || code > map[mapsize - 1]) + return false; + + if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2, + codepoint_range_cmp)) + return true; + else + return false; +} + +/* + * Calculate the length in characters of a null-terminated UTF-8 string. + * + * Returns -1 if the input is not valid UTF-8. + */ +static int +pg_utf8_string_len(const char *source) +{ + const unsigned char *p = (const unsigned char *) source; + int l; + int num_chars = 0; + + while (*p) + { + l = pg_utf_mblen(p); + + if (!pg_utf8_islegal(p, l)) + return -1; + + p += l; + num_chars++; + } + + return num_chars; +} + +/* + * Returns true if the input string is pure ASCII. + */ +static bool +pg_is_ascii_string(const char *p) +{ + while (*p) + { + if (IS_HIGHBIT_SET(*p)) + return false; + p++; + } + return true; +} + + +/* + * pg_saslprep - Normalize a password with SASLprep. + * + * SASLprep requires the input to be in UTF-8 encoding, but PostgreSQL + * supports many encodings, so we don't blindly assume that. pg_saslprep + * will check if the input looks like valid UTF-8, and returns + * SASLPREP_INVALID_UTF8 if not. + * + * If the string contains prohibited characters (or more precisely, if the + * output string would contain prohibited characters after normalization), + * returns SASLPREP_PROHIBITED. + * + * On success, returns SASLPREP_SUCCESS, and the normalized string in + * *output. + * + * In frontend, the normalized string is malloc'd, and the caller is + * responsible for freeing it. If an allocation fails, returns + * SASLPREP_OOM. In backend, the normalized string is palloc'd instead, + * and a failed allocation leads to ereport(ERROR). + */ +pg_saslprep_rc +pg_saslprep(const char *input, char **output) +{ + pg_wchar *input_chars = NULL; + pg_wchar *output_chars = NULL; + int input_size; + char *result; + int result_size; + int count; + int i; + bool contains_RandALCat; + unsigned char *p; + pg_wchar *wp; + + /* Check that the password isn't stupendously long */ + if (strlen(input) > MAX_PASSWORD_LENGTH) + { +#ifndef FRONTEND + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("password too long"))); +#else + return SASLPREP_OOM; +#endif + } + + /* + * Quick check if the input is pure ASCII. An ASCII string requires no + * further processing. + */ + if (pg_is_ascii_string(input)) + { + *output = STRDUP(input); + if (!(*output)) + goto oom; + return SASLPREP_SUCCESS; + } + + /* + * Convert the input from UTF-8 to an array of Unicode codepoints. + * + * This also checks that the input is a legal UTF-8 string. + */ + input_size = pg_utf8_string_len(input); + if (input_size < 0) + { + *output = NULL; + return SASLPREP_INVALID_UTF8; + } + + input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar)); + if (!input_chars) + goto oom; + + p = (unsigned char *) input; + for (i = 0; i < input_size; i++) + { + input_chars[i] = utf8_to_unicode(p); + p += pg_utf_mblen(p); + } + input_chars[i] = (pg_wchar) '\0'; + + /* + * The steps below correspond to the steps listed in [RFC3454], Section + * "2. Preparation Overview" + */ + + /* + * 1) Map -- For each character in the input, check if it has a mapping + * and, if so, replace it with its mapping. + */ + count = 0; + for (i = 0; i < input_size; i++) + { + pg_wchar code = input_chars[i]; + + if (IS_CODE_IN_TABLE(code, non_ascii_space_ranges)) + input_chars[count++] = 0x0020; + else if (IS_CODE_IN_TABLE(code, commonly_mapped_to_nothing_ranges)) + { + /* map to nothing */ + } + else + input_chars[count++] = code; + } + input_chars[count] = (pg_wchar) '\0'; + input_size = count; + + if (input_size == 0) + goto prohibited; /* don't allow empty password */ + + /* + * 2) Normalize -- Normalize the result of step 1 using Unicode + * normalization. + */ + output_chars = unicode_normalize_kc(input_chars); + if (!output_chars) + goto oom; + + /* + * 3) Prohibit -- Check for any characters that are not allowed in the + * output. If any are found, return an error. + */ + for (i = 0; i < input_size; i++) + { + pg_wchar code = input_chars[i]; + + if (IS_CODE_IN_TABLE(code, prohibited_output_ranges)) + goto prohibited; + if (IS_CODE_IN_TABLE(code, unassigned_codepoint_ranges)) + goto prohibited; + } + + /* + * 4) Check bidi -- Possibly check for right-to-left characters, and if + * any are found, make sure that the whole string satisfies the + * requirements for bidirectional strings. If the string does not satisfy + * the requirements for bidirectional strings, return an error. + * + * [RFC3454], Section "6. Bidirectional Characters" explains in more + * detail what that means: + * + * "In any profile that specifies bidirectional character handling, all + * three of the following requirements MUST be met: + * + * 1) The characters in section 5.8 MUST be prohibited. + * + * 2) If a string contains any RandALCat character, the string MUST NOT + * contain any LCat character. + * + * 3) If a string contains any RandALCat character, a RandALCat character + * MUST be the first character of the string, and a RandALCat character + * MUST be the last character of the string." + */ + contains_RandALCat = false; + for (i = 0; i < input_size; i++) + { + pg_wchar code = input_chars[i]; + + if (IS_CODE_IN_TABLE(code, RandALCat_codepoint_ranges)) + { + contains_RandALCat = true; + break; + } + } + + if (contains_RandALCat) + { + pg_wchar first = input_chars[0]; + pg_wchar last = input_chars[input_size - 1]; + + for (i = 0; i < input_size; i++) + { + pg_wchar code = input_chars[i]; + + if (IS_CODE_IN_TABLE(code, LCat_codepoint_ranges)) + goto prohibited; + } + + if (!IS_CODE_IN_TABLE(first, RandALCat_codepoint_ranges) || + !IS_CODE_IN_TABLE(last, RandALCat_codepoint_ranges)) + goto prohibited; + } + + /* + * Finally, convert the result back to UTF-8. + */ + result_size = 0; + for (wp = output_chars; *wp; wp++) + { + unsigned char buf[4]; + + unicode_to_utf8(*wp, buf); + result_size += pg_utf_mblen(buf); + } + + result = ALLOC(result_size + 1); + if (!result) + goto oom; + p = (unsigned char *) result; + for (wp = output_chars; *wp; wp++) + { + unicode_to_utf8(*wp, p); + p += pg_utf_mblen(p); + } + Assert((char *) p == result + result_size); + *p = '\0'; + + FREE(input_chars); + FREE(output_chars); + + *output = result; + return SASLPREP_SUCCESS; + +prohibited: + if (input_chars) + FREE(input_chars); + if (output_chars) + FREE(output_chars); + + return SASLPREP_PROHIBITED; + +oom: + if (input_chars) + FREE(input_chars); + if (output_chars) + FREE(output_chars); + + return SASLPREP_OOM; +} diff --git a/src/common/scram-common.c b/src/common/scram-common.c new file mode 100644 index 0000000000..461d75db12 --- /dev/null +++ b/src/common/scram-common.c @@ -0,0 +1,247 @@ +/*------------------------------------------------------------------------- + * scram-common.c + * Shared frontend/backend code for SCRAM authentication + * + * This contains the common low-level functions needed in both frontend and + * backend, for implement the Salted Challenge Response Authentication + * Mechanism (SCRAM), per IETF's RFC 5802. + * + * Portions Copyright (c) 2017, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/scram-common.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +/* for htonl */ +#include <netinet/in.h> +#include <arpa/inet.h> + +#include "common/base64.h" +#include "common/scram-common.h" + +#define HMAC_IPAD 0x36 +#define HMAC_OPAD 0x5C + +/* + * Calculate HMAC per RFC2104. + * + * The hash function used is SHA-256. + */ +void +scram_HMAC_init(scram_HMAC_ctx *ctx, const uint8 *key, int keylen) +{ + uint8 k_ipad[SHA256_HMAC_B]; + int i; + uint8 keybuf[SCRAM_KEY_LEN]; + + /* + * If the key is longer than the block size (64 bytes for SHA-256), pass + * it through SHA-256 once to shrink it down. + */ + if (keylen > SHA256_HMAC_B) + { + pg_sha256_ctx sha256_ctx; + + pg_sha256_init(&sha256_ctx); + pg_sha256_update(&sha256_ctx, key, keylen); + pg_sha256_final(&sha256_ctx, keybuf); + key = keybuf; + keylen = SCRAM_KEY_LEN; + } + + memset(k_ipad, HMAC_IPAD, SHA256_HMAC_B); + memset(ctx->k_opad, HMAC_OPAD, SHA256_HMAC_B); + + for (i = 0; i < keylen; i++) + { + k_ipad[i] ^= key[i]; + ctx->k_opad[i] ^= key[i]; + } + + /* tmp = H(K XOR ipad, text) */ + pg_sha256_init(&ctx->sha256ctx); + pg_sha256_update(&ctx->sha256ctx, k_ipad, SHA256_HMAC_B); +} + +/* + * Update HMAC calculation + * The hash function used is SHA-256. + */ +void +scram_HMAC_update(scram_HMAC_ctx *ctx, const char *str, int slen) +{ + pg_sha256_update(&ctx->sha256ctx, (const uint8 *) str, slen); +} + +/* + * Finalize HMAC calculation. + * The hash function used is SHA-256. + */ +void +scram_HMAC_final(uint8 *result, scram_HMAC_ctx *ctx) +{ + uint8 h[SCRAM_KEY_LEN]; + + pg_sha256_final(&ctx->sha256ctx, h); + + /* H(K XOR opad, tmp) */ + pg_sha256_init(&ctx->sha256ctx); + pg_sha256_update(&ctx->sha256ctx, ctx->k_opad, SHA256_HMAC_B); + pg_sha256_update(&ctx->sha256ctx, h, SCRAM_KEY_LEN); + pg_sha256_final(&ctx->sha256ctx, result); +} + +/* + * Calculate SaltedPassword. + * + * The password should already be normalized by SASLprep. + */ +void +scram_SaltedPassword(const char *password, + const char *salt, int saltlen, int iterations, + uint8 *result) +{ + int password_len = strlen(password); + uint32 one = htonl(1); + int i, + j; + uint8 Ui[SCRAM_KEY_LEN]; + uint8 Ui_prev[SCRAM_KEY_LEN]; + scram_HMAC_ctx hmac_ctx; + + /* + * Iterate hash calculation of HMAC entry using given salt. This is + * essentially PBKDF2 (see RFC2898) with HMAC() as the pseudorandom + * function. + */ + + /* First iteration */ + scram_HMAC_init(&hmac_ctx, (uint8 *) password, password_len); + scram_HMAC_update(&hmac_ctx, salt, saltlen); + scram_HMAC_update(&hmac_ctx, (char *) &one, sizeof(uint32)); + scram_HMAC_final(Ui_prev, &hmac_ctx); + memcpy(result, Ui_prev, SCRAM_KEY_LEN); + + /* Subsequent iterations */ + for (i = 2; i <= iterations; i++) + { + scram_HMAC_init(&hmac_ctx, (uint8 *) password, password_len); + scram_HMAC_update(&hmac_ctx, (const char *) Ui_prev, SCRAM_KEY_LEN); + scram_HMAC_final(Ui, &hmac_ctx); + for (j = 0; j < SCRAM_KEY_LEN; j++) + result[j] ^= Ui[j]; + memcpy(Ui_prev, Ui, SCRAM_KEY_LEN); + } +} + + +/* + * Calculate SHA-256 hash for a NULL-terminated string. (The NULL terminator is + * not included in the hash). + */ +void +scram_H(const uint8 *input, int len, uint8 *result) +{ + pg_sha256_ctx ctx; + + pg_sha256_init(&ctx); + pg_sha256_update(&ctx, input, len); + pg_sha256_final(&ctx, result); +} + +/* + * Calculate ClientKey. + */ +void +scram_ClientKey(const uint8 *salted_password, uint8 *result) +{ + scram_HMAC_ctx ctx; + + scram_HMAC_init(&ctx, salted_password, SCRAM_KEY_LEN); + scram_HMAC_update(&ctx, "Client Key", strlen("Client Key")); + scram_HMAC_final(result, &ctx); +} + +/* + * Calculate ServerKey. + */ +void +scram_ServerKey(const uint8 *salted_password, uint8 *result) +{ + scram_HMAC_ctx ctx; + + scram_HMAC_init(&ctx, salted_password, SCRAM_KEY_LEN); + scram_HMAC_update(&ctx, "Server Key", strlen("Server Key")); + scram_HMAC_final(result, &ctx); +} + + +/* + * Construct a verifier string for SCRAM, stored in pg_authid.rolpassword. + * + * The password should already have been processed with SASLprep, if necessary! + * + * If iterations is 0, default number of iterations is used. The result is + * palloc'd or malloc'd, so caller is responsible for freeing it. + */ +char * +scram_build_verifier(const char *salt, int saltlen, int iterations, + const char *password) +{ + uint8 salted_password[SCRAM_KEY_LEN]; + uint8 stored_key[SCRAM_KEY_LEN]; + uint8 server_key[SCRAM_KEY_LEN]; + char *result; + char *p; + int maxlen; + + if (iterations <= 0) + iterations = SCRAM_DEFAULT_ITERATIONS; + + /* Calculate StoredKey and ServerKey */ + scram_SaltedPassword(password, salt, saltlen, iterations, + salted_password); + scram_ClientKey(salted_password, stored_key); + scram_H(stored_key, SCRAM_KEY_LEN, stored_key); + + scram_ServerKey(salted_password, server_key); + + /*---------- + * The format is: + * SCRAM-SHA-256$<iteration count>:<salt>$<StoredKey>:<ServerKey> + *---------- + */ + maxlen = strlen("SCRAM-SHA-256") + 1 + + 10 + 1 /* iteration count */ + + pg_b64_enc_len(saltlen) + 1 /* Base64-encoded salt */ + + pg_b64_enc_len(SCRAM_KEY_LEN) + 1 /* Base64-encoded StoredKey */ + + pg_b64_enc_len(SCRAM_KEY_LEN) + 1; /* Base64-encoded ServerKey */ + +#ifdef FRONTEND + result = malloc(maxlen); + if (!result) + return NULL; +#else + result = palloc(maxlen); +#endif + + p = result + sprintf(result, "SCRAM-SHA-256$%d:", iterations); + + p += pg_b64_encode(salt, saltlen, p); + *(p++) = '$'; + p += pg_b64_encode((char *) stored_key, SCRAM_KEY_LEN, p); + *(p++) = ':'; + p += pg_b64_encode((char *) server_key, SCRAM_KEY_LEN, p); + *(p++) = '\0'; + + Assert(p - result <= maxlen); + + return result; +} diff --git a/src/common/sha2.c b/src/common/sha2.c new file mode 100644 index 0000000000..496467507d --- /dev/null +++ b/src/common/sha2.c @@ -0,0 +1,1006 @@ +/*------------------------------------------------------------------------- + * + * sha2.c + * Set of SHA functions for SHA-224, SHA-256, SHA-384 and SHA-512. + * + * This is the set of in-core functions used when there are no other + * alternative options like OpenSSL. + * + * Portions Copyright (c) 2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/sha2.c + * + *------------------------------------------------------------------------- + */ + +/* $OpenBSD: sha2.c,v 1.6 2004/05/03 02:57:36 millert Exp $ */ +/* + * FILE: sha2.c + * AUTHOR: Aaron D. Gifford <me@aarongifford.com> + * + * Copyright (c) 2000-2001, Aaron D. Gifford + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $From: sha2.c,v 1.1 2001/11/08 00:01:51 adg Exp adg $ + */ + + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <sys/param.h> + +#include "common/sha2.h" + +/* + * UNROLLED TRANSFORM LOOP NOTE: + * You can define SHA2_UNROLL_TRANSFORM to use the unrolled transform + * loop version for the hash transform rounds (defined using macros + * later in this file). Either define on the command line, for example: + * + * cc -DSHA2_UNROLL_TRANSFORM -o sha2 sha2.c sha2prog.c + * + * or define below: + * + * #define SHA2_UNROLL_TRANSFORM + * + */ + +/*** SHA-256/384/512 Various Length Definitions ***********************/ +#define PG_SHA256_SHORT_BLOCK_LENGTH (PG_SHA256_BLOCK_LENGTH - 8) +#define PG_SHA384_SHORT_BLOCK_LENGTH (PG_SHA384_BLOCK_LENGTH - 16) +#define PG_SHA512_SHORT_BLOCK_LENGTH (PG_SHA512_BLOCK_LENGTH - 16) + +/*** ENDIAN REVERSAL MACROS *******************************************/ +#ifndef WORDS_BIGENDIAN +#define REVERSE32(w,x) { \ + uint32 tmp = (w); \ + tmp = (tmp >> 16) | (tmp << 16); \ + (x) = ((tmp & 0xff00ff00UL) >> 8) | ((tmp & 0x00ff00ffUL) << 8); \ +} +#define REVERSE64(w,x) { \ + uint64 tmp = (w); \ + tmp = (tmp >> 32) | (tmp << 32); \ + tmp = ((tmp & 0xff00ff00ff00ff00ULL) >> 8) | \ + ((tmp & 0x00ff00ff00ff00ffULL) << 8); \ + (x) = ((tmp & 0xffff0000ffff0000ULL) >> 16) | \ + ((tmp & 0x0000ffff0000ffffULL) << 16); \ +} +#endif /* not bigendian */ + +/* + * Macro for incrementally adding the unsigned 64-bit integer n to the + * unsigned 128-bit integer (represented using a two-element array of + * 64-bit words): + */ +#define ADDINC128(w,n) { \ + (w)[0] += (uint64)(n); \ + if ((w)[0] < (n)) { \ + (w)[1]++; \ + } \ +} + +/*** THE SIX LOGICAL FUNCTIONS ****************************************/ +/* + * Bit shifting and rotation (used by the six SHA-XYZ logical functions: + * + * NOTE: The naming of R and S appears backwards here (R is a SHIFT and + * S is a ROTATION) because the SHA-256/384/512 description document + * (see http://www.iwar.org.uk/comsec/resources/cipher/sha256-384-512.pdf) + * uses this same "backwards" definition. + */ +/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */ +#define R(b,x) ((x) >> (b)) +/* 32-bit Rotate-right (used in SHA-256): */ +#define S32(b,x) (((x) >> (b)) | ((x) << (32 - (b)))) +/* 64-bit Rotate-right (used in SHA-384 and SHA-512): */ +#define S64(b,x) (((x) >> (b)) | ((x) << (64 - (b)))) + +/* Two of six logical functions used in SHA-256, SHA-384, and SHA-512: */ +#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) +#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +/* Four of six logical functions used in SHA-256: */ +#define Sigma0_256(x) (S32(2, (x)) ^ S32(13, (x)) ^ S32(22, (x))) +#define Sigma1_256(x) (S32(6, (x)) ^ S32(11, (x)) ^ S32(25, (x))) +#define sigma0_256(x) (S32(7, (x)) ^ S32(18, (x)) ^ R(3 , (x))) +#define sigma1_256(x) (S32(17, (x)) ^ S32(19, (x)) ^ R(10, (x))) + +/* Four of six logical functions used in SHA-384 and SHA-512: */ +#define Sigma0_512(x) (S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x))) +#define Sigma1_512(x) (S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x))) +#define sigma0_512(x) (S64( 1, (x)) ^ S64( 8, (x)) ^ R( 7, (x))) +#define sigma1_512(x) (S64(19, (x)) ^ S64(61, (x)) ^ R( 6, (x))) + +/*** INTERNAL FUNCTION PROTOTYPES *************************************/ +/* NOTE: These should not be accessed directly from outside this + * library -- they are intended for private internal visibility/use + * only. + */ +static void SHA512_Last(pg_sha512_ctx *context); +static void SHA256_Transform(pg_sha256_ctx *context, const uint8 *data); +static void SHA512_Transform(pg_sha512_ctx *context, const uint8 *data); + +/*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/ +/* Hash constant words K for SHA-256: */ +static const uint32 K256[64] = { + 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, + 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, + 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, + 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, + 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, + 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, + 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, + 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, + 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, + 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, + 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, + 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, + 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, + 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, + 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, + 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL +}; + +/* Initial hash value H for SHA-224: */ +static const uint32 sha224_initial_hash_value[8] = { + 0xc1059ed8UL, + 0x367cd507UL, + 0x3070dd17UL, + 0xf70e5939UL, + 0xffc00b31UL, + 0x68581511UL, + 0x64f98fa7UL, + 0xbefa4fa4UL +}; + +/* Initial hash value H for SHA-256: */ +static const uint32 sha256_initial_hash_value[8] = { + 0x6a09e667UL, + 0xbb67ae85UL, + 0x3c6ef372UL, + 0xa54ff53aUL, + 0x510e527fUL, + 0x9b05688cUL, + 0x1f83d9abUL, + 0x5be0cd19UL +}; + +/* Hash constant words K for SHA-384 and SHA-512: */ +static const uint64 K512[80] = { + 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, + 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, + 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, + 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, + 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, + 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, + 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, + 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, + 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, + 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, + 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, + 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, + 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, + 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, + 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, + 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, + 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, + 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, + 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, + 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, + 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, + 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, + 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, + 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, + 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, + 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, + 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, + 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, + 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, + 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, + 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, + 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, + 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, + 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, + 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, + 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, + 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, + 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, + 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, + 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL +}; + +/* Initial hash value H for SHA-384 */ +static const uint64 sha384_initial_hash_value[8] = { + 0xcbbb9d5dc1059ed8ULL, + 0x629a292a367cd507ULL, + 0x9159015a3070dd17ULL, + 0x152fecd8f70e5939ULL, + 0x67332667ffc00b31ULL, + 0x8eb44a8768581511ULL, + 0xdb0c2e0d64f98fa7ULL, + 0x47b5481dbefa4fa4ULL +}; + +/* Initial hash value H for SHA-512 */ +static const uint64 sha512_initial_hash_value[8] = { + 0x6a09e667f3bcc908ULL, + 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, + 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, + 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, + 0x5be0cd19137e2179ULL +}; + + +/*** SHA-256: *********************************************************/ +void +pg_sha256_init(pg_sha256_ctx *context) +{ + if (context == NULL) + return; + memcpy(context->state, sha256_initial_hash_value, PG_SHA256_DIGEST_LENGTH); + memset(context->buffer, 0, PG_SHA256_BLOCK_LENGTH); + context->bitcount = 0; +} + +#ifdef SHA2_UNROLL_TRANSFORM + +/* Unrolled SHA-256 round macros: */ + +#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h) do { \ + W256[j] = (uint32)data[3] | ((uint32)data[2] << 8) | \ + ((uint32)data[1] << 16) | ((uint32)data[0] << 24); \ + data += 4; \ + T1 = (h) + Sigma1_256((e)) + Ch((e), (f), (g)) + K256[j] + W256[j]; \ + (d) += T1; \ + (h) = T1 + Sigma0_256((a)) + Maj((a), (b), (c)); \ + j++; \ +} while(0) + +#define ROUND256(a,b,c,d,e,f,g,h) do { \ + s0 = W256[(j+1)&0x0f]; \ + s0 = sigma0_256(s0); \ + s1 = W256[(j+14)&0x0f]; \ + s1 = sigma1_256(s1); \ + T1 = (h) + Sigma1_256((e)) + Ch((e), (f), (g)) + K256[j] + \ + (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \ + (d) += T1; \ + (h) = T1 + Sigma0_256((a)) + Maj((a), (b), (c)); \ + j++; \ +} while(0) + +static void +SHA256_Transform(pg_sha256_ctx *context, const uint8 *data) +{ + uint32 a, + b, + c, + d, + e, + f, + g, + h, + s0, + s1; + uint32 T1, + *W256; + int j; + + W256 = (uint32 *) context->buffer; + + /* Initialize registers with the prev. intermediate value */ + a = context->state[0]; + b = context->state[1]; + c = context->state[2]; + d = context->state[3]; + e = context->state[4]; + f = context->state[5]; + g = context->state[6]; + h = context->state[7]; + + j = 0; + do + { + /* Rounds 0 to 15 (unrolled): */ + ROUND256_0_TO_15(a, b, c, d, e, f, g, h); + ROUND256_0_TO_15(h, a, b, c, d, e, f, g); + ROUND256_0_TO_15(g, h, a, b, c, d, e, f); + ROUND256_0_TO_15(f, g, h, a, b, c, d, e); + ROUND256_0_TO_15(e, f, g, h, a, b, c, d); + ROUND256_0_TO_15(d, e, f, g, h, a, b, c); + ROUND256_0_TO_15(c, d, e, f, g, h, a, b); + ROUND256_0_TO_15(b, c, d, e, f, g, h, a); + } while (j < 16); + + /* Now for the remaining rounds to 64: */ + do + { + ROUND256(a, b, c, d, e, f, g, h); + ROUND256(h, a, b, c, d, e, f, g); + ROUND256(g, h, a, b, c, d, e, f); + ROUND256(f, g, h, a, b, c, d, e); + ROUND256(e, f, g, h, a, b, c, d); + ROUND256(d, e, f, g, h, a, b, c); + ROUND256(c, d, e, f, g, h, a, b); + ROUND256(b, c, d, e, f, g, h, a); + } while (j < 64); + + /* Compute the current intermediate hash value */ + context->state[0] += a; + context->state[1] += b; + context->state[2] += c; + context->state[3] += d; + context->state[4] += e; + context->state[5] += f; + context->state[6] += g; + context->state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = 0; +} +#else /* SHA2_UNROLL_TRANSFORM */ + +static void +SHA256_Transform(pg_sha256_ctx *context, const uint8 *data) +{ + uint32 a, + b, + c, + d, + e, + f, + g, + h, + s0, + s1; + uint32 T1, + T2, + *W256; + int j; + + W256 = (uint32 *) context->buffer; + + /* Initialize registers with the prev. intermediate value */ + a = context->state[0]; + b = context->state[1]; + c = context->state[2]; + d = context->state[3]; + e = context->state[4]; + f = context->state[5]; + g = context->state[6]; + h = context->state[7]; + + j = 0; + do + { + W256[j] = (uint32) data[3] | ((uint32) data[2] << 8) | + ((uint32) data[1] << 16) | ((uint32) data[0] << 24); + data += 4; + /* Apply the SHA-256 compression function to update a..h */ + T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j]; + T2 = Sigma0_256(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 16); + + do + { + /* Part of the message block expansion: */ + s0 = W256[(j + 1) & 0x0f]; + s0 = sigma0_256(s0); + s1 = W256[(j + 14) & 0x0f]; + s1 = sigma1_256(s1); + + /* Apply the SHA-256 compression function to update a..h */ + T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + + (W256[j & 0x0f] += s1 + W256[(j + 9) & 0x0f] + s0); + T2 = Sigma0_256(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 64); + + /* Compute the current intermediate hash value */ + context->state[0] += a; + context->state[1] += b; + context->state[2] += c; + context->state[3] += d; + context->state[4] += e; + context->state[5] += f; + context->state[6] += g; + context->state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = T2 = 0; +} +#endif /* SHA2_UNROLL_TRANSFORM */ + +void +pg_sha256_update(pg_sha256_ctx *context, const uint8 *data, size_t len) +{ + size_t freespace, + usedspace; + + /* Calling with no data is valid (we do nothing) */ + if (len == 0) + return; + + usedspace = (context->bitcount >> 3) % PG_SHA256_BLOCK_LENGTH; + if (usedspace > 0) + { + /* Calculate how much free space is available in the buffer */ + freespace = PG_SHA256_BLOCK_LENGTH - usedspace; + + if (len >= freespace) + { + /* Fill the buffer completely and process it */ + memcpy(&context->buffer[usedspace], data, freespace); + context->bitcount += freespace << 3; + len -= freespace; + data += freespace; + SHA256_Transform(context, context->buffer); + } + else + { + /* The buffer is not yet full */ + memcpy(&context->buffer[usedspace], data, len); + context->bitcount += len << 3; + /* Clean up: */ + usedspace = freespace = 0; + return; + } + } + while (len >= PG_SHA256_BLOCK_LENGTH) + { + /* Process as many complete blocks as we can */ + SHA256_Transform(context, data); + context->bitcount += PG_SHA256_BLOCK_LENGTH << 3; + len -= PG_SHA256_BLOCK_LENGTH; + data += PG_SHA256_BLOCK_LENGTH; + } + if (len > 0) + { + /* There's left-overs, so save 'em */ + memcpy(context->buffer, data, len); + context->bitcount += len << 3; + } + /* Clean up: */ + usedspace = freespace = 0; +} + +static void +SHA256_Last(pg_sha256_ctx *context) +{ + unsigned int usedspace; + + usedspace = (context->bitcount >> 3) % PG_SHA256_BLOCK_LENGTH; +#ifndef WORDS_BIGENDIAN + /* Convert FROM host byte order */ + REVERSE64(context->bitcount, context->bitcount); +#endif + if (usedspace > 0) + { + /* Begin padding with a 1 bit: */ + context->buffer[usedspace++] = 0x80; + + if (usedspace <= PG_SHA256_SHORT_BLOCK_LENGTH) + { + /* Set-up for the last transform: */ + memset(&context->buffer[usedspace], 0, PG_SHA256_SHORT_BLOCK_LENGTH - usedspace); + } + else + { + if (usedspace < PG_SHA256_BLOCK_LENGTH) + { + memset(&context->buffer[usedspace], 0, PG_SHA256_BLOCK_LENGTH - usedspace); + } + /* Do second-to-last transform: */ + SHA256_Transform(context, context->buffer); + + /* And set-up for the last transform: */ + memset(context->buffer, 0, PG_SHA256_SHORT_BLOCK_LENGTH); + } + } + else + { + /* Set-up for the last transform: */ + memset(context->buffer, 0, PG_SHA256_SHORT_BLOCK_LENGTH); + + /* Begin padding with a 1 bit: */ + *context->buffer = 0x80; + } + /* Set the bit count: */ + *(uint64 *) &context->buffer[PG_SHA256_SHORT_BLOCK_LENGTH] = context->bitcount; + + /* Final transform: */ + SHA256_Transform(context, context->buffer); +} + +void +pg_sha256_final(pg_sha256_ctx *context, uint8 *digest) +{ + /* If no digest buffer is passed, we don't bother doing this: */ + if (digest != NULL) + { + SHA256_Last(context); + +#ifndef WORDS_BIGENDIAN + { + /* Convert TO host byte order */ + int j; + + for (j = 0; j < 8; j++) + { + REVERSE32(context->state[j], context->state[j]); + } + } +#endif + memcpy(digest, context->state, PG_SHA256_DIGEST_LENGTH); + } + + /* Clean up state data: */ + memset(context, 0, sizeof(pg_sha256_ctx)); +} + + +/*** SHA-512: *********************************************************/ +void +pg_sha512_init(pg_sha512_ctx *context) +{ + if (context == NULL) + return; + memcpy(context->state, sha512_initial_hash_value, PG_SHA512_DIGEST_LENGTH); + memset(context->buffer, 0, PG_SHA512_BLOCK_LENGTH); + context->bitcount[0] = context->bitcount[1] = 0; +} + +#ifdef SHA2_UNROLL_TRANSFORM + +/* Unrolled SHA-512 round macros: */ + +#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h) do { \ + W512[j] = (uint64)data[7] | ((uint64)data[6] << 8) | \ + ((uint64)data[5] << 16) | ((uint64)data[4] << 24) | \ + ((uint64)data[3] << 32) | ((uint64)data[2] << 40) | \ + ((uint64)data[1] << 48) | ((uint64)data[0] << 56); \ + data += 8; \ + T1 = (h) + Sigma1_512((e)) + Ch((e), (f), (g)) + K512[j] + W512[j]; \ + (d) += T1; \ + (h) = T1 + Sigma0_512((a)) + Maj((a), (b), (c)); \ + j++; \ +} while(0) + + +#define ROUND512(a,b,c,d,e,f,g,h) do { \ + s0 = W512[(j+1)&0x0f]; \ + s0 = sigma0_512(s0); \ + s1 = W512[(j+14)&0x0f]; \ + s1 = sigma1_512(s1); \ + T1 = (h) + Sigma1_512((e)) + Ch((e), (f), (g)) + K512[j] + \ + (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); \ + (d) += T1; \ + (h) = T1 + Sigma0_512((a)) + Maj((a), (b), (c)); \ + j++; \ +} while(0) + +static void +SHA512_Transform(pg_sha512_ctx *context, const uint8 *data) +{ + uint64 a, + b, + c, + d, + e, + f, + g, + h, + s0, + s1; + uint64 T1, + *W512 = (uint64 *) context->buffer; + int j; + + /* Initialize registers with the prev. intermediate value */ + a = context->state[0]; + b = context->state[1]; + c = context->state[2]; + d = context->state[3]; + e = context->state[4]; + f = context->state[5]; + g = context->state[6]; + h = context->state[7]; + + j = 0; + do + { + ROUND512_0_TO_15(a, b, c, d, e, f, g, h); + ROUND512_0_TO_15(h, a, b, c, d, e, f, g); + ROUND512_0_TO_15(g, h, a, b, c, d, e, f); + ROUND512_0_TO_15(f, g, h, a, b, c, d, e); + ROUND512_0_TO_15(e, f, g, h, a, b, c, d); + ROUND512_0_TO_15(d, e, f, g, h, a, b, c); + ROUND512_0_TO_15(c, d, e, f, g, h, a, b); + ROUND512_0_TO_15(b, c, d, e, f, g, h, a); + } while (j < 16); + + /* Now for the remaining rounds up to 79: */ + do + { + ROUND512(a, b, c, d, e, f, g, h); + ROUND512(h, a, b, c, d, e, f, g); + ROUND512(g, h, a, b, c, d, e, f); + ROUND512(f, g, h, a, b, c, d, e); + ROUND512(e, f, g, h, a, b, c, d); + ROUND512(d, e, f, g, h, a, b, c); + ROUND512(c, d, e, f, g, h, a, b); + ROUND512(b, c, d, e, f, g, h, a); + } while (j < 80); + + /* Compute the current intermediate hash value */ + context->state[0] += a; + context->state[1] += b; + context->state[2] += c; + context->state[3] += d; + context->state[4] += e; + context->state[5] += f; + context->state[6] += g; + context->state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = 0; +} +#else /* SHA2_UNROLL_TRANSFORM */ + +static void +SHA512_Transform(pg_sha512_ctx *context, const uint8 *data) +{ + uint64 a, + b, + c, + d, + e, + f, + g, + h, + s0, + s1; + uint64 T1, + T2, + *W512 = (uint64 *) context->buffer; + int j; + + /* Initialize registers with the prev. intermediate value */ + a = context->state[0]; + b = context->state[1]; + c = context->state[2]; + d = context->state[3]; + e = context->state[4]; + f = context->state[5]; + g = context->state[6]; + h = context->state[7]; + + j = 0; + do + { + W512[j] = (uint64) data[7] | ((uint64) data[6] << 8) | + ((uint64) data[5] << 16) | ((uint64) data[4] << 24) | + ((uint64) data[3] << 32) | ((uint64) data[2] << 40) | + ((uint64) data[1] << 48) | ((uint64) data[0] << 56); + data += 8; + /* Apply the SHA-512 compression function to update a..h */ + T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + W512[j]; + T2 = Sigma0_512(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 16); + + do + { + /* Part of the message block expansion: */ + s0 = W512[(j + 1) & 0x0f]; + s0 = sigma0_512(s0); + s1 = W512[(j + 14) & 0x0f]; + s1 = sigma1_512(s1); + + /* Apply the SHA-512 compression function to update a..h */ + T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + + (W512[j & 0x0f] += s1 + W512[(j + 9) & 0x0f] + s0); + T2 = Sigma0_512(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + j++; + } while (j < 80); + + /* Compute the current intermediate hash value */ + context->state[0] += a; + context->state[1] += b; + context->state[2] += c; + context->state[3] += d; + context->state[4] += e; + context->state[5] += f; + context->state[6] += g; + context->state[7] += h; + + /* Clean up */ + a = b = c = d = e = f = g = h = T1 = T2 = 0; +} +#endif /* SHA2_UNROLL_TRANSFORM */ + +void +pg_sha512_update(pg_sha512_ctx *context, const uint8 *data, size_t len) +{ + size_t freespace, + usedspace; + + /* Calling with no data is valid (we do nothing) */ + if (len == 0) + return; + + usedspace = (context->bitcount[0] >> 3) % PG_SHA512_BLOCK_LENGTH; + if (usedspace > 0) + { + /* Calculate how much free space is available in the buffer */ + freespace = PG_SHA512_BLOCK_LENGTH - usedspace; + + if (len >= freespace) + { + /* Fill the buffer completely and process it */ + memcpy(&context->buffer[usedspace], data, freespace); + ADDINC128(context->bitcount, freespace << 3); + len -= freespace; + data += freespace; + SHA512_Transform(context, context->buffer); + } + else + { + /* The buffer is not yet full */ + memcpy(&context->buffer[usedspace], data, len); + ADDINC128(context->bitcount, len << 3); + /* Clean up: */ + usedspace = freespace = 0; + return; + } + } + while (len >= PG_SHA512_BLOCK_LENGTH) + { + /* Process as many complete blocks as we can */ + SHA512_Transform(context, data); + ADDINC128(context->bitcount, PG_SHA512_BLOCK_LENGTH << 3); + len -= PG_SHA512_BLOCK_LENGTH; + data += PG_SHA512_BLOCK_LENGTH; + } + if (len > 0) + { + /* There's left-overs, so save 'em */ + memcpy(context->buffer, data, len); + ADDINC128(context->bitcount, len << 3); + } + /* Clean up: */ + usedspace = freespace = 0; +} + +static void +SHA512_Last(pg_sha512_ctx *context) +{ + unsigned int usedspace; + + usedspace = (context->bitcount[0] >> 3) % PG_SHA512_BLOCK_LENGTH; +#ifndef WORDS_BIGENDIAN + /* Convert FROM host byte order */ + REVERSE64(context->bitcount[0], context->bitcount[0]); + REVERSE64(context->bitcount[1], context->bitcount[1]); +#endif + if (usedspace > 0) + { + /* Begin padding with a 1 bit: */ + context->buffer[usedspace++] = 0x80; + + if (usedspace <= PG_SHA512_SHORT_BLOCK_LENGTH) + { + /* Set-up for the last transform: */ + memset(&context->buffer[usedspace], 0, PG_SHA512_SHORT_BLOCK_LENGTH - usedspace); + } + else + { + if (usedspace < PG_SHA512_BLOCK_LENGTH) + { + memset(&context->buffer[usedspace], 0, PG_SHA512_BLOCK_LENGTH - usedspace); + } + /* Do second-to-last transform: */ + SHA512_Transform(context, context->buffer); + + /* And set-up for the last transform: */ + memset(context->buffer, 0, PG_SHA512_BLOCK_LENGTH - 2); + } + } + else + { + /* Prepare for final transform: */ + memset(context->buffer, 0, PG_SHA512_SHORT_BLOCK_LENGTH); + + /* Begin padding with a 1 bit: */ + *context->buffer = 0x80; + } + /* Store the length of input data (in bits): */ + *(uint64 *) &context->buffer[PG_SHA512_SHORT_BLOCK_LENGTH] = context->bitcount[1]; + *(uint64 *) &context->buffer[PG_SHA512_SHORT_BLOCK_LENGTH + 8] = context->bitcount[0]; + + /* Final transform: */ + SHA512_Transform(context, context->buffer); +} + +void +pg_sha512_final(pg_sha512_ctx *context, uint8 *digest) +{ + /* If no digest buffer is passed, we don't bother doing this: */ + if (digest != NULL) + { + SHA512_Last(context); + + /* Save the hash data for output: */ +#ifndef WORDS_BIGENDIAN + { + /* Convert TO host byte order */ + int j; + + for (j = 0; j < 8; j++) + { + REVERSE64(context->state[j], context->state[j]); + } + } +#endif + memcpy(digest, context->state, PG_SHA512_DIGEST_LENGTH); + } + + /* Zero out state data */ + memset(context, 0, sizeof(pg_sha512_ctx)); +} + + +/*** SHA-384: *********************************************************/ +void +pg_sha384_init(pg_sha384_ctx *context) +{ + if (context == NULL) + return; + memcpy(context->state, sha384_initial_hash_value, PG_SHA512_DIGEST_LENGTH); + memset(context->buffer, 0, PG_SHA384_BLOCK_LENGTH); + context->bitcount[0] = context->bitcount[1] = 0; +} + +void +pg_sha384_update(pg_sha384_ctx *context, const uint8 *data, size_t len) +{ + pg_sha512_update((pg_sha512_ctx *) context, data, len); +} + +void +pg_sha384_final(pg_sha384_ctx *context, uint8 *digest) +{ + /* If no digest buffer is passed, we don't bother doing this: */ + if (digest != NULL) + { + SHA512_Last((pg_sha512_ctx *) context); + + /* Save the hash data for output: */ +#ifndef WORDS_BIGENDIAN + { + /* Convert TO host byte order */ + int j; + + for (j = 0; j < 6; j++) + { + REVERSE64(context->state[j], context->state[j]); + } + } +#endif + memcpy(digest, context->state, PG_SHA384_DIGEST_LENGTH); + } + + /* Zero out state data */ + memset(context, 0, sizeof(pg_sha384_ctx)); +} + +/*** SHA-224: *********************************************************/ +void +pg_sha224_init(pg_sha224_ctx *context) +{ + if (context == NULL) + return; + memcpy(context->state, sha224_initial_hash_value, PG_SHA256_DIGEST_LENGTH); + memset(context->buffer, 0, PG_SHA256_BLOCK_LENGTH); + context->bitcount = 0; +} + +void +pg_sha224_update(pg_sha224_ctx *context, const uint8 *data, size_t len) +{ + pg_sha256_update((pg_sha256_ctx *) context, data, len); +} + +void +pg_sha224_final(pg_sha224_ctx *context, uint8 *digest) +{ + /* If no digest buffer is passed, we don't bother doing this: */ + if (digest != NULL) + { + SHA256_Last(context); + +#ifndef WORDS_BIGENDIAN + { + /* Convert TO host byte order */ + int j; + + for (j = 0; j < 8; j++) + { + REVERSE32(context->state[j], context->state[j]); + } + } +#endif + memcpy(digest, context->state, PG_SHA224_DIGEST_LENGTH); + } + + /* Clean up state data: */ + memset(context, 0, sizeof(pg_sha224_ctx)); +} diff --git a/src/common/sha2_openssl.c b/src/common/sha2_openssl.c new file mode 100644 index 0000000000..bcc3442633 --- /dev/null +++ b/src/common/sha2_openssl.c @@ -0,0 +1,102 @@ +/*------------------------------------------------------------------------- + * + * sha2_openssl.c + * Set of wrapper routines on top of OpenSSL to support SHA-224 + * SHA-256, SHA-384 and SHA-512 functions. + * + * This should only be used if code is compiled with OpenSSL support. + * + * Portions Copyright (c) 2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/sha2_openssl.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include <openssl/sha.h> + +#include "common/sha2.h" + + +/* Interface routines for SHA-256 */ +void +pg_sha256_init(pg_sha256_ctx *ctx) +{ + SHA256_Init((SHA256_CTX *) ctx); +} + +void +pg_sha256_update(pg_sha256_ctx *ctx, const uint8 *data, size_t len) +{ + SHA256_Update((SHA256_CTX *) ctx, data, len); +} + +void +pg_sha256_final(pg_sha256_ctx *ctx, uint8 *dest) +{ + SHA256_Final(dest, (SHA256_CTX *) ctx); +} + +/* Interface routines for SHA-512 */ +void +pg_sha512_init(pg_sha512_ctx *ctx) +{ + SHA512_Init((SHA512_CTX *) ctx); +} + +void +pg_sha512_update(pg_sha512_ctx *ctx, const uint8 *data, size_t len) +{ + SHA512_Update((SHA512_CTX *) ctx, data, len); +} + +void +pg_sha512_final(pg_sha512_ctx *ctx, uint8 *dest) +{ + SHA512_Final(dest, (SHA512_CTX *) ctx); +} + +/* Interface routines for SHA-384 */ +void +pg_sha384_init(pg_sha384_ctx *ctx) +{ + SHA384_Init((SHA512_CTX *) ctx); +} + +void +pg_sha384_update(pg_sha384_ctx *ctx, const uint8 *data, size_t len) +{ + SHA384_Update((SHA512_CTX *) ctx, data, len); +} + +void +pg_sha384_final(pg_sha384_ctx *ctx, uint8 *dest) +{ + SHA384_Final(dest, (SHA512_CTX *) ctx); +} + +/* Interface routines for SHA-224 */ +void +pg_sha224_init(pg_sha224_ctx *ctx) +{ + SHA224_Init((SHA256_CTX *) ctx); +} + +void +pg_sha224_update(pg_sha224_ctx *ctx, const uint8 *data, size_t len) +{ + SHA224_Update((SHA256_CTX *) ctx, data, len); +} + +void +pg_sha224_final(pg_sha224_ctx *ctx, uint8 *dest) +{ + SHA224_Final(dest, (SHA256_CTX *) ctx); +} diff --git a/src/common/string.c b/src/common/string.c index 69b26e7dbd..159d9ea7b6 100644 --- a/src/common/string.c +++ b/src/common/string.c @@ -4,7 +4,7 @@ * string handling helpers * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/common/unicode/.gitignore b/src/common/unicode/.gitignore new file mode 100644 index 0000000000..5e583e2ccc --- /dev/null +++ b/src/common/unicode/.gitignore @@ -0,0 +1,7 @@ +/norm_test +/norm_test_table.h + +# Files downloaded from the Unicode Character Database +/CompositionExclusions.txt +/NormalizationTest.txt +/UnicodeData.txt diff --git a/src/common/unicode/Makefile b/src/common/unicode/Makefile new file mode 100644 index 0000000000..e20ef778f3 --- /dev/null +++ b/src/common/unicode/Makefile @@ -0,0 +1,53 @@ +#------------------------------------------------------------------------- +# +# Makefile +# Makefile for src/common/unicode +# +# IDENTIFICATION +# src/common/unicode/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/common/unicode +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +override CPPFLAGS := -DFRONTEND $(CPPFLAGS) +LIBS += $(PTHREAD_LIBS) + +# By default, do nothing. +all: + +DOWNLOAD = wget -O $@ --no-use-server-timestamps + +# These files are part of the Unicode Character Database. Download +# them on demand. +UnicodeData.txt CompositionExclusions.txt NormalizationTest.txt: + $(DOWNLOAD) http://unicode.org/Public/UNIDATA/$(@F) + +# Generation of conversion tables used for string normalization with +# UTF-8 strings. +unicode_norm_table.h: generate-unicode_norm_table.pl UnicodeData.txt CompositionExclusions.txt + $(PERL) generate-unicode_norm_table.pl + +# Test suite +normalization-check: norm_test + ./norm_test + +norm_test: norm_test.o ../unicode_norm.o + +norm_test.o: norm_test_table.h + +norm_test_table.h: generate-norm_test_table.pl NormalizationTest.txt + perl generate-norm_test_table.pl NormalizationTest.txt $@ + +.PHONY: normalization-check + + +clean: + rm -f $(OBJS) norm_test norm_test.o + +distclean: clean + rm -f UnicodeData.txt CompositionExclusions.txt NormalizationTest.txt norm_test_table.h unicode_norm_table.h + +maintainer-clean: distclean diff --git a/src/common/unicode/README b/src/common/unicode/README new file mode 100644 index 0000000000..7c1c433b61 --- /dev/null +++ b/src/common/unicode/README @@ -0,0 +1,35 @@ +This directory contains tools to generate the tables in +src/include/common/unicode_norm.h, used for Unicode normalization. The +generated .h file is included in the source tree, so these are normally not +needed to build PostgreSQL, only if you need to re-generate the .h file +from the Unicode data files for some reason, e.g. to update to a new version +of Unicode. + +Generating unicode_norm_table.h +------------------------------- + +1. Download the Unicode data file, UnicodeData.txt, from the Unicode +consortium and place it to the current directory. Run the perl script +"norm_test_generate.pl", to process it, and to generate the +"unicode_norm_table.h" file. The Makefile contains a rule to download the +data files if they don't exist. + + make unicode_norm_table.h + +2. Inspect the resulting header file. Once you're happy with it, copy it to +the right location. + + cp unicode_norm_table.h ../../../src/include/common/ + + + +Tests +----- + +The Unicode consortium publishes a comprehensive test suite for the +normalization algorithm, in a file called NormalizationTest.txt. This +directory also contains a perl script and some C code, to run our +normalization code with all the test strings in NormalizationTest.txt. +To download NormalizationTest.txt and run the tests: + + make normalization-check diff --git a/src/common/unicode/generate-norm_test_table.pl b/src/common/unicode/generate-norm_test_table.pl new file mode 100644 index 0000000000..310d32fd29 --- /dev/null +++ b/src/common/unicode/generate-norm_test_table.pl @@ -0,0 +1,102 @@ +#!/usr/bin/perl +# +# Read Unicode consortium's normalization test suite, NormalizationTest.txt, +# and generate a C array from it, for norm_test.c. +# +# NormalizationTest.txt is part of the Unicode Character Database. +# +# Copyright (c) 2000-2017, PostgreSQL Global Development Group + +use strict; +use warnings; + +use File::Basename; + +die "Usage: $0 INPUT_FILE OUTPUT_FILE\n" if @ARGV != 2; +my $input_file = $ARGV[0]; +my $output_file = $ARGV[1]; +my $output_base = basename($output_file); + +# Open the input and output files +open my $INPUT, '<', $input_file + or die "Could not open input file $input_file: $!"; +open my $OUTPUT, '>', $output_file + or die "Could not open output file $output_file: $!\n"; + +# Print header of output file. +print $OUTPUT <<HEADER; +/*------------------------------------------------------------------------- + * + * norm_test_table.h + * Test strings for Unicode normalization. + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/common/unicode/norm_test_table.h + * + *------------------------------------------------------------------------- + */ + +/* + * File auto-generated by src/common/unicode/generate-norm_test_table.pl, do + * not edit. There is deliberately not an #ifndef PG_NORM_TEST_TABLE_H + * here. + */ + +typedef struct +{ + int linenum; + pg_wchar input[50]; + pg_wchar output[50]; +} pg_unicode_test; + +/* test table */ +HEADER +print $OUTPUT + "static const pg_unicode_test UnicodeNormalizationTests[] =\n{\n"; + +# Helper routine to conver a space-separated list of Unicode characters to +# hexadecimal list format, suitable for outputting in a C array. +sub codepoint_string_to_hex +{ + my $codepoint_string = shift; + + my $result; + + foreach (split(' ', $codepoint_string)) + { + my $cp = $_; + my $utf8 = "0x$cp, "; + $result .= $utf8; + } + $result .= '0'; # null-terminated the array + return $result; +} + +# Process the input file line by line +my $linenum = 0; +while (my $line = <$INPUT>) +{ + $linenum = $linenum + 1; + if ($line =~ /^\s*#/) { next; } # ignore comments + + if ($line =~ /^@/) { next; } # ignore @Part0 like headers + + # Split the line wanted and get the fields needed: + # + # source; NFC; NFD; NFKC; NFKD + my ($source, $nfc, $nfd, $nfkc, $nfkd) = split(';', $line); + + my $source_utf8 = codepoint_string_to_hex($source); + my $nfkc_utf8 = codepoint_string_to_hex($nfkc); + + print $OUTPUT "\t{ $linenum, { $source_utf8 }, { $nfkc_utf8 } },\n"; +} + +# Output terminator entry +print $OUTPUT "\t{ 0, { 0 }, { 0 } }"; +print $OUTPUT "\n};\n"; + +close $OUTPUT; +close $INPUT; diff --git a/src/common/unicode/generate-unicode_norm_table.pl b/src/common/unicode/generate-unicode_norm_table.pl new file mode 100644 index 0000000000..1d77bb6380 --- /dev/null +++ b/src/common/unicode/generate-unicode_norm_table.pl @@ -0,0 +1,231 @@ +#!/usr/bin/perl +# +# Generate a composition table, using Unicode data files as input +# +# Input: UnicodeData.txt and CompositionExclusions.txt +# Output: unicode_norm_table.h +# +# Copyright (c) 2000-2017, PostgreSQL Global Development Group + +use strict; +use warnings; + +my $output_file = "unicode_norm_table.h"; + +my $FH; + +# Read list of codes that should be excluded from re-composition. +my @composition_exclusion_codes = (); +open($FH, '<', "CompositionExclusions.txt") + or die "Could not open CompositionExclusions.txt: $!."; +while (my $line = <$FH>) +{ + if ($line =~ /^([[:xdigit:]]+)/) + { + push @composition_exclusion_codes, $1; + } +} +close $FH; + +# Read entries from UnicodeData.txt into a list, and a hash table. We need +# three fields from each row: the codepoint, canonical combining class, +# and character decomposition mapping +my @characters = (); +my %character_hash = (); +open($FH, '<', "UnicodeData.txt") + or die "Could not open UnicodeData.txt: $!."; +while (my $line = <$FH>) +{ + + # Split the line wanted and get the fields needed: + # - Unicode code value + # - Canonical Combining Class + # - Character Decomposition Mapping + my @elts = split(';', $line); + my $code = $elts[0]; + my $class = $elts[3]; + my $decomp = $elts[5]; + + # Skip codepoints above U+10FFFF. They cannot be represented in 4 bytes + # in UTF-8, and PostgreSQL doesn't support UTF-8 characters longer than + # 4 bytes. (This is just pro forma, as there aren't any such entries in + # the data file, currently.) + next if hex($code) > 0x10FFFF; + + # Skip characters with no decompositions and a class of 0, to reduce the + # table size. + next if $class eq '0' && $decomp eq ''; + + my %char_entry = (code => $code, class => $class, decomp => $decomp); + push(@characters, \%char_entry); + $character_hash{$code} = \%char_entry; +} +close $FH; + +my $num_characters = scalar @characters; + +# Start writing out the output file +open my $OUTPUT, '>', $output_file + or die "Could not open output file $output_file: $!\n"; + +print $OUTPUT <<HEADER; +/*------------------------------------------------------------------------- + * + * unicode_norm_table.h + * Composition table used for Unicode normalization + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/common/unicode_norm_table.h + * + *------------------------------------------------------------------------- + */ + +/* + * File auto-generated by src/common/unicode/generate-unicode_norm_table.pl, + * do not edit. There is deliberately not an #ifndef PG_UNICODE_NORM_TABLE_H + * here. + */ +typedef struct +{ + uint32 codepoint; /* Unicode codepoint */ + uint8 comb_class; /* combining class of character */ + uint8 dec_size_flags; /* size and flags of decomposition code list */ + uint16 dec_index; /* index into UnicodeDecomp_codepoints, or the + * decomposition itself if DECOMP_INLINE */ +} pg_unicode_decomposition; + +#define DECOMP_NO_COMPOSE 0x80 /* don't use for re-composition */ +#define DECOMP_INLINE 0x40 /* decomposition is stored inline in dec_index */ + +#define DECOMPOSITION_SIZE(x) ((x)->dec_size_flags & 0x3F) +#define DECOMPOSITION_NO_COMPOSE(x) (((x)->dec_size_flags & DECOMP_NO_COMPOSE) != 0) +#define DECOMPOSITION_IS_INLINE(x) (((x)->dec_size_flags & DECOMP_INLINE) != 0) + +/* Table of Unicode codepoints and their decompositions */ +static const pg_unicode_decomposition UnicodeDecompMain[$num_characters] = +{ +HEADER + +my $decomp_index = 0; +my $decomp_string = ""; + +my $last_code = $characters[-1]->{code}; +foreach my $char (@characters) +{ + my $code = $char->{code}; + my $class = $char->{class}; + my $decomp = $char->{decomp}; + + # The character decomposition mapping field in UnicodeData.txt is a list + # of unicode codepoints, separated by space. But it can be prefixed with + # so-called compatibility formatting tag, like "<compat>", or "<font>". + # The entries with compatibility formatting tags should not be used for + # re-composing characters during normalization, so flag them in the table. + # (The tag doesn't matter, only whether there is a tag or not) + my $compat = 0; + if ($decomp =~ /\<.*\>/) + { + $compat = 1; + $decomp =~ s/\<[^][]*\>//g; + } + my @decomp_elts = split(" ", $decomp); + + # Decomposition size + # Print size of decomposition + my $decomp_size = scalar(@decomp_elts); + + my $first_decomp = shift @decomp_elts; + + my $flags = ""; + my $comment = ""; + + if ($decomp_size == 2) + { + + # Should this be used for recomposition? + if ($compat) + { + $flags .= " | DECOMP_NO_COMPOSE"; + $comment = "compatibility mapping"; + } + elsif ($character_hash{$first_decomp} + && $character_hash{$first_decomp}->{class} != 0) + { + $flags .= " | DECOMP_NO_COMPOSE"; + $comment = "non-starter decomposition"; + } + else + { + foreach my $lcode (@composition_exclusion_codes) + { + if ($lcode eq $char->{code}) + { + $flags .= " | DECOMP_NO_COMPOSE"; + $comment = "in exclusion list"; + last; + } + } + } + } + + if ($decomp_size == 0) + { + print $OUTPUT "\t{0x$code, $class, 0$flags, 0}"; + } + elsif ($decomp_size == 1 && length($first_decomp) <= 4) + { + + # The decomposition consists of a single codepoint, and it fits + # in a uint16, so we can store it "inline" in the main table. + $flags .= " | DECOMP_INLINE"; + print $OUTPUT "\t{0x$code, $class, 1$flags, 0x$first_decomp}"; + } + else + { + print $OUTPUT + "\t{0x$code, $class, $decomp_size$flags, $decomp_index}"; + + # Now save the decompositions into a dedicated area that will + # be written afterwards. First build the entry dedicated to + # a sub-table with the code and decomposition. + $decomp_string .= ",\n" if ($decomp_string ne ""); + + $decomp_string .= "\t /* $decomp_index */ 0x$first_decomp"; + foreach (@decomp_elts) + { + $decomp_string .= ", 0x$_"; + } + + $decomp_index = $decomp_index + $decomp_size; + } + + # Print a comma after all items except the last one. + print $OUTPUT "," unless ($code eq $last_code); + if ($comment ne "") + { + + # If the line is wide already, indent the comment with one tab, + # otherwise with two. This is to make the output match the way + # pgindent would mangle it. (This is quite hacky. To do this + # properly, we should actually track how long the line is so far, + # but this works for now.) + print $OUTPUT "\t" if ($decomp_index < 10); + + print $OUTPUT "\t/* $comment */" if ($comment ne ""); + } + print $OUTPUT "\n"; +} +print $OUTPUT "\n};\n\n"; + +# Print the array of decomposed codes. +print $OUTPUT <<HEADER; +/* codepoints array */ +static const uint32 UnicodeDecomp_codepoints[$decomp_index] = +{ +$decomp_string +}; +HEADER + +close $OUTPUT; diff --git a/src/common/unicode/norm_test.c b/src/common/unicode/norm_test.c new file mode 100644 index 0000000000..10a370cffa --- /dev/null +++ b/src/common/unicode/norm_test.c @@ -0,0 +1,80 @@ +/*------------------------------------------------------------------------- + * norm_test.c + * Program to test Unicode normalization functions. + * + * Portions Copyright (c) 2017, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/unicode_norm.c + * + *------------------------------------------------------------------------- + */ +#include "postgres_fe.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "common/unicode_norm.h" + +#include "norm_test_table.h" + +static char * +print_wchar_str(const pg_wchar *s) +{ +#define BUF_DIGITS 50 + static char buf[BUF_DIGITS * 2 + 1]; + int i; + + i = 0; + while (*s && i < BUF_DIGITS) + { + snprintf(&buf[i * 2], 3, "%04X", *s); + i++; + s++; + } + buf[i * 2] = '\0'; + return buf; +} + +static int +pg_wcscmp(const pg_wchar *s1, const pg_wchar *s2) +{ + for (;;) + { + if (*s1 < *s2) + return -1; + if (*s1 > *s2) + return 1; + if (*s1 == 0) + return 0; + s1++; + s2++; + } +} + +int +main(int argc, char **argv) +{ + const pg_unicode_test *test; + + for (test = UnicodeNormalizationTests; test->input[0] != 0; test++) + { + pg_wchar *result; + + result = unicode_normalize_kc(test->input); + + if (pg_wcscmp(test->output, result) != 0) + { + printf("FAILURE (Normalizationdata.txt line %d):\n", test->linenum); + printf("input:\t%s\n", print_wchar_str(test->input)); + printf("expected:\t%s\n", print_wchar_str(test->output)); + printf("got\t%s\n", print_wchar_str(result)); + printf("\n"); + exit(1); + } + } + + printf("All tests successful!\n"); + exit(0); +} diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c new file mode 100644 index 0000000000..5361f5f111 --- /dev/null +++ b/src/common/unicode_norm.c @@ -0,0 +1,437 @@ +/*------------------------------------------------------------------------- + * unicode_norm.c + * Normalize a Unicode string to NFKC form + * + * This implements Unicode normalization, per the documentation at + * http://www.unicode.org/reports/tr15/. + * + * Portions Copyright (c) 2017, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/unicode_norm.c + * + *------------------------------------------------------------------------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/unicode_norm.h" +#include "common/unicode_norm_table.h" + +#ifndef FRONTEND +#define ALLOC(size) palloc(size) +#define FREE(size) pfree(size) +#else +#define ALLOC(size) malloc(size) +#define FREE(size) free(size) +#endif + +/* Constants for calculations with Hangul characters */ +#define SBASE 0xAC00 /* U+AC00 */ +#define LBASE 0x1100 /* U+1100 */ +#define VBASE 0x1161 /* U+1161 */ +#define TBASE 0x11A7 /* U+11A7 */ +#define LCOUNT 19 +#define VCOUNT 21 +#define TCOUNT 28 +#define NCOUNT VCOUNT * TCOUNT +#define SCOUNT LCOUNT * NCOUNT + +/* comparison routine for bsearch() of decomposition lookup table. */ +static int +conv_compare(const void *p1, const void *p2) +{ + uint32 v1, + v2; + + v1 = *(const uint32 *) p1; + v2 = ((const pg_unicode_decomposition *) p2)->codepoint; + return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1); +} + +/* + * Get the entry corresponding to code in the decomposition lookup table. + */ +static pg_unicode_decomposition * +get_code_entry(pg_wchar code) +{ + return bsearch(&(code), + (void *) UnicodeDecompMain, + lengthof(UnicodeDecompMain), + sizeof(pg_unicode_decomposition), + conv_compare); +} + +/* + * Given a decomposition entry looked up earlier, get the decomposed + * characters. + * + * Note: the returned pointer can point to statically allocated buffer, and + * is only valid until next call to this function! + */ +static const pg_wchar * +get_code_decomposition(pg_unicode_decomposition *entry, int *dec_size) +{ + static pg_wchar x; + + if (DECOMPOSITION_IS_INLINE(entry)) + { + Assert(DECOMPOSITION_SIZE(entry) == 1); + x = (pg_wchar) entry->dec_index; + *dec_size = 1; + return &x; + } + else + { + *dec_size = DECOMPOSITION_SIZE(entry); + return &UnicodeDecomp_codepoints[entry->dec_index]; + } +} + +/* + * Calculate how many characters a given character will decompose to. + * + * This needs to recurse, if the character decomposes into characters that + * are, in turn, decomposable. + */ +static int +get_decomposed_size(pg_wchar code) +{ + pg_unicode_decomposition *entry; + int size = 0; + int i; + const uint32 *decomp; + int dec_size; + + /* + * Fast path for Hangul characters not stored in tables to save memory as + * decomposition is algorithmic. See + * http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details on + * the matter. + */ + if (code >= SBASE && code < SBASE + SCOUNT) + { + uint32 tindex, + sindex; + + sindex = code - SBASE; + tindex = sindex % TCOUNT; + + if (tindex != 0) + return 3; + return 2; + } + + entry = get_code_entry(code); + + /* + * Just count current code if no other decompositions. A NULL entry is + * equivalent to a character with class 0 and no decompositions. + */ + if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0) + return 1; + + /* + * If this entry has other decomposition codes look at them as well. First + * get its decomposition in the list of tables available. + */ + decomp = get_code_decomposition(entry, &dec_size); + for (i = 0; i < dec_size; i++) + { + uint32 lcode = decomp[i]; + + size += get_decomposed_size(lcode); + } + + return size; +} + +/* + * Recompose a set of characters. For hangul characters, the calculation + * is algorithmic. For others, an inverse lookup at the decomposition + * table is necessary. Returns true if a recomposition can be done, and + * false otherwise. + */ +static bool +recompose_code(uint32 start, uint32 code, uint32 *result) +{ + /* + * Handle Hangul characters algorithmically, per the Unicode spec. + * + * Check if two current characters are L and V. + */ + if (start >= LBASE && start < LBASE + LCOUNT && + code >= VBASE && code < VBASE + VCOUNT) + { + /* make syllable of form LV */ + uint32 lindex = start - LBASE; + uint32 vindex = code - VBASE; + + *result = SBASE + (lindex * VCOUNT + vindex) * TCOUNT; + return true; + } + /* Check if two current characters are LV and T */ + else if (start >= SBASE && start < (SBASE + SCOUNT) && + ((start - SBASE) % TCOUNT) == 0 && + code >= TBASE && code < (TBASE + TCOUNT)) + { + /* make syllable of from LVT */ + uint32 tindex = code - TBASE; + + *result = start + tindex; + return true; + } + else + { + int i; + + /* + * Do an inverse lookup of the decomposition tables to see if anything + * matches. The comparison just needs to be a perfect match on the + * sub-table of size two, because the start character has already been + * recomposed partially. + */ + for (i = 0; i < lengthof(UnicodeDecompMain); i++) + { + const pg_unicode_decomposition *entry = &UnicodeDecompMain[i]; + + if (DECOMPOSITION_SIZE(entry) != 2) + continue; + + if (DECOMPOSITION_NO_COMPOSE(entry)) + continue; + + if (start == UnicodeDecomp_codepoints[entry->dec_index] && + code == UnicodeDecomp_codepoints[entry->dec_index + 1]) + { + *result = entry->codepoint; + return true; + } + } + } + + return false; +} + +/* + * Decompose the given code into the array given by caller. The + * decomposition begins at the position given by caller, saving one + * lookup on the decomposition table. The current position needs to be + * updated here to let the caller know from where to continue filling + * in the array result. + */ +static void +decompose_code(pg_wchar code, pg_wchar **result, int *current) +{ + pg_unicode_decomposition *entry; + int i; + const uint32 *decomp; + int dec_size; + + /* + * Fast path for Hangul characters not stored in tables to save memory as + * decomposition is algorithmic. See + * http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details on + * the matter. + */ + if (code >= SBASE && code < SBASE + SCOUNT) + { + uint32 l, + v, + tindex, + sindex; + pg_wchar *res = *result; + + sindex = code - SBASE; + l = LBASE + sindex / (VCOUNT * TCOUNT); + v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT; + tindex = sindex % TCOUNT; + + res[*current] = l; + (*current)++; + res[*current] = v; + (*current)++; + + if (tindex != 0) + { + res[*current] = TBASE + tindex; + (*current)++; + } + + return; + } + + entry = get_code_entry(code); + + /* + * Just fill in with the current decomposition if there are no + * decomposition codes to recurse to. A NULL entry is equivalent to a + * character with class 0 and no decompositions, so just leave also in + * this case. + */ + if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0) + { + pg_wchar *res = *result; + + res[*current] = code; + (*current)++; + return; + } + + /* + * If this entry has other decomposition codes look at them as well. + */ + decomp = get_code_decomposition(entry, &dec_size); + for (i = 0; i < dec_size; i++) + { + pg_wchar lcode = (pg_wchar) decomp[i]; + + /* Leave if no more decompositions */ + decompose_code(lcode, result, current); + } +} + +/* + * unicode_normalize_kc - Normalize a Unicode string to NFKC form. + * + * The input is a 0-terminated array of codepoints. + * + * In frontend, returns a 0-terminated array of codepoints, allocated with + * malloc. Or NULL if we run out of memory. In frontend, the returned + * string is palloc'd instead, and OOM is reported with ereport(). + */ +pg_wchar * +unicode_normalize_kc(const pg_wchar *input) +{ + pg_wchar *decomp_chars; + pg_wchar *recomp_chars; + int decomp_size, + current_size; + int count; + const pg_wchar *p; + + /* variables for recomposition */ + int last_class; + int starter_pos; + int target_pos; + uint32 starter_ch; + + /* First, do character decomposition */ + + /* + * Calculate how many characters long the decomposed version will be. + */ + decomp_size = 0; + for (p = input; *p; p++) + decomp_size += get_decomposed_size(*p); + + decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar)); + if (decomp_chars == NULL) + return NULL; + + /* + * Now fill in each entry recursively. This needs a second pass on the + * decomposition table. + */ + current_size = 0; + for (p = input; *p; p++) + decompose_code(*p, &decomp_chars, ¤t_size); + decomp_chars[decomp_size] = '\0'; + Assert(decomp_size == current_size); + + /* + * Now apply canonical ordering. + */ + for (count = 1; count < decomp_size; count++) + { + pg_wchar prev = decomp_chars[count - 1]; + pg_wchar next = decomp_chars[count]; + pg_wchar tmp; + pg_unicode_decomposition *prevEntry = get_code_entry(prev); + pg_unicode_decomposition *nextEntry = get_code_entry(next); + + /* + * If no entries are found, the character used is either an Hangul + * character or a character with a class of 0 and no decompositions, + * so move to next result. + */ + if (prevEntry == NULL || nextEntry == NULL) + continue; + + /* + * Per Unicode (http://unicode.org/reports/tr15/tr15-18.html) annex 4, + * a sequence of two adjacent characters in a string is an + * exchangeable pair if the combining class (from the Unicode + * Character Database) for the first character is greater than the + * combining class for the second, and the second is not a starter. A + * character is a starter if its combining class is 0. + */ + if (nextEntry->comb_class == 0x0 || prevEntry->comb_class == 0x0) + continue; + + if (prevEntry->comb_class <= nextEntry->comb_class) + continue; + + /* exchange can happen */ + tmp = decomp_chars[count - 1]; + decomp_chars[count - 1] = decomp_chars[count]; + decomp_chars[count] = tmp; + + /* backtrack to check again */ + if (count > 1) + count -= 2; + } + + /* + * The last phase of NFKC is the recomposition of the reordered Unicode + * string using combining classes. The recomposed string cannot be longer + * than the decomposed one, so make the allocation of the output string + * based on that assumption. + */ + recomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar)); + if (!recomp_chars) + { + FREE(decomp_chars); + return NULL; + } + + last_class = -1; /* this eliminates a special check */ + starter_pos = 0; + target_pos = 1; + starter_ch = recomp_chars[0] = decomp_chars[0]; + + for (count = 1; count < decomp_size; count++) + { + pg_wchar ch = decomp_chars[count]; + pg_unicode_decomposition *ch_entry = get_code_entry(ch); + int ch_class = (ch_entry == NULL) ? 0 : ch_entry->comb_class; + pg_wchar composite; + + if (last_class < ch_class && + recompose_code(starter_ch, ch, &composite)) + { + recomp_chars[starter_pos] = composite; + starter_ch = composite; + } + else if (ch_class == 0) + { + starter_pos = target_pos; + starter_ch = ch; + last_class = -1; + recomp_chars[target_pos++] = ch; + } + else + { + last_class = ch_class; + recomp_chars[target_pos++] = ch; + } + } + recomp_chars[target_pos] = (pg_wchar) '\0'; + + FREE(decomp_chars); + + return recomp_chars; +} diff --git a/src/common/username.c b/src/common/username.c index 5abf4a56b0..487eacfe73 100644 --- a/src/common/username.c +++ b/src/common/username.c @@ -3,7 +3,7 @@ * username.c * get user name * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -18,10 +18,8 @@ #include "postgres_fe.h" #endif -#include <errno.h> #include <pwd.h> #include <unistd.h> -#include <sys/types.h> #include "common/username.h" diff --git a/src/common/wait_error.c b/src/common/wait_error.c index 4aa827d066..f824a5f2af 100644 --- a/src/common/wait_error.c +++ b/src/common/wait_error.c @@ -4,7 +4,7 @@ * Convert a wait/waitpid(2) result code to a human-readable string * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -21,8 +21,6 @@ #endif #include <signal.h> -#include <stdio.h> -#include <string.h> #include <sys/wait.h> /* |