Postgres95 1.01 Distribution - Virgin Sources

author Marc G. Fournier <scrappy@hub.org>

Tue, 9 Jul 1996 06:22:35 +0000 (06:22 +0000)

committer Marc G. Fournier <scrappy@hub.org>

Tue, 9 Jul 1996 06:22:35 +0000 (06:22 +0000)
author Marc G. Fournier <scrappy@hub.org>
Tue, 9 Jul 1996 06:22:35 +0000 (06:22 +0000)
committer Marc G. Fournier <scrappy@hub.org>
Tue, 9 Jul 1996 06:22:35 +0000 (06:22 +0000)
diff --git a/src/Makefile b/src/Makefile

new file mode 100644 (file)

index 0000000..7e047c0
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,48 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Build and install postgres.
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/Makefile,v 1.1.1.1 1996/07/09 06:21:07 scrappy Exp $
+#
+# NOTES
+#  objdir  - location of the objects and generated files (eg. obj)
+#
+#-------------------------------------------------------------------------
+
+SUBDIR= backend libpq bin
+
+FIND = find
+# assuming gnu tar and split here
+TAR  = tar
+SPLIT = split
+
+ETAGS = etags
+XARGS = xargs
+
+ifeq ($(USE_TCL), true)
+SUBDIR += libpgtcl
+endif
+
+include mk/postgres.subdir.mk
+
+TAGS:
+   rm -f TAGS; \
+   for i in backend libpq bin; do \
+     $(FIND) $$i -name '*.[chyl]' -print | $(XARGS) $(ETAGS) -a ; \
+   done
+
+# target to generate a backup tar file and split files that can be 
+# saved to 1.44M floppy
+BACKUP:
+   rm -f BACKUP.filelist BACKUP.tgz; \
+   $(FIND) . -not -path '*obj/*' -not -path '*data/*' -type f -print > BACKUP.filelist; \
+   $(TAR) --files-from BACKUP.filelist -c -z -v -f BACKUP.tgz
+   $(SPLIT) --bytes=1400k BACKUP.tgz pgBACKUP. 
+
+.PHONY: TAGS
+.PHONY: BACKUP
diff --git a/src/Makefile.global b/src/Makefile.global

new file mode 100644 (file)

index 0000000..1ecd62a
--- /dev/null
+++ b/src/Makefile.global
@@ -0,0 +1,306 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.global--
+#    global configuration for the Makefiles
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/Attic/Makefile.global,v 1.1.1.1 1996/07/09 06:21:07 scrappy Exp $
+#
+# NOTES
+#    This is seen by any Makefiles that include mk/postgres.mk. To
+#    override the default setting, create a Makefile.custom in this
+#    directory and put your defines there. (Makefile.custom is included
+#    at the end of this file.)
+#
+#    If you change any of these defines you probably have to 
+#  gmake clean; gmake
+#    since no dependecies are created for these. (of course you can 
+#    be crafty and check what files really depend on them and just remake
+#    those).
+#
+#-------------------------------------------------------------------------
+
+
+##############################################################################
+#
+# CONFIGURATION SECTION
+#
+# Following are settings pertaining to the postgres build and 
+# installation.  The most important one is obviously the name 
+# of the port.
+
+#  The name of the port.  Valid choices are:
+#  alpha       -   DEC Alpha AXP on OSF/1 2.0
+#  hpux        -   HP PA-RISC on HP-UX 9.0
+#  sparc_solaris   -   SUN SPARC on Solaris 2.4
+#  sparc       -   SUN SPARC on SunOS 4.1.3
+#  ultrix4     -   DEC MIPS on Ultrix 4.4
+#  linux       -   Intel x86 on Linux 1.2 and Linux ELF
+#              (For non-ELF Linux, you need to comment out 
+#              "LINUX_ELF=1" in src/mk/port/postgres.mk.linux)
+#  BSD44_derived   -   OSs derived from 4.4-lite BSD (NetBSD, FreeBSD)
+#       bsdi            -       BSD/OS 2.0 and 2.01
+#  aix     -   IBM on AIX 3.2.5
+#  irix5       -   SGI MIPS on IRIX 5.3
+#  Some hooks are provided for
+#  svr4        -   Intel x86 on Intel SVR4
+#  next        -   Motorola MC68K or Intel x86 on NeXTSTEP 3.2
+#  but these are guaranteed not to work as of yet.
+#
+#  XXX Note that you MUST set PORTNAME here (or on the command line) so 
+#  that port-dependent variables are correctly set within this file.
+#  Makefile.custom does not take effect (for ifeq purposes) 
+#  until after this file is processed!
+#  make sure that you have no whitespaces after the PORTNAME setting
+#  or the makefiles can get confused
+PORTNAME=  alpha
+
+# POSTGRESLOGIN is the login name of the user who gets special
+# privileges within the database.  By default it is "postgres", but
+# you can change it to any existing login name (such as your own 
+# login if you are compiling a private version or don't have root
+# access).
+POSTGRESLOGIN= postgres
+
+# For convenience, POSTGRESDIR is where DATADIR, BINDIR, and LIBDIR 
+# and other target destinations are rooted.  Of course, each of these is 
+# changable separately.
+POSTGRESDIR=   /private/postgres95
+
+# SRCDIR specifies where the source files are.
+SRCDIR=        $(POSTGRESDIR)/src
+
+# DATADIR specifies where the postmaster expects to find its database.
+# This may be overridden by command line options or the PGDATA environment
+# variable.
+DATADIR=   $(POSTGRESDIR)/data
+
+# Where the postgres executables live (changeable by just putting them
+# somewhere else and putting that directory in your shell PATH)
+BINDIR=        $(POSTGRESDIR)/bin
+
+# Where libpq.a gets installed.  You must put it where your loader will
+# look for it if you wish to use the -lpq convention.  Otherwise you
+# can just put the absolute pathname to the library at the end of your
+# command line.
+LIBDIR=        $(POSTGRESDIR)/lib
+
+# This is the directory where IPC utilities ipcs and ipcrm are located
+#
+IPCSDIR=   /usr/bin
+
+# Where the man pages (suitable for use with "man") get installed.
+POSTMANDIR=    $(POSTGRESDIR)/man
+
+# Where the formatted documents (e.g., the reference manual) get installed.
+POSTDOCDIR=    $(POSTGRESDIR)/doc
+
+# Where the header files necessary to build frontend programs get installed.
+HEADERDIR= $(POSTGRESDIR)/include
+
+# NAMEDATALEN is the max length for system identifiers (e.g. table names, 
+# attribute names, function names, etc.)  
+#
+# These MUST be set here.  DO NOT COMMENT THESE OUT
+# Setting these too high will result in excess space usage for system catalogs
+# Setting them too low will make the system unusable.
+# values between 16 and 64 that are multiples of four are recommended.
+#
+# NOTE also that databases with different NAMEDATALEN's cannot interoperate!
+#
+NAMEDATALEN = 32
+# OIDNAMELEN should be set to NAMEDATALEN + sizeof(Oid)
+OIDNAMELEN = 36
+
+CFLAGS+= -DNAMEDATALEN=$(NAMEDATALEN) -DOIDNAMELEN=$(OIDNAMELEN)
+
+##############################################################################
+#
+# FEATURES 
+#
+# To disable a feature, comment out the entire definition
+# (that is, prepend '#', don't set it to "0" or "no").
+
+# Comment out ENFORCE_ALIGNMENT if you do NOT want unaligned access to
+# multi-byte types to generate a bus error.
+ENFORCE_ALIGNMENT= true
+
+# Comment out CDEBUG to turn off debugging and sanity-checking.
+#
+#  XXX on MIPS, use -g3 if you want to compile with -O
+CDEBUG= -g
+
+# turn this on if you prefer European style dates instead of American
+# style dates
+# EUROPEAN_DATES = 1
+
+# Comment out PROFILE to disable profiling.
+#
+#  XXX define on MIPS if you want to be able to use pixie.
+#      note that this disables dynamic loading!
+#PROFILE= -p -non_shared
+
+# About the use of readline in psql:
+#    psql does not require the GNU readline and history libraries. Hence, we
+#    do not compile with them by default. However, there are hooks in the
+#    program which supports the use of GNU readline and history. Should you
+#    decide to use them, change USE_READLINE to true and change READLINE_INCDIR
+#    and READLINE_LIBDIR to reflect the location of the readline and histroy
+#    headers and libraries.
+#
+#USE_READLINE= true
+
+# directories for the readline and history libraries.
+READLINE_INCDIR=  /usr/local/include
+HISTORY_INCDIR=   /usr/local/include
+READLINE_LIBDIR=  /usr/local/lib
+HISTORY_LIBDIR=   /usr/local/lib
+
+# If you do not plan to use Host based authentication,
+# comment out the following line
+HBA = 1
+ 
+ifdef HBA
+HBAFLAGS= -DHBA
+endif
+
+
+
+# If you plan to use Kerberos for authentication...
+#
+# Comment out KRBVERS if you do not use Kerberos.
+#  Set KRBVERS to "4" for Kerberos v4, "5" for Kerberos v5.
+#  XXX Edit the default Kerberos variables below!
+#
+#KRBVERS=  5
+
+
+# Globally pass Kerberos file locations.
+#  these are used in the postmaster and all libpq applications.
+#
+#  Adjust KRBINCS and KRBLIBS to reflect where you have Kerberos
+#      include files and libraries installed.
+#  PG_KRB_SRVNAM is the name under which POSTGRES is registered in
+#      the Kerberos database (KDC).
+#  PG_KRB_SRVTAB is the location of the server's keytab file.
+#
+ifdef KRBVERS
+KRBINCS= -I/usr/athena/include
+KRBLIBS= -L/usr/athena/lib
+KRBFLAGS+= $(KRBINCS) -DPG_KRB_SRVNAM='"postgres_dbms"'
+   ifeq ($(KRBVERS), 4)
+KRBFLAGS+= -DKRB4
+KRBFLAGS+= -DPG_KRB_SRVTAB='"/etc/srvtab"'
+KRBLIBS+= -lkrb -ldes
+   else
+   ifeq ($(KRBVERS), 5)
+KRBFLAGS+= -DKRB5
+KRBFLAGS+= -DPG_KRB_SRVTAB='"FILE:/krb5/srvtab.postgres"'
+KRBLIBS+= -lkrb5 -lcrypto -lcom_err -lisode
+   endif
+   endif
+endif
+
+#
+# location of Tcl/Tk headers and libraries
+#
+# Uncomment this to build the tcl utilities.
+USE_TCL= true
+# customize these to your site's needs
+#
+TCL_INCDIR= /usr/local/devel/tcl7.4/include
+TCL_LIBDIR= /usr/local/devel/tcl7.4/lib
+TCL_LIB = -ltcl7.4
+TK_INCDIR=  /usr/local/devel/tk4.0/include
+TK_LIBDIR=  /usr/local/devel/tk4.0/lib
+TK_LIB = -ltk4.0
+
+#
+# include port specific rules and variables. For instance:
+#
+# signal(2) handling - this is here because it affects some of 
+# the frontend commands as well as the backend server.
+#
+# Ultrix and SunOS provide BSD signal(2) semantics by default.
+#
+# SVID2 and POSIX signal(2) semantics differ from BSD signal(2) 
+# semantics.  We can use the POSIX sigaction(2) on systems that
+# allow us to request restartable signals (SA_RESTART).
+#
+# Some systems don't allow restartable signals at all unless we 
+# link to a special BSD library.
+#
+# We devoutly hope that there aren't any systems that provide
+# neither POSIX signals nor BSD signals.  The alternative 
+# is to do signal-handler reinstallation, which doesn't work well 
+# at all.
+#
+-include $(MKDIR)/port/postgres.mk.$(PORTNAME)
+
+##############################################################################
+#
+# Flags for CC and LD. (depend on CDEBUG and PROFILE)
+#
+
+# Globally pass debugging/optimization/profiling flags based
+# on the options selected above.
+ifdef CDEBUG
+   CFLAGS+= $(CDEBUG)
+   LDFLAGS+= $(CDEBUG)
+else
+   ifndef CFLAGS_OPT
+      CFLAGS_OPT= -O
+   endif
+   CFLAGS+= $(CFLAGS_OPT)
+#
+# Uncommenting this will make things go a LOT faster, but you will
+# also lose a lot of useful error-checking.
+#
+   CFLAGS+= -DNO_ASSERT_CHECKING
+endif
+
+ifdef PROFILE
+CFLAGS+= $(PROFILE)
+LDFLAGS+= $(PROFILE)
+endif
+
+# Globally pass PORTNAME
+CFLAGS+= -DPORTNAME_$(PORTNAME)
+
+# Globally pass the default TCP port for postmaster(1).
+CFLAGS+= -DPOSTPORT='"5432"'
+
+# include flags from mk/port/postgres.mk.$(PORTNAME)
+CFLAGS+= $(CFLAGS_BE)
+LDADD+= $(LDADD_BE)
+LDFLAGS+= $(LDFLAGS_BE)
+
+
+##############################################################################
+#
+# Miscellaneous configuration
+#
+
+# This is the time, in seconds, at which a given backend server
+# will wait on a lock before deciding to abort the transaction
+# (this is what we do in lieu of deadlock detection).
+#
+# Low numbers are not recommended as they will tend to cause
+# false aborts if many transactions are long-lived.
+CFLAGS+= -DDEADLOCK_TIMEOUT=60
+
+srcdir=        $(SRCDIR)
+includedir=    $(HEADERDIR)
+objdir=        obj
+
+
+##############################################################################
+#
+# Customization.
+#
+-include $(MKDIR)/../Makefile.custom
+
+
diff --git a/src/backend/Makefile b/src/backend/Makefile

new file mode 100644 (file)

index 0000000..4cdc7ad
--- /dev/null
+++ b/src/backend/Makefile
@@ -0,0 +1,289 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for the postgres backend (and the postmaster)
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/backend/Makefile,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+#
+#-------------------------------------------------------------------------
+
+#
+# The following turns on intermediate linking of partial objects to speed
+# the link cycle during development. (To turn this off, put "BIGOBJS=false"
+# in your custom makefile, ../Makefile.custom.)
+BIGOBJS= true
+
+
+PROG=  postgres
+
+MKDIR= ../mk
+include $(MKDIR)/postgres.mk
+
+
+include $(CURDIR)/access/Makefile.inc
+include $(CURDIR)/bootstrap/Makefile.inc
+include $(CURDIR)/catalog/Makefile.inc
+include $(CURDIR)/commands/Makefile.inc
+include $(CURDIR)/executor/Makefile.inc
+include $(CURDIR)/include/Makefile.inc
+include $(CURDIR)/lib/Makefile.inc
+include $(CURDIR)/libpq/Makefile.inc
+include $(CURDIR)/main/Makefile.inc
+include $(CURDIR)/nodes/Makefile.inc
+include $(CURDIR)/optimizer/Makefile.inc
+include $(CURDIR)/parser/Makefile.inc
+include $(CURDIR)/port/Makefile.inc
+include $(CURDIR)/postmaster/Makefile.inc
+include $(CURDIR)/regex/Makefile.inc
+include $(CURDIR)/rewrite/Makefile.inc
+include $(CURDIR)/storage/Makefile.inc
+include $(CURDIR)/tcop/Makefile.inc
+include $(CURDIR)/tioga/Makefile.inc
+include $(CURDIR)/utils/Makefile.inc
+
+SRCS:= ${SRCS_ACCESS} ${SRCS_BOOTSTRAP} $(SRCS_CATALOG) ${SRCS_COMMANDS} \
+   ${SRCS_EXECUTOR} $(SRCS_LIB) $(SRCS_LIBPQ) ${SRCS_MAIN} \
+   ${SRCS_NODES} ${SRCS_OPTIMIZER} ${SRCS_PARSER} ${SRCS_PORT} \
+   $(SRCS_POSTMASTER) ${SRCS_REGEX} ${SRCS_REWRITE} ${SRCS_STORAGE} \
+   ${SRCS_TCOP} ${SRCS_UTILS} 
+
+ifeq ($(BIGOBJS), true)
+OBJS= ACCESS.o BOOTSTRAP.o COMMANDS.o EXECUTOR.o MAIN.o MISC.o NODES.o \
+   PARSER.o OPTIMIZER.o REGEX.o REWRITE.o STORAGE.o TCOP.o UTILS.o
+CLEANFILES+= $(subst .s,.o,$(SRCS:.c=.o)) $(OBJS)
+else
+OBJS:= $(subst .s,.o,$(SRCS:%.c=$(objdir)/%.o))
+CLEANFILES+= $(notdir $(OBJS))
+endif
+
+#############################################################################
+#
+# TIOGA stuff
+#
+ifdef TIOGA
+SRCS+= $(SRCS_TIOGA) 
+   ifeq ($(BIGOBJS), true)
+TIOGA.o:   $(SRCS_TIOGA:%.c=$(objdir)/%.o)
+   $(make_partial)
+OBJS+= TIOGA.o
+CLEANFILES+= $(SRCS_TIOGA:%.c=%.o) TIOGA.o
+   else
+OBJS+= $(SRCS_TIOGA:%.c=$(objdir)/%.o)
+   endif
+endif
+
+
+#############################################################################
+#
+# Compiling the postgres backend.
+#
+CFLAGS+=  -DPOSTGRESDIR='"$(POSTGRESDIR)"' \
+   -DPGDATADIR='"$(DATADIR)"' \
+   -I$(CURDIR)/. -I$(CURDIR)/$(objdir) \
+   -I$(CURDIR)/include \
+   -I$(CURDIR)/port/$(PORTNAME)
+
+# turn this on if you prefer European style dates instead of American
+# style dates
+ifdef EUROPEAN_DATES
+CFLAGS += -DEUROPEAN_STYLE
+endif
+
+# kerberos flags
+ifdef KRBVERS
+CFLAGS+= $(KRBFLAGS)
+LDADD+= $(KRBLIBS)
+endif
+
+# host based access flags
+ifdef HBA
+CFLAGS+= $(HBAFLAGS)
+endif
+ 
+
+
+#
+# All systems except NEXTSTEP require the math library.
+# Loader flags for system-dependent libraries are appended in
+#  src/backend/port/$(PORTNAME)/Makefile.inc
+#
+ifneq ($(PORTNAME), next)
+LDADD+=    -lm
+endif
+
+# statically link in libc for linux
+ifeq ($(PORTNAME), linux)
+LDADD+= -lc
+endif
+
+postgres: $(POSTGRES_DEPEND) $(OBJS) $(EXPORTS)
+   $(CC) $(LDFLAGS) -o $(objdir)/$(@F) $(addprefix $(objdir)/,$(notdir $(OBJS))) $(LDADD)
+
+# Make this target first if you are doing a parallel make.
+# The targets in 'first' need to be made sequentially because of dependencies.
+# Then, you can make 'all' with parallelism turned on.
+first: $(POSTGRES_DEPEND)
+
+
+#############################################################################
+#
+# Partial objects for platforms with slow linkers.
+#
+ifeq ($(BIGOBJS), true)
+
+OBJS_ACCESS:=     $(SRCS_ACCESS:%.c=$(objdir)/%.o)
+OBJS_BOOTSTRAP:=   $(SRCS_BOOTSTRAP:%.c=$(objdir)/%.o)
+OBJS_CATALOG:=    $(SRCS_CATALOG:%.c=$(objdir)/%.o)
+OBJS_COMMANDS:=       $(SRCS_COMMANDS:%.c=$(objdir)/%.o)
+OBJS_EXECUTOR:=       $(SRCS_EXECUTOR:%.c=$(objdir)/%.o)
+OBJS_MAIN:=       $(SRCS_MAIN:%.c=$(objdir)/%.o)
+OBJS_POSTMASTER:=  $(SRCS_POSTMASTER:%.c=$(objdir)/%.o)
+OBJS_LIB:=    $(SRCS_LIB:%.c=$(objdir)/%.o)
+OBJS_LIBPQ:=      $(SRCS_LIBPQ:%.c=$(objdir)/%.o)
+OBJS_PORT:=       $(addprefix $(objdir)/,$(subst .s,.o,$(SRCS_PORT:.c=.o)))
+OBJS_NODES:=      $(SRCS_NODES:%.c=$(objdir)/%.o)
+OBJS_PARSER:=     $(SRCS_PARSER:%.c=$(objdir)/%.o)
+OBJS_OPTIMIZER:=   $(SRCS_OPTIMIZER:%.c=$(objdir)/%.o)
+OBJS_REGEX:=      $(SRCS_REGEX:%.c=$(objdir)/%.o)
+OBJS_REWRITE:=    $(SRCS_REWRITE:%.c=$(objdir)/%.o)
+OBJS_STORAGE:=    $(SRCS_STORAGE:%.c=$(objdir)/%.o)
+OBJS_TCOP:=       $(SRCS_TCOP:%.c=$(objdir)/%.o)
+OBJS_UTILS:=      $(SRCS_UTILS:%.c=$(objdir)/%.o)
+
+ACCESS.o:  $(OBJS_ACCESS)
+   $(make_partial)
+BOOTSTRAP.o:   $(OBJS_BOOTSTRAP)
+   $(make_partial)
+COMMANDS.o:    $(OBJS_COMMANDS)
+   $(make_partial)
+EXECUTOR.o:    $(OBJS_EXECUTOR)
+   $(make_partial)
+MAIN.o:        $(OBJS_MAIN) $(OBJS_POSTMASTER)
+   $(make_partial)
+MISC.o:        $(OBJS_CATALOG) $(OBJS_LIB) $(OBJS_LIBPQ) $(OBJS_PORT)
+   $(make_partial)
+NODES.o:   $(OBJS_NODES)
+   $(make_partial)
+PARSER.o:  $(OBJS_PARSER)
+   $(make_partial)
+OPTIMIZER.o:   $(OBJS_OPTIMIZER)
+   $(make_partial)
+REGEX.o:   $(OBJS_REGEX)
+   $(make_partial)
+REWRITE.o: $(OBJS_REWRITE)
+   $(make_partial)
+STORAGE.o: $(OBJS_STORAGE)
+   $(make_partial)
+TCOP.o:        $(OBJS_TCOP)
+   $(make_partial)
+UTILS.o:   $(OBJS_UTILS)
+   $(make_partial)
+endif
+
+#############################################################################
+#
+# Installation.
+#
+# Install the bki files to the data directory.  We also copy a version
+# of them that has "PGUID" intact, so one can change the value of the
+# postgres userid before running initdb in the case of customizing the
+# binary release (i.e., fixing up PGUID w/o recompiling the system).
+# Those files are copied out as foo.source.  The program newbki(1) can
+# be run later to reset the postgres login id (but it must be run before
+# initdb is run, or after clearing the data directory with
+# cleardbdir(1)). [newbki distributed with v4r2 but not with Postgres95.]
+#
+
+#  NAMEDATALEN=`egrep "^#define NAMEDATALEN" $(CURDIR)/include/postgres.h | awk '{print $$3}'`; \
+#  OIDNAMELEN=`egrep "^#define OIDNAMELEN" $(CURDIR)/include/postgres.h | awk '{print $$3}'`; \
+
+install: beforeinstall pg_id $(BKIFILES) postgres
+   $(INSTALL) $(INSTL_EXE_OPTS) $(objdir)/postgres $(DESTDIR)$(BINDIR)/postgres
+   @rm -f $(DESTDIR)$(BINDIR)/postmaster
+   cd $(DESTDIR)$(BINDIR); ln -s postgres postmaster
+   @cd $(objdir); \
+   PG_UID=`./pg_id $(POSTGRESLOGIN)`; \
+   POSTGRESLOGIN=$(POSTGRESLOGIN);\
+   echo "NAMEDATALEN = $(NAMEDATALEN)"; \
+   echo "OIDNAMELEN = $(OIDNAMELEN)"; \
+   case $$PG_UID in "NOUSER") \
+       echo "Warning: no account named $(POSTGRESLOGIN), using yours";\
+       POSTGRESLOGIN=`whoami`; \
+       PG_UID=`./pg_id`;; \
+   esac ;\
+   for bki in $(BKIFILES); do \
+       sed \
+           -e "s/postgres PGUID/$$POSTGRESLOGIN $$PG_UID/" \
+           -e "s/NAMEDATALEN/$(NAMEDATALEN)/g" \
+           -e "s/OIDNAMELEN/$(OIDNAMELEN)/g" \
+           -e "s/PGUID/$$PG_UID/" \
+           < $$bki > $$bki.sed ; \
+       echo "Installing $(DESTDIR)$(DATADIR)/files/$$bki."; \
+       $(INSTALL) $(INSTLOPTS) \
+           $$bki.sed $(DESTDIR)$(DATADIR)/files/$$bki; \
+       rm -f $$bki.sed; \
+       echo "Installing $(DESTDIR)$(DATADIR)/files/$$bki.source."; \
+       $(INSTALL) $(INSTLOPTS) \
+           $$bki $(DESTDIR)$(DATADIR)/files/$$bki.source; \
+   done;
+   @echo "Installing $(DATADIR)/pg_hba";
+   @cp $(srcdir)/libpq/pg_hba $(DATADIR)
+   @chmod 644 $(DATADIR)/pg_hba
+
+
+# so we can get the UID of the postgres owner (w/o moving pg_id to
+# src/tools). We just want the vanilla LDFLAGS for pg_id
+IDLDFLAGS:= $(LDFLAGS)
+ifeq ($(PORTNAME), hpux)
+ifeq ($(CC), cc)
+IDLDFLAGS+= -Aa -D_HPUX_SOURCE
+endif
+endif
+pg_id: $(srcdir)/bin/pg_id/pg_id.c
+   $(CC) $(IDLDFLAGS) -o $(objdir)/$(@F) $<
+
+CLEANFILES+= pg_id postgres
+
+
+#############################################################################
+#
+# Support for code development.
+#
+
+#
+# Build the file, "./ID", used by the "gid" (grep-for-identifier) tool
+#
+IDFILE=    ID
+.PHONY: $(IDFILE)
+$(IDFILE):
+   $(CURDIR)/makeID $(PORTNAME)
+
+#
+# Special rule to generate cpp'd version of a .c file.  This is
+# especially useful given all the hellish macro processing going on.
+# The cpp'd version has a .C suffix.  To create foo.C from foo.c, just
+# type
+#  bmake foo.C
+#
+%.cpp: %.c
+   $(CC) -E $(CFLAGS) $(<:.C=.c) | cat -s | cb | tr -s '\012*' '\012' > $(objdir)/$(@F)
+
+cppall: $(SRCS:.c=.cpp)
+
+#
+# To use Purify (SunOS only), define PURIFY to be the path (and
+# options) with which to invoke the Purify loader.  Only the executable
+# needs to be loaded with Purify.
+#
+# PURIFY = /usr/sww/bin/purify -cache-dir=/usr/local/postgres/src/backend/purify-cache
+#.if defined(PURIFY)
+#${PROG}: $(POSTGRES_DEPEND) $(OBJS) $(EXPORTS)
+#  ${PURIFY} ${CC} ${LDFLAGS} -o $(objdir)/$(@F) $(addprefix $(objdir)/,$(notdir $(OBJS))) $(LDADD)
+#
+#CLEANFILES+= .purify* .pure .lock.*.o *_pure_*.o *.pure_*link*
+#.endif
+
diff --git a/src/backend/access/Makefile.inc b/src/backend/access/Makefile.inc

new file mode 100644 (file)

index 0000000..6adc2c6
--- /dev/null
+++ b/src/backend/access/Makefile.inc
@@ -0,0 +1,35 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for the access methods module
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/backend/access/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+#
+#-------------------------------------------------------------------------
+
+accdir=$(CURDIR)/access
+VPATH:=$(VPATH):$(accdir):\
+   $(accdir)/common:$(accdir)/hash:$(accdir)/heap:$(accdir)/index:\
+   $(accdir)/rtree:$(accdir)/nbtree:$(accdir)/transam
+
+
+SUBSRCS=
+include $(accdir)/common/Makefile.inc
+include $(accdir)/hash/Makefile.inc
+include $(accdir)/heap/Makefile.inc
+include $(accdir)/index/Makefile.inc
+include $(accdir)/rtree/Makefile.inc
+include $(accdir)/nbtree/Makefile.inc
+include $(accdir)/transam/Makefile.inc
+SRCS_ACCESS:= $(SUBSRCS)
+
+HEADERS+= attnum.h funcindex.h genam.h hash.h \
+   heapam.h hio.h htup.h ibit.h iqual.h istrat.h \
+   itup.h nbtree.h printtup.h relscan.h rtree.h \
+   sdir.h skey.h strat.h transam.h tupdesc.h tupmacs.h \
+   valid.h xact.h
+
diff --git a/src/backend/access/attnum.h b/src/backend/access/attnum.h

new file mode 100644 (file)

index 0000000..7c999e5
--- /dev/null
+++ b/src/backend/access/attnum.h
@@ -0,0 +1,61 @@
+/*-------------------------------------------------------------------------
+ *
+ * attnum.h--
+ *    POSTGRES attribute number definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: attnum.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    ATTNUM_H
+#define ATTNUM_H
+
+#include "c.h"
+
+/*
+ * user defined attribute numbers start at 1.  -ay 2/95
+ */
+typedef int16      AttrNumber;
+
+#define InvalidAttrNumber  0
+
+/* ----------------
+ * support macros
+ * ----------------
+ */
+/*
+ * AttributeNumberIsValid --
+ * True iff the attribute number is valid.
+ */
+#define AttributeNumberIsValid(attributeNumber) \
+    ((bool) ((attributeNumber) != InvalidAttrNumber))
+
+/*
+ * AttrNumberIsForUserDefinedAttr --
+ * True iff the attribute number corresponds to an user defined attribute.
+ */
+#define AttrNumberIsForUserDefinedAttr(attributeNumber) \
+    ((bool) ((attributeNumber) > 0))
+
+/*
+ * AttrNumberGetAttrOffset --
+ * Returns the attribute offset for an attribute number.
+ *
+ * Note:
+ * Assumes the attribute number is for an user defined attribute.
+ */
+#define AttrNumberGetAttrOffset(attNum) \
+     (AssertMacro(AttrNumberIsForUserDefinedAttr(attNum)) ? \
+      ((attNum - 1)) : 0)
+
+/*
+ * AttributeOffsetGetAttributeNumber --
+ * Returns the attribute number for an attribute offset.
+ */
+#define AttrOffsetGetAttrNumber(attributeOffset) \
+     ((AttrNumber) (1 + attributeOffset))
+
+#endif /* ATTNUM_H */
diff --git a/src/backend/access/common/Makefile.inc b/src/backend/access/common/Makefile.inc

new file mode 100644 (file)

index 0000000..5d5dd47
--- /dev/null
+++ b/src/backend/access/common/Makefile.inc
@@ -0,0 +1,16 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for access/common
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/backend/access/common/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+#
+#-------------------------------------------------------------------------
+
+SUBSRCS+= heaptuple.c heapvalid.c indextuple.c indexvalid.c printtup.c \
+   scankey.c tupdesc.c
+
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c

new file mode 100644 (file)

index 0000000..c3e72fb
--- /dev/null
+++ b/src/backend/access/common/heaptuple.c
@@ -0,0 +1,1011 @@
+/*-------------------------------------------------------------------------
+ *
+ * heaptuple.c--
+ *    This file contains heap tuple accessor and mutator routines, as well
+ *    as a few various tuple utilities.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/common/heaptuple.c,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ * NOTES
+ *    The old interface functions have been converted to macros
+ *    and moved to heapam.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <string.h>
+
+#include "postgres.h"
+
+#include "access/htup.h"
+#include "access/itup.h"
+#include "access/tupmacs.h"
+#include "access/skey.h"
+#include "storage/ipc.h"
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+#include "access/transam.h"
+#include "storage/bufpage.h"       /* for MAXTUPLEN */
+#include "storage/itemptr.h"
+#include "utils/memutils.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/nabstime.h"
+
+/* this is so the sparcstation debugger works */
+
+#ifndef NO_ASSERT_CHECKING
+#ifdef sparc
+#define register
+#endif /* sparc */
+#endif /* NO_ASSERT_CHECKING */
+
+/* ----------------------------------------------------------------
+ *         misc support routines
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ * ComputeDataSize
+ * ----------------
+ */
+Size
+ComputeDataSize(TupleDesc tupleDesc,
+       Datum value[],
+       char nulls[])
+{
+    uint32 length;
+    int i;
+    int numberOfAttributes = tupleDesc->natts;
+    AttributeTupleForm *att = tupleDesc->attrs;
+    
+    for (length = 0, i = 0; i < numberOfAttributes; i++) {
+   if (nulls[i] != ' ') continue;
+       
+   switch (att[i]->attlen) {
+   case -1:
+       /*
+        * This is the size of the disk representation and so
+        * must include the additional sizeof long.
+        */
+       if (att[i]->attalign == 'd') {
+       length = DOUBLEALIGN(length)
+           + VARSIZE(DatumGetPointer(value[i]));
+       } else {
+       length = INTALIGN(length)
+           + VARSIZE(DatumGetPointer(value[i]));
+       }
+       break;
+   case sizeof(char):
+       length++;
+       break;
+   case sizeof(short):
+       length = SHORTALIGN(length + sizeof(short));
+       break;
+   case sizeof(int32):
+       length = INTALIGN(length + sizeof(int32));
+       break;
+   default:
+       if (att[i]->attlen < sizeof(int32))
+       elog(WARN, "ComputeDataSize: attribute %d has len %d",
+            i, att[i]->attlen);
+       if (att[i]->attalign == 'd')
+       length = DOUBLEALIGN(length) + att[i]->attlen;
+       else
+       length = LONGALIGN(length) + att[i]->attlen;
+       break;
+   }
+    }
+    
+    return length;
+}
+
+/* ----------------
+ * DataFill
+ * ----------------
+ */
+void
+DataFill(char *data,
+    TupleDesc tupleDesc,
+    Datum value[],
+    char nulls[],
+    char *infomask,
+    bits8 bit[])
+{
+    bits8  *bitP;
+    int        bitmask;
+    uint32 length;
+    int        i;
+    int         numberOfAttributes = tupleDesc->natts;
+    AttributeTupleForm* att = tupleDesc->attrs;
+    
+    if (bit != NULL) {
+   bitP = &bit[-1];
+   bitmask = CSIGNBIT;
+    }
+    
+    *infomask = 0;
+    
+    for (i = 0; i < numberOfAttributes; i++) {
+   if (bit != NULL) {
+       if (bitmask != CSIGNBIT) {
+       bitmask <<= 1;
+       } else {
+       bitP += 1;
+       *bitP = 0x0;
+       bitmask = 1;
+       }
+       
+       if (nulls[i] == 'n') {
+       *infomask |= HEAP_HASNULL;
+       continue;
+       }
+       
+       *bitP |= bitmask;
+   }
+       
+   switch (att[i]->attlen) {
+   case -1:
+       *infomask |= HEAP_HASVARLENA;
+       if (att[i]->attalign=='d') {
+       data = (char *) DOUBLEALIGN(data);
+       } else {
+       data = (char *) INTALIGN(data);
+       }
+       length = VARSIZE(DatumGetPointer(value[i]));
+       memmove(data, DatumGetPointer(value[i]),length);
+       data += length;
+       break;
+   case sizeof(char):
+       *data = att[i]->attbyval ?
+       DatumGetChar(value[i]) : *((char *) value[i]);
+       data += sizeof(char);
+       break;
+   case sizeof(int16):
+       data = (char *) SHORTALIGN(data);
+       * (short *) data = (att[i]->attbyval ?
+               DatumGetInt16(value[i]) :
+               *((short *) value[i]));
+       data += sizeof(short);
+       break;
+   case sizeof(int32):
+       data = (char *) INTALIGN(data);
+       * (int32 *) data = (att[i]->attbyval ?
+               DatumGetInt32(value[i]) :
+               *((int32 *) value[i]));
+       data += sizeof(int32);
+       break;
+   default:
+       if (att[i]->attlen < sizeof(int32))
+       elog(WARN, "DataFill: attribute %d has len %d",
+            i, att[i]->attlen);
+       if (att[i]->attalign == 'd') {
+       data = (char *) DOUBLEALIGN(data);
+       memmove(data, DatumGetPointer(value[i]),
+           att[i]->attlen);
+       data += att[i]->attlen;
+       } else {
+       data = (char *) LONGALIGN(data);
+       memmove(data, DatumGetPointer(value[i]),
+           att[i]->attlen);
+       data += att[i]->attlen;
+       }
+           
+   }
+    }
+}
+
+/* ----------------------------------------------------------------
+ *         heap tuple interface
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ * heap_attisnull  - returns 1 iff tuple attribute is not present
+ * ----------------
+ */
+int
+heap_attisnull(HeapTuple tup, int attnum)
+{
+    if (attnum > (int)tup->t_natts)
+   return (1);
+    
+    if (HeapTupleNoNulls(tup)) return(0);
+    
+    if (attnum > 0) {
+   return(att_isnull(attnum - 1, tup->t_bits));
+    } else
+   switch (attnum) {
+   case SelfItemPointerAttributeNumber:
+   case ObjectIdAttributeNumber:
+   case MinTransactionIdAttributeNumber:
+   case MinCommandIdAttributeNumber:
+   case MaxTransactionIdAttributeNumber:
+   case MaxCommandIdAttributeNumber:
+   case ChainItemPointerAttributeNumber:
+   case AnchorItemPointerAttributeNumber:
+   case MinAbsoluteTimeAttributeNumber:
+   case MaxAbsoluteTimeAttributeNumber:
+   case VersionTypeAttributeNumber:
+       break;
+       
+   case 0:
+       elog(WARN, "heap_attisnull: zero attnum disallowed");
+       
+   default:
+       elog(WARN, "heap_attisnull: undefined negative attnum");
+   }
+    
+    return (0);
+}
+
+/* ----------------------------------------------------------------
+ *      system attribute heap tuple support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ * heap_sysattrlen
+ *
+ * This routine returns the length of a system attribute.
+ * ----------------
+ */
+int
+heap_sysattrlen(AttrNumber attno)
+{
+    HeapTupleData  *f = NULL;
+    int            len;
+
+    switch (attno) {
+    case SelfItemPointerAttributeNumber:
+   len = sizeof f->t_ctid;
+   break;
+    case ObjectIdAttributeNumber:
+   len = sizeof f->t_oid;
+   break;
+    case MinTransactionIdAttributeNumber:
+   len = sizeof f->t_xmin;
+   break;
+    case MinCommandIdAttributeNumber:
+   len = sizeof f->t_cmin;
+   break;
+    case MaxTransactionIdAttributeNumber:
+   len = sizeof f->t_xmax;
+   break;
+    case MaxCommandIdAttributeNumber:
+   len = sizeof f->t_cmax;
+   break;
+    case ChainItemPointerAttributeNumber:
+   len = sizeof f->t_chain;
+   break;
+    case AnchorItemPointerAttributeNumber:
+   elog(WARN, "heap_sysattrlen: field t_anchor does not exist!");
+   break;
+    case MinAbsoluteTimeAttributeNumber:
+   len = sizeof f->t_tmin;
+   break;
+    case MaxAbsoluteTimeAttributeNumber:
+   len = sizeof f->t_tmax;
+   break;
+    case VersionTypeAttributeNumber:
+   len = sizeof f->t_vtype;
+   break;
+    default:
+   elog(WARN, "sysattrlen: System attribute number %d unknown.",
+        attno);
+   len = 0;
+   break;
+    }
+    return (len);
+}
+
+/* ----------------
+ * heap_sysattrbyval
+ *
+ * This routine returns the "by-value" property of a system attribute.
+ * ----------------
+ */
+bool
+heap_sysattrbyval(AttrNumber attno)
+{
+    bool       byval;
+    
+    switch (attno) {
+    case SelfItemPointerAttributeNumber:
+   byval = false;
+   break;
+    case ObjectIdAttributeNumber:
+   byval = true;
+   break;
+    case MinTransactionIdAttributeNumber:
+   byval = true;
+   break;
+    case MinCommandIdAttributeNumber:
+   byval = true;
+   break;
+    case MaxTransactionIdAttributeNumber:
+   byval = true;
+   break;
+    case MaxCommandIdAttributeNumber:
+   byval = true;
+   break;
+    case ChainItemPointerAttributeNumber:
+   byval = false;
+   break;
+    case AnchorItemPointerAttributeNumber:
+   byval = false;
+   break;
+    case MinAbsoluteTimeAttributeNumber:
+   byval = true;
+   break;
+    case MaxAbsoluteTimeAttributeNumber:
+   byval = true;
+   break;
+    case VersionTypeAttributeNumber:
+   byval = true;
+   break;
+    default:
+   byval = true;
+   elog(WARN, "sysattrbyval: System attribute number %d unknown.",
+        attno);
+   break;
+    }
+    
+    return byval;
+}
+
+/* ----------------
+ * heap_getsysattr
+ * ----------------
+ */
+char *
+heap_getsysattr(HeapTuple tup, Buffer b, int attnum)
+{
+    switch (attnum) {
+    case SelfItemPointerAttributeNumber:
+   return ((char *)&tup->t_ctid);
+    case ObjectIdAttributeNumber:
+   return ((char *) (long) tup->t_oid);
+    case MinTransactionIdAttributeNumber:
+   return ((char *) (long) tup->t_xmin);
+    case MinCommandIdAttributeNumber:
+   return ((char *) (long) tup->t_cmin);
+    case MaxTransactionIdAttributeNumber:
+   return ((char *) (long) tup->t_xmax);
+    case MaxCommandIdAttributeNumber:
+   return ((char *) (long) tup->t_cmax);
+    case ChainItemPointerAttributeNumber:
+   return ((char *) &tup->t_chain);
+    case AnchorItemPointerAttributeNumber:
+   elog(WARN, "heap_getsysattr: t_anchor does not exist!");
+   break;
+   
+   /*
+    *  For tmin and tmax, we need to do some extra work.  These don't
+    *  get filled in until the vacuum cleaner runs (or we manage to flush
+    *  a page after setting the value correctly below).  If the vacuum
+    *  cleaner hasn't run yet, then the times stored in the tuple are
+    *  wrong, and we need to look up the commit time of the transaction.
+    *  We cache this value in the tuple to avoid doing the work more than
+    *  once.
+    */
+   
+    case MinAbsoluteTimeAttributeNumber:
+   if (!AbsoluteTimeIsBackwardCompatiblyValid(tup->t_tmin) &&
+       TransactionIdDidCommit(tup->t_xmin))
+       tup->t_tmin = TransactionIdGetCommitTime(tup->t_xmin);
+   return ((char *) (long) tup->t_tmin);
+    case MaxAbsoluteTimeAttributeNumber:
+   if (!AbsoluteTimeIsBackwardCompatiblyReal(tup->t_tmax)) {
+       if (TransactionIdDidCommit(tup->t_xmax))
+       tup->t_tmax = TransactionIdGetCommitTime(tup->t_xmax);
+       else
+       tup->t_tmax = CURRENT_ABSTIME;
+   }
+   return ((char *) (long) tup->t_tmax);
+    case VersionTypeAttributeNumber:
+   return ((char *) (long) tup->t_vtype);
+    default:
+   elog(WARN, "heap_getsysattr: undefined attnum %d", attnum);
+    }
+    return(NULL);
+}
+
+/* ----------------
+ * fastgetattr
+ *
+ * This is a newer version of fastgetattr which attempts to be
+ * faster by caching attribute offsets in the attribute descriptor.
+ *
+ * an alternate way to speed things up would be to cache offsets
+ * with the tuple, but that seems more difficult unless you take
+ * the storage hit of actually putting those offsets into the
+ * tuple you send to disk.  Yuck.
+ *
+ * This scheme will be slightly slower than that, but should
+ * preform well for queries which hit large #'s of tuples.  After
+ * you cache the offsets once, examining all the other tuples using
+ * the same attribute descriptor will go much quicker. -cim 5/4/91
+ * ----------------
+ */
+char *
+fastgetattr(HeapTuple tup,
+       int attnum,
+       TupleDesc tupleDesc,
+       bool *isnull)
+{
+    char *tp;      /* ptr to att in tuple */
+    bits8  *bp;            /* ptr to att in tuple */
+    int slow;      /* do we have to walk nulls? */
+    AttributeTupleForm *att = tupleDesc->attrs;
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    
+    Assert(PointerIsValid(isnull));
+    Assert(attnum > 0);
+    
+    /* ----------------
+     *   Three cases:
+     * 
+     *   1: No nulls and no variable length attributes.
+     *   2: Has a null or a varlena AFTER att.
+     *   3: Has nulls or varlenas BEFORE att.
+     * ----------------
+     */
+    
+    *isnull =  false;
+    
+    if (HeapTupleNoNulls(tup)) {
+   attnum--;
+   if (att[attnum]->attcacheoff > 0) {
+       return (char *)
+       fetchatt( &(att[attnum]),
+            (char *)tup + tup->t_hoff + att[attnum]->attcacheoff);
+   } else if (attnum == 0) {
+       /*
+        * first attribute is always at position zero
+        */
+       return((char *) fetchatt(&(att[0]), (char *) tup + tup->t_hoff));
+   }
+       
+   tp = (char *) tup + tup->t_hoff;
+       
+   slow = 0;
+    } else {
+   /*
+    * there's a null somewhere in the tuple
+    */
+
+   bp = tup->t_bits;
+   tp = (char *) tup + tup->t_hoff;
+   slow = 0;
+   attnum--;
+       
+   /* ----------------
+    *  check to see if desired att is null
+    * ----------------
+    */
+   
+   if (att_isnull(attnum, bp)) {
+       *isnull = true;
+       return NULL;
+   }
+
+   /* ----------------
+    *      Now check to see if any preceeding bits are null...
+    * ----------------
+    */
+   
+   {
+       register int  i = 0; /* current offset in bp */
+       
+       for (i = 0; i < attnum && !slow; i++) {
+       if (att_isnull(i, bp)) slow = 1;
+       }
+   }
+    }
+    
+    /*
+     * now check for any non-fixed length attrs before our attribute
+     */
+    if (!slow) {
+   if (att[attnum]->attcacheoff > 0) {
+       return (char *)
+       fetchatt(&(att[attnum]),
+            tp + att[attnum]->attcacheoff);
+   } else if (attnum == 0) {
+       return (char *)
+       fetchatt(&(att[0]), (char *) tup + tup->t_hoff);
+   } else if (!HeapTupleAllFixed(tup)) {
+       register int j = 0;
+           
+       for (j = 0; j < attnum && !slow; j++)
+       if (att[j]->attlen < 1) slow = 1;
+   }
+    }
+    
+    /*
+     * if slow is zero, and we got here, we know that we have a tuple with
+     * no nulls.  We also have to initialize the remainder of
+     * the attribute cached offset values.
+     */
+    if (!slow) {
+   register int j = 1;
+   register long off;
+       
+   /*
+    * need to set cache for some atts
+    */
+       
+   att[0]->attcacheoff = 0;
+   
+   while (att[j]->attcacheoff > 0) j++;
+   
+   off = att[j-1]->attcacheoff + att[j-1]->attlen;
+   
+   for (; j < attnum + 1; j++) {
+       switch(att[j]->attlen) {
+       case -1:
+       off = (att[j]->attalign=='d') ?
+           DOUBLEALIGN(off) : INTALIGN(off);
+       break;
+       case sizeof(char):
+       break;
+       case sizeof(short):
+       off = SHORTALIGN(off);
+       break;
+       case sizeof(int32):
+       off = INTALIGN(off);
+       break;
+       default:
+       if (att[j]->attlen < sizeof(int32)) {
+           elog(WARN,
+            "fastgetattr: attribute %d has len %d",
+            j, att[j]->attlen);
+       }
+       if (att[j]->attalign == 'd')
+           off = DOUBLEALIGN(off);
+       else
+           off = LONGALIGN(off);
+       break;
+       }
+           
+       att[j]->attcacheoff = off;
+       off += att[j]->attlen;
+   }
+   
+   return
+       (char *)fetchatt(&(att[attnum]), tp + att[attnum]->attcacheoff);
+    } else {
+   register bool usecache = true;
+   register int off = 0;
+   register int i;
+   
+   /*
+    * Now we know that we have to walk the tuple CAREFULLY.
+    *
+    * Note - This loop is a little tricky.  On iteration i we
+    * first set the offset for attribute i and figure out how much
+    * the offset should be incremented.  Finally, we need to align the
+    * offset based on the size of attribute i+1 (for which the offset
+    * has been computed). -mer 12 Dec 1991
+    */
+   
+   for (i = 0; i < attnum; i++) {
+       if (!HeapTupleNoNulls(tup)) {
+       if (att_isnull(i, bp)) {
+           usecache = false;
+           continue;
+       }
+       }
+       switch (att[i]->attlen) {
+       case -1:
+       off = (att[i]->attalign=='d') ?
+           DOUBLEALIGN(off) : INTALIGN(off);
+       break;
+       case sizeof(char):
+       break;
+       case sizeof(short):
+       off = SHORTALIGN(off);
+       break;
+       case sizeof(int32):
+       off = INTALIGN(off);
+       break;
+       default:
+       if (att[i]->attlen < sizeof(int32))
+           elog(WARN,
+            "fastgetattr2: attribute %d has len %d",
+            i, att[i]->attlen);
+       if (att[i]->attalign == 'd')
+           off = DOUBLEALIGN(off);
+       else
+           off = LONGALIGN(off);
+       break;
+       }
+       if (usecache && att[i]->attcacheoff > 0) {
+       off = att[i]->attcacheoff;
+       if (att[i]->attlen == -1) {
+           usecache = false;
+       }
+       } else {
+       if (usecache) att[i]->attcacheoff = off;
+       }
+           
+       switch(att[i]->attlen) {
+       case sizeof(char):
+       off++;
+       break;
+       case sizeof(int16):
+       off += sizeof(int16);
+       break;
+       case sizeof(int32):
+       off += sizeof(int32);
+       break;
+       case -1:
+       usecache = false;
+       off += VARSIZE(tp + off);
+       break;
+       default:
+       off += att[i]->attlen;
+       break;
+       }
+   }
+   switch (att[attnum]->attlen) {
+   case -1:
+       off = (att[attnum]->attalign=='d')?
+       DOUBLEALIGN(off) : INTALIGN(off);
+       break;
+   case sizeof(char):
+       break;
+   case sizeof(short):
+       off = SHORTALIGN(off);
+       break;
+   case sizeof(int32):
+       off = INTALIGN(off);
+       break;
+   default:
+       if (att[attnum]->attlen < sizeof(int32))
+       elog(WARN, "fastgetattr3: attribute %d has len %d",
+            attnum, att[attnum]->attlen);
+       if (att[attnum]->attalign == 'd')
+       off = DOUBLEALIGN(off);
+       else
+       off = LONGALIGN(off);
+       break;
+   }
+   return((char *) fetchatt(&(att[attnum]), tp + off));
+    }
+}
+
+/* ----------------
+ * heap_getattr
+ *
+ * returns an attribute from a heap tuple.  uses 
+ * ----------------
+ */
+char *
+heap_getattr(HeapTuple tup,
+        Buffer b,
+        int attnum,
+        TupleDesc tupleDesc,
+        bool *isnull)
+{
+    bool   localIsNull;
+
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    Assert(tup != NULL);
+    
+    if (! PointerIsValid(isnull))
+   isnull = &localIsNull;
+    
+    if (attnum > (int) tup->t_natts) {
+   *isnull = true;
+   return ((char *) NULL);
+    }
+    
+    /* ----------------
+     * take care of user defined attributes
+     * ----------------
+     */
+    if (attnum > 0) {
+   char  *datum;
+   datum = fastgetattr(tup, attnum, tupleDesc, isnull);
+   
+   return (datum);
+    }
+    
+    /* ----------------
+     * take care of system attributes
+     * ----------------
+     */
+    *isnull = false;
+    return
+   heap_getsysattr(tup, b, attnum);
+}
+
+/* ----------------
+ * heap_copytuple
+ *
+ * returns a copy of an entire tuple
+ * ----------------
+ */
+HeapTuple
+heap_copytuple(HeapTuple tuple)
+{
+    HeapTuple  newTuple;
+
+    if (! HeapTupleIsValid(tuple))
+   return (NULL);
+    
+    /* XXX For now, just prevent an undetectable executor related error */
+    if (tuple->t_len > MAXTUPLEN) {
+   elog(WARN, "palloctup: cannot handle length %d tuples",
+        tuple->t_len);
+    }
+    
+    newTuple = (HeapTuple) palloc(tuple->t_len);
+    memmove((char *) newTuple, (char *) tuple, (int) tuple->t_len);
+    return(newTuple);
+}
+
+/* ----------------
+ * heap_deformtuple
+ *
+ * the inverse of heap_formtuple (see below)
+ * ----------------
+ */
+void
+heap_deformtuple(HeapTuple tuple,
+        TupleDesc tdesc,
+        Datum values[],
+        char nulls[])
+{
+    int i;
+    int natts;
+    
+    Assert(HeapTupleIsValid(tuple));
+    
+    natts = tuple->t_natts;
+    for (i = 0; i<natts; i++) {
+   bool isnull;
+       
+   values[i] = (Datum)heap_getattr(tuple,
+                   InvalidBuffer,
+                   i+1,
+                   tdesc,
+                   &isnull);
+   if (isnull)
+       nulls[i] = 'n';
+   else
+       nulls[i] = ' ';
+    }
+}
+
+/* ----------------
+ * heap_formtuple 
+ *
+ * constructs a tuple from the given value[] and null[] arrays
+ *
+ * old comments
+ * Handles alignment by aligning 2 byte attributes on short boundries
+ * and 3 or 4 byte attributes on long word boundries on a vax; and
+ * aligning non-byte attributes on short boundries on a sun.  Does
+ * not properly align fixed length arrays of 1 or 2 byte types (yet).
+ *
+ * Null attributes are indicated by a 'n' in the appropriate byte
+ * of the null[].  Non-null attributes are indicated by a ' ' (space).
+ *
+ * Fix me.  (Figure that must keep context if debug--allow give oid.)
+ * Assumes in order.
+ * ----------------
+ */
+HeapTuple
+heap_formtuple(TupleDesc tupleDescriptor,
+          Datum value[],
+          char nulls[])
+{
+    char   *tp;    /* tuple pointer */
+    HeapTuple  tuple;  /* return tuple */
+    int        bitmaplen;
+    long   len;
+    int        hoff;
+    bool   hasnull = false;
+    int        i;
+    int         numberOfAttributes = tupleDescriptor->natts;    
+
+    len = sizeof *tuple - sizeof tuple->t_bits;
+    
+    for (i = 0; i < numberOfAttributes && !hasnull; i++) {
+   if (nulls[i] != ' ') hasnull = true;
+    }
+    
+    if (numberOfAttributes > MaxHeapAttributeNumber)
+   elog(WARN, "heap_formtuple: numberOfAttributes of %d > %d",
+        numberOfAttributes, MaxHeapAttributeNumber);
+    
+    if (hasnull) {
+   bitmaplen = BITMAPLEN(numberOfAttributes);
+   len       += bitmaplen;
+    }
+
+    hoff = len = DOUBLEALIGN(len); /* be conservative here */
+
+    len += ComputeDataSize(tupleDescriptor, value, nulls);
+    
+    tp = (char *) palloc(len);
+    tuple = (HeapTuple) tp;
+
+    memset(tp, 0, (int)len);
+    
+    tuple->t_len =     len;
+    tuple->t_natts =   numberOfAttributes;
+    tuple->t_hoff = hoff;
+    tuple->t_tmin = INVALID_ABSTIME;
+    tuple->t_tmax = CURRENT_ABSTIME;
+    
+    DataFill((char *)tuple + tuple->t_hoff,
+        tupleDescriptor,
+        value,
+        nulls,
+             &tuple->t_infomask,
+        (hasnull ? tuple->t_bits : NULL));
+    
+    return (tuple);
+}
+
+/* ----------------
+ * heap_modifytuple
+ *
+ * forms a new tuple from an old tuple and a set of replacement values.
+ * ----------------
+ */
+HeapTuple
+heap_modifytuple(HeapTuple tuple,
+        Buffer buffer,
+        Relation relation,
+        Datum replValue[],
+        char replNull[],
+        char repl[])
+{
+    int        attoff;
+    int        numberOfAttributes;
+    Datum  *value;
+    char   *nulls;
+    bool   isNull;
+    HeapTuple  newTuple;
+    int        madecopy;
+    uint8  infomask;
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    Assert(HeapTupleIsValid(tuple));
+    Assert(BufferIsValid(buffer) || RelationIsValid(relation));
+    Assert(HeapTupleIsValid(tuple));
+    Assert(PointerIsValid(replValue));
+    Assert(PointerIsValid(replNull));
+    Assert(PointerIsValid(repl));
+    
+    /* ----------------
+     * if we're pointing to a disk page, then first
+     *  make a copy of our tuple so that all the attributes
+     *  are available.  XXX this is inefficient -cim
+     * ----------------
+     */
+    madecopy = 0;
+    if (BufferIsValid(buffer) == true) {
+   relation =  (Relation) BufferGetRelation(buffer);
+   tuple =     heap_copytuple(tuple);
+   madecopy = 1;
+    }
+    
+    numberOfAttributes = RelationGetRelationTupleForm(relation)->relnatts;
+    
+    /* ----------------
+     * allocate and fill value[] and nulls[] arrays from either
+     *  the tuple or the repl information, as appropriate.
+     * ----------------
+     */
+    value = (Datum *)  palloc(numberOfAttributes * sizeof *value);
+    nulls =  (char *)  palloc(numberOfAttributes * sizeof *nulls);
+    
+    for (attoff = 0;
+    attoff < numberOfAttributes;
+    attoff += 1) {
+   
+   if (repl[attoff] == ' ') {
+       char *attr;
+
+       attr =
+       heap_getattr(tuple,
+                InvalidBuffer, 
+                AttrOffsetGetAttrNumber(attoff),
+                RelationGetTupleDescriptor(relation),
+                &isNull) ;
+       value[attoff] = PointerGetDatum(attr);
+       nulls[attoff] = (isNull) ? 'n' : ' ';
+       
+   } else if (repl[attoff] != 'r') {
+       elog(WARN, "heap_modifytuple: repl is \\%3d", repl[attoff]);
+       
+   } else { /* == 'r' */
+       value[attoff] = replValue[attoff];
+       nulls[attoff] =  replNull[attoff];
+   }
+    }
+    
+    /* ----------------
+     * create a new tuple from the values[] and nulls[] arrays
+     * ----------------
+     */
+    newTuple = heap_formtuple(RelationGetTupleDescriptor(relation),
+                 value,
+                 nulls);
+    
+    /* ----------------
+     * copy the header except for t_len, t_natts, t_hoff, t_bits, t_infomask
+     * ----------------
+     */
+    infomask = newTuple->t_infomask;
+    memmove((char *) &newTuple->t_ctid,    /*XXX*/
+       (char *) &tuple->t_ctid,
+       ((char *) &tuple->t_hoff - (char *) &tuple->t_ctid)); /*XXX*/
+    newTuple->t_infomask = infomask;
+    newTuple->t_natts = numberOfAttributes;    /* fix t_natts just in case */
+    
+    /* ----------------
+     * if we made a copy of the tuple, then free it.
+     * ----------------
+     */
+    if (madecopy)
+   pfree(tuple);
+    
+    return
+   newTuple;
+}
+
+/* ----------------------------------------------------------------
+ *         other misc functions
+ * ----------------------------------------------------------------
+ */
+
+HeapTuple
+heap_addheader(uint32 natts,   /* max domain index */
+          int structlen,   /* its length */
+          char *structure) /* pointer to the struct */
+{
+    register char  *tp;    /* tuple data pointer */
+    HeapTuple      tup;
+    long       len;
+    int            hoff;
+    
+    AssertArg(natts > 0);
+    
+    len = sizeof (HeapTupleData) - sizeof (tup->t_bits);
+    
+    hoff = len = DOUBLEALIGN(len); /* be conservative */
+    len += structlen;
+    tp = (char *) palloc(len);
+    tup = (HeapTuple) tp;
+    memset((char*)tup, 0, len);
+    
+    tup->t_len = (short) len;          /* XXX */
+    tp += tup->t_hoff = hoff;
+    tup->t_natts = natts;
+    tup->t_infomask = 0;
+    
+    memmove(tp, structure, structlen);
+    
+    return (tup);
+}
diff --git a/src/backend/access/common/heapvalid.c b/src/backend/access/common/heapvalid.c

new file mode 100644 (file)

index 0000000..b80c5dd
--- /dev/null
+++ b/src/backend/access/common/heapvalid.c
@@ -0,0 +1,134 @@
+/*-------------------------------------------------------------------------
+ *
+ * heapvalid.c--
+ *    heap tuple qualification validity checking code
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/common/Attic/heapvalid.c,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "access/htup.h"
+#include "access/skey.h"
+#include "access/heapam.h"
+#include "utils/tqual.h"
+#include "access/valid.h"  /* where the declarations go */
+#include "access/xact.h"
+
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+#include "storage/itemid.h"
+#include "fmgr.h"
+#include "utils/elog.h"
+#include "utils/rel.h"
+
+/* ----------------
+ * heap_keytest
+ *
+ * Test a heap tuple with respect to a scan key.
+ * ----------------
+ */
+bool
+heap_keytest(HeapTuple t,
+        TupleDesc tupdesc,
+        int nkeys,
+        ScanKey keys)
+{
+    bool   isnull;
+    Datum  atp;
+    int        test;
+    
+    for (; nkeys--; keys++) {
+   atp = (Datum)heap_getattr(t, InvalidBuffer,
+                 keys->sk_attno, 
+                 tupdesc,
+                 &isnull);
+   
+   if (isnull)
+       /* XXX eventually should check if SK_ISNULL */
+       return false;
+   
+   if (keys->sk_flags & SK_COMMUTE)
+       test = (long) FMGR_PTR2(keys->sk_func, keys->sk_procedure,
+                   keys->sk_argument, atp);
+   else
+       test = (long) FMGR_PTR2(keys->sk_func, keys->sk_procedure,
+                   atp, keys->sk_argument);
+   
+   if (!test == !(keys->sk_flags & SK_NEGATE))
+       return false;
+    }
+    
+    return true;
+}
+
+/* ----------------
+ * heap_tuple_satisfies
+ *
+ *  Returns a valid HeapTuple if it satisfies the timequal and keytest.
+ *  Returns NULL otherwise.  Used to be heap_satisifies (sic) which
+ *  returned a boolean.  It now returns a tuple so that we can avoid doing two
+ *  PageGetItem's per tuple.
+ *
+ * Complete check of validity including LP_CTUP and keytest.
+ * This should perhaps be combined with valid somehow in the
+ * future.  (Also, additional rule tests/time range tests.)
+ *
+ *  on 8/21/92 mao says:  i rearranged the tests here to do keytest before
+ *  SatisfiesTimeQual.  profiling indicated that even for vacuumed relations,
+ *  time qual checking was more expensive than key testing.  time qual is
+ *  least likely to fail, too.  we should really add the time qual test to
+ *  the restriction and optimize it in the normal way.  this has interactions
+ *  with joey's expensive function work.
+ * ----------------
+ */
+HeapTuple
+heap_tuple_satisfies(ItemId itemId,
+            Relation relation,
+            PageHeader disk_page,
+            TimeQual   qual,
+            int nKeys,
+            ScanKey key)
+{
+    HeapTuple  tuple;
+    bool res;
+    
+    if (! ItemIdIsUsed(itemId))
+   return NULL;
+    
+    tuple = (HeapTuple) PageGetItem((Page) disk_page, itemId);
+    
+    if (key != NULL)
+   res = heap_keytest(tuple, RelationGetTupleDescriptor(relation), 
+              nKeys, key);
+    else
+   res = TRUE;
+    
+    if (res && (relation->rd_rel->relkind == RELKIND_UNCATALOGED
+       || HeapTupleSatisfiesTimeQual(tuple,qual)))
+   return tuple;
+    
+    return (HeapTuple) NULL;
+}
+
+/*
+ *  TupleUpdatedByCurXactAndCmd() -- Returns true if this tuple has
+ * already been updated once by the current transaction/command
+ * pair.
+ */
+bool
+TupleUpdatedByCurXactAndCmd(HeapTuple t)
+{
+    if (TransactionIdEquals(t->t_xmax,
+               GetCurrentTransactionId()) &&
+   t->t_cmax == GetCurrentCommandId())
+   return true;
+    
+    return false;
+}
diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c

new file mode 100644 (file)

index 0000000..be5d2cc
--- /dev/null
+++ b/src/backend/access/common/indextuple.c
@@ -0,0 +1,427 @@
+/*-------------------------------------------------------------------------
+ *
+ * indextuple.c--
+ *     This file contains index tuple accessor and mutator routines,
+ *     as well as a few various tuple utilities.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/common/indextuple.c,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <string.h>
+
+#include "c.h"
+#include "access/ibit.h"
+#include "access/itup.h"   /* where the declarations go */
+#include "access/heapam.h"
+#include "access/genam.h"  
+#include "access/tupdesc.h"
+#include "access/tupmacs.h"
+
+#include "storage/itemptr.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+
+static Size IndexInfoFindDataOffset(unsigned short t_info);
+
+/* ----------------------------------------------------------------
+ *       index_ tuple interface routines
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ * index_formtuple
+ * ----------------
+ */
+IndexTuple
+index_formtuple(TupleDesc tupleDescriptor,
+       Datum value[],
+       char null[])
+{
+    register char  *tp;    /* tuple pointer */
+    IndexTuple     tuple;  /* return tuple */
+    Size       size, hoff;
+    int        i;
+    unsigned short      infomask = 0;
+    bool       hasnull = false;
+    char       tupmask = 0;
+    int                 numberOfAttributes = tupleDescriptor->natts;
+    
+    if (numberOfAttributes > MaxIndexAttributeNumber)
+   elog(WARN, "index_formtuple: numberOfAttributes of %d > %d",
+        numberOfAttributes, MaxIndexAttributeNumber);
+    
+    
+    for (i = 0; i < numberOfAttributes && !hasnull; i++) {
+   if (null[i] != ' ') hasnull = true;
+    }
+    
+    if (hasnull) infomask |= INDEX_NULL_MASK;
+    
+    hoff = IndexInfoFindDataOffset(infomask);
+    size = hoff
+   + ComputeDataSize(tupleDescriptor,
+             value, null);
+    size = DOUBLEALIGN(size);  /* be conservative */
+    
+    tp = (char *) palloc(size);
+    tuple = (IndexTuple) tp;
+    memset(tp,0,(int)size);
+    
+    DataFill((char *)tp + hoff,
+        tupleDescriptor,
+        value,
+        null,
+        &tupmask,
+        (hasnull ? (bits8*)tp + sizeof(*tuple) : NULL));
+    
+    /*
+     * We do this because DataFill wants to initialize a "tupmask" which
+     * is used for HeapTuples, but we want an indextuple infomask.  The only
+     * "relevent" info is the "has variable attributes" field, which is in
+     * mask position 0x02.  We have already set the null mask above.
+     */
+    
+    if (tupmask & 0x02) infomask |= INDEX_VAR_MASK;
+    
+    /*
+     * Here we make sure that we can actually hold the size.  We also want
+     * to make sure that size is not aligned oddly.  This actually is a
+     * rather odd way to make sure the size is not too large overall.
+     */
+    
+    if (size & 0xE000)
+   elog(WARN, "index_formtuple: data takes %d bytes: too big", size);
+
+    
+    infomask |= size;
+    
+    /* ----------------
+     * initialize metadata
+     * ----------------
+     */
+    tuple->t_info = infomask;
+    return (tuple);
+}
+
+/* ----------------
+ * fastgetiattr
+ *
+ * This is a newer version of fastgetiattr which attempts to be
+ * faster by caching attribute offsets in the attribute descriptor.
+ *
+ * an alternate way to speed things up would be to cache offsets
+ * with the tuple, but that seems more difficult unless you take
+ * the storage hit of actually putting those offsets into the
+ * tuple you send to disk.  Yuck.
+ *
+ * This scheme will be slightly slower than that, but should
+ * preform well for queries which hit large #'s of tuples.  After
+ * you cache the offsets once, examining all the other tuples using
+ * the same attribute descriptor will go much quicker. -cim 5/4/91
+ * ----------------
+ */
+char *
+fastgetiattr(IndexTuple tup,
+        int attnum,
+        TupleDesc tupleDesc,
+        bool *isnull)
+{
+    register char      *tp;        /* ptr to att in tuple */
+    register char      *bp;        /* ptr to att in tuple */
+    int            slow;       /* do we have to walk nulls? */
+    register int       data_off;   /* tuple data offset */
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    
+    Assert(PointerIsValid(isnull));
+    Assert(attnum > 0);
+    
+    /* ----------------
+     *   Three cases:
+     * 
+     *   1: No nulls and no variable length attributes.
+     *   2: Has a null or a varlena AFTER att.
+     *   3: Has nulls or varlenas BEFORE att.
+     * ----------------
+     */
+    
+    *isnull =  false;
+    data_off = IndexTupleHasMinHeader(tup) ? sizeof *tup : 
+   IndexInfoFindDataOffset(tup->t_info);
+    
+    if (IndexTupleNoNulls(tup)) {
+   
+   /* first attribute is always at position zero */
+   
+   if (attnum == 1) {
+       return(fetchatt(&(tupleDesc->attrs[0]), (char *) tup + data_off));
+   }
+   attnum--;
+   
+   if (tupleDesc->attrs[attnum]->attcacheoff > 0) {
+       return(fetchatt(&(tupleDesc->attrs[attnum]),
+               (char *) tup + data_off + 
+               tupleDesc->attrs[attnum]->attcacheoff));
+   }
+   
+   tp = (char *) tup + data_off;
+   
+   slow = 0;
+    }else { /* there's a null somewhere in the tuple */
+   
+   bp = (char *) tup + sizeof(*tup); /* "knows" t_bits are here! */
+   slow = 0;
+   /* ----------------
+    *  check to see if desired att is null
+    * ----------------
+    */
+   
+   attnum--;
+   {
+       if (att_isnull(attnum, bp)) {
+       *isnull = true;
+       return NULL;
+       }
+   }
+   /* ----------------
+    *      Now check to see if any preceeding bits are null...
+    * ----------------
+    */
+   {
+       register int  i = 0; /* current offset in bp */
+       register int  mask;  /* bit in byte we're looking at */
+       register char n;     /* current byte in bp */
+       register int byte, finalbit;
+       
+       byte = attnum >> 3;
+       finalbit = attnum & 0x07;
+       
+       for (; i <= byte; i++) {
+       n = bp[i];
+       if (i < byte) {
+           /* check for nulls in any "earlier" bytes */
+           if ((~n) != 0) {
+           slow++;
+           break;
+           }
+       } else {
+           /* check for nulls "before" final bit of last byte*/
+           mask = (finalbit << 1) - 1;
+           if ((~n) & mask)
+           slow++;
+       }
+       }
+   }
+   tp = (char *) tup + data_off;
+    }
+    
+    /* now check for any non-fixed length attrs before our attribute */
+    
+    if (!slow) {
+   if (tupleDesc->attrs[attnum]->attcacheoff > 0) {
+       return(fetchatt(&(tupleDesc->attrs[attnum]), 
+               tp + tupleDesc->attrs[attnum]->attcacheoff));
+   }else if (!IndexTupleAllFixed(tup)) {
+       register int j = 0;
+       
+       for (j = 0; j < attnum && !slow; j++)
+       if (tupleDesc->attrs[j]->attlen < 1) slow = 1;
+   }
+    }
+    
+    /*
+     * if slow is zero, and we got here, we know that we have a tuple with
+     * no nulls.  We also know that we have to initialize the remainder of
+     * the attribute cached offset values.
+     */
+    
+    if (!slow) {
+   register int j = 1;
+   register long off;
+   
+   /*
+    * need to set cache for some atts
+    */
+   
+   tupleDesc->attrs[0]->attcacheoff = 0;
+   
+   while (tupleDesc->attrs[j]->attcacheoff > 0) j++;
+   
+   off = tupleDesc->attrs[j-1]->attcacheoff + 
+         tupleDesc->attrs[j-1]->attlen;
+   
+   for (; j < attnum + 1; j++) {
+       /*
+        * Fix me when going to a machine with more than a four-byte
+        * word!
+        */
+       
+       switch(tupleDesc->attrs[j]->attlen)
+       {
+       case -1:
+           off = (tupleDesc->attrs[j]->attalign=='d')?
+           DOUBLEALIGN(off):INTALIGN(off);
+           break;
+       case sizeof(char):
+           break;
+       case sizeof(short):
+           off = SHORTALIGN(off);
+           break;
+       case sizeof(int32):
+           off = INTALIGN(off);
+           break;
+       default:
+           if (tupleDesc->attrs[j]->attlen > sizeof(int32))
+           off = (tupleDesc->attrs[j]->attalign=='d')?
+               DOUBLEALIGN(off) : LONGALIGN(off);
+           else
+           elog(WARN, "fastgetiattr: attribute %d has len %d",
+                j, tupleDesc->attrs[j]->attlen);
+           break;
+           
+       }
+       
+       tupleDesc->attrs[j]->attcacheoff = off;
+       off += tupleDesc->attrs[j]->attlen;
+   }
+   
+   return(fetchatt( &(tupleDesc->attrs[attnum]), 
+           tp + tupleDesc->attrs[attnum]->attcacheoff));
+    }else {
+   register bool usecache = true;
+   register int off = 0;
+   register int i;
+   
+   /*
+    * Now we know that we have to walk the tuple CAREFULLY.
+    */
+   
+   for (i = 0; i < attnum; i++) {
+       if (!IndexTupleNoNulls(tup)) {
+       if (att_isnull(i, bp)) {
+           usecache = false;
+           continue;
+       }
+       }
+       
+       if (usecache && tupleDesc->attrs[i]->attcacheoff > 0) {
+       off = tupleDesc->attrs[i]->attcacheoff;
+       if (tupleDesc->attrs[i]->attlen == -1) 
+           usecache = false;
+       else
+           continue;
+       }
+           
+       if (usecache) tupleDesc->attrs[i]->attcacheoff = off;
+       switch(tupleDesc->attrs[i]->attlen)
+       {
+       case sizeof(char):
+           off++;
+           break;
+       case sizeof(short):
+           off = SHORTALIGN(off) + sizeof(short);
+           break;
+       case -1:
+           usecache = false;
+           off = (tupleDesc->attrs[i]->attalign=='d')?
+           DOUBLEALIGN(off):INTALIGN(off);
+           off += VARSIZE(tp + off);
+           break;
+       default:
+           if (tupleDesc->attrs[i]->attlen > sizeof(int32))
+           off = (tupleDesc->attrs[i]->attalign=='d') ?
+               DOUBLEALIGN(off) + tupleDesc->attrs[i]->attlen :
+               LONGALIGN(off) + tupleDesc->attrs[i]->attlen;
+           else
+           elog(WARN, "fastgetiattr2: attribute %d has len %d",
+                i, tupleDesc->attrs[i]->attlen);
+           
+           break;
+       }
+   }
+   
+   return(fetchatt(&tupleDesc->attrs[attnum], tp + off));
+    }
+}
+
+/* ----------------
+ * index_getattr
+ * ----------------
+ */
+Datum
+index_getattr(IndexTuple tuple,
+         AttrNumber attNum,
+         TupleDesc tupDesc,
+         bool *isNullOutP)
+{
+    Assert (attNum > 0);
+
+    return (Datum)
+   fastgetiattr(tuple, attNum, tupDesc, isNullOutP);
+}
+
+RetrieveIndexResult
+FormRetrieveIndexResult(ItemPointer indexItemPointer,
+           ItemPointer heapItemPointer)
+{
+    RetrieveIndexResult    result;
+    
+    Assert(ItemPointerIsValid(indexItemPointer));
+    Assert(ItemPointerIsValid(heapItemPointer));
+    
+    result = (RetrieveIndexResult) palloc(sizeof *result);
+    
+    result->index_iptr = *indexItemPointer;
+    result->heap_iptr = *heapItemPointer;
+    
+    return (result);
+}
+
+/*
+ * Takes an infomask as argument (primarily because this needs to be usable
+ * at index_formtuple time so enough space is allocated).
+ *
+ * Change me if adding an attribute to IndexTuples!!!!!!!!!!!
+ */
+static Size
+IndexInfoFindDataOffset(unsigned short t_info)
+{
+    if (!(t_info & INDEX_NULL_MASK))
+   return((Size) sizeof(IndexTupleData));
+    else {
+   Size size = sizeof(IndexTupleData);
+   
+   if (t_info & INDEX_NULL_MASK) {
+       size += sizeof(IndexAttributeBitMapData);
+   }
+   return DOUBLEALIGN(size);   /* be conservative */
+    }
+}
+
+/*
+ * Copies source into target.  If *target == NULL, we palloc space; otherwise
+ * we assume we have space that is already palloc'ed.
+ */
+void
+CopyIndexTuple(IndexTuple source, IndexTuple *target)
+{
+    Size size;
+    IndexTuple ret;
+    
+    size = IndexTupleSize(source);
+    if (*target == NULL) {
+   *target = (IndexTuple) palloc(size);
+    }
+    
+    ret = *target;
+    memmove((char*)ret, (char*)source, size);
+}
+
diff --git a/src/backend/access/common/indexvalid.c b/src/backend/access/common/indexvalid.c

new file mode 100644 (file)

index 0000000..b437718
--- /dev/null
+++ b/src/backend/access/common/indexvalid.c
@@ -0,0 +1,84 @@
+/*-------------------------------------------------------------------------
+ *
+ * indexvalid.c--
+ *    index tuple qualification validity checking code
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/common/Attic/indexvalid.c,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "executor/execdebug.h"
+#include "access/genam.h"
+#include "access/iqual.h"  /* where the declarations go */
+#include "access/itup.h"
+#include "access/skey.h"
+
+#include "storage/buf.h"
+#include "storage/bufpage.h"
+#include "storage/itemid.h"
+#include "utils/rel.h"
+
+/* ----------------------------------------------------------------
+ *       index scan key qualification code
+ * ----------------------------------------------------------------
+ */
+int    NIndexTupleProcessed;
+
+/* ----------------
+ * index_keytest
+ *
+ * old comments
+ * May eventually combine with other tests (like timeranges)?
+ * Should have Buffer buffer; as an argument and pass it to amgetattr.
+ * ----------------
+ */
+bool
+index_keytest(IndexTuple tuple,
+         TupleDesc tupdesc,
+         int scanKeySize,
+         ScanKey key)
+{
+    bool       isNull;
+    Datum      datum;
+    int            test;
+    
+    IncrIndexProcessed();
+    
+    while (scanKeySize > 0) {
+   datum = index_getattr(tuple,
+                 1,
+                 tupdesc,
+                 &isNull);
+   
+   if (isNull) {
+       /* XXX eventually should check if SK_ISNULL */
+       return (false);
+   }
+   
+   if (key[0].sk_flags & SK_COMMUTE) {
+       test = (int) (*(key[0].sk_func))
+       (DatumGetPointer(key[0].sk_argument),
+        datum);
+   } else {
+       test = (int) (*(key[0].sk_func))
+       (datum,
+        DatumGetPointer(key[0].sk_argument));
+   }
+   
+   if (!test == !(key[0].sk_flags & SK_NEGATE)) {
+       return (false);
+   }
+   
+   scanKeySize -= 1;
+   key++;
+    }
+    
+    return (true);
+}
+
diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c

new file mode 100644 (file)

index 0000000..556b73b
--- /dev/null
+++ b/src/backend/access/common/printtup.c
@@ -0,0 +1,306 @@
+/*-------------------------------------------------------------------------
+ *
+ * printtup.c--
+ *    Routines to print out tuples to the destination (binary or non-binary
+ *    portals, frontend/interactive backend, etc.).
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/common/printtup.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/htup.h"
+#include "access/skey.h"
+#include "access/printtup.h"
+#include "access/tupdesc.h"
+#include "storage/buf.h"
+#include "utils/memutils.h"
+#include "utils/palloc.h"
+#include "fmgr.h"
+#include "utils/elog.h"
+
+#include "utils/syscache.h"
+#include "catalog/pg_type.h"
+
+#include "libpq/libpq.h"
+
+/* ----------------------------------------------------------------
+ * printtup / debugtup support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ * typtoout - used by printtup and debugtup
+ * ----------------
+ */
+Oid
+typtoout(Oid type)
+{
+    HeapTuple  typeTuple;
+    
+    typeTuple = SearchSysCacheTuple(TYPOID,
+                   ObjectIdGetDatum(type),
+                   0, 0, 0);
+    
+    if (HeapTupleIsValid(typeTuple))
+   return((Oid)
+          ((TypeTupleForm) GETSTRUCT(typeTuple))->typoutput);
+    
+    elog(WARN, "typtoout: Cache lookup of type %d failed", type);
+    return(InvalidOid);
+}
+
+Oid
+gettypelem(Oid type)
+{
+    HeapTuple  typeTuple;
+    
+    typeTuple = SearchSysCacheTuple(TYPOID,
+                   ObjectIdGetDatum(type),
+                   0,0,0);
+    
+    if (HeapTupleIsValid(typeTuple))
+   return((Oid)
+          ((TypeTupleForm) GETSTRUCT(typeTuple))->typelem);
+    
+    elog(WARN, "typtoout: Cache lookup of type %d failed", type);
+    return(InvalidOid);
+}
+
+/* ----------------
+ * printtup
+ * ----------------
+ */
+void
+printtup(HeapTuple tuple, TupleDesc typeinfo)
+{
+    int        i, j, k;
+    char   *outputstr, *attr;
+    bool   isnull;
+    Oid    typoutput;
+    
+    /* ----------------
+     * tell the frontend to expect new tuple data
+     * ----------------
+     */
+    pq_putnchar("D", 1);
+    
+    /* ----------------
+     * send a bitmap of which attributes are null
+     * ----------------
+     */
+    j = 0;
+    k = 1 << 7;
+    for (i = 0; i < tuple->t_natts; ) {
+   attr = heap_getattr(tuple, InvalidBuffer, ++i, typeinfo, &isnull);
+   if (!isnull)
+       j |= k;
+   k >>= 1;
+   if (!(i & 7)) {
+       pq_putint(j, 1);
+       j = 0;
+       k = 1 << 7;
+   }
+    }
+    if (i & 7)
+   pq_putint(j, 1);
+    
+    /* ----------------
+     * send the attributes of this tuple
+     * ----------------
+     */
+    for (i = 0; i < tuple->t_natts; ++i) {
+   attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull);
+   typoutput = typtoout((Oid) typeinfo->attrs[i]->atttypid);
+   
+   if (!isnull && OidIsValid(typoutput)) {
+       outputstr = fmgr(typoutput, attr, 
+                gettypelem(typeinfo->attrs[i]->atttypid));
+       pq_putint(strlen(outputstr)+4, 4);
+       pq_putnchar(outputstr, strlen(outputstr));
+       pfree(outputstr);
+   }
+    }
+}
+
+/* ----------------
+ * printatt
+ * ----------------
+ */
+static void
+printatt(unsigned attributeId,
+    AttributeTupleForm attributeP,
+    char *value)
+{
+    printf("\t%2d: %.*s%s%s%s\t(typeid = %u, len = %d, byval = %c)\n",
+      attributeId,
+      NAMEDATALEN,     /* attname is a char16 */
+      attributeP->attname.data,
+      value != NULL ? " = \"" : "",
+      value != NULL ? value : "",
+      value != NULL ? "\"" : "",
+      (unsigned int) (attributeP->atttypid),
+      attributeP->attlen,
+      attributeP->attbyval ? 't' : 'f');
+}
+
+/* ----------------
+ * showatts
+ * ----------------
+ */
+void
+showatts(char *name, TupleDesc tupleDesc)
+{
+    int    i;
+    int natts = tupleDesc->natts;
+    AttributeTupleForm *attinfo = tupleDesc->attrs;
+
+    puts(name);
+    for (i = 0; i < natts; ++i)
+   printatt((unsigned) i+1, attinfo[i], (char *) NULL);
+    printf("\t----\n");
+}
+
+/* ----------------
+ * debugtup
+ * ----------------
+ */
+void
+debugtup(HeapTuple tuple, TupleDesc typeinfo)
+{
+    register int   i;
+    char       *attr, *value;
+    bool       isnull;
+    Oid        typoutput;
+    
+    for (i = 0; i < tuple->t_natts; ++i) {
+   attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull);
+   typoutput = typtoout((Oid) typeinfo->attrs[i]->atttypid);
+   
+   if (!isnull && OidIsValid(typoutput)) {
+       value = fmgr(typoutput, attr, 
+            gettypelem(typeinfo->attrs[i]->atttypid));
+       printatt((unsigned) i+1, typeinfo->attrs[i], value);
+       pfree(value);
+   }
+    }
+    printf("\t----\n");
+}
+
+/*#define IPORTAL_DEBUG*/
+
+/* ----------------
+ * printtup_internal
+ *      Protocol expects either T, D, C, E, or N.
+ *      We use a different data prefix, e.g. 'B' instead of 'D' to
+ *      indicate a tuple in internal (binary) form.
+ *
+ *      This is same as printtup, except we don't use the typout func.
+ * ----------------
+ */
+void
+printtup_internal(HeapTuple tuple, TupleDesc typeinfo)
+{
+    int        i, j, k;
+    char   *attr;
+    bool   isnull;
+    
+    /* ----------------
+     * tell the frontend to expect new tuple data
+     * ----------------
+     */
+    pq_putnchar("B", 1);
+    
+    /* ----------------
+     * send a bitmap of which attributes are null
+     * ----------------
+     */
+    j = 0;
+    k = 1 << 7;
+    for (i = 0; i < tuple->t_natts; ) {
+   attr = heap_getattr(tuple, InvalidBuffer, ++i, typeinfo, &isnull);
+   if (!isnull)
+       j |= k;
+   k >>= 1;
+   if (!(i & 7)) {
+       pq_putint(j, 1);
+       j = 0;
+       k = 1 << 7;
+   }
+    }
+    if (i & 7)
+   pq_putint(j, 1);
+    
+    /* ----------------
+     * send the attributes of this tuple
+     * ----------------
+     */
+#ifdef IPORTAL_DEBUG
+    fprintf(stderr, "sending tuple with %d atts\n", tuple->t_natts);
+#endif
+    for (i = 0; i < tuple->t_natts; ++i) {
+   int32 len = typeinfo->attrs[i]->attlen;
+   
+   attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull);
+   if (!isnull) {
+       /* # of bytes, and opaque data */
+       if (len == -1) {
+       /* variable length, assume a varlena structure */
+       len = VARSIZE(attr) - VARHDRSZ;
+       
+       pq_putint(len, sizeof(int32));
+       pq_putnchar(VARDATA(attr), len);
+#ifdef IPORTAL_DEBUG
+       {
+           char *d = VARDATA(attr);
+           
+           fprintf(stderr, "length %d data %x%x%x%x\n",
+               len, *d, *(d+1), *(d+2), *(d+3));
+       }
+#endif
+       } else {
+       /* fixed size */
+       if (typeinfo->attrs[i]->attbyval) {
+           int8 i8;
+           int16 i16;
+           int32 i32;
+           
+           pq_putint(len, sizeof(int32));
+           switch (len) {
+           case sizeof(int8):
+           i8 = DatumGetChar(attr);
+           pq_putnchar((char *) &i8, len);
+           break;
+           case sizeof(int16):
+           i16 = DatumGetInt16(attr);
+           pq_putnchar((char *) &i16, len);
+           break;
+           case sizeof(int32):
+           i32 = DatumGetInt32(attr);
+           pq_putnchar((char *) &i32, len);
+           break;
+           }
+#ifdef IPORTAL_DEBUG
+           fprintf(stderr, "byval length %d data %d\n", len, attr);
+#endif
+       } else {
+           pq_putint(len, sizeof(int32));
+           pq_putnchar(attr, len);
+#ifdef IPORTAL_DEBUG
+           fprintf(stderr, "byref length %d data %x\n", len, attr);
+#endif
+       }
+       }
+   }
+    }
+}
diff --git a/src/backend/access/common/scankey.c b/src/backend/access/common/scankey.c

new file mode 100644 (file)

index 0000000..7a47219
--- /dev/null
+++ b/src/backend/access/common/scankey.c
@@ -0,0 +1,68 @@
+/*-------------------------------------------------------------------------
+ *
+ * scan.c--
+ *    scan direction and key code
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/common/scankey.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+#include "access/sdir.h"
+#include "access/attnum.h"
+#include "access/skey.h"
+
+#include "fmgr.h"
+
+/*
+ * ScanKeyEntryIsLegal --
+ * True iff the scan key entry is legal.
+ */
+#define ScanKeyEntryIsLegal(entry) \
+    ((bool) (AssertMacro(PointerIsValid(entry)) && \
+        AttributeNumberIsValid(entry->sk_attno)))
+
+/*
+ * ScanKeyEntrySetIllegal --
+ * Marks a scan key entry as illegal.
+ */
+void
+ScanKeyEntrySetIllegal(ScanKey entry)
+{
+
+    Assert(PointerIsValid(entry));
+    
+    entry->sk_flags = 0;   /* just in case... */
+    entry->sk_attno = InvalidAttrNumber;
+    entry->sk_procedure = 0;   /* should be InvalidRegProcedure */
+}
+
+/*
+ * ScanKeyEntryInitialize --
+ * Initializes an scan key entry.
+ *
+ * Note:
+ * Assumes the scan key entry is valid.
+ * Assumes the intialized scan key entry will be legal.
+ */
+void
+ScanKeyEntryInitialize(ScanKey entry,
+              bits16 flags,
+              AttrNumber attributeNumber,
+              RegProcedure procedure,
+              Datum argument)
+{
+    Assert(PointerIsValid(entry));
+    
+    entry->sk_flags = flags;
+    entry->sk_attno = attributeNumber;
+    entry->sk_procedure = procedure;
+    entry->sk_argument = argument;
+    fmgr_info(procedure, &entry->sk_func, &entry->sk_nargs);
+    
+    Assert(ScanKeyEntryIsLegal(entry));
+}
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c

new file mode 100644 (file)

index 0000000..527eb51
--- /dev/null
+++ b/src/backend/access/common/tupdesc.c
@@ -0,0 +1,398 @@
+/*-------------------------------------------------------------------------
+ *
+ * tupdesc.c--
+ *    POSTGRES tuple descriptor support code
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
+ *
+ * NOTES
+ *    some of the executor utility code such as "ExecTypeFromTL" should be
+ *    moved here.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <stdio.h> /* for sprintf() */
+#include <ctype.h>
+#include <string.h>
+
+#include "postgres.h"
+
+#include "nodes/pg_list.h"
+#include "nodes/parsenodes.h"
+
+#include "access/attnum.h"
+#include "access/htup.h"
+#include "access/tupdesc.h"
+
+#include "utils/builtins.h"
+#include "utils/elog.h"        /* XXX generate exceptions instead */
+#include "utils/palloc.h"
+
+#include "utils/syscache.h"
+#include "catalog/pg_type.h"
+
+#include "nodes/primnodes.h"
+
+#include "parser/catalog_utils.h"
+
+/* ----------------------------------------------------------------
+ * CreateTemplateTupleDesc
+ *
+ * This function allocates and zeros a tuple descriptor structure.
+ * ----------------------------------------------------------------
+ */
+TupleDesc
+CreateTemplateTupleDesc(int natts)
+{
+    uint32 size;
+    TupleDesc desc;
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    AssertArg(natts >= 1);
+    
+    /* ----------------
+     *  allocate enough memory for the tuple descriptor and
+     *  zero it as TupleDescInitEntry assumes that the descriptor
+     *  is filled with NULL pointers.
+     * ----------------
+     */
+    size = natts * sizeof (AttributeTupleForm);
+    desc = (TupleDesc) palloc(sizeof(struct tupleDesc));
+    desc->attrs = (AttributeTupleForm*) palloc(size);
+    memset(desc->attrs, 0, size);
+
+    desc->natts = natts;
+
+    return (desc);
+}
+
+/* ----------------------------------------------------------------
+ * CreateTupleDesc
+ *
+ * This function allocates a new TupleDesc from AttributeTupleForm array
+ * ----------------------------------------------------------------
+ */
+TupleDesc
+CreateTupleDesc(int natts, AttributeTupleForm* attrs)
+{
+    TupleDesc desc;
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    AssertArg(natts >= 1);
+    
+    desc = (TupleDesc) palloc(sizeof(struct tupleDesc));
+    desc->attrs = attrs;
+    desc->natts = natts;    
+
+
+    return (desc);
+}
+
+/* ----------------------------------------------------------------
+ * CreateTupleDescCopy
+ *
+ * This function creates a new TupleDesc by copying from an existing
+ *      TupleDesc
+ * 
+ * ----------------------------------------------------------------
+ */
+TupleDesc
+CreateTupleDescCopy(TupleDesc tupdesc)
+{
+    TupleDesc desc;
+    int i, size;
+
+    desc = (TupleDesc) palloc(sizeof(struct tupleDesc));
+    desc->natts = tupdesc->natts;
+    size = desc->natts * sizeof (AttributeTupleForm);
+    desc->attrs = (AttributeTupleForm*) palloc(size);
+    for (i=0;i<desc->natts;i++) {
+   desc->attrs[i] = 
+       (AttributeTupleForm)palloc(ATTRIBUTE_TUPLE_SIZE);
+   memmove(desc->attrs[i],
+       tupdesc->attrs[i],
+       ATTRIBUTE_TUPLE_SIZE);
+    }
+    return desc;
+}
+
+/* ----------------------------------------------------------------
+ * TupleDescInitEntry
+ *
+ * This function initializes a single attribute structure in
+ * a preallocated tuple descriptor.
+ * ----------------------------------------------------------------
+ */
+bool
+TupleDescInitEntry(TupleDesc desc,
+          AttrNumber attributeNumber,
+          char *attributeName,
+          char *typeName,
+          int attdim,
+          bool attisset)
+{
+    HeapTuple      tuple;
+    TypeTupleForm  typeForm;
+    AttributeTupleForm att;
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    AssertArg(PointerIsValid(desc));
+    AssertArg(attributeNumber >= 1);
+    /* attributeName's are sometimes NULL, 
+       from resdom's.  I don't know why that is, though -- Jolly */
+/*    AssertArg(NameIsValid(attributeName));*/
+/*    AssertArg(NameIsValid(typeName));*/
+    
+    AssertArg(!PointerIsValid(desc->attrs[attributeNumber - 1]));
+    
+
+    /* ----------------
+     * allocate storage for this attribute
+     * ----------------
+     */
+
+    att = (AttributeTupleForm) palloc(ATTRIBUTE_TUPLE_SIZE);
+    desc->attrs[attributeNumber - 1] = att;
+
+    /* ----------------
+     * initialize some of the attribute fields
+     * ----------------
+     */
+    att->attrelid  = 0;                /* dummy value */
+    
+    if (attributeName != NULL)
+   namestrcpy(&(att->attname), attributeName);
+    else
+   memset(att->attname.data,0,NAMEDATALEN);
+
+    
+    att->attdefrel =   0;          /* dummy value */
+    att->attnvals  =   0;          /* dummy value */
+    att->atttyparg =   0;          /* dummy value */
+    att->attbound =    0;          /* dummy value */
+    att->attcanindex =     0;          /* dummy value */
+    att->attproc =     0;          /* dummy value */
+    att->attcacheoff =     -1;
+    
+    att->attnum = attributeNumber;
+    att->attnelems = attdim;
+    att->attisset = attisset;
+    
+    /* ----------------
+     * search the system cache for the type tuple of the attribute
+     *  we are creating so that we can get the typeid and some other
+     *  stuff.
+     *
+     *  Note: in the special case of 
+     *
+     *     create EMP (name = char16, manager = EMP)
+     *
+     *  RelationNameCreateHeapRelation() calls BuildDesc() which
+     *  calls this routine and since EMP does not exist yet, the
+     *  system cache lookup below fails.  That's fine, but rather
+     *  then doing a elog(WARN) we just leave that information
+     *  uninitialized, return false, then fix things up later.
+     *  -cim 6/14/90
+     * ----------------
+     */
+    tuple = SearchSysCacheTuple(TYPNAME, PointerGetDatum(typeName),
+               0,0,0);
+    if (! HeapTupleIsValid(tuple)) {
+   /* ----------------
+    *   here type info does not exist yet so we just fill
+    *   the attribute with dummy information and return false.
+    * ----------------
+    */
+   att->atttypid = InvalidOid;
+   att->attlen   = (int16) 0;
+   att->attbyval = (bool) 0;
+   att->attalign = 'i';
+   return false;
+    }
+    
+    /* ----------------
+     * type info exists so we initialize our attribute
+     *  information from the type tuple we found..
+     * ----------------
+     */
+    typeForm = (TypeTupleForm) GETSTRUCT(tuple);
+    
+    att->atttypid = tuple->t_oid;
+    att->attalign = typeForm->typalign;
+    
+    /* ------------------------
+       If this attribute is a set, what is really stored in the
+       attribute is the OID of a tuple in the pg_proc catalog.
+       The pg_proc tuple contains the query string which defines
+       this set - i.e., the query to run to get the set.
+       So the atttypid (just assigned above) refers to the type returned
+       by this query, but the actual length of this attribute is the
+       length (size) of an OID.
+       
+       Why not just make the atttypid point to the OID type, instead
+       of the type the query returns?  Because the executor uses the atttypid
+       to tell the front end what type will be returned (in BeginCommand),
+       and in the end the type returned will be the result of the query, not
+       an OID.
+       
+       Why not wait until the return type of the set is known (i.e., the
+       recursive call to the executor to execute the set has returned) 
+       before telling the front end what the return type will be?  Because
+       the executor is a delicate thing, and making sure that the correct
+       order of front-end commands is maintained is messy, especially 
+       considering that target lists may change as inherited attributes
+       are considered, etc.  Ugh.
+       -----------------------------------------
+       */
+    if (attisset) {
+   Type t = type("oid");
+   att->attlen = tlen(t);
+   att->attbyval = tbyval(t);
+    } else {
+   att->attlen   = typeForm->typlen;
+   att->attbyval = typeForm->typbyval;
+    }
+    
+    
+    return true;
+}
+
+
+/* ----------------------------------------------------------------
+ * TupleDescMakeSelfReference
+ *
+ * This function initializes a "self-referential" attribute like
+ *      manager in "create EMP (name=text, manager = EMP)".
+ * It calls TypeShellMake() which inserts a "shell" type
+ * tuple into pg_type.  A self-reference is one kind of set, so
+ *      its size and byval are the same as for a set.  See the comments
+ *      above in TupleDescInitEntry.
+ * ----------------------------------------------------------------
+ */
+static void
+TupleDescMakeSelfReference(TupleDesc desc,
+              AttrNumber attnum,
+              char *relname)
+{
+    AttributeTupleForm att;
+    Type t = type("oid");
+    
+    att = desc->attrs[attnum-1];
+    att->atttypid = TypeShellMake(relname);
+    att->attlen   = tlen(t);
+    att->attbyval = tbyval(t);
+    att->attnelems = 0;
+}
+
+/* ----------------------------------------------------------------
+ * BuildDescForRelation
+ *
+ * This is a general purpose function identical to BuildDesc
+ * but is used by the DefineRelation() code to catch the
+ * special case where you
+ *
+ *     create FOO ( ..., x = FOO )
+ *
+ * here, the initial type lookup for "x = FOO" will fail
+ * because FOO isn't in the catalogs yet.  But since we
+ * are creating FOO, instead of doing an elog() we add
+ * a shell type tuple to pg_type and fix things later
+ * in amcreate().
+ * ----------------------------------------------------------------
+ */
+TupleDesc
+BuildDescForRelation(List *schema, char *relname)
+{
+    int            natts;
+    AttrNumber     attnum;
+    List       *p;
+    TupleDesc      desc;
+    char               *attname;
+    char               *typename;
+    int            attdim;
+    bool                attisset;
+    
+    /* ----------------
+     * allocate a new tuple descriptor
+     * ----------------
+     */
+    natts =    length(schema);
+    desc =     CreateTemplateTupleDesc(natts);
+    
+    attnum = 0;
+    
+    typename = palloc(NAMEDATALEN+1);
+
+    foreach(p, schema) {
+   ColumnDef *entry;
+   List    *arry;
+
+   /* ----------------
+    *  for each entry in the list, get the name and type
+    *      information from the list and have TupleDescInitEntry
+    *  fill in the attribute information we need.
+    * ----------------
+    */ 
+   attnum++;
+   
+   entry =     lfirst(p);
+   attname =   entry->colname;
+   arry = entry->typename->arrayBounds;
+   attisset = entry->typename->setof;
+
+   if (arry != NIL) {
+       char buf[20];
+       
+       attdim = length(arry);
+       
+       /* array of XXX is _XXX (inherited from release 3) */
+       sprintf(buf, "_%.*s", NAMEDATALEN, entry->typename->name);
+       strcpy(typename, buf);
+   } else {
+       strcpy(typename, entry->typename->name);
+       attdim = 0;
+   }
+   
+   if (! TupleDescInitEntry(desc, attnum, attname, 
+                typename, attdim, attisset)) {
+       /* ----------------
+        *  if TupleDescInitEntry() fails, it means there is
+        *  no type in the system catalogs.  So now we check if
+        *  the type name equals the relation name.  If so we
+        *  have a self reference, otherwise it's an error.
+        * ----------------
+        */
+       if (!strcmp(typename, relname)) {
+       TupleDescMakeSelfReference(desc, attnum, relname);
+       } else
+       elog(WARN, "DefineRelation: no such type %.*s", 
+            NAMEDATALEN, typename);
+   }
+
+   /*
+    * this is for char() and varchar(). When an entry is of type
+    * char() or varchar(), typlen is set to the appropriate length,
+    * which we'll use here instead. (The catalog lookup only returns
+    * the length of bpchar and varchar which is not what we want!)
+    *                      - ay 6/95
+    */
+   if (entry->typename->typlen > 0) {
+       desc->attrs[attnum - 1]->attlen = entry->typename->typlen;
+   }
+    }
+    return desc;
+}
+
diff --git a/src/backend/access/funcindex.h b/src/backend/access/funcindex.h

new file mode 100644 (file)

index 0000000..4689df1
--- /dev/null
+++ b/src/backend/access/funcindex.h
@@ -0,0 +1,43 @@
+/*-------------------------------------------------------------------------
+ *
+ * funcindex.h--
+ *    
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: funcindex.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _FUNC_INDEX_INCLUDED_
+#define _FUNC_INDEX_INCLUDED_
+
+#include "postgres.h"
+
+typedef struct {
+   int nargs;
+   Oid arglist[8];
+   Oid procOid;
+   NameData funcName;
+} FuncIndexInfo;
+
+typedef FuncIndexInfo  *FuncIndexInfoPtr;
+
+/*
+ * some marginally useful macro definitions
+ */
+/* #define FIgetname(FINFO) (&((FINFO)->funcName.data[0]))*/
+#define FIgetname(FINFO) (FINFO)->funcName.data
+#define FIgetnArgs(FINFO) (FINFO)->nargs
+#define FIgetProcOid(FINFO) (FINFO)->procOid
+#define FIgetArg(FINFO, argnum) (FINFO)->arglist[argnum]
+#define FIgetArglist(FINFO) (FINFO)->arglist
+
+#define FIsetnArgs(FINFO, numargs) ((FINFO)->nargs = numargs)
+#define FIsetProcOid(FINFO, id) ((FINFO)->procOid = id)
+#define FIsetArg(FINFO, argnum, argtype) ((FINFO)->arglist[argnum] = argtype)
+
+#define FIisFunctionalIndex(FINFO) (FINFO->procOid != InvalidOid)
+
+#endif /* FUNCINDEX_H */
diff --git a/src/backend/access/genam.h b/src/backend/access/genam.h

new file mode 100644 (file)

index 0000000..b254465
--- /dev/null
+++ b/src/backend/access/genam.h
@@ -0,0 +1,60 @@
+/*-------------------------------------------------------------------------
+ *
+ * genam.h--
+ *    POSTGRES general access method definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: genam.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    GENAM_H
+#define GENAM_H
+
+#include "postgres.h"
+
+#include "access/attnum.h"
+#include "access/htup.h"
+#include "access/istrat.h"
+#include "access/itup.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+#include "access/sdir.h"
+#include "access/funcindex.h"
+
+/* ----------------
+ * generalized index_ interface routines
+ * ----------------
+ */
+extern Relation index_open(Oid relationId);
+extern Relation index_openr(char *relationName);
+extern void index_close(Relation relation);
+extern InsertIndexResult index_insert(Relation relation,
+                     IndexTuple indexTuple);
+extern void index_delete(Relation relation, ItemPointer indexItem);
+extern IndexScanDesc index_beginscan(Relation relation, bool scanFromEnd,
+     uint16 numberOfKeys, ScanKey key);
+extern void index_rescan(IndexScanDesc scan, bool scanFromEnd, ScanKey key);
+extern void index_endscan(IndexScanDesc scan);
+extern void index_markpos(IndexScanDesc scan);
+extern void index_restrpos(IndexScanDesc scan);
+extern RetrieveIndexResult index_getnext(IndexScanDesc scan,
+                    ScanDirection direction);
+extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum,
+                   uint16 procnum);
+extern Datum GetIndexValue(HeapTuple tuple, TupleDesc hTupDesc,
+     int attOff, AttrNumber attrNums[], FuncIndexInfo *fInfo,
+     bool *attNull, Buffer buffer);
+
+/* in genam.c */
+extern IndexScanDesc RelationGetIndexScan(Relation relation, bool scanFromEnd,
+                     uint16 numberOfKeys, ScanKey key);
+extern void IndexScanRestart(IndexScanDesc scan, bool scanFromEnd,
+                ScanKey key);
+extern void IndexScanEnd(IndexScanDesc scan);
+extern void IndexScanMarkPosition(IndexScanDesc scan);
+extern void IndexScanRestorePosition(IndexScanDesc scan);
+
+#endif /* GENAM_H */
diff --git a/src/backend/access/hash.h b/src/backend/access/hash.h

new file mode 100644 (file)

index 0000000..2140769
--- /dev/null
+++ b/src/backend/access/hash.h
@@ -0,0 +1,336 @@
+/*-------------------------------------------------------------------------
+ *
+ * hash.h--
+ *    header file for postgres hash access method implementation 
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: hash.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ * NOTES
+ * modeled after Margo Seltzer's hash implementation for unix. 
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef HASH_H
+#define HASH_H
+
+#include "access/itup.h"
+
+/* 
+ * An overflow page is a spare page allocated for storing data whose 
+ * bucket doesn't have room to store it. We use overflow pages rather
+ * than just splitting the bucket because there is a linear order in
+ * the way we split buckets. In other words, if there isn't enough space
+ * in the bucket itself, put it in an overflow page. 
+ *
+ * Overflow page addresses are stored in form: (Splitnumber, Page offset).
+ *
+ * A splitnumber is the number of the generation where the table doubles
+ * in size. The ovflpage's offset within the splitnumber; offsets start
+ * at 1. 
+ * 
+ * We convert the stored bitmap address into a page address with the
+ * macro OADDR_OF(S, O) where S is the splitnumber and O is the page 
+ * offset. 
+ */
+typedef uint32     Bucket;
+typedef bits16 OverflowPageAddress;
+typedef uint32 SplitNumber;
+typedef uint32  PageOffset;
+
+/* A valid overflow address will always have a page offset >= 1 */
+#define InvalidOvflAddress 0   
+                      
+#define SPLITSHIFT 11
+#define SPLITMASK  0x7FF
+#define SPLITNUM(N)    ((SplitNumber)(((uint32)(N)) >> SPLITSHIFT))
+#define OPAGENUM(N)    ((PageOffset)((N) & SPLITMASK))
+#define    OADDR_OF(S,O)   ((OverflowPageAddress)((uint32)((uint32)(S) << SPLITSHIFT) + (O)))
+
+#define BUCKET_TO_BLKNO(B) \
+   ((Bucket) ((B) + ((B) ? metap->SPARES[_hash_log2((B)+1)-1] : 0)) + 1)
+#define OADDR_TO_BLKNO(B)   \
+   ((BlockNumber) \
+    (BUCKET_TO_BLKNO ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B))));
+
+/* 
+ * hasho_flag tells us which type of page we're looking at.  For
+ * example, knowing overflow pages from bucket pages is necessary
+ * information when you're deleting tuples from a page. If all the
+ * tuples are deleted from an overflow page, the overflow is made
+ * available to other buckets by calling _hash_freeovflpage(). If all
+ * the tuples are deleted from a bucket page, no additional action is
+ * necessary.
+ */
+
+#define    LH_UNUSED_PAGE      (0)
+#define LH_OVERFLOW_PAGE   (1 << 0)
+#define LH_BUCKET_PAGE     (1 << 1)
+#define    LH_BITMAP_PAGE      (1 << 2)
+#define    LH_META_PAGE        (1 << 3)
+
+typedef struct HashPageOpaqueData {
+    bits16 hasho_flag;         /* is this page a bucket or ovfl */
+    Bucket hasho_bucket;       /* bucket number this pg belongs to */
+    OverflowPageAddress hasho_oaddr;   /* ovfl address of this ovfl pg */
+    BlockNumber hasho_nextblkno;   /* next ovfl blkno */
+    BlockNumber    hasho_prevblkno;    /* previous ovfl (or bucket) blkno */
+} HashPageOpaqueData;
+
+typedef HashPageOpaqueData        *HashPageOpaque;
+
+/*
+ *  ScanOpaqueData is used to remember which buffers we're currently
+ *  examining in the scan.  We keep these buffers locked and pinned and
+ *  recorded in the opaque entry of the scan in order to avoid doing a
+ *  ReadBuffer() for every tuple in the index.  This avoids semop() calls,
+ *  which are expensive.
+ */
+
+typedef struct HashScanOpaqueData {
+    Buffer      hashso_curbuf;
+    Buffer      hashso_mrkbuf;
+} HashScanOpaqueData;
+
+typedef HashScanOpaqueData        *HashScanOpaque;
+
+/* 
+ * Definitions for metapage.
+ */
+
+#define HASH_METAPAGE  0       /* metapage is always block 0 */
+
+#define HASH_MAGIC 0x6440640
+#define HASH_VERSION   0
+
+/*
+ * NCACHED is used to set the array sizeof spares[] & bitmaps[].
+ *
+ * Spares[] is used to hold the number overflow pages currently
+ * allocated at a certain splitpoint. For example, if spares[3] = 7
+ * then there are a maximum of 7 ovflpages available at splitpoint 3.
+ * The value in spares[] will change as ovflpages are added within
+ * a splitpoint. 
+ * 
+ * Within a splitpoint, one can find which ovflpages are available and
+ * which are used by looking at a bitmaps that are stored on the ovfl
+ * pages themselves. There is at least one bitmap for every splitpoint's
+ * ovflpages. Bitmaps[] contains the ovflpage addresses of the ovflpages 
+ * that hold the ovflpage bitmaps. 
+ *
+ * The reason that the size is restricted to NCACHED (32) is because
+ * the bitmaps are 16 bits: upper 5 represent the splitpoint, lower 11
+ * indicate the page number within the splitpoint. Since there are 
+ * only 5 bits to store the splitpoint, there can only be 32 splitpoints. 
+ * Both spares[] and bitmaps[] use splitpoints as there indices, so there
+ * can only be 32 of them. 
+ */
+
+#define    NCACHED     32  
+
+
+typedef struct HashMetaPageData {
+    PageHeaderData hashm_phdr;     /* pad for page header
+                          (do not use) */
+    uint32     hashm_magic;        /* magic no. for hash tables */
+    uint32     hashm_version;      /* version ID */
+    uint32     hashm_nkeys;        /* number of keys stored in
+                          the table */
+    uint16     hashm_ffactor;      /* fill factor */
+    uint16     hashm_bsize;        /* bucket size (bytes) -
+                          must be a power of 2 */
+    uint16     hashm_bshift;       /* bucket shift */
+    uint16     hashm_bmsize;       /* bitmap array size (bytes) -
+                          must be a power of 2 */
+    uint32         hashm_maxbucket;    /* ID of maximum bucket
+                          in use */
+    uint32     hashm_highmask;     /* mask to modulo into
+                          entire table */
+    uint32     hashm_lowmask;      /* mask to modulo into lower
+                          half of table */
+    uint32     hashm_ovflpoint;    /* pageno. from which ovflpgs
+                          being allocated */
+    uint32     hashm_lastfreed;    /* last ovflpage freed */
+    uint32     hashm_nmaps;        /* Initial number of bitmaps */
+    uint32     hashm_spares[NCACHED];  /* spare pages available at
+                          splitpoints */
+    BlockNumber        hashm_mapp[NCACHED];    /* blknumbers of ovfl page
+                          maps */
+    RegProcedure   hashm_procid;       /* hash procedure id from
+                          pg_proc */
+} HashMetaPageData;
+
+typedef HashMetaPageData *HashMetaPage;
+
+/* Short hands for accessing structure */
+#define BSHIFT     hashm_bshift
+#define OVFL_POINT hashm_ovflpoint
+#define    LAST_FREED  hashm_lastfreed
+#define MAX_BUCKET hashm_maxbucket
+#define FFACTOR        hashm_ffactor
+#define HIGH_MASK  hashm_highmask
+#define LOW_MASK   hashm_lowmask
+#define NKEYS      hashm_nkeys
+#define SPARES     hashm_spares
+
+extern bool    BuildingHash;
+
+typedef struct HashItemData {
+    IndexTupleData          hash_itup;
+} HashItemData;
+
+typedef HashItemData      *HashItem;
+
+/*
+ * Constants
+ */
+#define DEFAULT_FFACTOR        300
+#define SPLITMAX       8
+#define BYTE_TO_BIT        3   /* 2^3 bits/byte */
+#define INT_TO_BYTE        2   /* 2^2 bytes/int */
+#define INT_TO_BIT     5   /* 2^5 bits/int */
+#define ALL_SET            ((uint32) ~0)
+
+/*
+ * bitmap pages do not contain tuples.  they do contain the standard
+ * page headers and trailers; however, everything in between is a
+ * giant bit array.  the number of bits that fit on a page obviously
+ * depends on the page size and the header/trailer overhead.
+ */
+#define    BMPGSZ_BYTE(metap)  ((metap)->hashm_bmsize)
+#define    BMPGSZ_BIT(metap)   ((metap)->hashm_bmsize << BYTE_TO_BIT)
+#define    HashPageGetBitmap(pg) \
+    ((uint32 *) (((char *) (pg)) + DOUBLEALIGN(sizeof(PageHeaderData))))
+
+/*
+ * The number of bits in an ovflpage bitmap which
+ * tells which ovflpages are empty versus in use (NOT the number of
+ * bits in an overflow page *address* bitmap). 
+ */
+#define BITS_PER_MAP   32  /* Number of bits in ovflpage bitmap */
+
+/* Given the address of the beginning of a big map, clear/set the nth bit */
+#define CLRBIT(A, N)   ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
+#define SETBIT(A, N)   ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
+#define ISSET(A, N)    ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
+
+/*
+ * page locking modes
+ */
+#define    HASH_READ   0
+#define    HASH_WRITE  1
+
+/*  
+ *  In general, the hash code tries to localize its knowledge about page
+ *  layout to a couple of routines.  However, we need a special value to
+ *  indicate "no page number" in those places where we expect page numbers.
+ */
+
+#define P_NONE     0
+
+/*
+ *  Strategy number. There's only one valid strategy for hashing: equality.
+ */
+
+#define HTEqualStrategyNumber      1
+#define HTMaxStrategyNumber        1
+
+/*
+ *  When a new operator class is declared, we require that the user supply
+ *  us with an amproc procudure for hashing a key of the new type.
+ *  Since we only have one such proc in amproc, it's number 1.
+ */
+
+#define HASHPROC   1
+
+/* public routines */
+
+extern void hashbuild(Relation heap, Relation index, int natts,
+   AttrNumber *attnum, IndexStrategy istrat, uint16 pcount,
+   Datum *params, FuncIndexInfo *finfo, PredInfo *predInfo);
+extern InsertIndexResult hashinsert(Relation rel, IndexTuple itup);
+extern char *hashgettuple(IndexScanDesc scan, ScanDirection dir);
+extern char *hashbeginscan(Relation rel, bool fromEnd, uint16 keysz,
+              ScanKey scankey);
+extern void hashrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey);
+extern void hashendscan(IndexScanDesc scan);
+extern void hashmarkpos(IndexScanDesc scan);
+extern void hashrestrpos(IndexScanDesc scan);
+extern void hashdelete(Relation rel, ItemPointer tid);
+
+/* hashfunc.c */
+extern uint32 hashint2(int16 key);
+extern uint32 hashint4(uint32 key);
+extern uint32 hashfloat4(float32 keyp);
+extern uint32 hashfloat8(float64 keyp);
+extern uint32 hashoid(Oid key);
+extern uint32 hashchar(char key);
+extern uint32 hashchar2(uint16 intkey);
+extern uint32 hashchar4(uint32 intkey);
+extern uint32 hashchar8(char *key);
+extern uint32 hashchar16(char *key);
+extern uint32 hashtext(struct varlena *key);
+
+/* private routines */
+
+/* hashinsert.c */
+extern InsertIndexResult _hash_doinsert(Relation rel, HashItem hitem);
+
+
+/* hashovfl.c */
+extern Buffer _hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf);
+extern Buffer _hash_freeovflpage(Relation rel, Buffer ovflbuf);
+extern int32 _hash_initbitmap(Relation rel, HashMetaPage metap, int32 pnum,
+                 int32 nbits, int32 ndx);
+extern void _hash_squeezebucket(Relation rel, HashMetaPage metap,
+               Bucket bucket);
+
+
+/* hashpage.c */
+extern void _hash_metapinit(Relation rel);
+extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access);
+extern void _hash_relbuf(Relation rel, Buffer buf, int access);
+extern void _hash_wrtbuf(Relation rel, Buffer buf);
+extern void _hash_wrtnorelbuf(Relation rel, Buffer buf);
+extern Page _hash_chgbufaccess(Relation rel, Buffer *bufp, int from_access,
+                  int to_access);
+extern void _hash_pageinit(Page page, Size size);
+extern void _hash_pagedel(Relation rel, ItemPointer tid);
+extern void _hash_expandtable(Relation rel, Buffer metabuf);
+
+
+/* hashscan.c */
+extern void _hash_regscan(IndexScanDesc scan);
+extern void _hash_dropscan(IndexScanDesc scan);
+extern void _hash_adjscans(Relation rel, ItemPointer tid);
+
+
+/* hashsearch.c */
+extern void _hash_search(Relation rel, int keysz, ScanKey scankey,
+            Buffer *bufP, HashMetaPage metap);
+extern RetrieveIndexResult _hash_next(IndexScanDesc scan, ScanDirection dir);
+extern RetrieveIndexResult _hash_first(IndexScanDesc scan, ScanDirection dir);
+extern bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir,
+              Buffer metabuf);
+
+
+/* hashstrat.c */
+extern StrategyNumber _hash_getstrat(Relation rel, AttrNumber attno,
+                    RegProcedure proc);
+extern bool _hash_invokestrat(Relation rel, AttrNumber attno,
+                 StrategyNumber strat, Datum left, Datum right);
+
+
+/* hashutil.c */
+extern ScanKey _hash_mkscankey(Relation rel, IndexTuple itup,
+                  HashMetaPage metap);
+extern void _hash_freeskey(ScanKey skey);
+extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup);
+extern HashItem _hash_formitem(IndexTuple itup);
+extern Bucket _hash_call(Relation rel, HashMetaPage metap, Datum key);
+extern uint32 _hash_log2(uint32 num);
+extern void _hash_checkpage(Page page, int flags);
+
+#endif /* HASH_H */
diff --git a/src/backend/access/hash/Makefile.inc b/src/backend/access/hash/Makefile.inc

new file mode 100644 (file)

index 0000000..8ea221b
--- /dev/null
+++ b/src/backend/access/hash/Makefile.inc
@@ -0,0 +1,18 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for access/hash (hash access method)
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/backend/access/hash/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
+#
+#-------------------------------------------------------------------------
+
+SUBSRCS+= hash.c hashfunc.c hashinsert.c hashovfl.c hashpage.c hashscan.c \
+   hashsearch.c hashstrat.c hashutil.c
+
+
+
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c

new file mode 100644 (file)

index 0000000..a4a4e16
--- /dev/null
+++ b/src/backend/access/hash/hash.c
@@ -0,0 +1,467 @@
+/*-------------------------------------------------------------------------
+ *
+ * hash.c--
+ *    Implementation of Margo Seltzer's Hashing package for postgres. 
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
+ *
+ * NOTES
+ *    This file contains only the public interface routines.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/sdir.h"
+#include "access/hash.h"
+#include "access/funcindex.h"
+#include "nodes/execnodes.h"
+#include "nodes/plannodes.h"
+#include "executor/executor.h"
+#include "executor/tuptable.h"
+#include "catalog/index.h"
+
+
+bool   BuildingHash = false;
+
+/*
+ *  hashbuild() -- build a new hash index.
+ *
+ * We use a global variable to record the fact that we're creating
+ * a new index.  This is used to avoid high-concurrency locking,
+ * since the index won't be visible until this transaction commits
+ * and since building is guaranteed to be single-threaded.
+ */
+void
+hashbuild(Relation heap,
+     Relation index,
+     int natts,
+     AttrNumber *attnum,
+     IndexStrategy istrat,
+     uint16 pcount,
+     Datum *params,
+     FuncIndexInfo *finfo,
+     PredInfo *predInfo)
+{
+    HeapScanDesc hscan;
+    Buffer buffer;
+    HeapTuple htup;
+    IndexTuple itup;
+    TupleDesc htupdesc, itupdesc;
+    Datum *attdata;
+    bool *nulls;
+    InsertIndexResult res;
+    int nhtups, nitups;
+    int i;
+    HashItem hitem;
+    ExprContext *econtext;
+    TupleTable tupleTable;
+    TupleTableSlot *slot;
+    Oid hrelid, irelid;
+    Node *pred, *oldPred;
+    
+    /* note that this is a new btree */
+    BuildingHash = true;
+    
+    pred = predInfo->pred;
+    oldPred = predInfo->oldPred;
+    
+    /*  initialize the hash index metadata page (if this is a new index) */
+    if (oldPred == NULL)
+   _hash_metapinit(index);
+    
+    /* get tuple descriptors for heap and index relations */
+    htupdesc = RelationGetTupleDescriptor(heap);
+    itupdesc = RelationGetTupleDescriptor(index);
+    
+    /* get space for data items that'll appear in the index tuple */
+    attdata = (Datum *) palloc(natts * sizeof(Datum));
+    nulls = (bool *) palloc(natts * sizeof(bool));
+    
+    /*
+     * If this is a predicate (partial) index, we will need to evaluate the
+     * predicate using ExecQual, which requires the current tuple to be in a
+     * slot of a TupleTable.  In addition, ExecQual must have an ExprContext
+     * referring to that slot.  Here, we initialize dummy TupleTable and
+     * ExprContext objects for this purpose. --Nels, Feb '92
+     */
+#ifndef OMIT_PARTIAL_INDEX
+    if (pred != NULL || oldPred != NULL) {
+   tupleTable = ExecCreateTupleTable(1);
+   slot = ExecAllocTableSlot(tupleTable);
+   econtext = makeNode(ExprContext);
+   FillDummyExprContext(econtext, slot, htupdesc, buffer);
+    }
+#endif /* OMIT_PARTIAL_INDEX */
+    
+    /* start a heap scan */
+    hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL);
+    htup = heap_getnext(hscan, 0, &buffer);
+    
+    /* build the index */
+    nhtups = nitups = 0;
+    
+    for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) {
+   
+   nhtups++;
+   
+   /*
+    * If oldPred != NULL, this is an EXTEND INDEX command, so skip
+    * this tuple if it was already in the existing partial index
+    */
+   if (oldPred != NULL) {
+       /*SetSlotContents(slot, htup); */
+#ifndef OMIT_PARTIAL_INDEX
+       slot->val = htup;
+       if (ExecQual((List*)oldPred, econtext) == true) {
+       nitups++;
+       continue;
+       }
+#endif /* OMIT_PARTIAL_INDEX */    
+   }
+   
+   /* Skip this tuple if it doesn't satisfy the partial-index predicate */
+   if (pred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+       /*SetSlotContents(slot, htup); */
+       slot->val = htup;
+       if (ExecQual((List*)pred, econtext) == false)
+       continue;
+#endif /* OMIT_PARTIAL_INDEX */        
+}
+   
+   nitups++;
+   
+   /*
+    *  For the current heap tuple, extract all the attributes
+    *  we use in this index, and note which are null.
+    */
+   for (i = 1; i <= natts; i++) {
+       int attoff;
+       bool attnull;
+       
+       /*
+        *  Offsets are from the start of the tuple, and are
+        *  zero-based; indices are one-based.  The next call
+        *  returns i - 1.  That's data hiding for you.
+        */
+       
+       /* attoff = i - 1 */
+       attoff = AttrNumberGetAttrOffset(i);
+       
+       /* below, attdata[attoff] set to equal some datum &
+        * attnull is changed to indicate whether or not the attribute 
+        * is null for this tuple
+        */
+       attdata[attoff] = GetIndexValue(htup, 
+                       htupdesc,
+                       attoff, 
+                       attnum, 
+                       finfo, 
+                       &attnull,
+                       buffer);
+       nulls[attoff] = (attnull ? 'n' : ' ');
+   }
+   
+   /* form an index tuple and point it at the heap tuple */
+   itup = index_formtuple(itupdesc, attdata, nulls);
+   
+   /*
+    *  If the single index key is null, we don't insert it into
+    *  the index.  Hash tables support scans on '='.
+    *  Relational algebra says that A = B
+    *  returns null if either A or B is null.  This
+    *  means that no qualification used in an index scan could ever
+    *  return true on a null attribute.  It also means that indices
+    *  can't be used by ISNULL or NOTNULL scans, but that's an
+    *  artifact of the strategy map architecture chosen in 1986, not
+    *  of the way nulls are handled here.
+    */
+   
+   if (itup->t_info & INDEX_NULL_MASK) {
+       pfree(itup);
+       continue;
+   }
+   
+   itup->t_tid = htup->t_ctid;
+   hitem = _hash_formitem(itup);
+   res = _hash_doinsert(index, hitem);
+   pfree(hitem);
+   pfree(itup);
+   pfree(res);
+    }
+    
+    /* okay, all heap tuples are indexed */
+    heap_endscan(hscan);
+    
+    if (pred != NULL || oldPred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+   ExecDestroyTupleTable(tupleTable, true);
+   pfree(econtext);
+#endif /* OMIT_PARTIAL_INDEX */        
+    }
+    
+    /*
+     *  Since we just counted the tuples in the heap, we update its
+     *  stats in pg_class to guarantee that the planner takes advantage
+     *  of the index we just created. Finally, only update statistics
+     *  during normal index definitions, not for indices on system catalogs
+     *  created during bootstrap processing.  We must close the relations
+     *  before updatings statistics to guarantee that the relcache entries
+     *  are flushed when we increment the command counter in UpdateStats().
+     */
+    if (IsNormalProcessingMode())
+   {
+       hrelid = heap->rd_id;
+       irelid = index->rd_id;
+       heap_close(heap);
+       index_close(index);
+       UpdateStats(hrelid, nhtups, true);
+       UpdateStats(irelid, nitups, false);
+       if (oldPred != NULL) {
+       if (nitups == nhtups) pred = NULL;
+       UpdateIndexPredicate(irelid, oldPred, pred);
+       }  
+   }
+    
+    /* be tidy */
+    pfree(nulls);
+    pfree(attdata);
+    
+    /* all done */
+    BuildingHash = false;
+}
+
+/*
+ *  hashinsert() -- insert an index tuple into a hash table. 
+ *
+ *  Hash on the index tuple's key, find the appropriate location 
+ *  for the new tuple, put it there, and return an InsertIndexResult
+ *  to the caller. 
+ */
+InsertIndexResult
+hashinsert(Relation rel, IndexTuple itup)
+{
+    HashItem hitem;
+    InsertIndexResult res;
+    
+    if (itup->t_info & INDEX_NULL_MASK)
+   return ((InsertIndexResult) NULL);
+    
+    hitem = _hash_formitem(itup);
+    
+    res = _hash_doinsert(rel, hitem);
+    
+    pfree(hitem);
+    
+    return (res);
+}
+
+
+/*
+ *  hashgettuple() -- Get the next tuple in the scan.
+ */
+char *
+hashgettuple(IndexScanDesc scan, ScanDirection dir)
+{
+    RetrieveIndexResult res;
+    
+    /*
+     *  If we've already initialized this scan, we can just advance it
+     *  in the appropriate direction.  If we haven't done so yet, we
+     *  call a routine to get the first item in the scan.
+     */
+    
+    if (ItemPointerIsValid(&(scan->currentItemData)))
+   res = _hash_next(scan, dir);
+    else
+   res = _hash_first(scan, dir);
+    
+    return ((char *) res);
+}
+
+
+/*
+ *  hashbeginscan() -- start a scan on a hash index
+ */
+char *
+hashbeginscan(Relation rel,
+         bool fromEnd,
+         uint16 keysz,
+         ScanKey scankey)
+{
+    IndexScanDesc scan;
+    HashScanOpaque so;
+    
+    scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey);
+    so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData)); 
+    so->hashso_curbuf = so->hashso_mrkbuf = InvalidBuffer;
+    scan->opaque = so; 
+    scan->flags = 0x0;
+    
+    /* register scan in case we change pages it's using */
+    _hash_regscan(scan);
+    
+    return ((char *) scan);
+}
+
+/*
+ *  hashrescan() -- rescan an index relation
+ */
+void
+hashrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey)
+{
+    ItemPointer iptr;
+    HashScanOpaque so;
+    
+    so = (HashScanOpaque) scan->opaque;
+    
+    /* we hold a read lock on the current page in the scan */
+    if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+   _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
+   so->hashso_curbuf = InvalidBuffer;
+   ItemPointerSetInvalid(iptr);
+    }
+    if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
+   _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
+   so->hashso_mrkbuf = InvalidBuffer;
+   ItemPointerSetInvalid(iptr);
+    }
+    
+    /* reset the scan key */
+    if (scan->numberOfKeys > 0) {
+   memmove(scan->keyData,
+       scankey,
+       scan->numberOfKeys * sizeof(ScanKeyData));
+    }
+}
+
+/*
+ *  hashendscan() -- close down a scan
+ */
+void
+hashendscan(IndexScanDesc scan)
+{
+    
+    ItemPointer iptr;
+    HashScanOpaque so;
+    
+    so = (HashScanOpaque) scan->opaque;
+    
+    /* release any locks we still hold */
+    if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+   _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
+   so->hashso_curbuf = InvalidBuffer;
+   ItemPointerSetInvalid(iptr);
+    }
+    
+    if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
+   if (BufferIsValid(so->hashso_mrkbuf))
+       _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
+   so->hashso_mrkbuf = InvalidBuffer;
+   ItemPointerSetInvalid(iptr);
+    }
+    
+    /* don't need scan registered anymore */
+    _hash_dropscan(scan);
+    
+    /* be tidy */
+#ifdef PERFECT_MMGR
+    pfree (scan->opaque);
+#endif /* PERFECT_MMGR */
+}
+
+/*
+ *  hashmarkpos() -- save current scan position
+ *
+ */
+void
+hashmarkpos(IndexScanDesc scan)
+{
+    ItemPointer iptr;
+    HashScanOpaque so;
+    
+    /*  see if we ever call this code. if we do, then so_mrkbuf a
+     *  useful element in the scan->opaque structure. if this procedure
+     *  is never called, so_mrkbuf should be removed from the scan->opaque
+     *  structure. 
+     */
+    elog(NOTICE, "Hashmarkpos() called.");
+    
+    so = (HashScanOpaque) scan->opaque;
+    
+    /* release lock on old marked data, if any */
+    if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
+   _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
+   so->hashso_mrkbuf = InvalidBuffer;
+   ItemPointerSetInvalid(iptr);
+    }
+    
+    /* bump lock on currentItemData and copy to currentMarkData */
+    if (ItemPointerIsValid(&(scan->currentItemData))) {
+   so->hashso_mrkbuf = _hash_getbuf(scan->relation,
+                    BufferGetBlockNumber(so->hashso_curbuf),
+                    HASH_READ);
+   scan->currentMarkData = scan->currentItemData;
+    }
+}
+
+/*
+ *  hashrestrpos() -- restore scan to last saved position
+ */
+void
+hashrestrpos(IndexScanDesc scan)
+{
+    ItemPointer iptr;
+    HashScanOpaque so;
+    
+    /*  see if we ever call this code. if we do, then so_mrkbuf a
+     *  useful element in the scan->opaque structure. if this procedure
+     *  is never called, so_mrkbuf should be removed from the scan->opaque
+     *  structure. 
+     */
+    elog(NOTICE, "Hashrestrpos() called.");
+    
+    so = (HashScanOpaque) scan->opaque;
+    
+    /* release lock on current data, if any */
+    if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+   _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
+   so->hashso_curbuf = InvalidBuffer;
+   ItemPointerSetInvalid(iptr);
+    }
+    
+    /* bump lock on currentMarkData and copy to currentItemData */
+    if (ItemPointerIsValid(&(scan->currentMarkData))) {
+   so->hashso_curbuf =
+       _hash_getbuf(scan->relation,
+            BufferGetBlockNumber(so->hashso_mrkbuf),
+            HASH_READ);
+   
+   scan->currentItemData = scan->currentMarkData;
+    }
+}
+
+/* stubs */
+void
+hashdelete(Relation rel, ItemPointer tid)
+{
+    /* adjust any active scans that will be affected by this deletion */
+    _hash_adjscans(rel, tid);
+    
+    /* delete the data from the page */
+    _hash_pagedel(rel, tid);
+}
+
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c

new file mode 100644 (file)

index 0000000..6b37de2
--- /dev/null
+++ b/src/backend/access/hash/hashfunc.c
@@ -0,0 +1,276 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashfunc.c--
+ *    Comparison functions for hash access method.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
+ *
+ * NOTES
+ *    These functions are stored in pg_amproc.  For each operator class
+ *    defined on hash tables, they compute the hash value of the argument.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include "utils/nabstime.h"
+
+uint32 hashint2(int16 key)
+{
+    return ((uint32) ~key);
+}
+
+uint32 hashint4(uint32 key)
+{
+    return (~key);
+}
+
+/* Hash function from Chris Torek. */
+uint32 hashfloat4(float32 keyp)
+{
+    int len;
+    int loop;
+    uint32 h;
+    char *kp = (char *) keyp;
+
+    len = sizeof(float32data);
+
+#define HASH4a   h = (h << 5) - h + *kp++;
+#define HASH4b   h = (h << 5) + h + *kp++;
+#define HASH4 HASH4b
+
+
+    h = 0;
+    if (len > 0) {
+   loop = (len + 8 - 1) >> 3;
+   
+   switch (len & (8 - 1)) {
+   case 0:
+       do {    /* All fall throughs */
+       HASH4;
+       case 7:
+       HASH4;
+       case 6:
+       HASH4;
+       case 5:
+       HASH4;
+       case 4:
+       HASH4;
+       case 3:
+       HASH4;
+       case 2:
+       HASH4;
+       case 1:
+       HASH4;
+       } while (--loop);
+   }
+    }
+    return (h);
+}  
+
+
+uint32 hashfloat8(float64 keyp)
+{
+    int len;
+    int loop;
+    uint32 h;
+    char *kp = (char *) keyp;
+
+    len = sizeof(float64data);
+
+#define HASH4a   h = (h << 5) - h + *kp++;
+#define HASH4b   h = (h << 5) + h + *kp++;
+#define HASH4 HASH4b
+
+
+    h = 0;
+    if (len > 0) {
+   loop = (len + 8 - 1) >> 3;
+   
+   switch (len & (8 - 1)) {
+   case 0:
+       do {    /* All fall throughs */
+       HASH4;
+       case 7:
+       HASH4;
+       case 6:
+       HASH4;
+       case 5:
+       HASH4;
+       case 4:
+       HASH4;
+       case 3:
+       HASH4;
+       case 2:
+       HASH4;
+       case 1:
+       HASH4;
+       } while (--loop);
+   }
+    }
+    return (h);
+}  
+
+
+uint32 hashoid(Oid key)
+{
+    return ((uint32) ~key);
+}
+
+
+uint32 hashchar(char key)
+{
+    int len;
+    uint32 h;
+
+    len = sizeof(char);
+
+#define PRIME1     37
+#define PRIME2     1048583
+
+    h = 0;
+    /* Convert char to integer */
+    h = h * PRIME1 ^ (key - ' ');
+    h %= PRIME2;
+    
+    return (h);
+}
+
+uint32 hashchar2(uint16 intkey)
+{
+    uint32 h;
+    int len;
+    char *key = (char *) &intkey;
+ 
+    h = 0;
+    len = sizeof(uint16);
+    /* Convert string to integer */
+    while (len--)
+   h = h * PRIME1 ^ (*key++ - ' ');
+    h %= PRIME2;
+   
+    return (h);
+}
+
+uint32 hashchar4(uint32 intkey)
+{
+    uint32 h;
+    int len;
+    char *key = (char *) &intkey;
+ 
+    h = 0;
+    len = sizeof(uint32);
+    /* Convert string to integer */
+    while (len--)
+   h = h * PRIME1 ^ (*key++ - ' ');
+    h %= PRIME2;
+   
+    return (h);
+}
+
+uint32 hashchar8(char *key)
+{
+    uint32 h;
+    int len;
+ 
+    h = 0;
+    len = sizeof(char8);
+    /* Convert string to integer */
+    while (len--)
+   h = h * PRIME1 ^ (*key++ - ' ');
+    h %= PRIME2;
+   
+    return (h);
+}
+
+uint32 hashname(NameData *n)
+{
+    uint32 h;
+    int len;
+    char *key;
+
+    key = n->data;
+ 
+    h = 0;
+    len = NAMEDATALEN;
+    /* Convert string to integer */
+    while (len--)
+   h = h * PRIME1 ^ (*key++ - ' ');
+    h %= PRIME2;
+   
+    return (h);
+}
+
+
+uint32 hashchar16(char *key)
+{
+    uint32 h;
+    int len;
+ 
+    h = 0;
+    len = sizeof(char16);
+    /* Convert string to integer */
+    while (len--)
+   h = h * PRIME1 ^ (*key++ - ' ');
+    h %= PRIME2;
+   
+    return (h);
+}
+
+
+/*
+ * (Comment from the original db3 hashing code: )
+ *
+ * "This is INCREDIBLY ugly, but fast.  We break the string up into 8 byte
+ * units.  On the first time through the loop we get the 'leftover bytes'
+ * (strlen % 8).  On every other iteration, we perform 8 HASHC's so we handle
+ * all 8 bytes.  Essentially, this saves us 7 cmp & branch instructions.  If
+ * this routine is heavily used enough, it's worth the ugly coding.
+ *
+ * "OZ's original sdbm hash"
+ */
+uint32 hashtext(struct varlena *key)
+{
+    int keylen;
+    char *keydata;
+    uint32 n;
+    int loop;
+
+    keydata = VARDATA(key);
+    keylen = VARSIZE(key);
+
+    /* keylen includes the four bytes in which string keylength is stored */
+    keylen -= sizeof(VARSIZE(key));
+
+#define HASHC   n = *keydata++ + 65599 * n
+
+    n = 0;
+    if (keylen > 0) {
+   loop = (keylen + 8 - 1) >> 3;
+   
+   switch (keylen & (8 - 1)) {
+   case 0:
+       do {    /* All fall throughs */
+       HASHC;
+       case 7:
+       HASHC;
+       case 6:
+       HASHC;
+       case 5:
+       HASHC;
+       case 4:
+       HASHC;
+       case 3:
+       HASHC;
+       case 2:
+       HASHC;
+       case 1:
+       HASHC;
+       } while (--loop);
+   }
+    }
+    return (n);
+}  
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c

new file mode 100644 (file)

index 0000000..c514cc6
--- /dev/null
+++ b/src/backend/access/hash/hashinsert.c
@@ -0,0 +1,239 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashinsert.c--
+ *    Item insertion in hash tables for Postgres.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/hash.h"
+
+static InsertIndexResult _hash_insertonpg(Relation rel, Buffer buf, int keysz, ScanKey scankey, HashItem hitem, Buffer metabuf);
+static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, HashItem hitem);
+
+/*
+ *  _hash_doinsert() -- Handle insertion of a single HashItem in the table.
+ *
+ * This routine is called by the public interface routines, hashbuild
+ * and hashinsert.  By here, hashitem is filled in, and has a unique
+ * (xid, seqno) pair. The datum to be used as a "key" is in the
+ *     hashitem. 
+ */
+InsertIndexResult
+_hash_doinsert(Relation rel, HashItem hitem)
+{
+    Buffer buf;
+    Buffer metabuf;
+    BlockNumber blkno;
+    HashMetaPage metap;
+    IndexTuple itup;
+    InsertIndexResult res;
+    ScanKey itup_scankey;
+    int natts;
+    Page page;
+    
+    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+    
+    /* we need a scan key to do our search, so build one */
+    itup = &(hitem->hash_itup);
+    if ((natts = rel->rd_rel->relnatts) != 1)
+   elog(WARN, "Hash indices valid for only one index key.");
+    itup_scankey = _hash_mkscankey(rel, itup, metap);
+    
+    /* 
+     * find the first page in the bucket chain containing this key and
+     * place it in buf.  _hash_search obtains a read lock for us.
+     */
+    _hash_search(rel, natts, itup_scankey, &buf, metap);
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE);
+
+    /*
+     * trade in our read lock for a write lock so that we can do the
+     * insertion.
+     */
+    blkno = BufferGetBlockNumber(buf);
+    _hash_relbuf(rel, buf, HASH_READ);
+    buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+    
+    
+    /*
+     * XXX btree comment (haven't decided what to do in hash): don't
+     * think the bucket can be split while we're reading the metapage.
+     *
+     * If the page was split between the time that we surrendered our
+     * read lock and acquired our write lock, then this page may no
+     * longer be the right place for the key we want to insert.
+     */
+    
+    /* do the insertion */
+    res = _hash_insertonpg(rel, buf, natts, itup_scankey,
+              hitem, metabuf);
+    
+    /* be tidy */
+    _hash_freeskey(itup_scankey);
+    
+    return (res);
+}
+
+/*
+ *  _hash_insertonpg() -- Insert a tuple on a particular page in the table.
+ *
+ * This recursive procedure does the following things:
+ *
+ *     +  if necessary, splits the target page.  
+ *     +  inserts the tuple.
+ *
+ * On entry, we must have the right buffer on which to do the
+ * insertion, and the buffer must be pinned and locked.  On return,
+ * we will have dropped both the pin and the write lock on the buffer.
+ *
+ */
+static InsertIndexResult
+_hash_insertonpg(Relation rel,
+        Buffer buf,
+        int keysz,
+        ScanKey scankey,
+        HashItem hitem,
+        Buffer metabuf)
+{
+    InsertIndexResult res; 
+    Page page;
+    BlockNumber itup_blkno;
+    OffsetNumber itup_off;
+    int itemsz;
+    HashPageOpaque pageopaque;
+    bool do_expand = false;     
+    Buffer ovflbuf;
+    HashMetaPage metap;
+    Bucket bucket;
+    
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+    
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+    pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
+    bucket = pageopaque->hasho_bucket;
+
+    itemsz = IndexTupleDSize(hitem->hash_itup)
+   + (sizeof(HashItemData) - sizeof(IndexTupleData));
+    itemsz = DOUBLEALIGN(itemsz);
+    
+    while (PageGetFreeSpace(page) < itemsz) {
+   /* 
+         * no space on this page; check for an overflow page 
+    */
+   if (BlockNumberIsValid(pageopaque->hasho_nextblkno)) {
+       /* 
+        * ovfl page exists; go get it.  if it doesn't have room,
+        * we'll find out next pass through the loop test above.
+        */
+       ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno,
+                  HASH_WRITE);
+       _hash_relbuf(rel, buf, HASH_WRITE);
+       buf = ovflbuf;
+       page = BufferGetPage(buf);
+   } else {
+       /* 
+        * we're at the end of the bucket chain and we haven't
+        * found a page with enough room.  allocate a new overflow
+        * page.
+        */
+       do_expand = true;
+       ovflbuf = _hash_addovflpage(rel, &metabuf, buf);
+       _hash_relbuf(rel, buf, HASH_WRITE);
+       buf = ovflbuf;
+       page = BufferGetPage(buf);
+
+       if (PageGetFreeSpace(page) < itemsz) {
+       /* it doesn't fit on an empty page -- give up */
+       elog(WARN, "hash item too large");
+       }
+   }
+   _hash_checkpage(page, LH_OVERFLOW_PAGE);
+   pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
+   Assert(pageopaque->hasho_bucket == bucket);
+    }
+
+    itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem);
+    itup_blkno = BufferGetBlockNumber(buf);
+    
+    /* by here, the new tuple is inserted */
+    res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
+    
+    ItemPointerSet(&(res->pointerData), itup_blkno, itup_off);
+    
+    if (res != NULL) {
+   /* 
+    * Increment the number of keys in the table.
+    * We switch lock access type just for a moment
+    * to allow greater accessibility to the metapage. 
+    */
+   metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
+                         HASH_READ, HASH_WRITE);
+   metap->hashm_nkeys += 1;
+   metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
+                         HASH_WRITE, HASH_READ);
+   
+    }
+    
+    _hash_wrtbuf(rel, buf);
+    
+    if (do_expand || 
+   (metap->hashm_nkeys / (metap->hashm_maxbucket + 1))
+   > metap->hashm_ffactor) {
+   _hash_expandtable(rel, metabuf);
+    }
+    _hash_relbuf(rel, metabuf, HASH_READ);
+    return (res);
+}  
+
+/*
+ *  _hash_pgaddtup() -- add a tuple to a particular page in the index.
+ *
+ * This routine adds the tuple to the page as requested, and keeps the
+ * write lock and reference associated with the page's buffer.  It is
+ * an error to call pgaddtup() without a write lock and reference.
+ */
+static OffsetNumber
+_hash_pgaddtup(Relation rel,
+          Buffer buf,
+          int keysz,
+          ScanKey itup_scankey,
+          Size itemsize,
+          HashItem hitem)
+{
+    OffsetNumber itup_off;
+    Page page;
+    
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+
+    itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
+    (void) PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED);
+    
+    /* write the buffer, but hold our lock */
+    _hash_wrtnorelbuf(rel, buf);
+    
+    return (itup_off);
+}
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c

new file mode 100644 (file)

index 0000000..55ee9e9
--- /dev/null
+++ b/src/backend/access/hash/hashovfl.c
@@ -0,0 +1,614 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashovfl.c--
+ *    Overflow page management code for the Postgres hash access method
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
+ *
+ * NOTES
+ *    Overflow pages look like ordinary relation pages.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/genam.h"
+#include "access/hash.h"
+
+static OverflowPageAddress _hash_getovfladdr(Relation rel, Buffer *metabufp);
+static uint32 _hash_firstfreebit(uint32 map);
+
+/*
+ *  _hash_addovflpage
+ *
+ *  Add an overflow page to the page currently pointed to by the buffer 
+ *  argument 'buf'. 
+ *
+ *  *Metabufp has a read lock upon entering the function; buf has a 
+ *  write lock. 
+ *  
+ */
+Buffer
+_hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf)
+{
+    
+    OverflowPageAddress oaddr;
+    BlockNumber ovflblkno;
+    Buffer ovflbuf;
+    HashMetaPage metap;
+    HashPageOpaque ovflopaque;
+    HashPageOpaque pageopaque;
+    Page page;
+    Page ovflpage;
+    
+    /* this had better be the last page in a bucket chain */
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+    pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
+    Assert(!BlockNumberIsValid(pageopaque->hasho_nextblkno));
+    
+    metap = (HashMetaPage) BufferGetPage(*metabufp);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+
+    /* allocate an empty overflow page */
+    oaddr = _hash_getovfladdr(rel, metabufp);
+    if (oaddr == InvalidOvflAddress) {
+   elog(WARN, "_hash_addovflpage: problem with _hash_getovfladdr.");
+    }
+    ovflblkno = OADDR_TO_BLKNO(OADDR_OF(SPLITNUM(oaddr), OPAGENUM(oaddr)));
+    Assert(BlockNumberIsValid(ovflblkno));
+    ovflbuf = _hash_getbuf(rel, ovflblkno, HASH_WRITE);
+    Assert(BufferIsValid(ovflbuf));
+    ovflpage = BufferGetPage(ovflbuf);
+
+    /* initialize the new overflow page */
+    _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
+    ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
+    ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
+    ovflopaque->hasho_nextblkno = InvalidBlockNumber;
+    ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
+    ovflopaque->hasho_oaddr = oaddr;
+    ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
+    _hash_wrtnorelbuf(rel, ovflbuf);
+    
+    /* logically chain overflow page to previous page */
+    pageopaque->hasho_nextblkno = ovflblkno;
+    _hash_wrtnorelbuf(rel, buf);
+    return (ovflbuf);
+}
+
+/*
+ *  _hash_getovfladdr()
+ *
+ *  Find an available overflow page and return its address. 
+ *
+ *  When we enter this function, we have a read lock on *metabufp which
+ *  we change to a write lock immediately. Before exiting, the write lock
+ *  is exchanged for a read lock. 
+ *
+ */
+static OverflowPageAddress
+_hash_getovfladdr(Relation rel, Buffer *metabufp)
+{
+    HashMetaPage metap;
+    Buffer mapbuf;
+    BlockNumber blkno;
+    PageOffset offset;
+    OverflowPageAddress oaddr;
+    SplitNumber splitnum;
+    uint32 *freep;
+    uint32 max_free; 
+    uint32 bit;
+    uint32 first_page; 
+    uint32 free_bit; 
+    uint32 free_page; 
+    uint32 in_use_bits;
+    uint32 i, j;
+    
+    metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE);
+    
+    splitnum = metap->OVFL_POINT;
+    max_free = metap->SPARES[splitnum];
+    
+    free_page = (max_free - 1) >> (metap->BSHIFT + BYTE_TO_BIT);
+    free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1);
+    
+    /* Look through all the free maps to find the first free block */
+    first_page = metap->LAST_FREED >> (metap->BSHIFT + BYTE_TO_BIT);
+    for ( i = first_page; i <= free_page; i++ ) {
+   Page mappage;
+
+   blkno = metap->hashm_mapp[i];
+   mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE);
+   mappage = BufferGetPage(mapbuf);
+   _hash_checkpage(mappage, LH_BITMAP_PAGE);
+   freep = HashPageGetBitmap(mappage);
+   Assert(freep);
+   
+   if (i == free_page)
+       in_use_bits = free_bit;
+   else
+       in_use_bits = BMPGSZ_BIT(metap) - 1;
+   
+   if (i == first_page) {
+       bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1);
+       j = bit / BITS_PER_MAP;
+       bit = bit & ~(BITS_PER_MAP - 1);
+   } else {
+       bit = 0;
+       j = 0;
+   }
+   for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
+       if (freep[j] != ALL_SET)
+       goto found;
+    }
+    
+    /* No Free Page Found - have to allocate a new page */
+    metap->LAST_FREED = metap->SPARES[splitnum];
+    metap->SPARES[splitnum]++;
+    offset = metap->SPARES[splitnum] -
+   (splitnum ? metap->SPARES[splitnum - 1] : 0);
+    
+#define    OVMSG   "HASH: Out of overflow pages.  Out of luck.\n"
+    
+    if (offset > SPLITMASK) {
+   if (++splitnum >= NCACHED) {
+       elog(WARN, OVMSG);
+   }
+   metap->OVFL_POINT = splitnum;
+   metap->SPARES[splitnum] = metap->SPARES[splitnum-1];
+   metap->SPARES[splitnum-1]--;
+   offset = 0;
+    }
+    
+    /* Check if we need to allocate a new bitmap page */
+    if (free_bit == BMPGSZ_BIT(metap) - 1) {
+   /* won't be needing old map page */
+
+   _hash_relbuf(rel, mapbuf, HASH_WRITE);
+
+   free_page++;
+   if (free_page >= NCACHED) {
+       elog(WARN, OVMSG);
+   }
+   
+   /*
+    * This is tricky.  The 1 indicates that you want the new page
+    * allocated with 1 clear bit.  Actually, you are going to
+    * allocate 2 pages from this map.  The first is going to be
+    * the map page, the second is the overflow page we were
+    * looking for.  The init_bitmap routine automatically, sets
+    * the first bit of itself to indicate that the bitmap itself
+    * is in use.  We would explicitly set the second bit, but
+    * don't have to if we tell init_bitmap not to leave it clear
+    * in the first place.
+    */
+   if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset),
+                1, free_page)) {
+       elog(WARN, "overflow_page: problem with _hash_initbitmap.");
+   }
+   metap->SPARES[splitnum]++;
+   offset++;
+   if (offset > SPLITMASK) {
+       if (++splitnum >= NCACHED) {
+       elog(WARN, OVMSG);
+       }
+       metap->OVFL_POINT = splitnum;
+       metap->SPARES[splitnum] = metap->SPARES[splitnum-1];
+       metap->SPARES[splitnum-1]--;
+       offset = 0;
+   }
+    } else {
+   
+   /*
+    * Free_bit addresses the last used bit.  Bump it to address
+    * the first available bit.
+    */
+   free_bit++;
+   SETBIT(freep, free_bit);
+   _hash_wrtbuf(rel, mapbuf);
+    }
+    
+    /* Calculate address of the new overflow page */
+    oaddr = OADDR_OF(splitnum, offset);
+    _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ);
+    return (oaddr);
+    
+ found:
+    bit = bit + _hash_firstfreebit(freep[j]);
+    SETBIT(freep, bit);
+    _hash_wrtbuf(rel, mapbuf);
+    
+    /*
+     * Bits are addressed starting with 0, but overflow pages are addressed
+     * beginning at 1. Bit is a bit addressnumber, so we need to increment
+     * it to convert it to a page number.
+     */
+    
+    bit = 1 + bit + (i * BMPGSZ_BIT(metap));
+    if (bit >= metap->LAST_FREED) {
+   metap->LAST_FREED = bit - 1;
+    }
+    
+    /* Calculate the split number for this page */
+    for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++)
+   ;
+    offset = (i ? bit - metap->SPARES[i - 1] : bit);
+    if (offset >= SPLITMASK) {
+   elog(WARN, OVMSG);
+    }
+    
+    /* initialize this page */
+    oaddr = OADDR_OF(i, offset);
+    _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ);
+    return (oaddr);
+}
+
+/*
+ *  _hash_firstfreebit()
+ *
+ *  Return the first bit that is not set in the argument 'map'. This
+ *  function is used to find an available overflow page within a
+ *  splitnumber. 
+ * 
+ */
+static uint32
+_hash_firstfreebit(uint32 map)
+{
+    uint32 i, mask;
+    
+    mask = 0x1;
+    for (i = 0; i < BITS_PER_MAP; i++) {
+   if (!(mask & map))
+       return (i);
+   mask = mask << 1;
+    }
+    return (i);
+}
+
+/*
+ *  _hash_freeovflpage() - 
+ *
+ *  Mark this overflow page as free and return a buffer with 
+ *  the page that follows it (which may be defined as
+ *  InvalidBuffer). 
+ *
+ */
+Buffer
+_hash_freeovflpage(Relation rel, Buffer ovflbuf)
+{
+    HashMetaPage metap;
+    Buffer metabuf;
+    Buffer mapbuf;
+    BlockNumber prevblkno;
+    BlockNumber blkno;
+    BlockNumber nextblkno;
+    HashPageOpaque ovflopaque;
+    Page ovflpage;
+    Page mappage;
+    OverflowPageAddress addr;
+    SplitNumber splitnum;
+    uint32 *freep;
+    uint32 ovflpgno;
+    int32 bitmappage, bitmapbit;
+    Bucket bucket;
+    
+    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+    
+    ovflpage = BufferGetPage(ovflbuf);
+    _hash_checkpage(ovflpage, LH_OVERFLOW_PAGE);
+    ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
+    addr = ovflopaque->hasho_oaddr;
+    nextblkno = ovflopaque->hasho_nextblkno;
+    prevblkno = ovflopaque->hasho_prevblkno;
+    bucket = ovflopaque->hasho_bucket;
+    (void) memset(ovflpage, 0, BufferGetPageSize(ovflbuf));
+    _hash_wrtbuf(rel, ovflbuf);
+    
+    /* 
+     * fix up the bucket chain.  this is a doubly-linked list, so we
+     * must fix up the bucket chain members behind and ahead of the
+     * overflow page being deleted.
+     *
+     * XXX this should look like:
+     * - lock prev/next
+     * - modify/write prev/next (how to do write ordering with a
+     * doubly-linked list???)
+     * - unlock prev/next
+     */
+    if (BlockNumberIsValid(prevblkno)) {
+   Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE);
+   Page prevpage = BufferGetPage(prevbuf);
+   HashPageOpaque prevopaque =
+       (HashPageOpaque) PageGetSpecialPointer(prevpage);
+
+   _hash_checkpage(prevpage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+   Assert(prevopaque->hasho_bucket == bucket);
+   prevopaque->hasho_nextblkno = nextblkno;
+   _hash_wrtbuf(rel, prevbuf);
+    }
+    if (BlockNumberIsValid(nextblkno)) {
+   Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE);
+   Page nextpage = BufferGetPage(nextbuf);
+   HashPageOpaque nextopaque =
+       (HashPageOpaque) PageGetSpecialPointer(nextpage);
+   
+   _hash_checkpage(nextpage, LH_OVERFLOW_PAGE);
+   Assert(nextopaque->hasho_bucket == bucket);
+   nextopaque->hasho_prevblkno = prevblkno;
+   _hash_wrtbuf(rel, nextbuf);
+    }
+    
+    /* 
+     * Fix up the overflow page bitmap that tracks this particular
+     * overflow page. The bitmap can be found in the MetaPageData
+     * array element hashm_mapp[bitmappage].
+     */
+    splitnum = (addr >> SPLITSHIFT);
+    ovflpgno =
+   (splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1;
+    
+    if (ovflpgno < metap->LAST_FREED) {
+   metap->LAST_FREED = ovflpgno;
+    }
+    
+    bitmappage = (ovflpgno >> (metap->BSHIFT + BYTE_TO_BIT));
+    bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1);
+    
+    blkno = metap->hashm_mapp[bitmappage];
+    mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE);
+    mappage = BufferGetPage(mapbuf);
+    _hash_checkpage(mappage, LH_BITMAP_PAGE);
+    freep = HashPageGetBitmap(mappage);
+    CLRBIT(freep, bitmapbit);
+    _hash_wrtbuf(rel, mapbuf);
+    
+    _hash_relbuf(rel, metabuf, HASH_WRITE);
+    
+    /* 
+     * now instantiate the page that replaced this one, 
+     * if it exists, and return that buffer with a write lock.
+     */
+    if (BlockNumberIsValid(nextblkno)) {
+   return (_hash_getbuf(rel, nextblkno, HASH_WRITE));
+    } else {
+   return (InvalidBuffer);
+    }
+}
+
+
+/*
+ *  _hash_initbitmap()
+ *  
+ *   Initialize a new bitmap page.  The metapage has a write-lock upon
+ *   entering the function.
+ *
+ * 'pnum' is the OverflowPageAddress of the new bitmap page.
+ * 'nbits' is how many bits to clear (i.e., make available) in the new
+ * bitmap page.  the remainder of the bits (as well as the first bit,
+ * representing the bitmap page itself) will be set.
+ * 'ndx' is the 0-based offset of the new bitmap page within the
+ * metapage's array of bitmap page OverflowPageAddresses.
+ */
+
+#define INT_MASK   ((1 << INT_TO_BIT) -1)
+
+int32
+_hash_initbitmap(Relation rel,
+        HashMetaPage metap,
+        int32 pnum,
+        int32 nbits,
+        int32 ndx)
+{
+    Buffer buf;
+    BlockNumber blkno;
+    Page pg;
+    HashPageOpaque op;
+    uint32 *freep;
+    int clearbytes, clearints;
+    
+    blkno = OADDR_TO_BLKNO(pnum);
+    buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+    pg = BufferGetPage(buf);
+    _hash_pageinit(pg, BufferGetPageSize(buf));
+    op = (HashPageOpaque) PageGetSpecialPointer(pg);
+    op->hasho_oaddr = InvalidOvflAddress;
+    op->hasho_prevblkno = InvalidBlockNumber;
+    op->hasho_nextblkno = InvalidBlockNumber;
+    op->hasho_flag = LH_BITMAP_PAGE;
+    op->hasho_bucket = -1;
+
+    freep = HashPageGetBitmap(pg);
+
+    /* set all of the bits above 'nbits' to 1 */
+    clearints = ((nbits - 1) >> INT_TO_BIT) + 1;
+    clearbytes = clearints << INT_TO_BYTE;
+    (void) memset((char *) freep, 0, clearbytes);
+    (void) memset(((char *) freep) + clearbytes, 0xFF,
+         BMPGSZ_BYTE(metap) - clearbytes);
+    freep[clearints - 1] = ALL_SET << (nbits & INT_MASK);
+
+    /* bit 0 represents the new bitmap page */
+    SETBIT(freep, 0);
+        
+    /* metapage already has a write lock */
+    metap->hashm_nmaps++;
+    metap->hashm_mapp[ndx] = blkno;
+    
+    /* write out the new bitmap page (releasing its locks) */
+    _hash_wrtbuf(rel, buf);
+
+    return (0);
+}
+
+
+/*
+ *  _hash_squeezebucket(rel, bucket)
+ *
+ *  Try to squeeze the tuples onto pages occuring earlier in the
+ *  bucket chain in an attempt to free overflow pages. When we start
+ *  the "squeezing", the page from which we start taking tuples (the
+ *  "read" page) is the last bucket in the bucket chain and the page
+ *  onto which we start squeezing tuples (the "write" page) is the
+ *  first page in the bucket chain.  The read page works backward and
+ *  the write page works forward; the procedure terminates when the
+ *  read page and write page are the same page.
+ */
+void
+_hash_squeezebucket(Relation rel,
+           HashMetaPage metap, 
+           Bucket bucket)
+{
+    Buffer wbuf;
+    Buffer rbuf;
+    BlockNumber wblkno;        
+    BlockNumber rblkno;        
+    Page wpage;
+    Page rpage;
+    HashPageOpaque wopaque;
+    HashPageOpaque ropaque;
+    OffsetNumber woffnum;
+    OffsetNumber roffnum;
+    HashItem hitem;
+    int itemsz;
+    
+/*    elog(DEBUG, "_hash_squeezebucket: squeezing bucket %d", bucket); */
+
+    /*
+     * start squeezing into the base bucket page.
+     */
+    wblkno = BUCKET_TO_BLKNO(bucket);
+    wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
+    wpage = BufferGetPage(wbuf);
+    _hash_checkpage(wpage, LH_BUCKET_PAGE);
+    wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
+    
+    /*
+     * if there aren't any overflow pages, there's nothing to squeeze.
+     */
+    if (!BlockNumberIsValid(wopaque->hasho_nextblkno)) {
+   _hash_relbuf(rel, wbuf, HASH_WRITE);
+   return;
+    }
+    
+    /*
+     * find the last page in the bucket chain by starting at the base
+     * bucket page and working forward.
+     *
+     * XXX if chains tend to be long, we should probably move forward
+     * using HASH_READ and then _hash_chgbufaccess to HASH_WRITE when
+     * we reach the end.  if they are short we probably don't care
+     * very much.  if the hash function is working at all, they had
+     * better be short..
+     */
+    ropaque = wopaque;
+    do {
+   rblkno = ropaque->hasho_nextblkno;
+   if (ropaque != wopaque) {
+       _hash_relbuf(rel, rbuf, HASH_WRITE);
+   }
+   rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
+   rpage = BufferGetPage(rbuf);
+   _hash_checkpage(rpage, LH_OVERFLOW_PAGE);
+   Assert(!PageIsEmpty(rpage));
+   ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
+   Assert(ropaque->hasho_bucket == bucket);
+    } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
+
+    /*
+     * squeeze the tuples.
+     */
+    roffnum = FirstOffsetNumber;
+    for(;;) {
+   hitem = (HashItem) PageGetItem(rpage, PageGetItemId(rpage, roffnum));
+   itemsz = IndexTupleDSize(hitem->hash_itup) 
+       + (sizeof(HashItemData) - sizeof(IndexTupleData));
+   itemsz = DOUBLEALIGN(itemsz);
+   
+   /*
+    * walk up the bucket chain, looking for a page big enough for
+    * this item.
+    */
+   while (PageGetFreeSpace(wpage) < itemsz) {
+       wblkno = wopaque->hasho_nextblkno;
+
+       _hash_wrtbuf(rel, wbuf);
+
+       if (!BlockNumberIsValid(wblkno) || (rblkno == wblkno)) {
+       _hash_wrtbuf(rel, rbuf);
+       /* wbuf is already released */
+       return;
+       }
+       
+       wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
+       wpage = BufferGetPage(wbuf);
+       _hash_checkpage(wpage, LH_OVERFLOW_PAGE);
+       Assert(!PageIsEmpty(wpage));
+       wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
+       Assert(wopaque->hasho_bucket == bucket);
+   }
+   
+   /* 
+    * if we're here, we have found room so insert on the "write"
+    * page.
+    */
+   woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage));
+   (void) PageAddItem(wpage, (Item) hitem, itemsz, woffnum, LP_USED);
+   
+   /* 
+    * delete the tuple from the "read" page.
+    * PageIndexTupleDelete repacks the ItemId array, so 'roffnum'
+    * will be "advanced" to the "next" ItemId.
+    */
+   PageIndexTupleDelete(rpage, roffnum);
+   _hash_wrtnorelbuf(rel, rbuf);
+   
+   /*
+    * if the "read" page is now empty because of the deletion,
+    * free it.
+    */
+   if (PageIsEmpty(rpage) && (ropaque->hasho_flag & LH_OVERFLOW_PAGE)) {
+       rblkno = ropaque->hasho_prevblkno;
+       Assert(BlockNumberIsValid(rblkno));
+
+       /*
+        * free this overflow page.  the extra _hash_relbuf is
+        * because _hash_freeovflpage gratuitously returns the
+        * next page (we want the previous page and will get it
+        * ourselves later).
+        */
+       rbuf = _hash_freeovflpage(rel, rbuf);
+       if (BufferIsValid(rbuf)) {
+       _hash_relbuf(rel, rbuf, HASH_WRITE);
+       }
+       
+       if (rblkno == wblkno) {
+       /* rbuf is already released */
+       _hash_wrtbuf(rel, wbuf);
+       return;
+       }
+       
+       rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
+       rpage = BufferGetPage(rbuf);
+       _hash_checkpage(rpage, LH_OVERFLOW_PAGE);
+       Assert(!PageIsEmpty(rpage));
+       ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
+       Assert(ropaque->hasho_bucket == bucket);
+
+       roffnum = FirstOffsetNumber;
+   }
+    }
+}
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c

new file mode 100644 (file)

index 0000000..2c6ebed
--- /dev/null
+++ b/src/backend/access/hash/hashpage.c
@@ -0,0 +1,669 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashpage.c--
+ *    Hash table page management code for the Postgres hash access method
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
+ *
+ * NOTES
+ *    Postgres hash pages look like ordinary relation pages.  The opaque
+ *    data at high addresses includes information about the page including
+ *    whether a page is an overflow page or a true bucket, the block 
+ *    numbers of the preceding and following pages, and the overflow
+ *    address of the page if it is an overflow page.
+ *
+ *    The first page in a hash relation, page zero, is special -- it stores
+ *    information describing the hash table; it is referred to as teh
+ *    "meta page." Pages one and higher store the actual data. 
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/genam.h"
+#include "access/hash.h"
+
+static void _hash_setpagelock(Relation rel, BlockNumber blkno, int access);
+static void _hash_unsetpagelock(Relation rel, BlockNumber blkno, int access);
+static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket);
+
+/*  
+ *  We use high-concurrency locking on hash indices.  There are two cases in
+ *  which we don't do locking.  One is when we're building the index.
+ *  Since the creating transaction has not committed, no one can see
+ *  the index, and there's no reason to share locks.  The second case
+ *  is when we're just starting up the database system.  We use some
+ *  special-purpose initialization code in the relation cache manager
+ *  (see utils/cache/relcache.c) to allow us to do indexed scans on
+ *  the system catalogs before we'd normally be able to.  This happens
+ *  before the lock table is fully initialized, so we can't use it.
+ *  Strictly speaking, this violates 2pl, but we don't do 2pl on the
+ *  system catalogs anyway.
+ */
+
+
+#define USELOCKING (!BuildingHash && !IsInitProcessingMode())
+
+
+/*
+ *  _hash_metapinit() -- Initialize the metadata page of a hash index,
+ *     the two buckets that we begin with and the initial
+ *     bitmap page.
+ */
+void
+_hash_metapinit(Relation rel)
+{
+    HashMetaPage metap;
+    HashPageOpaque pageopaque;
+    Buffer metabuf;
+    Buffer buf;
+    Page pg;
+    int nbuckets;
+    uint32 nelem;          /* number elements */
+    uint32 lg2nelem;           /* _hash_log2(nelem)   */
+    uint32 nblocks;
+    uint16 i;
+    
+    /* can't be sharing this with anyone, now... */
+    if (USELOCKING)
+   RelationSetLockForWrite(rel);
+    
+    if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) {
+   elog(WARN, "Cannot initialize non-empty hash table %s",
+        RelationGetRelationName(rel));
+    }
+    
+    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
+    pg = BufferGetPage(metabuf);
+    metap = (HashMetaPage) pg;
+    _hash_pageinit(pg, BufferGetPageSize(metabuf));
+    
+    metap->hashm_magic         = HASH_MAGIC;
+    metap->hashm_version   = HASH_VERSION;
+    metap->hashm_nkeys         = 0;
+    metap->hashm_nmaps         = 0;
+    metap->hashm_ffactor   = DEFAULT_FFACTOR;
+    metap->hashm_bsize         = BufferGetPageSize(metabuf);
+    metap->hashm_bshift        = _hash_log2(metap->hashm_bsize);
+    for (i = metap->hashm_bshift; i > 0; --i) {
+   if ((1 << i) < (metap->hashm_bsize -
+           (DOUBLEALIGN(sizeof(PageHeaderData)) +
+            DOUBLEALIGN(sizeof(HashPageOpaqueData))))) {
+       break;
+   }
+    }
+    Assert(i);
+    metap->hashm_bmsize        = 1 << i;
+    metap->hashm_procid        = index_getprocid(rel, 1, HASHPROC);
+    
+    /* 
+     * Make nelem = 2 rather than 0 so that we end up allocating space 
+     * for the next greater power of two number of buckets. 
+     */
+    nelem = 2;
+    lg2nelem = 1;      /*_hash_log2(MAX(nelem, 2)) */
+    nbuckets = 2;      /*1 << lg2nelem */
+    
+    memset((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares));
+    memset((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
+    
+    metap->hashm_spares[lg2nelem]     = 2; /* lg2nelem + 1 */
+    metap->hashm_spares[lg2nelem + 1] = 2; /* lg2nelem + 1 */
+    metap->hashm_ovflpoint            = 1; /* lg2nelem */
+    metap->hashm_lastfreed            = 2;
+    
+    metap->hashm_maxbucket = metap->hashm_lowmask = 1;     /* nbuckets - 1 */
+    metap->hashm_highmask  = 3;             /* (nbuckets << 1) - 1 */
+    
+    pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
+    pageopaque->hasho_oaddr = InvalidOvflAddress;
+    pageopaque->hasho_prevblkno = InvalidBlockNumber;
+    pageopaque->hasho_nextblkno = InvalidBlockNumber;
+    pageopaque->hasho_flag = LH_META_PAGE;
+    pageopaque->hasho_bucket = -1;
+
+    /* 
+     * First bitmap page is at: splitpoint lg2nelem page offset 1 which
+     * turns out to be page 3. Couldn't initialize page 3  until we created
+     * the first two buckets above. 
+     */
+    if (_hash_initbitmap(rel, metap, OADDR_OF(lg2nelem, 1), lg2nelem + 1, 0))
+   elog(WARN, "Problem with _hash_initbitmap.");
+
+    /* all done */
+    _hash_wrtnorelbuf(rel, metabuf);
+    
+    /* 
+     * initialize the first two buckets 
+     */
+    for (i = 0; i <= 1; i++) {
+   buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE);
+   pg = BufferGetPage(buf);
+   _hash_pageinit(pg, BufferGetPageSize(buf));
+   pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
+   pageopaque->hasho_oaddr = InvalidOvflAddress;
+   pageopaque->hasho_prevblkno = InvalidBlockNumber;
+   pageopaque->hasho_nextblkno = InvalidBlockNumber;
+   pageopaque->hasho_flag = LH_BUCKET_PAGE;
+   pageopaque->hasho_bucket = i;
+   _hash_wrtbuf(rel, buf);
+    }
+    
+    _hash_relbuf(rel, metabuf, HASH_WRITE);
+    
+    if (USELOCKING)
+   RelationUnsetLockForWrite(rel);
+}
+
+/*
+ *  _hash_getbuf() -- Get a buffer by block number for read or write.
+ *
+ * When this routine returns, the appropriate lock is set on the
+ * requested buffer its reference count is correct.
+ *
+ * XXX P_NEW is not used because, unlike the tree structures, we
+ * need the bucket blocks to be at certain block numbers.  we must
+ * depend on the caller to call _hash_pageinit on the block if it
+ * knows that this is a new block.
+ */
+Buffer
+_hash_getbuf(Relation rel, BlockNumber blkno, int access)
+{
+    Buffer buf;
+    
+    if (blkno == P_NEW) {
+   elog(WARN, "_hash_getbuf: internal error: hash AM does not use P_NEW");
+    }
+    switch (access) {
+    case HASH_WRITE:
+    case HASH_READ:
+   _hash_setpagelock(rel, blkno, access);
+   break;
+    default:
+   elog(WARN, "_hash_getbuf: invalid access (%d) on new blk: %.*s",
+        access, NAMEDATALEN, RelationGetRelationName(rel));
+   break;
+    }
+    buf = ReadBuffer(rel, blkno);
+    
+    /* ref count and lock type are correct */
+    return (buf);
+}
+
+/*
+ *  _hash_relbuf() -- release a locked buffer.
+ */
+void
+_hash_relbuf(Relation rel, Buffer buf, int access)
+{
+    BlockNumber blkno;
+    
+    blkno = BufferGetBlockNumber(buf);
+    
+    switch (access) {
+    case HASH_WRITE:
+    case HASH_READ:
+   _hash_unsetpagelock(rel, blkno, access);
+   break;
+    default:
+   elog(WARN, "_hash_relbuf: invalid access (%d) on blk %x: %.*s",
+        access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
+    }
+    
+    ReleaseBuffer(buf);
+}
+
+/*
+ *  _hash_wrtbuf() -- write a hash page to disk.
+ *
+ * This routine releases the lock held on the buffer and our reference
+ * to it.  It is an error to call _hash_wrtbuf() without a write lock
+ * or a reference to the buffer.
+ */
+void
+_hash_wrtbuf(Relation rel, Buffer buf)
+{
+    BlockNumber blkno;
+    
+    blkno = BufferGetBlockNumber(buf);
+    WriteBuffer(buf);
+    _hash_unsetpagelock(rel, blkno, HASH_WRITE);
+}
+
+/*
+ *  _hash_wrtnorelbuf() -- write a hash page to disk, but do not release
+ *          our reference or lock.
+ *
+ * It is an error to call _hash_wrtnorelbuf() without a write lock
+ * or a reference to the buffer.
+ */
+void
+_hash_wrtnorelbuf(Relation rel, Buffer buf)
+{
+    BlockNumber blkno;
+    
+    blkno = BufferGetBlockNumber(buf);
+    WriteNoReleaseBuffer(buf);
+}
+
+Page
+_hash_chgbufaccess(Relation rel,
+          Buffer *bufp,
+          int from_access,
+          int to_access)
+{
+    BlockNumber blkno;
+    
+    blkno = BufferGetBlockNumber(*bufp);
+    
+    switch (from_access) {
+    case HASH_WRITE:
+   _hash_wrtbuf(rel, *bufp);
+   break;
+    case HASH_READ:
+   _hash_relbuf(rel, *bufp, from_access);
+   break;
+    default:
+   elog(WARN, "_hash_chgbufaccess: invalid access (%d) on blk %x: %.*s",
+        from_access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
+   break;
+    }
+    *bufp = _hash_getbuf(rel, blkno, to_access);
+    return (BufferGetPage(*bufp));
+}
+
+/*
+ *  _hash_pageinit() -- Initialize a new page.
+ */
+void
+_hash_pageinit(Page page, Size size)
+{
+    Assert(((PageHeader) page)->pd_lower == 0);
+    Assert(((PageHeader) page)->pd_upper == 0);
+    Assert(((PageHeader) page)->pd_special == 0);
+
+    /*
+     *  Cargo-cult programming -- don't really need this to be zero, but
+     *  creating new pages is an infrequent occurrence and it makes me feel
+     *  good when I know they're empty.
+     */
+    memset(page, 0, size);
+    
+    PageInit(page, size, sizeof(HashPageOpaqueData));
+}
+
+static void
+_hash_setpagelock(Relation rel,
+         BlockNumber blkno,
+         int access)
+{
+    ItemPointerData iptr;
+    
+    if (USELOCKING) {
+   ItemPointerSet(&iptr, blkno, 1);
+   
+   switch (access) {
+   case HASH_WRITE:
+       RelationSetSingleWLockPage(rel, &iptr);
+       break;
+   case HASH_READ:
+       RelationSetSingleRLockPage(rel, &iptr);
+       break;
+   default:
+       elog(WARN, "_hash_setpagelock: invalid access (%d) on blk %x: %.*s",
+        access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
+       break;
+   }
+    }
+}
+
+static void
+_hash_unsetpagelock(Relation rel,
+           BlockNumber blkno,
+           int access)
+{
+    ItemPointerData iptr;
+    
+    if (USELOCKING) {
+   ItemPointerSet(&iptr, blkno, 1);
+   
+   switch (access) {
+   case HASH_WRITE:
+       RelationUnsetSingleWLockPage(rel, &iptr);
+       break;
+   case HASH_READ:
+       RelationUnsetSingleRLockPage(rel, &iptr);
+       break;
+   default:
+       elog(WARN, "_hash_unsetpagelock: invalid access (%d) on blk %x: %.*s",
+        access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
+       break;
+   }
+    }
+}
+
+void
+_hash_pagedel(Relation rel, ItemPointer tid)
+{
+    Buffer buf;
+    Buffer metabuf;
+    Page page;
+    BlockNumber blkno;
+    OffsetNumber offno;
+    HashMetaPage metap;
+    HashPageOpaque opaque;
+    
+    blkno = ItemPointerGetBlockNumber(tid);
+    offno = ItemPointerGetOffsetNumber(tid);
+    
+    buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+    opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+    
+    PageIndexTupleDelete(page, offno);
+    _hash_wrtnorelbuf(rel, buf);
+    
+    if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) {
+   buf = _hash_freeovflpage(rel, buf);
+   if (BufferIsValid(buf)) {
+       _hash_relbuf(rel, buf, HASH_WRITE);
+   }
+    } else {
+   _hash_relbuf(rel, buf, HASH_WRITE);
+    }
+    
+    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+    ++metap->hashm_nkeys;
+    _hash_wrtbuf(rel, metabuf);
+}
+
+void
+_hash_expandtable(Relation rel, Buffer metabuf)
+{
+    HashMetaPage metap;
+    Bucket old_bucket;
+    Bucket new_bucket;
+    uint32 spare_ndx;
+    
+/*    elog(DEBUG, "_hash_expandtable: expanding..."); */
+
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+    
+    metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);   
+    new_bucket = ++metap->MAX_BUCKET;
+    metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);   
+    old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK);
+    
+    /*
+     * If the split point is increasing (MAX_BUCKET's log base 2
+     * * increases), we need to copy the current contents of the spare
+     * split bucket to the next bucket.
+     */
+    spare_ndx = _hash_log2(metap->MAX_BUCKET + 1);
+    if (spare_ndx > metap->OVFL_POINT) {
+   
+   metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);    
+   metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT];
+   metap->OVFL_POINT = spare_ndx;
+   metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);    
+    }
+    
+    if (new_bucket > metap->HIGH_MASK) {
+   
+   /* Starting a new doubling */
+   metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);    
+   metap->LOW_MASK = metap->HIGH_MASK;
+   metap->HIGH_MASK = new_bucket | metap->LOW_MASK;
+   metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);    
+   
+    }
+    /* Relocate records to the new bucket */
+    _hash_splitpage(rel, metabuf, old_bucket, new_bucket);
+}
+
+
+/*
+ * _hash_splitpage -- split 'obucket' into 'obucket' and 'nbucket'
+ *
+ * this routine is actually misnamed -- we are splitting a bucket that
+ * consists of a base bucket page and zero or more overflow (bucket
+ * chain) pages.
+ */
+static void
+_hash_splitpage(Relation rel,
+       Buffer metabuf,
+       Bucket obucket,
+       Bucket nbucket)
+{
+    Bucket bucket;
+    Buffer obuf;
+    Buffer nbuf;
+    Buffer ovflbuf;
+    BlockNumber oblkno;
+    BlockNumber nblkno;
+    bool null;
+    Datum datum;
+    HashItem hitem;
+    HashPageOpaque oopaque;
+    HashPageOpaque nopaque;
+    HashMetaPage metap;
+    IndexTuple itup;
+    int itemsz;
+    OffsetNumber ooffnum;
+    OffsetNumber noffnum;
+    OffsetNumber omaxoffnum;
+    Page opage;
+    Page npage;
+    TupleDesc itupdesc;
+    
+/*    elog(DEBUG, "_hash_splitpage: splitting %d into %d,%d",
+    obucket, obucket, nbucket);
+*/
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+    
+    /* get the buffers & pages */
+    oblkno = BUCKET_TO_BLKNO(obucket);
+    nblkno = BUCKET_TO_BLKNO(nbucket);
+    obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
+    nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE);
+    opage = BufferGetPage(obuf);
+    npage = BufferGetPage(nbuf);
+
+    /* initialize the new bucket */
+    _hash_pageinit(npage, BufferGetPageSize(nbuf));
+    nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
+    nopaque->hasho_prevblkno = InvalidBlockNumber;
+    nopaque->hasho_nextblkno = InvalidBlockNumber;
+    nopaque->hasho_flag = LH_BUCKET_PAGE;
+    nopaque->hasho_oaddr = InvalidOvflAddress;
+    nopaque->hasho_bucket = nbucket;
+    _hash_wrtnorelbuf(rel, nbuf);
+    
+    /*
+     * make sure the old bucket isn't empty.  advance 'opage' and
+     * friends through the overflow bucket chain until we find a
+     * non-empty page.
+     *
+     * XXX we should only need this once, if we are careful to
+     * preserve the invariant that overflow pages are never empty.
+     */
+    _hash_checkpage(opage, LH_BUCKET_PAGE);
+    oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+    if (PageIsEmpty(opage)) {
+   oblkno = oopaque->hasho_nextblkno;
+   _hash_relbuf(rel, obuf, HASH_WRITE);
+   if (!BlockNumberIsValid(oblkno)) {
+       /*
+        * the old bucket is completely empty; of course, the new
+        * bucket will be as well, but since it's a base bucket
+        * page we don't care.
+        */
+       _hash_relbuf(rel, nbuf, HASH_WRITE);
+       return;
+   }
+   obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
+   opage = BufferGetPage(obuf);
+   _hash_checkpage(opage, LH_OVERFLOW_PAGE);
+   if (PageIsEmpty(opage)) {
+       elog(WARN, "_hash_splitpage: empty overflow page %d", oblkno);
+   }
+   oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+    }
+
+    /*
+     * we are now guaranteed that 'opage' is not empty.  partition the
+     * tuples in the old bucket between the old bucket and the new
+     * bucket, advancing along their respective overflow bucket chains
+     * and adding overflow pages as needed.
+     */
+    ooffnum = FirstOffsetNumber;
+    omaxoffnum = PageGetMaxOffsetNumber(opage); 
+    for (;;) {
+   /*
+    * at each iteration through this loop, each of these variables
+    * should be up-to-date: obuf opage oopaque ooffnum omaxoffnum
+    */
+
+   /* check if we're at the end of the page */
+   if (ooffnum > omaxoffnum) {
+       /* at end of page, but check for overflow page */
+       oblkno = oopaque->hasho_nextblkno;      
+       if (BlockNumberIsValid(oblkno)) {
+       /*
+        * we ran out of tuples on this particular page, but
+        * we have more overflow pages; re-init values.
+        */
+       _hash_wrtbuf(rel, obuf);
+       obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
+       opage = BufferGetPage(obuf);
+       _hash_checkpage(opage, LH_OVERFLOW_PAGE);
+       oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+       
+       /* we're guaranteed that an ovfl page has at least 1 tuple */
+       if (PageIsEmpty(opage)) {
+           elog(WARN, "_hash_splitpage: empty ovfl page %d!",
+            oblkno);
+       }
+       ooffnum = FirstOffsetNumber;
+       omaxoffnum = PageGetMaxOffsetNumber(opage);
+       } else {
+       /*
+        * we're at the end of the bucket chain, so now we're
+        * really done with everything.  before quitting, call
+        * _hash_squeezebucket to ensure the tuples in the
+        * bucket (including the overflow pages) are packed as
+        * tightly as possible.
+        */
+       _hash_wrtbuf(rel, obuf);
+       _hash_wrtbuf(rel, nbuf);
+       _hash_squeezebucket(rel, metap, obucket);
+       return;
+       }
+   }
+   
+   /* hash on the tuple */
+   hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum));
+   itup = &(hitem->hash_itup);
+   itupdesc = RelationGetTupleDescriptor(rel);
+   datum = index_getattr(itup, 1, itupdesc, &null);
+   bucket = _hash_call(rel, metap, datum);
+   
+   if (bucket == nbucket) {
+       /*
+        * insert the tuple into the new bucket.  if it doesn't
+        * fit on the current page in the new bucket, we must
+        * allocate a new overflow page and place the tuple on
+        * that page instead.
+        */
+       itemsz = IndexTupleDSize(hitem->hash_itup) 
+       + (sizeof(HashItemData) - sizeof(IndexTupleData));
+
+       itemsz = DOUBLEALIGN(itemsz);
+       
+       if (PageGetFreeSpace(npage) < itemsz) {
+       ovflbuf = _hash_addovflpage(rel, &metabuf, nbuf);
+       _hash_wrtbuf(rel, nbuf);
+       nbuf = ovflbuf;
+       npage = BufferGetPage(nbuf);
+       _hash_checkpage(npage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+       }
+       
+       noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage));
+       (void) PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED);
+       _hash_wrtnorelbuf(rel, nbuf);
+       
+       /*
+        * now delete the tuple from the old bucket.  after this
+        * section of code, 'ooffnum' will actually point to the
+        * ItemId to which we would point if we had advanced it
+        * before the deletion (PageIndexTupleDelete repacks the
+        * ItemId array).  this also means that 'omaxoffnum' is
+        * exactly one less than it used to be, so we really can
+        * just decrement it instead of calling
+        * PageGetMaxOffsetNumber.
+        */
+       PageIndexTupleDelete(opage, ooffnum);
+       _hash_wrtnorelbuf(rel, obuf);
+       omaxoffnum = OffsetNumberPrev(omaxoffnum);
+       
+       /*
+        * tidy up.  if the old page was an overflow page and it
+        * is now empty, we must free it (we want to preserve the
+        * invariant that overflow pages cannot be empty).
+        */
+       if (PageIsEmpty(opage) &&
+       (oopaque->hasho_flag & LH_OVERFLOW_PAGE)) {
+       obuf = _hash_freeovflpage(rel, obuf);
+       
+       /* check that we're not through the bucket chain */
+       if (BufferIsInvalid(obuf)) {
+           _hash_wrtbuf(rel, nbuf);
+           _hash_squeezebucket(rel, metap, obucket);
+           return;
+       }
+       
+       /* 
+        * re-init. again, we're guaranteed that an ovfl page
+        * has at least one tuple.
+        */
+       opage = BufferGetPage(obuf);
+       _hash_checkpage(opage, LH_OVERFLOW_PAGE);
+       oblkno = BufferGetBlockNumber(obuf);
+       oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+       if (PageIsEmpty(opage)) {
+           elog(WARN, "_hash_splitpage: empty overflow page %d",
+            oblkno);
+       }
+       ooffnum = FirstOffsetNumber;
+       omaxoffnum = PageGetMaxOffsetNumber(opage);
+       }
+   } else {
+       /*
+        * the tuple stays on this page.  we didn't move anything,
+        * so we didn't delete anything and therefore we don't
+        * have to change 'omaxoffnum'.
+        *
+        * XXX any hash value from [0, nbucket-1] will map to this
+        * bucket, which doesn't make sense to me.
+        */
+       ooffnum = OffsetNumberNext(ooffnum);
+   }
+    }
+    /*NOTREACHED*/
+}
diff --git a/src/backend/access/hash/hashscan.c b/src/backend/access/hash/hashscan.c

new file mode 100644 (file)

index 0000000..c4cce0e
--- /dev/null
+++ b/src/backend/access/hash/hashscan.c
@@ -0,0 +1,172 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashscan.c--
+ *    manage scans on hash tables
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
+ *
+ * NOTES
+ *    Because we can be doing an index scan on a relation while we
+ *    update it, we need to avoid missing data that moves around in
+ *    the index.  The routines and global variables in this file
+ *    guarantee that all scans in the local address space stay
+ *    correctly positioned.  This is all we need to worry about, since
+ *    write locking guarantees that no one else will be on the same
+ *    page at the same time as we are.
+ *
+ *    The scheme is to manage a list of active scans in the current
+ *    backend.  Whenever we add or remove records from an index, we
+ *    check the list of active scans to see if any has been affected.
+ *    A scan is affected only if it is on the same relation, and the
+ *    same page, as the update.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/sdir.h"
+#include "access/hash.h"
+
+static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
+static bool _hash_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
+
+typedef struct HashScanListData {
+    IndexScanDesc      hashsl_scan;
+    struct HashScanListData    *hashsl_next;
+} HashScanListData;
+
+typedef HashScanListData   *HashScanList;
+
+static HashScanList    HashScans = (HashScanList) NULL;
+
+/*
+ *  _Hash_regscan() -- register a new scan.
+ */
+void
+_hash_regscan(IndexScanDesc scan)
+{
+    HashScanList new_el;
+    
+    new_el = (HashScanList) palloc(sizeof(HashScanListData));
+    new_el->hashsl_scan = scan;
+    new_el->hashsl_next = HashScans;
+    HashScans = new_el;
+}
+
+/*
+ *  _hash_dropscan() -- drop a scan from the scan list
+ */
+void
+_hash_dropscan(IndexScanDesc scan)
+{
+    HashScanList chk, last;
+    
+    last = (HashScanList) NULL;
+    for (chk = HashScans;
+    chk != (HashScanList) NULL && chk->hashsl_scan != scan;
+    chk = chk->hashsl_next) {
+   last = chk;
+    }
+    
+    if (chk == (HashScanList) NULL)
+   elog(WARN, "hash scan list trashed; can't find 0x%lx", scan);
+    
+    if (last == (HashScanList) NULL)
+   HashScans = chk->hashsl_next;
+    else
+   last->hashsl_next = chk->hashsl_next;
+    
+#ifdef PERFECT_MEM
+    pfree (chk);
+#endif /* PERFECT_MEM */
+}
+
+void
+_hash_adjscans(Relation rel, ItemPointer tid)
+{
+    HashScanList l;
+    Oid relid;
+    
+    relid = rel->rd_id;
+    for (l = HashScans; l != (HashScanList) NULL; l = l->hashsl_next) {
+   if (relid == l->hashsl_scan->relation->rd_id)
+       _hash_scandel(l->hashsl_scan, ItemPointerGetBlockNumber(tid),
+             ItemPointerGetOffsetNumber(tid));
+    }
+}
+
+static void
+_hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
+{
+    ItemPointer current;
+    Buffer buf;
+    Buffer metabuf;
+    HashScanOpaque so;
+    
+    if (!_hash_scantouched(scan, blkno, offno))
+   return;
+    
+    metabuf = _hash_getbuf(scan->relation, HASH_METAPAGE, HASH_READ);
+    
+    so = (HashScanOpaque) scan->opaque;
+    buf = so->hashso_curbuf;
+    
+    current = &(scan->currentItemData);
+    if (ItemPointerIsValid(current)
+   && ItemPointerGetBlockNumber(current) == blkno
+   && ItemPointerGetOffsetNumber(current) >= offno) {
+   _hash_step(scan, &buf, BackwardScanDirection, metabuf);
+   so->hashso_curbuf = buf;
+    }
+    
+    current = &(scan->currentMarkData);
+    if (ItemPointerIsValid(current)
+   && ItemPointerGetBlockNumber(current) == blkno
+   && ItemPointerGetOffsetNumber(current) >= offno) {
+   ItemPointerData tmp;
+   tmp = *current;
+   *current = scan->currentItemData;
+   scan->currentItemData = tmp;
+   _hash_step(scan, &buf, BackwardScanDirection, metabuf);
+   so->hashso_mrkbuf = buf;
+   tmp = *current;
+   *current = scan->currentItemData;
+   scan->currentItemData = tmp;
+    }
+}
+
+static bool
+_hash_scantouched(IndexScanDesc scan,
+         BlockNumber blkno,
+         OffsetNumber offno)
+{
+    ItemPointer current;
+    
+    current = &(scan->currentItemData);
+    if (ItemPointerIsValid(current)
+   && ItemPointerGetBlockNumber(current) == blkno
+   && ItemPointerGetOffsetNumber(current) >= offno)
+   return (true);
+    
+    current = &(scan->currentMarkData);
+    if (ItemPointerIsValid(current)
+   && ItemPointerGetBlockNumber(current) == blkno
+   && ItemPointerGetOffsetNumber(current) >= offno)
+   return (true);
+    
+    return (false);
+}
diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c

new file mode 100644 (file)

index 0000000..056235d
--- /dev/null
+++ b/src/backend/access/hash/hashsearch.c
@@ -0,0 +1,425 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashsearch.c--
+ *    search code for postgres hash tables
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "fmgr.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/skey.h"
+#include "access/sdir.h"
+#include "access/hash.h"
+
+/*
+ *  _hash_search() -- Finds the page/bucket that the contains the
+ *  scankey and loads it into *bufP.  the buffer has a read lock.
+ */
+void
+_hash_search(Relation rel,
+        int keysz,
+        ScanKey scankey,
+        Buffer *bufP,
+        HashMetaPage metap)
+{
+    BlockNumber blkno;
+    Datum keyDatum;
+    Bucket bucket;
+
+    if (scankey == (ScanKey) NULL ||
+   (keyDatum = scankey[0].sk_argument) == (Datum) NULL) {
+   /* 
+    * If the scankey argument is NULL, all tuples will satisfy
+    * the scan so we start the scan at the first bucket (bucket
+    * 0).
+    */
+   bucket = 0;
+    } else {
+   bucket = _hash_call(rel, metap, keyDatum);
+    }
+
+    blkno = BUCKET_TO_BLKNO(bucket);
+    
+    *bufP = _hash_getbuf(rel, blkno, HASH_READ);
+}
+
+/*
+ *  _hash_next() -- Get the next item in a scan.
+ *
+ * On entry, we have a valid currentItemData in the scan, and a
+ * read lock on the page that contains that item.  We do not have
+ * the page pinned.  We return the next item in the scan.  On
+ * exit, we have the page containing the next item locked but not
+ * pinned.
+ */
+RetrieveIndexResult
+_hash_next(IndexScanDesc scan, ScanDirection dir)
+{
+    Relation rel;
+    Buffer buf;
+    Buffer metabuf;
+    Page page;
+    OffsetNumber offnum;
+    RetrieveIndexResult res;
+    ItemPointer current;
+    ItemPointer iptr;
+    HashItem hitem;
+    IndexTuple itup;
+    HashScanOpaque so;
+
+    rel = scan->relation;
+    so = (HashScanOpaque) scan->opaque; 
+    current = &(scan->currentItemData);
+
+    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+
+    /*
+     *  XXX 10 may 91:  somewhere there's a bug in our management of the
+     *  cached buffer for this scan.  wei discovered it.  the following
+     *  is a workaround so he can work until i figure out what's going on.
+     */
+
+    if (!BufferIsValid(so->hashso_curbuf)) {
+   so->hashso_curbuf = _hash_getbuf(rel,
+                    ItemPointerGetBlockNumber(current),
+                    HASH_READ);
+    }
+
+    /* we still have the buffer pinned and locked */
+    buf = so->hashso_curbuf;
+
+    /*
+     * step to next valid tuple.  note that _hash_step releases our
+     * lock on 'metabuf'; if we switch to a new 'buf' while looking
+     * for the next tuple, we come back with a lock on that buffer.
+     */
+    if (!_hash_step(scan, &buf, dir, metabuf)) {
+   return ((RetrieveIndexResult) NULL);
+    }
+
+    /* if we're here, _hash_step found a valid tuple */
+    current = &(scan->currentItemData);
+    offnum = ItemPointerGetOffsetNumber(current);
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+    hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
+    itup = &hitem->hash_itup;
+    iptr = (ItemPointer) palloc(sizeof(ItemPointerData));
+    memmove((char *) iptr, (char *) &(itup->t_tid),  sizeof(ItemPointerData));
+    res = FormRetrieveIndexResult(current, iptr);
+
+    return (res);
+}
+
+static void
+_hash_readnext(Relation rel,
+          Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
+{
+    BlockNumber blkno;
+
+    blkno = (*opaquep)->hasho_nextblkno;
+    _hash_relbuf(rel, *bufp, HASH_READ);
+    *bufp = InvalidBuffer;
+    if (BlockNumberIsValid(blkno)) {
+   *bufp = _hash_getbuf(rel, blkno, HASH_READ);
+   *pagep = BufferGetPage(*bufp);
+   _hash_checkpage(*pagep, LH_OVERFLOW_PAGE);
+   *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
+   Assert(!PageIsEmpty(*pagep));
+    }
+}
+
+static void
+_hash_readprev(Relation rel,
+          Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
+{
+    BlockNumber blkno;
+
+    blkno = (*opaquep)->hasho_prevblkno;
+    _hash_relbuf(rel, *bufp, HASH_READ);
+    *bufp = InvalidBuffer;
+    if (BlockNumberIsValid(blkno)) {
+   *bufp = _hash_getbuf(rel, blkno, HASH_READ);
+   *pagep = BufferGetPage(*bufp);
+   _hash_checkpage(*pagep, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+   *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
+   if (PageIsEmpty(*pagep)) {
+       Assert((*opaquep)->hasho_flag & LH_BUCKET_PAGE);
+       _hash_relbuf(rel, *bufp, HASH_READ);
+       *bufp = InvalidBuffer;
+   }
+    }
+}
+
+/*
+ *  _hash_first() -- Find the first item in a scan.
+ *
+ * Return the RetrieveIndexResult of the first item in the tree that
+ * satisfies the qualificatin associated with the scan descriptor. On
+ *     exit, the page containing the current index tuple is read locked
+ *     and pinned, and the scan's opaque data entry is updated to 
+ * include the buffer.  
+ */
+RetrieveIndexResult
+_hash_first(IndexScanDesc scan, ScanDirection dir)
+{
+    Relation rel;
+    Buffer buf;
+    Buffer metabuf;
+    Page page;
+    HashPageOpaque opaque;
+    HashMetaPage metap;
+    HashItem hitem;
+    IndexTuple itup;
+    ItemPointer current;
+    ItemPointer iptr;
+    OffsetNumber offnum;
+    RetrieveIndexResult res;
+    HashScanOpaque so;
+
+    rel = scan->relation;
+    so = (HashScanOpaque) scan->opaque;
+    current = &(scan->currentItemData);
+
+    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+
+    /*
+     *  XXX -- The attribute number stored in the scan key is the attno
+     *        in the heap relation.  We need to transmogrify this into
+     *         the index relation attno here.  For the moment, we have
+     *        hardwired attno == 1.
+     */
+
+    /* find the correct bucket page and load it into buf */
+    _hash_search(rel, 1, scan->keyData, &buf, metap);
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE);
+    opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+    /*
+     * if we are scanning forward, we need to find the first non-empty
+     * page (if any) in the bucket chain.  since overflow pages are
+     * never empty, this had better be either the bucket page or the
+     * first overflow page.
+     *
+     * if we are scanning backward, we always go all the way to the
+     * end of the bucket chain.
+     */
+    if (PageIsEmpty(page)) {
+   if (BlockNumberIsValid(opaque->hasho_nextblkno)) {
+       _hash_readnext(rel, &buf, &page, &opaque);
+   } else {
+       ItemPointerSetInvalid(current);
+       so->hashso_curbuf = InvalidBuffer;
+       return ((RetrieveIndexResult) NULL);
+   }
+    }
+    if (ScanDirectionIsBackward(dir)) {
+   while (BlockNumberIsValid(opaque->hasho_nextblkno)) {
+       _hash_readnext(rel, &buf, &page, &opaque);
+   }
+    }
+
+    if (!_hash_step(scan, &buf, dir, metabuf)) {
+   return ((RetrieveIndexResult) NULL);
+    }
+
+    /* if we're here, _hash_step found a valid tuple */
+    current = &(scan->currentItemData);
+    offnum = ItemPointerGetOffsetNumber(current);
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+    hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
+    itup = &hitem->hash_itup;
+    iptr = (ItemPointer) palloc(sizeof(ItemPointerData));
+    memmove((char *) iptr, (char *) &(itup->t_tid), sizeof(ItemPointerData));
+    res = FormRetrieveIndexResult(current, iptr);
+
+    return (res);
+}
+
+/*
+ *  _hash_step() -- step to the next valid item in a scan in the bucket.
+ *
+ * If no valid record exists in the requested direction, return
+ * false.  Else, return true and set the CurrentItemData for the
+ * scan to the right thing.
+ * 
+ * 'bufP' points to the buffer which contains the current page
+ * that we'll step through.
+ *
+ * 'metabuf' is released when this returns.
+ */
+bool
+_hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, Buffer metabuf)
+{
+    Relation rel;
+    ItemPointer current;
+    HashScanOpaque so;
+    int allbuckets;
+    HashMetaPage metap;
+    Buffer buf;
+    Page page;
+    HashPageOpaque opaque;
+    OffsetNumber maxoff;
+    OffsetNumber offnum;
+    Bucket bucket;
+    BlockNumber blkno;
+    HashItem hitem;
+    IndexTuple itup;
+
+    rel = scan->relation;
+    current = &(scan->currentItemData);
+    so = (HashScanOpaque) scan->opaque;
+    allbuckets = (scan->numberOfKeys < 1);
+
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+
+    buf = *bufP;
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+    opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+    /*
+     * If _hash_step is called from _hash_first, current will not be
+     * valid, so we can't dereference it.  However, in that case, we
+     * presumably want to start at the beginning/end of the page...
+     */
+    maxoff = PageGetMaxOffsetNumber(page);
+    if (ItemPointerIsValid(current)) {
+   offnum = ItemPointerGetOffsetNumber(current);
+    } else {
+   offnum = InvalidOffsetNumber;
+    }
+
+    /*
+     * 'offnum' now points to the last tuple we have seen (if any).
+     *
+     * continue to step through tuples until:
+     *       1) we get to the end of the bucket chain or
+     *       2) we find a valid tuple.
+     */
+    do {
+   bucket = opaque->hasho_bucket;
+
+   switch (dir) {
+   case ForwardScanDirection:
+       if (offnum != InvalidOffsetNumber) {
+       offnum = OffsetNumberNext(offnum);  /* move forward */
+       } else {
+       offnum = FirstOffsetNumber;     /* new page */
+       }
+       while (offnum > maxoff) {
+       /*
+        * either this page is empty (maxoff ==
+        * InvalidOffsetNumber) or we ran off the end.
+        */
+       _hash_readnext(rel, &buf, &page, &opaque);
+       if (BufferIsInvalid(buf)) { /* end of chain */
+           if (allbuckets && bucket < metap->hashm_maxbucket) {
+           ++bucket;
+           blkno = BUCKET_TO_BLKNO(bucket);
+           buf = _hash_getbuf(rel, blkno, HASH_READ);
+           page = BufferGetPage(buf);
+           _hash_checkpage(page, LH_BUCKET_PAGE);
+           opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+           Assert(opaque->hasho_bucket == bucket);
+           while (PageIsEmpty(page) &&
+                  BlockNumberIsValid(opaque->hasho_nextblkno)) {
+               _hash_readnext(rel, &buf, &page, &opaque);
+           }
+           maxoff = PageGetMaxOffsetNumber(page);
+           offnum = FirstOffsetNumber;
+           } else {
+           maxoff = offnum = InvalidOffsetNumber;
+           break;  /* while */
+           }
+       } else {
+           /* _hash_readnext never returns an empty page */
+           maxoff = PageGetMaxOffsetNumber(page);
+           offnum = FirstOffsetNumber;
+       }
+       }
+       break;
+   case BackwardScanDirection:
+       if (offnum != InvalidOffsetNumber) {
+       offnum = OffsetNumberPrev(offnum);  /* move back */
+       } else {
+       offnum = maxoff;            /* new page */
+       }
+       while (offnum < FirstOffsetNumber) {
+       /*
+        * either this page is empty (offnum ==
+        * InvalidOffsetNumber) or we ran off the end.
+        */
+       _hash_readprev(rel, &buf, &page, &opaque);
+       if (BufferIsInvalid(buf)) { /* end of chain */
+           if (allbuckets && bucket > 0) {
+           --bucket;
+           blkno = BUCKET_TO_BLKNO(bucket);
+           buf = _hash_getbuf(rel, blkno, HASH_READ);
+           page = BufferGetPage(buf);
+           _hash_checkpage(page, LH_BUCKET_PAGE);
+           opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+           Assert(opaque->hasho_bucket == bucket);
+           while (BlockNumberIsValid(opaque->hasho_nextblkno)) {
+               _hash_readnext(rel, &buf, &page, &opaque);
+           }
+           maxoff = offnum = PageGetMaxOffsetNumber(page);
+           } else {
+           maxoff = offnum = InvalidOffsetNumber;
+           break;  /* while */
+           }
+       } else {
+           /* _hash_readprev never returns an empty page */
+           maxoff = offnum = PageGetMaxOffsetNumber(page);
+       }
+       }
+       break;
+   default:
+       /* NoMovementScanDirection */
+       /* this should not be reached */
+       break;
+   }
+
+   /* we ran off the end of the world without finding a match */
+   if (offnum == InvalidOffsetNumber) {
+       _hash_relbuf(rel, metabuf, HASH_READ);
+       *bufP = so->hashso_curbuf = InvalidBuffer;
+       ItemPointerSetInvalid(current);
+       return(false);
+   }
+   
+   /* get ready to check this tuple */
+   hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
+   itup = &hitem->hash_itup;
+    } while (!_hash_checkqual(scan, itup));
+   
+    /* if we made it to here, we've found a valid tuple */
+    _hash_relbuf(rel, metabuf, HASH_READ);
+    blkno = BufferGetBlockNumber(buf);
+    *bufP = so->hashso_curbuf = buf;
+    ItemPointerSet(current, blkno, offnum);
+    return(true);
+}
diff --git a/src/backend/access/hash/hashstrat.c b/src/backend/access/hash/hashstrat.c

new file mode 100644 (file)

index 0000000..cac2a58
--- /dev/null
+++ b/src/backend/access/hash/hashstrat.c
@@ -0,0 +1,104 @@
+/*-------------------------------------------------------------------------
+ *
+ * btstrat.c--
+ *    Srategy map entries for the btree indexed access method
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/hash/Attic/hashstrat.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/genam.h"
+#include "access/hash.h"
+
+/* 
+ *  only one valid strategy for hash tables: equality. 
+ */
+
+static StrategyNumber  HTNegate[1] = {
+    InvalidStrategy
+};
+
+static StrategyNumber  HTCommute[1] = {
+    HTEqualStrategyNumber
+};
+
+static StrategyNumber  HTNegateCommute[1] = {
+    InvalidStrategy
+};
+
+static StrategyEvaluationData  HTEvaluationData = {
+    /* XXX static for simplicity */
+
+    HTMaxStrategyNumber,
+    (StrategyTransformMap)HTNegate,
+    (StrategyTransformMap)HTCommute,
+    (StrategyTransformMap)HTNegateCommute,
+    {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
+};
+
+/* ----------------------------------------------------------------
+ * RelationGetHashStrategy
+ * ----------------------------------------------------------------
+ */
+
+StrategyNumber
+_hash_getstrat(Relation rel,
+          AttrNumber attno,
+          RegProcedure proc)
+{
+    StrategyNumber strat;
+
+    strat = RelationGetStrategy(rel, attno, &HTEvaluationData, proc);
+
+    Assert(StrategyNumberIsValid(strat));
+
+    return (strat);
+}
+
+bool
+_hash_invokestrat(Relation rel,
+         AttrNumber attno,
+         StrategyNumber strat,
+         Datum left,
+         Datum right)
+{
+    return (RelationInvokeStrategy(rel, &HTEvaluationData, attno, strat, 
+                  left, right));
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c

new file mode 100644 (file)

index 0000000..f8f49fe
--- /dev/null
+++ b/src/backend/access/hash/hashutil.c
@@ -0,0 +1,147 @@
+/*-------------------------------------------------------------------------
+ *
+ * btutils.c--
+ *    Utility code for Postgres btree implementation.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "fmgr.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/iqual.h"
+#include "access/hash.h"
+
+ScanKey
+_hash_mkscankey(Relation rel, IndexTuple itup, HashMetaPage metap)
+{
+    ScanKey skey;
+    TupleDesc itupdesc;
+    int natts;
+    AttrNumber i;
+    Datum arg;
+    RegProcedure proc;
+    bool null;
+    
+    natts = rel->rd_rel->relnatts;
+    itupdesc = RelationGetTupleDescriptor(rel);
+    
+    skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
+    
+    for (i = 0; i < natts; i++) {
+   arg = index_getattr(itup, i + 1, itupdesc, &null);
+   proc = metap->hashm_procid;
+   ScanKeyEntryInitialize(&skey[i],
+                  0x0, (AttrNumber) (i + 1), proc, arg);
+    }
+    
+    return (skey);
+}  
+
+void
+_hash_freeskey(ScanKey skey)
+{
+    pfree(skey);
+}
+
+
+bool
+_hash_checkqual(IndexScanDesc scan, IndexTuple itup)
+{
+    if (scan->numberOfKeys > 0)
+   return (index_keytest(itup, 
+                 RelationGetTupleDescriptor(scan->relation),
+                 scan->numberOfKeys, scan->keyData));
+    else
+   return (true);
+}
+
+HashItem
+_hash_formitem(IndexTuple itup)
+{
+    int nbytes_hitem;
+    HashItem hitem;
+    Size tuplen;
+    
+    /* disallow nulls in hash keys */
+    if (itup->t_info & INDEX_NULL_MASK)
+   elog(WARN, "hash indices cannot include null keys");
+    
+    /* make a copy of the index tuple with room for the sequence number */
+    tuplen = IndexTupleSize(itup);
+    nbytes_hitem = tuplen +
+   (sizeof(HashItemData) - sizeof(IndexTupleData));
+    
+    hitem = (HashItem) palloc(nbytes_hitem);
+    memmove((char *) &(hitem->hash_itup), (char *) itup, tuplen);
+    
+    return (hitem);
+}
+
+Bucket
+_hash_call(Relation rel, HashMetaPage metap, Datum key)
+{
+    uint32 n;
+    Bucket bucket;
+    RegProcedure proc;
+    
+    proc = metap->hashm_procid;
+    n = (uint32) fmgr(proc, key);
+    bucket = n & metap->hashm_highmask;
+    if (bucket > metap->hashm_maxbucket)
+   bucket = bucket & metap->hashm_lowmask;
+    return (bucket);
+}
+
+/*
+ * _hash_log2 -- returns ceil(lg2(num))
+ */
+uint32
+_hash_log2(uint32 num)
+{
+    uint32 i, limit;
+    
+    limit = 1;
+    for (i = 0; limit < num; limit = limit << 1, i++)
+   ;
+    return (i);
+}
+
+/*
+ * _hash_checkpage -- sanity checks on the format of all hash pages
+ */
+void
+_hash_checkpage(Page page, int flags)
+{
+    PageHeader ph = (PageHeader) page;
+    HashPageOpaque opaque;
+
+    Assert(page);
+    Assert(ph->pd_lower >= (sizeof(PageHeaderData) - sizeof(ItemIdData)));
+#if 1
+    Assert(ph->pd_upper <=
+      (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData))));
+    Assert(ph->pd_special ==
+      (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData))));
+    Assert(ph->pd_opaque.od_pagesize == BLCKSZ);
+#endif
+    if (flags) {
+   opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+   Assert(opaque->hasho_flag & flags);
+    }
+}
diff --git a/src/backend/access/heap/Makefile.inc b/src/backend/access/heap/Makefile.inc

new file mode 100644 (file)

index 0000000..f4f4bbb
--- /dev/null
+++ b/src/backend/access/heap/Makefile.inc
@@ -0,0 +1,14 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for access/heap
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/backend/access/heap/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
+#
+#-------------------------------------------------------------------------
+
+SUBSRCS+= heapam.c hio.c stats.c
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c

new file mode 100644 (file)

index 0000000..4bf31ef
--- /dev/null
+++ b/src/backend/access/heap/heapam.c
@@ -0,0 +1,1507 @@
+/*-------------------------------------------------------------------------
+ *
+ * heapam.c--
+ *    heap access method code
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
+ *
+ *
+ * INTERFACE ROUTINES
+ * heapgettup  - fetch next heap tuple from a scan
+ * heap_open   - open a heap relation by relationId
+ * heap_openr  - open a heap relation by name
+ * heap_close  - close a heap relation
+ * heap_beginscan  - begin relation scan
+ * heap_rescan - restart a relation scan
+ * heap_endscan    - end relation scan
+ * heap_getnext    - retrieve next tuple in scan
+ * heap_fetch  - retrive tuple with tid
+ * heap_insert - insert tuple into a relation
+ * heap_delete - delete a tuple from a relation
+ * heap_replace    - replace a tuple in a relation with another tuple
+ * heap_markpos    - mark scan position
+ * heap_restrpos   - restore position to marked location
+ * 
+ * NOTES
+ *    This file contains the heap_ routines which implement
+ *    the POSTGRES heap access method used for all POSTGRES
+ *    relations.  
+ *
+ * OLD COMMENTS
+ * struct relscan hints:  (struct should be made AM independent?)
+ *
+ * rs_ctid is the tid of the last tuple returned by getnext.
+ * rs_ptid and rs_ntid are the tids of the previous and next tuples
+ * returned by getnext, respectively.  NULL indicates an end of
+ * scan (either direction); NON indicates an unknow value.
+ *
+ * possible combinations:
+ * rs_p    rs_c    rs_n        interpretation
+ * NULL    NULL    NULL        empty scan
+ * NULL    NULL    NON     at begining of scan
+ * NULL    NULL    t1      at begining of scan (with cached tid)
+ * NON NULL    NULL        at end of scan
+ * t1  NULL    NULL        at end of scan (with cached tid)
+ * NULL    t1  NULL        just returned only tuple
+ * NULL    t1  NON     just returned first tuple
+ * NULL    t1  t2      returned first tuple (with cached tid)
+ * NON t1  NULL        just returned last tuple
+ * t2  t1  NULL        returned last tuple (with cached tid)
+ * t1  t2  NON     in the middle of a forward scan
+ * NON t2  t1      in the middle of a reverse scan
+ * ti  tj  tk      in the middle of a scan (w cached tid)
+ *
+ * Here NULL is ...tup == NULL && ...buf == InvalidBuffer,
+ * and NON is ...tup == NULL && ...buf == UnknownBuffer.
+ *
+ * Currently, the NONTID values are not cached with their actual
+ * values by getnext.  Values may be cached by markpos since it stores
+ * all three tids.
+ *
+ * NOTE:  the calls to elog() must stop.  Should decide on an interface
+ * between the general and specific AM calls.
+ *
+ *     XXX probably do not need a free tuple routine for heaps.
+ *     Huh?  Free tuple is not necessary for tuples returned by scans, but
+ *     is necessary for tuples which are returned by
+ * RelationGetTupleByItemPointer. -hirohama
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <sys/file.h>
+#include <string.h>
+
+#include "postgres.h"
+
+#include "access/attnum.h"
+#include "access/heapam.h"
+#include "access/hio.h"
+#include "access/htup.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+
+#include "utils/tqual.h"
+#include "access/valid.h"
+#include "access/xact.h"
+
+#include "catalog/catalog.h"
+#include "catalog/catname.h"
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+#include "storage/itemid.h"
+#include "storage/itemptr.h"
+#include "storage/lmgr.h"
+
+#include "tcop/tcopdebug.h"
+#include "miscadmin.h"
+
+#include "utils/memutils.h"
+#include "utils/palloc.h"
+#include "fmgr.h"
+#include "utils/inval.h"
+#include "utils/elog.h"
+#include "utils/mcxt.h"
+#include "utils/rel.h"
+#include "utils/relcache.h"
+
+static bool    ImmediateInvalidation;
+
+/* ----------------------------------------------------------------
+ *                       heap support routines
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ * initsdesc - sdesc code common to heap_beginscan and heap_rescan
+ * ----------------
+ */
+static void
+initsdesc(HeapScanDesc sdesc,
+     Relation relation,
+     int atend,
+     unsigned nkeys,
+     ScanKey key)
+{
+    if (!RelationGetNumberOfBlocks(relation)) {
+   /* ----------------
+    *  relation is empty
+    * ----------------
+    */
+   sdesc->rs_ntup = sdesc->rs_ctup = sdesc->rs_ptup = NULL;
+   sdesc->rs_nbuf = sdesc->rs_cbuf = sdesc->rs_pbuf = InvalidBuffer;
+    } else if (atend) {
+   /* ----------------
+    *  reverse scan
+    * ----------------
+    */
+   sdesc->rs_ntup = sdesc->rs_ctup = NULL;
+   sdesc->rs_nbuf = sdesc->rs_cbuf = InvalidBuffer;
+   sdesc->rs_ptup = NULL;
+   sdesc->rs_pbuf = UnknownBuffer;
+    } else {
+   /* ----------------
+    *  forward scan
+    * ----------------
+    */
+   sdesc->rs_ctup = sdesc->rs_ptup = NULL;
+   sdesc->rs_cbuf = sdesc->rs_pbuf = InvalidBuffer;
+   sdesc->rs_ntup = NULL;
+   sdesc->rs_nbuf = UnknownBuffer;
+    } /* invalid too */
+    
+    /* we don't have a marked position... */
+    ItemPointerSetInvalid(&(sdesc->rs_mptid));
+    ItemPointerSetInvalid(&(sdesc->rs_mctid));
+    ItemPointerSetInvalid(&(sdesc->rs_mntid));
+    ItemPointerSetInvalid(&(sdesc->rs_mcd));
+    
+    /* ----------------
+     * copy the scan key, if appropriate
+     * ----------------
+     */
+    if (key != NULL)
+   memmove(sdesc->rs_key, key, nkeys * sizeof(ScanKeyData));
+}
+
+/* ----------------
+ * unpinsdesc - code common to heap_rescan and heap_endscan
+ * ----------------
+ */
+static void
+unpinsdesc(HeapScanDesc sdesc)
+{
+    if (BufferIsValid(sdesc->rs_pbuf)) {
+   ReleaseBuffer(sdesc->rs_pbuf);
+    }
+    
+    /* ------------------------------------
+     *  Scan will pin buffer one for each non-NULL tuple pointer
+     *  (ptup, ctup, ntup), so they have to be unpinned multiple
+     *  times.
+     * ------------------------------------
+     */
+    if (BufferIsValid(sdesc->rs_cbuf)) {
+   ReleaseBuffer(sdesc->rs_cbuf);
+    }
+    
+    if (BufferIsValid(sdesc->rs_nbuf)) {
+   ReleaseBuffer(sdesc->rs_nbuf);
+    }
+}
+
+/* ------------------------------------------
+ * nextpage
+ *
+ * figure out the next page to scan after the current page
+ * taking into account of possible adjustment of degrees of
+ * parallelism
+ * ------------------------------------------
+ */
+static int
+nextpage(int page, int dir)
+{
+    return((dir<0)?page-1:page+1);
+}
+
+/* ----------------
+ * heapgettup - fetch next heap tuple
+ *
+ * routine used by heap_getnext() which does most of the
+ * real work in scanning tuples.
+ * ----------------
+ */
+static HeapTuple 
+heapgettup(Relation relation,
+      ItemPointer tid,
+      int dir,
+      Buffer *b,
+      TimeQual timeQual,
+      int nkeys,
+      ScanKey key)
+{
+    ItemId     lpp;
+    Page       dp;
+    int            page;
+    int            pages;
+    int            lines;
+    HeapTuple      rtup;
+    OffsetNumber   lineoff;
+    int            linesleft;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_heapgettup);
+    IncrHeapAccessStat(global_heapgettup);
+    
+    /* ----------------
+     * debugging stuff 
+     *
+     * check validity of arguments, here and for other functions too
+     * Note: no locking manipulations needed--this is a local function
+     * ----------------
+     */
+#ifdef HEAPDEBUGALL
+    if (ItemPointerIsValid(tid)) {
+   elog(DEBUG, "heapgettup(%.16s, tid=0x%x[%d,%d], dir=%d, ...)",
+        RelationGetRelationName(relation), tid, tid->ip_blkid,
+        tid->ip_posid, dir);
+    } else {
+   elog(DEBUG, "heapgettup(%.16s, tid=0x%x, dir=%d, ...)",
+        RelationGetRelationName(relation), tid, dir);
+    }
+    elog(DEBUG, "heapgettup(..., b=0x%x, timeQ=0x%x, nkeys=%d, key=0x%x",
+    b, timeQual, nkeys, key);
+    if (timeQual == SelfTimeQual) {
+   elog(DEBUG, "heapgettup: relation(%c)=`%.16s', SelfTimeQual",
+        relation->rd_rel->relkind, &relation->rd_rel->relname);
+    } else {
+   elog(DEBUG, "heapgettup: relation(%c)=`%.16s', timeQual=%d",
+        relation->rd_rel->relkind, &relation->rd_rel->relname,
+        timeQual);
+    }
+#endif /* !defined(HEAPDEBUGALL) */
+    
+    if (!ItemPointerIsValid(tid)) {
+   Assert(!PointerIsValid(tid));
+    }
+    
+    /* ----------------
+     * return null immediately if relation is empty
+     * ----------------
+     */
+    if (!(pages = relation->rd_nblocks))
+   return (NULL);
+    
+    /* ----------------
+     * calculate next starting lineoff, given scan direction
+     * ----------------
+     */
+    if (!dir) {
+   /* ----------------
+    * ``no movement'' scan direction
+    * ----------------
+    */
+   /* assume it is a valid TID XXX */
+   if (ItemPointerIsValid(tid) == false) {
+       *b = InvalidBuffer;
+       return (NULL);
+   }
+   *b = RelationGetBufferWithBuffer(relation,
+                            ItemPointerGetBlockNumber(tid),
+                            *b);
+   
+#ifndef NO_BUFFERISVALID
+   if (!BufferIsValid(*b)) {
+       elog(WARN, "heapgettup: failed ReadBuffer");
+   }
+#endif
+   
+   dp = (Page) BufferGetPage(*b);
+   lineoff = ItemPointerGetOffsetNumber(tid);
+   lpp = PageGetItemId(dp, lineoff);
+   
+   rtup = (HeapTuple)PageGetItem((Page) dp, lpp);
+   return (rtup);
+   
+    } else if (dir < 0) {
+   /* ----------------
+    *  reverse scan direction
+    * ----------------
+    */
+   if (ItemPointerIsValid(tid) == false) {
+       tid = NULL;
+   }
+   if (tid == NULL) {
+       page = pages - 1;               /* final page */
+   } else {
+       page = ItemPointerGetBlockNumber(tid);  /* current page */
+   }
+   if (page < 0) { 
+       *b = InvalidBuffer;
+       return (NULL);
+   }
+   
+   *b = RelationGetBufferWithBuffer(relation, page, *b);
+#ifndef NO_BUFFERISVALID
+   if (!BufferIsValid(*b)) {
+       elog(WARN, "heapgettup: failed ReadBuffer");
+   }
+#endif
+   
+   dp = (Page) BufferGetPage(*b);
+   lines = PageGetMaxOffsetNumber(dp);
+   if (tid == NULL) {
+       lineoff = lines;                /* final offnum */
+   } else {
+       lineoff =                   /* previous offnum */
+       OffsetNumberPrev(ItemPointerGetOffsetNumber(tid));
+   }
+   /* page and lineoff now reference the physically previous tid */
+
+    } else {
+   /* ----------------
+    *  forward scan direction
+    * ----------------
+    */
+   if (ItemPointerIsValid(tid) == false) {
+       page = 0;                   /* first page */
+       lineoff = FirstOffsetNumber;        /* first offnum */
+   } else {
+       page = ItemPointerGetBlockNumber(tid);  /* current page */
+       lineoff =                   /* next offnum */
+       OffsetNumberNext(ItemPointerGetOffsetNumber(tid));
+   }
+   
+   if (page >= pages) {
+       *b = InvalidBuffer;
+       return (NULL);
+   }
+   /* page and lineoff now reference the physically next tid */
+
+   *b = RelationGetBufferWithBuffer(relation, page, *b);
+#ifndef NO_BUFFERISVALID
+   if (!BufferIsValid(*b)) {
+       elog(WARN, "heapgettup: failed ReadBuffer");
+   }
+#endif
+   
+   dp = (Page) BufferGetPage(*b);
+   lines = PageGetMaxOffsetNumber(dp);
+    }
+    
+    /* 'dir' is now non-zero */
+
+    /* ----------------
+     * calculate line pointer and number of remaining items
+     *  to check on this page.
+     * ----------------
+     */
+    lpp = PageGetItemId(dp, lineoff);
+    if (dir < 0) {
+   linesleft = lineoff - 1;
+    } else {
+   linesleft = lines - lineoff;
+    }
+
+    /* ----------------
+     * advance the scan until we find a qualifying tuple or
+     *  run out of stuff to scan
+     * ----------------
+     */
+    for (;;) {
+   while (linesleft >= 0) {
+       /* ----------------
+        *  if current tuple qualifies, return it.
+        * ----------------
+        */
+       if ((rtup = heap_tuple_satisfies(lpp, relation, (PageHeader) dp,
+                        timeQual, nkeys, key)) != NULL) {
+       ItemPointer iptr = &(rtup->t_ctid); 
+       if (ItemPointerGetBlockNumber(iptr) != page) {
+           /*
+            * set block id to the correct page number
+            * --- this is a hack to support the virtual fragment
+            * concept
+            */
+           ItemPointerSetBlockNumber(iptr, page);
+       }
+       return (rtup);
+       }
+       
+       /* ----------------
+        *  otherwise move to the next item on the page
+        * ----------------
+        */
+       --linesleft;
+       if (dir < 0) {
+       --lpp;  /* move back in this page's ItemId array */
+       } else {
+       ++lpp;  /* move forward in this page's ItemId array */
+       }
+   }
+   
+   /* ----------------
+    *  if we get here, it means we've exhausted the items on
+    *  this page and it's time to move to the next..
+    * ----------------
+    */
+   page = nextpage(page, dir);
+   
+   /* ----------------
+    *  return NULL if we've exhausted all the pages..
+    * ----------------
+    */
+   if (page < 0 || page >= pages) {
+       if (BufferIsValid(*b))
+       ReleaseBuffer(*b);
+       *b = InvalidBuffer;
+       return (NULL);
+   }
+   
+   *b = ReleaseAndReadBuffer(*b, relation, page);
+   
+#ifndef NO_BUFFERISVALID
+   if (!BufferIsValid(*b)) {
+       elog(WARN, "heapgettup: failed ReadBuffer");
+   }
+#endif
+   dp = (Page) BufferGetPage(*b);
+   lines = lineoff = PageGetMaxOffsetNumber((Page) dp);
+   linesleft = lines - 1;
+   if (dir < 0) {
+       lpp = PageGetItemId(dp, lineoff);
+   } else {
+       lpp = PageGetItemId(dp, FirstOffsetNumber);
+   }
+    }
+}
+
+void
+doinsert(Relation relation, HeapTuple tup)
+{
+    RelationPutHeapTupleAtEnd(relation, tup);
+    return;
+}
+
+/* 
+ * HeapScanIsValid is now a macro in relscan.h -cim 4/27/91
+ */
+
+/* ----------------
+ * SetHeapAccessMethodImmediateInvalidation
+ * ----------------
+ */
+void
+SetHeapAccessMethodImmediateInvalidation(bool on)
+{
+    ImmediateInvalidation = on;
+}
+
+/* ----------------------------------------------------------------
+ *                   heap access method interface
+ * ----------------------------------------------------------------
+ */
+/* ----------------
+ * heap_open - open a heap relation by relationId
+ *
+ * presently the relcache routines do all the work we need
+ * to open/close heap relations.
+ * ----------------
+ */
+Relation
+heap_open(Oid relationId)
+{
+    Relation r;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_open);
+    IncrHeapAccessStat(global_open);
+    
+    r = (Relation) RelationIdGetRelation(relationId);
+    
+    if (RelationIsValid(r) && r->rd_rel->relkind == RELKIND_INDEX) {
+   elog(WARN, "%s is an index relation", r->rd_rel->relname.data);
+    }
+    
+    return (r);
+}
+
+/* ----------------
+ * heap_openr - open a heap relation by name
+ *
+ * presently the relcache routines do all the work we need
+ * to open/close heap relations.
+ * ----------------
+ */
+Relation
+heap_openr(char *relationName)
+{
+    Relation r;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_openr);
+    IncrHeapAccessStat(global_openr);
+    
+    r = RelationNameGetRelation(relationName);
+    
+    if (RelationIsValid(r) && r->rd_rel->relkind == RELKIND_INDEX) {
+   elog(WARN, "%s is an index relation", r->rd_rel->relname.data);
+    }
+    
+    return (r);
+}
+
+/* ----------------
+ * heap_close - close a heap relation
+ *
+ * presently the relcache routines do all the work we need
+ * to open/close heap relations.
+ * ----------------
+ */
+void
+heap_close(Relation relation)
+{
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_close);
+    IncrHeapAccessStat(global_close);
+    
+    (void) RelationClose(relation);
+}
+
+
+/* ----------------
+ * heap_beginscan  - begin relation scan
+ * ----------------
+ */
+HeapScanDesc
+heap_beginscan(Relation relation,
+          int atend,
+          TimeQual timeQual,
+          unsigned nkeys,
+          ScanKey key)
+{
+    HeapScanDesc   sdesc;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_beginscan);
+    IncrHeapAccessStat(global_beginscan);
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    if (RelationIsValid(relation) == false)
+   elog(WARN, "heap_beginscan: !RelationIsValid(relation)");
+    
+    /* ----------------
+     * set relation level read lock
+     * ----------------
+     */
+    RelationSetLockForRead(relation);
+    
+    /* XXX someday assert SelfTimeQual if relkind == RELKIND_UNCATALOGED */
+    if (relation->rd_rel->relkind == RELKIND_UNCATALOGED) {
+   timeQual = SelfTimeQual;
+    }
+    
+    /* ----------------
+     *  increment relation ref count while scanning relation
+     * ----------------
+     */
+    RelationIncrementReferenceCount(relation);
+    
+    /* ----------------
+     * allocate and initialize scan descriptor
+     * ----------------
+     */
+    sdesc = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
+    
+    relation->rd_nblocks = smgrnblocks(relation->rd_rel->relsmgr, relation);
+    sdesc->rs_rd = relation;
+    
+    if (nkeys) {
+   /*
+    * we do this here instead of in initsdesc() because heap_rescan also
+    * calls initsdesc() and we don't want to allocate memory again
+    */
+   sdesc->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
+    } else {
+   sdesc->rs_key = NULL;
+    }
+
+    initsdesc(sdesc, relation, atend, nkeys, key);
+    
+    sdesc->rs_atend = atend;
+    sdesc->rs_tr = timeQual;
+    sdesc->rs_nkeys = (short)nkeys;
+    
+    return (sdesc);
+}
+
+/* ----------------
+ * heap_rescan - restart a relation scan
+ * ----------------
+ */
+void
+heap_rescan(HeapScanDesc sdesc,
+       bool scanFromEnd,
+       ScanKey key)
+{
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_rescan);
+    IncrHeapAccessStat(global_rescan);
+    
+    /* Note: set relation level read lock is still set */
+    
+    /* ----------------
+     * unpin scan buffers
+     * ----------------
+     */
+    unpinsdesc(sdesc);
+    
+    /* ----------------
+     * reinitialize scan descriptor
+     * ----------------
+     */
+    initsdesc(sdesc, sdesc->rs_rd, scanFromEnd, sdesc->rs_nkeys, key);
+    sdesc->rs_atend = (bool) scanFromEnd;
+}
+
+/* ----------------
+ * heap_endscan    - end relation scan
+ *
+ * See how to integrate with index scans.
+ * Check handling if reldesc caching.
+ * ----------------
+ */
+void
+heap_endscan(HeapScanDesc sdesc)
+{
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_endscan);    
+    IncrHeapAccessStat(global_endscan);    
+    
+    /* Note: no locking manipulations needed */
+    
+    /* ----------------
+     * unpin scan buffers
+     * ----------------
+     */
+    unpinsdesc(sdesc);
+    
+    /* ----------------
+     * decrement relation reference count and free scan descriptor storage
+     * ----------------
+     */
+    RelationDecrementReferenceCount(sdesc->rs_rd);
+    
+    /* ----------------
+     * Non 2-phase read locks on catalog relations
+     * ----------------
+     */
+    if ( IsSystemRelationName(RelationGetRelationName(sdesc->rs_rd)->data) )
+
+   RelationUnsetLockForRead(sdesc->rs_rd);
+    
+    pfree(sdesc);  /* XXX */
+}
+
+/* ----------------
+ * heap_getnext    - retrieve next tuple in scan
+ *
+ * Fix to work with index relations.
+ * ----------------
+ */
+
+#ifdef HEAPDEBUGALL
+#define HEAPDEBUG_1 \
+elog(DEBUG, "heap_getnext([%s,nkeys=%d],backw=%d,0x%x) called", \
+     sdesc->rs_rd->rd_rel->relname.data, sdesc->rs_nkeys, backw, b)
+     
+#define HEAPDEBUG_2 \
+     elog(DEBUG, "heap_getnext called with backw (no tracing yet)")
+     
+#define HEAPDEBUG_3 \
+     elog(DEBUG, "heap_getnext returns NULL at end")
+     
+#define HEAPDEBUG_4 \
+     elog(DEBUG, "heap_getnext valid buffer UNPIN'd")
+     
+#define HEAPDEBUG_5 \
+     elog(DEBUG, "heap_getnext next tuple was cached")
+     
+#define HEAPDEBUG_6 \
+     elog(DEBUG, "heap_getnext returning EOS")
+     
+#define HEAPDEBUG_7 \
+     elog(DEBUG, "heap_getnext returning tuple");
+#else
+#define HEAPDEBUG_1
+#define HEAPDEBUG_2
+#define HEAPDEBUG_3
+#define HEAPDEBUG_4
+#define HEAPDEBUG_5
+#define HEAPDEBUG_6
+#define HEAPDEBUG_7
+#endif /* !defined(HEAPDEBUGALL) */
+     
+     
+HeapTuple
+heap_getnext(HeapScanDesc scandesc,
+        int backw,
+        Buffer *b)
+{
+    register HeapScanDesc sdesc = scandesc;
+    Buffer       localb;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_getnext);    
+    IncrHeapAccessStat(global_getnext);    
+    
+    /* Note: no locking manipulations needed */
+    
+    /* ----------------
+     * argument checks
+     * ----------------
+     */
+    if (sdesc == NULL)
+   elog(WARN, "heap_getnext: NULL relscan");
+    
+    /* ----------------
+     * initialize return buffer to InvalidBuffer
+     * ----------------
+     */
+    if (! PointerIsValid(b)) b = &localb;
+    (*b) = InvalidBuffer;
+    
+    HEAPDEBUG_1; /* heap_getnext( info ) */
+    
+    if (backw) {
+   /* ----------------
+    *  handle reverse scan
+    * ----------------
+    */
+   HEAPDEBUG_2; /* heap_getnext called with backw */
+   
+   if (sdesc->rs_ptup == sdesc->rs_ctup &&
+       BufferIsInvalid(sdesc->rs_pbuf))
+       {
+       if (BufferIsValid(sdesc->rs_nbuf))
+           ReleaseBuffer(sdesc->rs_nbuf);
+       return (NULL);
+       }
+   
+   /*
+    * Copy the "current" tuple/buffer
+    * to "next". Pin/unpin the buffers
+    * accordingly
+    */
+   if (sdesc->rs_nbuf != sdesc->rs_cbuf) {
+       if (BufferIsValid(sdesc->rs_nbuf))
+       ReleaseBuffer(sdesc->rs_nbuf);
+       if (BufferIsValid(sdesc->rs_cbuf))
+       IncrBufferRefCount(sdesc->rs_cbuf);
+   }
+   sdesc->rs_ntup = sdesc->rs_ctup;
+   sdesc->rs_nbuf = sdesc->rs_cbuf;
+   
+   if (sdesc->rs_ptup != NULL) {
+       if (sdesc->rs_cbuf != sdesc->rs_pbuf) {
+       if (BufferIsValid(sdesc->rs_cbuf))
+           ReleaseBuffer(sdesc->rs_cbuf);
+       if (BufferIsValid(sdesc->rs_pbuf))
+           IncrBufferRefCount(sdesc->rs_pbuf);
+       }
+       sdesc->rs_ctup = sdesc->rs_ptup;
+       sdesc->rs_cbuf = sdesc->rs_pbuf;
+   } else { /* NONTUP */
+       ItemPointer iptr;
+       
+       iptr = (sdesc->rs_ctup != NULL) ?
+       &(sdesc->rs_ctup->t_ctid) : (ItemPointer) NULL;
+       
+            /* Don't release sdesc->rs_cbuf at this point, because
+               heapgettup doesn't increase PrivateRefCount if it
+               is already set. On a backward scan, both rs_ctup and rs_ntup
+               usually point to the same buffer page, so
+               PrivateRefCount[rs_cbuf] should be 2 (or more, if for instance
+               ctup is stored in a TupleTableSlot).  - 01/09/94 */
+       
+       sdesc->rs_ctup = (HeapTuple)
+       heapgettup(sdesc->rs_rd,
+              iptr,
+              -1,
+              &(sdesc->rs_cbuf),
+              sdesc->rs_tr,
+              sdesc->rs_nkeys,
+              sdesc->rs_key);
+   }
+   
+   if (sdesc->rs_ctup == NULL && !BufferIsValid(sdesc->rs_cbuf))
+       {
+       if (BufferIsValid(sdesc->rs_pbuf))
+           ReleaseBuffer(sdesc->rs_pbuf);
+       sdesc->rs_ptup = NULL;
+       sdesc->rs_pbuf = InvalidBuffer;
+       if (BufferIsValid(sdesc->rs_nbuf))
+           ReleaseBuffer(sdesc->rs_nbuf);
+       sdesc->rs_ntup = NULL;
+       sdesc->rs_nbuf = InvalidBuffer;
+       return (NULL);
+       }
+   
+   if (BufferIsValid(sdesc->rs_pbuf))
+       ReleaseBuffer(sdesc->rs_pbuf);
+   sdesc->rs_ptup = NULL;
+   sdesc->rs_pbuf = UnknownBuffer;
+   
+    } else {
+   /* ----------------
+    *  handle forward scan
+    * ----------------
+    */
+   if (sdesc->rs_ctup == sdesc->rs_ntup &&
+       BufferIsInvalid(sdesc->rs_nbuf)) {
+       if (BufferIsValid(sdesc->rs_pbuf))
+       ReleaseBuffer(sdesc->rs_pbuf);
+       HEAPDEBUG_3; /* heap_getnext returns NULL at end */
+       return (NULL);
+   }
+   
+   /*
+    * Copy the "current" tuple/buffer
+    * to "previous". Pin/unpin the buffers
+    * accordingly
+    */
+   if (sdesc->rs_pbuf != sdesc->rs_cbuf) {
+       if (BufferIsValid(sdesc->rs_pbuf))
+       ReleaseBuffer(sdesc->rs_pbuf);
+       if (BufferIsValid(sdesc->rs_cbuf))
+       IncrBufferRefCount(sdesc->rs_cbuf);
+   }
+   sdesc->rs_ptup = sdesc->rs_ctup;
+   sdesc->rs_pbuf = sdesc->rs_cbuf;
+   
+   if (sdesc->rs_ntup != NULL) {
+       if (sdesc->rs_cbuf != sdesc->rs_nbuf) {
+       if (BufferIsValid(sdesc->rs_cbuf))
+           ReleaseBuffer(sdesc->rs_cbuf);
+       if (BufferIsValid(sdesc->rs_nbuf))
+           IncrBufferRefCount(sdesc->rs_nbuf);
+       }
+       sdesc->rs_ctup = sdesc->rs_ntup;
+       sdesc->rs_cbuf = sdesc->rs_nbuf;
+       HEAPDEBUG_5; /* heap_getnext next tuple was cached */
+   } else { /* NONTUP */
+       ItemPointer iptr;
+       
+       iptr = (sdesc->rs_ctup != NULL) ?
+       &sdesc->rs_ctup->t_ctid : (ItemPointer) NULL;
+       
+            /* Don't release sdesc->rs_cbuf at this point, because
+               heapgettup doesn't increase PrivateRefCount if it
+               is already set. On a forward scan, both rs_ctup and rs_ptup
+               usually point to the same buffer page, so
+               PrivateRefCount[rs_cbuf] should be 2 (or more, if for instance
+               ctup is stored in a TupleTableSlot).  - 01/09/93 */
+       
+       sdesc->rs_ctup = (HeapTuple)
+       heapgettup(sdesc->rs_rd,
+              iptr,
+              1,
+              &sdesc->rs_cbuf,
+              sdesc->rs_tr,
+              sdesc->rs_nkeys,
+              sdesc->rs_key);
+   }
+   
+   if (sdesc->rs_ctup == NULL && !BufferIsValid(sdesc->rs_cbuf)) {
+       if (BufferIsValid(sdesc->rs_nbuf))
+       ReleaseBuffer(sdesc->rs_nbuf);
+       sdesc->rs_ntup = NULL;
+       sdesc->rs_nbuf = InvalidBuffer;
+       if (BufferIsValid(sdesc->rs_pbuf))
+       ReleaseBuffer(sdesc->rs_pbuf);
+       sdesc->rs_ptup = NULL;
+       sdesc->rs_pbuf = InvalidBuffer;
+       HEAPDEBUG_6; /* heap_getnext returning EOS */
+       return (NULL);
+   }
+   
+   if (BufferIsValid(sdesc->rs_nbuf))
+       ReleaseBuffer(sdesc->rs_nbuf);
+   sdesc->rs_ntup = NULL;
+   sdesc->rs_nbuf = UnknownBuffer;
+    }
+    
+    /* ----------------
+     * if we get here it means we have a new current scan tuple, so
+     *  point to the proper return buffer and return the tuple.
+     * ----------------
+     */
+    (*b) = sdesc->rs_cbuf;
+    
+    HEAPDEBUG_7; /* heap_getnext returning tuple */
+    
+    return (sdesc->rs_ctup);
+}
+
+/* ----------------
+ * heap_fetch  - retrive tuple with tid
+ *
+ * Currently ignores LP_IVALID during processing!
+ * ----------------
+ */
+HeapTuple
+heap_fetch(Relation relation,
+      TimeQual timeQual,
+      ItemPointer tid,
+      Buffer *b)
+{
+    ItemId     lp;
+    Buffer     buffer;
+    PageHeader     dp;
+    HeapTuple      tuple;
+    OffsetNumber   offnum;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_fetch);    
+    IncrHeapAccessStat(global_fetch);    
+    
+    /*
+     * Note: This is collosally expensive - does two system calls per
+     * indexscan tuple fetch.  Not good, and since we should be doing
+     * page level locking by the scanner anyway, it is commented out.
+     */
+    
+    /* RelationSetLockForTupleRead(relation, tid); */
+    
+    /* ----------------
+     * get the buffer from the relation descriptor
+     *  Note that this does a buffer pin.
+     * ----------------
+     */
+    
+    buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
+    
+#ifndef NO_BUFFERISVALID
+    if (!BufferIsValid(buffer)) {
+   elog(WARN, "heap_fetch: %s relation: ReadBuffer(%lx) failed",
+        &relation->rd_rel->relname, (long)tid);
+    }
+#endif
+    
+    /* ----------------
+     * get the item line pointer corresponding to the requested tid
+     * ----------------
+     */
+    dp = (PageHeader) BufferGetPage(buffer);
+    offnum = ItemPointerGetOffsetNumber(tid);
+    lp = PageGetItemId(dp, offnum);
+    
+    /* ----------------
+     * more sanity checks
+     * ----------------
+     */
+    
+    Assert(ItemIdIsUsed(lp)); 
+    
+    /* ----------------
+     * check time qualification of tid
+     * ----------------
+     */
+    
+    tuple = heap_tuple_satisfies(lp, relation, dp,
+                timeQual, 0,(ScanKey)NULL);
+    
+    if (tuple == NULL)
+   {
+       ReleaseBuffer(buffer);
+       return (NULL);
+   }
+    
+    /* ----------------
+     * all checks passed, now either return a copy of the tuple
+     *  or pin the buffer page and return a pointer, depending on
+     *  whether caller gave us a valid b.
+     * ----------------
+     */
+    
+    if (PointerIsValid(b)) {
+   *b = buffer;
+    } else {
+   tuple = heap_copytuple(tuple);
+   ReleaseBuffer(buffer);
+    }
+    return (tuple);
+}
+
+/* ----------------
+ * heap_insert - insert tuple
+ *
+ * The assignment of t_min (and thus the others) should be
+ * removed eventually.
+ *
+ * Currently places the tuple onto the last page.  If there is no room,
+ * it is placed on new pages.  (Heap relations)
+ * Note that concurrent inserts during a scan will probably have
+ * unexpected results, though this will be fixed eventually.
+ *
+ * Fix to work with indexes.
+ * ----------------
+ */
+Oid
+heap_insert(Relation relation, HeapTuple tup)
+{
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_insert);
+    IncrHeapAccessStat(global_insert);
+    
+    /* ----------------
+     * set relation level write lock. If this is a "local" relation (not
+     *  visible to others), we don't need to set a write lock.
+     * ----------------
+     */
+    if (!relation->rd_islocal)
+   RelationSetLockForWrite(relation);
+
+    /* ----------------
+     *  If the object id of this tuple has already been assigned, trust
+     *  the caller.  There are a couple of ways this can happen.  At initial
+     *  db creation, the backend program sets oids for tuples.  When we
+     *  define an index, we set the oid.  Finally, in the future, we may
+     *  allow users to set their own object ids in order to support a
+     *  persistent object store (objects need to contain pointers to one
+     *  another).
+     * ----------------
+     */
+    if (!OidIsValid(tup->t_oid)) {
+   tup->t_oid = newoid();
+   LastOidProcessed = tup->t_oid;
+    }
+    
+    TransactionIdStore(GetCurrentTransactionId(), &(tup->t_xmin));
+    tup->t_cmin = GetCurrentCommandId();
+    StoreInvalidTransactionId(&(tup->t_xmax));
+    tup->t_tmin = INVALID_ABSTIME;
+    tup->t_tmax = CURRENT_ABSTIME;
+    
+    doinsert(relation, tup);
+    
+    if ( IsSystemRelationName(RelationGetRelationName(relation)->data)) {
+   RelationUnsetLockForWrite(relation);
+    
+   /* ----------------
+    *  invalidate caches (only works for system relations)
+    * ----------------
+    */
+   SetRefreshWhenInvalidate(ImmediateInvalidation);
+   RelationInvalidateHeapTuple(relation, tup);
+   SetRefreshWhenInvalidate((bool)!ImmediateInvalidation);
+    }
+    
+    return(tup->t_oid);
+}
+
+/* ----------------
+ * heap_delete - delete a tuple
+ *
+ * Must decide how to handle errors.
+ * ----------------
+ */
+void
+heap_delete(Relation relation, ItemPointer tid)
+{
+    ItemId     lp;
+    HeapTuple      tp;
+    PageHeader     dp;
+    Buffer     b;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_delete);
+    IncrHeapAccessStat(global_delete);
+    
+    /* ----------------
+     * sanity check
+     * ----------------
+     */
+    Assert(ItemPointerIsValid(tid));
+    
+    /* ----------------
+     * set relation level write lock
+     * ----------------
+     */
+    RelationSetLockForWrite(relation);
+    
+    b = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
+    
+#ifndef NO_BUFFERISVALID
+    if (!BufferIsValid(b)) { /* XXX L_SH better ??? */
+   elog(WARN, "heap_delete: failed ReadBuffer");
+    }
+#endif /* NO_BUFFERISVALID */
+    
+    dp = (PageHeader) BufferGetPage(b);
+    lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid));
+    
+    /* ----------------
+     * check that we're deleteing a valid item
+     * ----------------
+     */
+    if (!(tp = heap_tuple_satisfies(lp, relation, dp,
+                   NowTimeQual, 0, (ScanKey) NULL))) {
+   
+   /* XXX call something else */
+   ReleaseBuffer(b);
+   
+   elog(WARN, "heap_delete: (am)invalid tid");
+    }
+    
+    /* ----------------
+     * get the tuple and lock tell the buffer manager we want
+     *  exclusive access to the page
+     * ----------------
+     */
+    
+    /* ----------------
+     * store transaction information of xact deleting the tuple
+     * ----------------
+     */
+    TransactionIdStore(GetCurrentTransactionId(), &(tp->t_xmax));
+    tp->t_cmax = GetCurrentCommandId();
+    ItemPointerSetInvalid(&tp->t_chain);
+    
+    /* ----------------
+     * invalidate caches
+     * ----------------
+     */
+    SetRefreshWhenInvalidate(ImmediateInvalidation);
+    RelationInvalidateHeapTuple(relation, tp);
+    SetRefreshWhenInvalidate((bool)!ImmediateInvalidation);
+    
+    WriteBuffer(b);
+    if ( IsSystemRelationName(RelationGetRelationName(relation)->data) )
+   RelationUnsetLockForWrite(relation);
+}
+
+/* ----------------
+ * heap_replace    - replace a tuple
+ *
+ * Must decide how to handle errors.
+ *
+ * Fix arguments, work with indexes.
+ * 
+ *      12/30/93 - modified the return value to be 1 when
+ *            a non-functional update is detected. This
+ *        prevents the calling routine from updating
+ *        indices unnecessarily. -kw
+ *
+ * ----------------
+ */
+int
+heap_replace(Relation relation, ItemPointer otid, HeapTuple tup)
+{
+    ItemId     lp;
+    HeapTuple      tp;
+    Page       dp;
+    Buffer     buffer;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_replace);
+    IncrHeapAccessStat(global_replace);
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    Assert(ItemPointerIsValid(otid));
+    
+    /* ----------------
+     * set relation level write lock
+     * ----------------
+     */
+    if (!relation->rd_islocal)
+   RelationSetLockForWrite(relation);
+    
+    buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(otid));
+#ifndef NO_BUFFERISVALID
+    if (!BufferIsValid(buffer)) {
+   /* XXX L_SH better ??? */
+   elog(WARN, "amreplace: failed ReadBuffer");
+    }  
+#endif /* NO_BUFFERISVALID */
+    
+    dp = (Page) BufferGetPage(buffer);
+    lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(otid));
+    
+    /* ----------------
+     * logically delete old item
+     * ----------------
+     */
+    
+    tp = (HeapTuple) PageGetItem(dp, lp);
+    Assert(HeapTupleIsValid(tp));
+    
+    /* -----------------
+     *  the following test should be able to catch all non-functional
+     *  update attempts and shut out all ghost tuples.
+     *  XXX In the future, Spyros may need to update the rule lock on a tuple
+     *  more than once within the same command and same transaction.
+     *  He will have to introduce a new flag to override the following check.
+     *  -- Wei
+     *
+     * -----------------
+     */
+    
+    if (TupleUpdatedByCurXactAndCmd(tp)) {
+   elog(NOTICE, "Non-functional update, only first update is performed");
+   if ( IsSystemRelationName(RelationGetRelationName(relation)->data) )
+       RelationUnsetLockForWrite(relation);
+   ReleaseBuffer(buffer);
+   return(1);
+    }
+    
+    /* ----------------
+     * check that we're replacing a valid item -
+     *
+     *  NOTE that this check must follow the non-functional update test
+     *       above as it can happen that we try to 'replace' the same tuple
+     *       twice in a single transaction.  The second time around the
+     *       tuple will fail the NowTimeQual.  We don't want to abort the
+     *       xact, we only want to flag the 'non-functional' NOTICE. -mer
+     * ----------------
+     */
+    if (!heap_tuple_satisfies(lp,
+                 relation,
+                 (PageHeader)dp,
+                 NowTimeQual,
+                 0,
+                 (ScanKey)NULL))
+   {
+       ReleaseBuffer(buffer);
+       elog(WARN, "heap_replace: (am)invalid otid");
+   }
+    
+    /* XXX order problems if not atomic assignment ??? */
+    tup->t_oid = tp->t_oid;
+    TransactionIdStore(GetCurrentTransactionId(), &(tup->t_xmin));
+    tup->t_cmin = GetCurrentCommandId();
+    StoreInvalidTransactionId(&(tup->t_xmax));
+    tup->t_tmin = INVALID_ABSTIME;
+    tup->t_tmax = CURRENT_ABSTIME;
+    ItemPointerSetInvalid(&tup->t_chain);
+    
+    /* ----------------
+     * insert new item
+     * ----------------
+     */
+    if ((unsigned)DOUBLEALIGN(tup->t_len) <= PageGetFreeSpace((Page) dp)) {
+   RelationPutHeapTuple(relation, BufferGetBlockNumber(buffer), tup);
+    } else {
+   /* ----------------
+    *  new item won't fit on same page as old item, have to look
+    *  for a new place to put it.
+    * ----------------
+    */
+   doinsert(relation, tup);
+    }
+
+    /* ----------------
+     * new item in place, now record transaction information
+     * ----------------
+     */
+    TransactionIdStore(GetCurrentTransactionId(), &(tp->t_xmax));
+    tp->t_cmax = GetCurrentCommandId();
+    tp->t_chain = tup->t_ctid;
+    
+    /* ----------------
+     * invalidate caches
+     * ----------------
+     */
+    SetRefreshWhenInvalidate(ImmediateInvalidation);
+    RelationInvalidateHeapTuple(relation, tp);
+    SetRefreshWhenInvalidate((bool)!ImmediateInvalidation);
+    
+    WriteBuffer(buffer);
+    
+    if ( IsSystemRelationName(RelationGetRelationName(relation)->data) )
+   RelationUnsetLockForWrite(relation);
+    
+    return(0);
+}
+
+/* ----------------
+ * heap_markpos    - mark scan position
+ *
+ * Note:
+ *     Should only one mark be maintained per scan at one time.
+ * Check if this can be done generally--say calls to get the
+ * next/previous tuple and NEVER pass struct scandesc to the
+ * user AM's.  Now, the mark is sent to the executor for safekeeping.
+ * Probably can store this info into a GENERAL scan structure.
+ *
+ * May be best to change this call to store the marked position
+ * (up to 2?) in the scan structure itself.
+ * Fix to use the proper caching structure.
+ * ----------------
+ */
+void
+heap_markpos(HeapScanDesc sdesc)
+{
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_markpos);
+    IncrHeapAccessStat(global_markpos);
+    
+    /* Note: no locking manipulations needed */
+    
+    if (sdesc->rs_ptup == NULL &&
+   BufferIsUnknown(sdesc->rs_pbuf)) { /* == NONTUP */
+   sdesc->rs_ptup = (HeapTuple)
+       heapgettup(sdesc->rs_rd,
+              (sdesc->rs_ctup == NULL) ?
+              (ItemPointer)NULL : &sdesc->rs_ctup->t_ctid,
+              -1,
+              &sdesc->rs_pbuf,
+              sdesc->rs_tr,
+              sdesc->rs_nkeys,
+              sdesc->rs_key);
+   
+    } else if (sdesc->rs_ntup == NULL &&
+          BufferIsUnknown(sdesc->rs_nbuf)) { /* == NONTUP */
+   sdesc->rs_ntup = (HeapTuple)
+       heapgettup(sdesc->rs_rd,
+              (sdesc->rs_ctup == NULL) ?
+              (ItemPointer)NULL : &sdesc->rs_ctup->t_ctid,
+              1,
+              &sdesc->rs_nbuf,
+              sdesc->rs_tr,
+              sdesc->rs_nkeys,
+              sdesc->rs_key);
+    }
+    
+    /* ----------------
+     * Should not unpin the buffer pages.  They may still be in use.
+     * ----------------
+     */
+    if (sdesc->rs_ptup != NULL) {
+   sdesc->rs_mptid = sdesc->rs_ptup->t_ctid;
+    } else {
+   ItemPointerSetInvalid(&sdesc->rs_mptid);
+    }
+    if (sdesc->rs_ctup != NULL) {
+   sdesc->rs_mctid = sdesc->rs_ctup->t_ctid;
+    } else {
+   ItemPointerSetInvalid(&sdesc->rs_mctid);
+    }
+    if (sdesc->rs_ntup != NULL) {
+   sdesc->rs_mntid = sdesc->rs_ntup->t_ctid;
+    } else {
+   ItemPointerSetInvalid(&sdesc->rs_mntid);
+    }
+}
+
+/* ----------------
+ * heap_restrpos   - restore position to marked location
+ *
+ * Note:  there are bad side effects here.  If we were past the end
+ * of a relation when heapmarkpos is called, then if the relation is
+ * extended via insert, then the next call to heaprestrpos will set
+ * cause the added tuples to be visible when the scan continues.
+ * Problems also arise if the TID's are rearranged!!!
+ *
+ * Now pins buffer once for each valid tuple pointer (rs_ptup,
+ * rs_ctup, rs_ntup) referencing it.
+ *  - 01/13/94
+ *
+ * XXX might be better to do direct access instead of
+ * using the generality of heapgettup().
+ *
+ * XXX It is very possible that when a scan is restored, that a tuple
+ * XXX which previously qualified may fail for time range purposes, unless
+ * XXX some form of locking exists (ie., portals currently can act funny.
+ * ----------------
+ */
+void
+heap_restrpos(HeapScanDesc sdesc)
+{
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_restrpos);
+    IncrHeapAccessStat(global_restrpos);
+    
+    /* XXX no amrestrpos checking that ammarkpos called */
+    
+    /* Note: no locking manipulations needed */
+    
+    unpinsdesc(sdesc);
+    
+    /* force heapgettup to pin buffer for each loaded tuple */
+    sdesc->rs_pbuf = InvalidBuffer;
+    sdesc->rs_cbuf = InvalidBuffer;
+    sdesc->rs_nbuf = InvalidBuffer;
+    
+    if (!ItemPointerIsValid(&sdesc->rs_mptid)) {
+   sdesc->rs_ptup = NULL;
+    } else {
+   sdesc->rs_ptup = (HeapTuple)
+       heapgettup(sdesc->rs_rd,
+              &sdesc->rs_mptid,
+              0,
+              &sdesc->rs_pbuf,
+              NowTimeQual,
+              0,
+              (ScanKey) NULL);
+    }
+    
+    if (!ItemPointerIsValid(&sdesc->rs_mctid)) {
+   sdesc->rs_ctup = NULL;
+    } else {
+   sdesc->rs_ctup = (HeapTuple)
+       heapgettup(sdesc->rs_rd,
+              &sdesc->rs_mctid,
+              0,
+              &sdesc->rs_cbuf,
+              NowTimeQual,
+              0,
+              (ScanKey) NULL);
+    }
+    
+    if (!ItemPointerIsValid(&sdesc->rs_mntid)) {
+   sdesc->rs_ntup = NULL;
+    } else {
+   sdesc->rs_ntup = (HeapTuple)
+       heapgettup(sdesc->rs_rd,
+              &sdesc->rs_mntid,
+              0,
+              &sdesc->rs_nbuf,
+              NowTimeQual,
+              0,
+              (ScanKey) NULL);
+    }
+}
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c

new file mode 100644 (file)

index 0000000..457e117
--- /dev/null
+++ b/src/backend/access/heap/hio.c
@@ -0,0 +1,195 @@
+/*-------------------------------------------------------------------------
+ *
+ * hio.c--
+ *    POSTGRES heap access method input/output code.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Id: hio.c,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <string.h>
+
+#include "c.h"
+
+#include "access/heapam.h"
+#include "access/hio.h"
+#include "access/htup.h"
+
+#include "storage/block.h"
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+#include "storage/itemid.h"
+#include "storage/itemptr.h"
+#include "storage/off.h"
+
+#include "utils/memutils.h"
+#include "utils/elog.h"
+#include "utils/rel.h"
+
+/*
+ * amputunique - place tuple at tid
+ *   Currently on errors, calls elog.  Perhaps should return -1?
+ *   Possible errors include the addition of a tuple to the page
+ *   between the time the linep is chosen and the page is L_UP'd.
+ *
+ *   This should be coordinated with the B-tree code.
+ *   Probably needs to have an amdelunique to allow for
+ *   internal index records to be deleted and reordered as needed.
+ *   For the heap AM, this should never be needed.
+ */
+void
+RelationPutHeapTuple(Relation relation,
+            BlockNumber blockIndex,
+            HeapTuple tuple)
+{
+    Buffer     buffer;
+    Page       pageHeader;
+    BlockNumber        numberOfBlocks;
+    OffsetNumber   offnum;
+    unsigned int   len;
+    ItemId     itemId;
+    Item       item;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_RelationPutHeapTuple);
+    IncrHeapAccessStat(global_RelationPutHeapTuple);
+    
+    Assert(RelationIsValid(relation));
+    Assert(HeapTupleIsValid(tuple));
+    
+    numberOfBlocks = RelationGetNumberOfBlocks(relation);
+    Assert(blockIndex < numberOfBlocks);
+    
+    buffer = ReadBuffer(relation, blockIndex);
+#ifndef NO_BUFFERISVALID
+    if (!BufferIsValid(buffer)) {
+   elog(WARN, "RelationPutHeapTuple: no buffer for %ld in %s",
+        blockIndex, &relation->rd_rel->relname);
+    }
+#endif
+    
+    pageHeader = (Page)BufferGetPage(buffer);
+    len = (unsigned)DOUBLEALIGN(tuple->t_len); /* be conservative */
+    Assert((int)len <= PageGetFreeSpace(pageHeader));
+    
+    offnum = PageAddItem((Page)pageHeader, (Item)tuple,
+            tuple->t_len, InvalidOffsetNumber, LP_USED);
+    
+    itemId = PageGetItemId((Page)pageHeader, offnum);
+    item = PageGetItem((Page)pageHeader, itemId);
+    
+    ItemPointerSet(&((HeapTuple)item)->t_ctid, blockIndex, offnum);
+    
+    WriteBuffer(buffer);
+    /* return an accurate tuple */
+    ItemPointerSet(&tuple->t_ctid, blockIndex, offnum);
+}
+
+/*
+ * The heap_insert routines "know" that a buffer page is initialized to
+ * zero when a BlockExtend operation is performed. 
+ */
+
+#define PageIsNew(page) ((page)->pd_upper == 0)
+
+/*
+ * This routine is another in the series of attempts to reduce the number
+ * of I/O's and system calls executed in the various benchmarks.  In
+ * particular, this routine is used to append data to the end of a relation
+ * file without excessive lseeks.  This code should do no more than 2 semops
+ * in the ideal case.
+ *
+ * Eventually, we should cache the number of blocks in a relation somewhere.
+ * Until that time, this code will have to do an lseek to determine the number
+ * of blocks in a relation.
+ * 
+ * This code should ideally do at most 4 semops, 1 lseek, and possibly 1 write
+ * to do an append; it's possible to eliminate 2 of the semops if we do direct
+ * buffer stuff (!); the lseek and the write can go if we get
+ * RelationGetNumberOfBlocks to be useful.
+ *
+ * NOTE: This code presumes that we have a write lock on the relation.
+ *
+ * Also note that this routine probably shouldn't have to exist, and does
+ * screw up the call graph rather badly, but we are wasting so much time and
+ * system resources being massively general that we are losing badly in our
+ * performance benchmarks.
+ */
+void
+RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
+{
+    Buffer     buffer;
+    Page       pageHeader;
+    BlockNumber        lastblock;
+    OffsetNumber   offnum;
+    unsigned int   len;
+    ItemId     itemId;
+    Item       item;
+    
+    Assert(RelationIsValid(relation));
+    Assert(HeapTupleIsValid(tuple));
+    
+    /*
+     * XXX This does an lseek - VERY expensive - but at the moment it
+     * is the only way to accurately determine how many blocks are in
+     * a relation.  A good optimization would be to get this to actually
+     * work properly.
+     */
+    
+    lastblock = RelationGetNumberOfBlocks(relation);
+    
+    if (lastblock == 0)
+   {
+       buffer = ReadBuffer(relation, lastblock);
+       pageHeader = (Page)BufferGetPage(buffer);
+       if (PageIsNew((PageHeader) pageHeader))
+       {
+           buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
+           pageHeader = (Page)BufferGetPage(buffer);
+           PageInit(pageHeader, BufferGetPageSize(buffer), 0);
+       }
+   }
+    else
+   buffer = ReadBuffer(relation, lastblock - 1);
+    
+    pageHeader = (Page)BufferGetPage(buffer);
+    len = (unsigned)DOUBLEALIGN(tuple->t_len); /* be conservative */
+    
+    /*
+     * Note that this is true if the above returned a bogus page, which
+     * it will do for a completely empty relation.
+     */
+    
+    if (len > PageGetFreeSpace(pageHeader))
+   {
+       buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
+       pageHeader = (Page)BufferGetPage(buffer);
+       PageInit(pageHeader, BufferGetPageSize(buffer), 0);
+       
+       if (len > PageGetFreeSpace(pageHeader))
+       elog(WARN, "Tuple is too big: size %d", len);
+   }
+    
+    offnum = PageAddItem((Page)pageHeader, (Item)tuple,
+            tuple->t_len, InvalidOffsetNumber, LP_USED);
+    
+    itemId = PageGetItemId((Page)pageHeader, offnum);
+    item = PageGetItem((Page)pageHeader, itemId);
+    
+    lastblock = BufferGetBlockNumber(buffer);
+    
+    ItemPointerSet(&((HeapTuple)item)->t_ctid, lastblock, offnum);
+    
+    /* return an accurate tuple */
+    ItemPointerSet(&tuple->t_ctid, lastblock, offnum);
+    
+    WriteBuffer(buffer);
+}
diff --git a/src/backend/access/heap/stats.c b/src/backend/access/heap/stats.c

new file mode 100644 (file)

index 0000000..d41d01a
--- /dev/null
+++ b/src/backend/access/heap/stats.c
@@ -0,0 +1,329 @@
+/*-------------------------------------------------------------------------
+ *
+ * stats.c--
+ *    heap access method debugging statistic collection routines
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/heap/Attic/stats.c,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
+ *
+ * NOTES
+ *    initam should be moved someplace else.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/heapam.h"
+
+#include "utils/memutils.h"
+#include "utils/palloc.h"
+#include "utils/elog.h"
+#include "utils/mcxt.h"
+
+/* ----------------
+ *      InitHeapAccessStatistics
+ * ----------------
+ */
+HeapAccessStatistics heap_access_stats = (HeapAccessStatistics) NULL;
+     
+void
+InitHeapAccessStatistics()    
+{
+    MemoryContext    oldContext;
+    HeapAccessStatistics stats;
+    
+    /* ----------------
+     *  make sure we don't initialize things twice
+     * ----------------
+     */
+    if (heap_access_stats != NULL)
+        return;
+    
+    /* ----------------
+     *  allocate statistics structure from the top memory context
+     * ----------------
+     */
+    oldContext = MemoryContextSwitchTo(TopMemoryContext);
+    
+    stats = (HeapAccessStatistics)
+        palloc(sizeof(HeapAccessStatisticsData));
+    
+    /* ----------------
+     *  initialize fields to default values
+     * ----------------
+     */
+    stats->global_open = 0;            
+    stats->global_openr = 0;
+    stats->global_close = 0;
+    stats->global_beginscan = 0;
+    stats->global_rescan = 0;
+    stats->global_endscan = 0;
+    stats->global_getnext = 0;
+    stats->global_fetch = 0;
+    stats->global_insert = 0;
+    stats->global_delete = 0;
+    stats->global_replace = 0;
+    stats->global_markpos = 0;
+    stats->global_restrpos = 0;
+    stats->global_BufferGetRelation = 0;
+    stats->global_RelationIdGetRelation = 0;
+    stats->global_RelationIdGetRelation_Buf = 0;
+    stats->global_getreldesc = 0;
+    stats->global_heapgettup = 0;
+    stats->global_RelationPutHeapTuple = 0;
+    stats->global_RelationPutLongHeapTuple = 0;
+    
+    stats->local_open = 0;
+    stats->local_openr = 0;
+    stats->local_close = 0;
+    stats->local_beginscan = 0;
+    stats->local_rescan = 0;
+    stats->local_endscan = 0;
+    stats->local_getnext = 0;
+    stats->local_fetch = 0;
+    stats->local_insert = 0;
+    stats->local_delete = 0;
+    stats->local_replace = 0;
+    stats->local_markpos = 0;
+    stats->local_restrpos = 0;
+    stats->local_BufferGetRelation = 0;
+    stats->local_RelationIdGetRelation = 0;
+    stats->local_RelationIdGetRelation_Buf = 0;
+    stats->local_getreldesc = 0;
+    stats->local_heapgettup = 0;
+    stats->local_RelationPutHeapTuple = 0;
+    stats->local_RelationPutLongHeapTuple = 0;
+    stats->local_RelationNameGetRelation = 0;
+    stats->global_RelationNameGetRelation = 0;
+    
+    /* ----------------
+     *  record init times
+     * ----------------
+     */
+    time(&stats->init_global_timestamp);
+    time(&stats->local_reset_timestamp);
+    time(&stats->last_request_timestamp);
+    
+    /* ----------------
+     *  return to old memory context
+     * ----------------
+     */
+    (void) MemoryContextSwitchTo(oldContext);
+    
+    heap_access_stats = stats;
+}
+
+/* ----------------
+ *      ResetHeapAccessStatistics
+ * ----------------
+ */
+void
+ResetHeapAccessStatistics()    
+{
+    HeapAccessStatistics stats;
+    
+    /* ----------------
+     *  do nothing if stats aren't initialized
+     * ----------------
+     */
+    if (heap_access_stats == NULL)
+        return;
+    
+    stats = heap_access_stats;
+    
+    /* ----------------
+     *  reset local counts
+     * ----------------
+     */
+    stats->local_open = 0;
+    stats->local_openr = 0;
+    stats->local_close = 0;
+    stats->local_beginscan = 0;
+    stats->local_rescan = 0;
+    stats->local_endscan = 0;
+    stats->local_getnext = 0;
+    stats->local_fetch = 0;
+    stats->local_insert = 0;
+    stats->local_delete = 0;
+    stats->local_replace = 0;
+    stats->local_markpos = 0;
+    stats->local_restrpos = 0;
+    stats->local_BufferGetRelation = 0;
+    stats->local_RelationIdGetRelation = 0;
+    stats->local_RelationIdGetRelation_Buf = 0;
+    stats->local_getreldesc = 0;
+    stats->local_heapgettup = 0;
+    stats->local_RelationPutHeapTuple = 0;
+    stats->local_RelationPutLongHeapTuple = 0;
+    
+    /* ----------------
+     *  reset local timestamps
+     * ----------------
+     */
+    time(&stats->local_reset_timestamp);
+    time(&stats->last_request_timestamp);
+}
+
+/* ----------------
+ *      GetHeapAccessStatistics
+ * ----------------
+ */
+HeapAccessStatistics GetHeapAccessStatistics()    
+{
+    HeapAccessStatistics stats;
+    
+    /* ----------------
+     *  return nothing if stats aren't initialized
+     * ----------------
+     */
+    if (heap_access_stats == NULL)
+        return NULL;
+    
+    /* ----------------
+     *  record the current request time
+     * ----------------
+     */
+    time(&heap_access_stats->last_request_timestamp);
+    
+    /* ----------------
+     *  allocate a copy of the stats and return it to the caller.
+     * ----------------
+     */
+    stats = (HeapAccessStatistics)
+        palloc(sizeof(HeapAccessStatisticsData));
+    
+     memmove(stats,
+        heap_access_stats,
+        sizeof(HeapAccessStatisticsData)); 
+    
+    return stats;
+}
+
+/* ----------------
+ *      PrintHeapAccessStatistics
+ * ----------------
+ */
+void
+PrintHeapAccessStatistics(HeapAccessStatistics stats)
+{
+    /* ----------------
+     *  return nothing if stats aren't valid
+     * ----------------
+     */
+    if (stats == NULL)
+        return;
+    
+    printf("======== heap am statistics ========\n");
+    printf("init_global_timestamp:      %s",
+           ctime(&(stats->init_global_timestamp)));
+    
+    printf("local_reset_timestamp:      %s",
+           ctime(&(stats->local_reset_timestamp)));
+    
+    printf("last_request_timestamp:     %s",
+           ctime(&(stats->last_request_timestamp)));
+    
+    printf("local/global_open:                        %6d/%6d\n",
+           stats->local_open, stats->global_open);
+    
+    printf("local/global_openr:                       %6d/%6d\n",
+           stats->local_openr, stats->global_openr);
+    
+    printf("local/global_close:                       %6d/%6d\n",
+           stats->local_close, stats->global_close);
+    
+    printf("local/global_beginscan:                   %6d/%6d\n",
+           stats->local_beginscan, stats->global_beginscan);
+    
+    printf("local/global_rescan:                      %6d/%6d\n",
+           stats->local_rescan, stats->global_rescan);
+    
+    printf("local/global_endscan:                     %6d/%6d\n",
+           stats->local_endscan, stats->global_endscan);
+    
+    printf("local/global_getnext:                     %6d/%6d\n",
+           stats->local_getnext, stats->global_getnext);
+    
+    printf("local/global_fetch:                       %6d/%6d\n",
+           stats->local_fetch, stats->global_fetch);
+    
+    printf("local/global_insert:                      %6d/%6d\n",
+           stats->local_insert, stats->global_insert);
+    
+    printf("local/global_delete:                      %6d/%6d\n",
+           stats->local_delete, stats->global_delete);
+    
+    printf("local/global_replace:                     %6d/%6d\n",
+           stats->local_replace, stats->global_replace);
+    
+    printf("local/global_markpos:                     %6d/%6d\n",
+           stats->local_markpos, stats->global_markpos);
+    
+    printf("local/global_restrpos:                    %6d/%6d\n",
+           stats->local_restrpos, stats->global_restrpos);
+    
+    printf("================\n");
+    
+    printf("local/global_BufferGetRelation:             %6d/%6d\n",
+           stats->local_BufferGetRelation,
+           stats->global_BufferGetRelation);
+    
+    printf("local/global_RelationIdGetRelation:         %6d/%6d\n",
+           stats->local_RelationIdGetRelation,
+           stats->global_RelationIdGetRelation);
+    
+    printf("local/global_RelationIdGetRelation_Buf:     %6d/%6d\n",
+           stats->local_RelationIdGetRelation_Buf,
+           stats->global_RelationIdGetRelation_Buf);
+    
+    printf("local/global_getreldesc:                    %6d/%6d\n",
+           stats->local_getreldesc, stats->global_getreldesc);
+    
+    printf("local/global_heapgettup:                    %6d/%6d\n",
+           stats->local_heapgettup, stats->global_heapgettup);
+    
+    printf("local/global_RelationPutHeapTuple:          %6d/%6d\n",
+           stats->local_RelationPutHeapTuple,
+           stats->global_RelationPutHeapTuple);
+    
+    printf("local/global_RelationPutLongHeapTuple:      %6d/%6d\n",
+           stats->local_RelationPutLongHeapTuple,
+           stats->global_RelationPutLongHeapTuple);
+    
+    printf("===================================\n");
+    
+    printf("\n");
+}
+
+/* ----------------
+ *      PrintAndFreeHeapAccessStatistics
+ * ----------------
+ */
+void
+PrintAndFreeHeapAccessStatistics(HeapAccessStatistics stats)
+{
+    PrintHeapAccessStatistics(stats);
+    if (stats != NULL)
+   pfree(stats);
+}
+
+/* ----------------------------------------------------------------
+ *         access method initialization
+ * ----------------------------------------------------------------
+ */
+/* ----------------
+ * initam should someday be moved someplace else.
+ * ----------------
+ */
+void
+initam()
+{
+    /* ----------------
+     * initialize heap statistics.
+     * ----------------
+     */
+    InitHeapAccessStatistics();
+}
diff --git a/src/backend/access/heapam.h b/src/backend/access/heapam.h

new file mode 100644 (file)

index 0000000..9938dbe
--- /dev/null
+++ b/src/backend/access/heapam.h
@@ -0,0 +1,149 @@
+/*-------------------------------------------------------------------------
+ *
+ * heapam.h--
+ *    POSTGRES heap access method definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: heapam.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    HEAPAM_H
+#define HEAPAM_H
+
+#include <sys/types.h>
+
+#include "postgres.h"
+
+#include "access/attnum.h"
+#include "access/htup.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+#include "utils/tqual.h"
+#include "access/tupdesc.h"
+#include "storage/smgr.h"
+#include "utils/rel.h"
+
+/* ----------------------------------------------------------------
+ *     heap access method statistics
+ * ----------------------------------------------------------------
+ */
+
+typedef struct HeapAccessStatisticsData {
+    time_t  init_global_timestamp; /* time global statistics started */
+    time_t  local_reset_timestamp; /* last time local reset was done */
+    time_t  last_request_timestamp;    /* last time stats were requested */
+
+    int global_open;           
+    int global_openr;
+    int global_close;
+    int global_beginscan;
+    int global_rescan;
+    int global_endscan;
+    int global_getnext;
+    int global_fetch;
+    int global_insert;
+    int global_delete;
+    int global_replace; 
+    int global_markpos; 
+    int global_restrpos;
+    int global_BufferGetRelation;
+    int global_RelationIdGetRelation;
+    int global_RelationIdGetRelation_Buf;
+    int global_RelationNameGetRelation;
+    int global_getreldesc;
+    int global_heapgettup;
+    int global_RelationPutHeapTuple;
+    int global_RelationPutLongHeapTuple;
+
+    int local_open;            
+    int local_openr;
+    int local_close;
+    int local_beginscan;
+    int local_rescan;
+    int local_endscan;
+    int local_getnext;
+    int local_fetch;
+    int local_insert;
+    int local_delete;
+    int local_replace; 
+    int local_markpos; 
+    int local_restrpos;
+    int local_BufferGetRelation;
+    int local_RelationIdGetRelation;
+    int local_RelationIdGetRelation_Buf;
+    int local_RelationNameGetRelation;
+    int local_getreldesc;
+    int local_heapgettup;
+    int local_RelationPutHeapTuple;
+    int local_RelationPutLongHeapTuple;
+} HeapAccessStatisticsData;
+
+typedef HeapAccessStatisticsData *HeapAccessStatistics;
+
+#define IncrHeapAccessStat(x) \
+    (heap_access_stats == NULL ? 0 : (heap_access_stats->x)++)
+
+extern HeapAccessStatistics heap_access_stats; /* in stats.c */
+
+/* ----------------
+ * function prototypes for heap access method
+ * ----------------
+ */
+/* heap_create, heap_creatr, and heap_destroy are declared in catalog/heap.h */
+#include "catalog/heap.h"
+
+/* heapam.c */
+extern void doinsert(Relation relation, HeapTuple tup);
+extern void SetHeapAccessMethodImmediateInvalidation(bool on);
+
+extern Relation heap_open(Oid relationId);
+extern Relation heap_openr(char *relationName);
+extern void heap_close(Relation relation);
+extern HeapScanDesc heap_beginscan(Relation relation, int atend,
+               TimeQual timeQual, unsigned nkeys, ScanKey key);
+extern void heap_rescan(HeapScanDesc sdesc, bool scanFromEnd, ScanKey key);
+extern void heap_endscan(HeapScanDesc sdesc);
+extern HeapTuple heap_getnext(HeapScanDesc scandesc, int backw, Buffer *b);
+extern HeapTuple heap_fetch(Relation relation, TimeQual timeQual,
+               ItemPointer tid, Buffer *b);
+extern Oid heap_insert(Relation relation, HeapTuple tup);
+extern void heap_delete(Relation relation, ItemPointer tid);
+extern int heap_replace(Relation relation, ItemPointer otid,
+           HeapTuple tup);
+extern void heap_markpos(HeapScanDesc sdesc);
+extern void heap_restrpos(HeapScanDesc sdesc);
+
+/* in common/heaptuple.c */
+extern Size ComputeDataSize(TupleDesc tupleDesc, Datum value[], char nulls[]);
+extern void DataFill(char *data, TupleDesc tupleDesc,
+            Datum value[], char nulls[], char *infomask,
+            bits8 bit[]);
+extern int heap_attisnull(HeapTuple tup, int attnum);
+extern int heap_sysattrlen(AttrNumber attno);
+extern bool heap_sysattrbyval(AttrNumber attno);
+extern char *heap_getsysattr(HeapTuple tup, Buffer b, int attnum);
+extern char *fastgetattr(HeapTuple tup, unsigned attnum,
+            TupleDesc att, bool *isnull);
+extern char *heap_getattr(HeapTuple tup, Buffer b, int attnum,
+             TupleDesc att, bool *isnull);
+extern HeapTuple heap_copytuple(HeapTuple tuple);
+extern void heap_deformtuple(HeapTuple tuple, TupleDesc tdesc,
+                Datum values[], char nulls[]);
+extern HeapTuple heap_formtuple(TupleDesc tupleDescriptor, 
+               Datum value[], char nulls[]);
+extern HeapTuple heap_modifytuple(HeapTuple tuple, Buffer buffer,
+   Relation relation, Datum replValue[], char replNull[], char repl[]);
+HeapTuple heap_addheader(uint32    natts, int structlen, char *structure);
+
+/* in common/heap/stats.c */
+extern void InitHeapAccessStatistics(void);
+extern void ResetHeapAccessStatistics(void);
+extern HeapAccessStatistics GetHeapAccessStatistics(void);
+extern void PrintHeapAccessStatistics(HeapAccessStatistics stats);
+extern void PrintAndFreeHeapAccessStatistics(HeapAccessStatistics stats);
+extern void initam(void);
+
+#endif /* HEAPAM_H */
diff --git a/src/backend/access/hio.h b/src/backend/access/hio.h

new file mode 100644 (file)

index 0000000..4a699ff
--- /dev/null
+++ b/src/backend/access/hio.h
@@ -0,0 +1,26 @@
+/*-------------------------------------------------------------------------
+ *
+ * hio.h--
+ *    POSTGRES heap access method input/output definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: hio.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    HIO_H
+#define HIO_H
+
+#include "c.h"
+
+#include "storage/block.h"
+#include "access/htup.h"
+#include "utils/rel.h"
+
+extern void RelationPutHeapTuple(Relation relation, BlockNumber blockIndex,
+                HeapTuple tuple);
+extern void RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple);
+
+#endif /* HIO_H */
diff --git a/src/backend/access/htup.h b/src/backend/access/htup.h

new file mode 100644 (file)

index 0000000..7cf1ecf
--- /dev/null
+++ b/src/backend/access/htup.h
@@ -0,0 +1,115 @@
+/*-------------------------------------------------------------------------
+ *
+ * htup.h--
+ *    POSTGRES heap tuple definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: htup.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    HTUP_H
+#define HTUP_H
+
+#include "access/attnum.h"
+#include "storage/bufpage.h"       /* just to reduce levels of #include */
+#include "storage/itemptr.h"
+#include "utils/nabstime.h"
+
+#define MinHeapTupleBitmapSize 32      /* 8 * 4 */
+
+/* check these, they are likely to be more severely limited by t_hoff */
+
+#define MaxHeapAttributeNumber 1600        /* 8 * 200 */
+
+/*
+ * to avoid wasting space, the attributes should be layed out in such a
+ * way to reduce structure padding.
+ */
+typedef struct HeapTupleData {
+
+    unsigned int   t_len;      /* length of entire tuple */
+
+    ItemPointerData    t_ctid;     /* current TID of this tuple */
+
+    ItemPointerData    t_chain;    /* replaced tuple TID */
+
+    Oid            t_oid;      /* OID of this tuple -- 4 bytes */
+
+    CommandId      t_cmin;     /* insert CID stamp -- 2 bytes each */
+    CommandId      t_cmax;     /* delete CommandId stamp */
+
+    TransactionId  t_xmin;     /* insert XID stamp -- 4 bytes each */
+    TransactionId  t_xmax;     /* delete XID stamp */
+
+    AbsoluteTime   t_tmin;     /* time stamps -- 4 bytes each */
+    AbsoluteTime   t_tmax; 
+
+    int16      t_natts;    /* number of attributes */
+    char       t_vtype;    /* not used - padding */
+
+    char       t_infomask; /* whether tuple as null or variable
+                    * length attributes
+                    */
+
+    uint8      t_hoff;     /* sizeof tuple header */
+
+    bits8      t_bits[MinHeapTupleBitmapSize / 8];
+                   /* bit map of domains */
+
+    /* MORE DATA FOLLOWS AT END OF STRUCT */
+} HeapTupleData;   
+
+typedef HeapTupleData  *HeapTuple;
+
+
+#define SelfItemPointerAttributeNumber     (-1)
+#define ObjectIdAttributeNumber            (-2)
+#define MinTransactionIdAttributeNumber        (-3)
+#define MinCommandIdAttributeNumber        (-4)
+#define MaxTransactionIdAttributeNumber        (-5)
+#define MaxCommandIdAttributeNumber        (-6)
+#define ChainItemPointerAttributeNumber        (-7)
+#define AnchorItemPointerAttributeNumber   (-8)
+#define MinAbsoluteTimeAttributeNumber     (-9)
+#define MaxAbsoluteTimeAttributeNumber     (-10)
+#define VersionTypeAttributeNumber     (-11)
+#define FirstLowInvalidHeapAttributeNumber (-12)
+
+
+/* ----------------
+ * support macros
+ * ----------------
+ */
+#define GETSTRUCT(TUP) (((char *)(TUP)) + ((HeapTuple)(TUP))->t_hoff)
+
+
+/*
+ * BITMAPLEN(NATTS) - 
+ * Computes minimum size of bitmap given number of domains.
+ */
+#define BITMAPLEN(NATTS) \
+   ((((((int)(NATTS) - 1) >> 3) + 4 - (MinHeapTupleBitmapSize >> 3)) \
+     & ~03) + (MinHeapTupleBitmapSize >> 3))
+
+/*
+ * HeapTupleIsValid
+ * True iff the heap tuple is valid.
+ */
+#define    HeapTupleIsValid(tuple) PointerIsValid(tuple)
+
+/*
+ * information stored in t_infomask:
+ */
+#define HEAP_HASNULL       0x01    /* has null attribute(s) */
+#define    HEAP_HASVARLENA     0x02    /* has variable length attribute(s) */
+
+#define HeapTupleNoNulls(tuple) \
+   (!(((HeapTuple) (tuple))->t_infomask & HEAP_HASNULL))
+
+#define HeapTupleAllFixed(tuple) \
+   (!(((HeapTuple) (tuple))->t_infomask & HEAP_HASVARLENA))
+
+#endif /* HTUP_H */
diff --git a/src/backend/access/ibit.h b/src/backend/access/ibit.h

new file mode 100644 (file)

index 0000000..990c23a
--- /dev/null
+++ b/src/backend/access/ibit.h
@@ -0,0 +1,34 @@
+/*-------------------------------------------------------------------------
+ *
+ * ibit.h--
+ *    POSTGRES index valid attribute bit map definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: ibit.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    IBIT_H
+#define IBIT_H
+
+#include "c.h"
+#include "utils/memutils.h"
+
+typedef struct IndexAttributeBitMapData {
+   char    bits[(MaxIndexAttributeNumber + MaxBitsPerByte - 1)
+       / MaxBitsPerByte];
+} IndexAttributeBitMapData;
+
+typedef IndexAttributeBitMapData   *IndexAttributeBitMap;
+
+#define IndexAttributeBitMapSize   sizeof(IndexAttributeBitMapData)
+
+/*
+ * IndexAttributeBitMapIsValid --
+ * True iff attribute bit map is valid.
+ */
+#define    IndexAttributeBitMapIsValid(bits) PointerIsValid(bits)
+
+#endif /* IBIT_H */
diff --git a/src/backend/access/index/Makefile.inc b/src/backend/access/index/Makefile.inc

new file mode 100644 (file)

index 0000000..0bc5883
--- /dev/null
+++ b/src/backend/access/index/Makefile.inc
@@ -0,0 +1,14 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for access/index
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/backend/access/index/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
+#
+#-------------------------------------------------------------------------
+
+SUBSRCS+= genam.c indexam.c istrat.c
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c

new file mode 100644 (file)

index 0000000..3d02ba5
--- /dev/null
+++ b/src/backend/access/index/genam.c
@@ -0,0 +1,275 @@
+/*-------------------------------------------------------------------------
+ *
+ * genam.c--
+ *    general index access method routines
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/index/genam.c,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
+ *
+ * NOTES
+ *    many of the old access method routines have been turned into
+ *    macros and moved to genam.h -cim 4/30/91
+ *
+ *-------------------------------------------------------------------------
+ */
+/*
+ * OLD COMMENTS
+ * Scans are implemented as follows:
+ *
+ * `0' represents an invalid item pointer.
+ * `-' represents an unknown item pointer.
+ * `X' represents a known item pointers.
+ * `+' represents known or invalid item pointers.
+ * `*' represents any item pointers.
+ *
+ * State is represented by a triple of these symbols in the order of
+ * previous, current, next.  Note that the case of reverse scans works
+ * identically.
+ *
+ * State   Result
+ * (1) + + -   + 0 0       (if the next item pointer is invalid)
+ * (2)     + X -       (otherwise)
+ * (3) * 0 0   * 0 0       (no change)
+ * (4) + X 0   X 0 0       (shift)
+ * (5) * + X   + X -       (shift, add unknown)
+ *
+ * All other states cannot occur.
+ *
+ * Note:
+ *It would be possible to cache the status of the previous and
+ * next item pointer using the flags.
+ * ----------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/attnum.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/itup.h"
+#include "access/relscan.h"
+#include "access/sdir.h"
+#include "access/skey.h"
+
+#include "storage/bufmgr.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+
+#include "catalog/catname.h"
+#include "catalog/pg_attribute.h"
+#include "catalog/pg_index.h"
+#include "catalog/pg_proc.h"
+
+#include "catalog/index.h"
+
+/* ----------------------------------------------------------------
+ * general access method routines
+ *
+ * All indexed access methods use an identical scan structure.
+ * We don't know how the various AMs do locking, however, so we don't
+ * do anything about that here.
+ *
+ * The intent is that an AM implementor will define a front-end routine
+ * that calls this one, to fill in the scan, and then does whatever kind
+ * of locking he wants.
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ *  RelationGetIndexScan -- Create and fill an IndexScanDesc.
+ *
+ * This routine creates an index scan structure and sets its contents
+ * up correctly. This routine calls AMrescan to set up the scan with
+ * the passed key.
+ *
+ * Parameters:
+ *     relation -- index relation for scan.
+ *     scanFromEnd -- if true, begin scan at one of the index's
+ *                endpoints.
+ *     numberOfKeys -- count of scan keys (more than one won't
+ *             necessarily do anything useful, yet).
+ *     key -- the ScanKey for the starting position of the scan.
+ *
+ * Returns:
+ *     An initialized IndexScanDesc.
+ *
+ * Side Effects:
+ *     Bumps the ref count on the relation to keep it in the cache.
+ * 
+ * ----------------
+ */
+IndexScanDesc
+RelationGetIndexScan(Relation relation,
+            bool scanFromEnd,
+            uint16 numberOfKeys,
+            ScanKey key)
+{
+    IndexScanDesc  scan;
+    
+    if (! RelationIsValid(relation))
+   elog(WARN, "RelationGetIndexScan: relation invalid");
+    
+    scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData));
+    
+    scan->relation = relation;
+    scan->opaque = NULL;
+    scan->numberOfKeys = numberOfKeys;
+    
+    ItemPointerSetInvalid(&scan->previousItemData);
+    ItemPointerSetInvalid(&scan->currentItemData);
+    ItemPointerSetInvalid(&scan->nextItemData);
+    ItemPointerSetInvalid(&scan->previousMarkData);
+    ItemPointerSetInvalid(&scan->currentMarkData);
+    ItemPointerSetInvalid(&scan->nextMarkData);
+
+    if (numberOfKeys > 0) {
+   scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * numberOfKeys);
+    } else {
+   scan->keyData = NULL;
+    }
+
+    index_rescan(scan, scanFromEnd, key);
+    
+    return (scan);
+}
+
+/* ----------------
+ *  IndexScanRestart -- Restart an index scan.
+ *
+ * This routine isn't used by any existing access method.  It's
+ * appropriate if relation level locks are what you want.
+ *
+ *  Returns:
+ * None.
+ *
+ *  Side Effects:
+ * None.
+ * ----------------
+ */
+void
+IndexScanRestart(IndexScanDesc scan,
+        bool scanFromEnd,
+        ScanKey key)
+{
+    if (! IndexScanIsValid(scan))
+   elog(WARN, "IndexScanRestart: invalid scan");
+    
+    ItemPointerSetInvalid(&scan->previousItemData);
+    ItemPointerSetInvalid(&scan->currentItemData);
+    ItemPointerSetInvalid(&scan->nextItemData);
+    
+    if (RelationGetNumberOfBlocks(scan->relation) == 0) 
+   scan->flags = ScanUnmarked;
+    else if (scanFromEnd)
+   scan->flags = ScanUnmarked | ScanUncheckedPrevious;
+    else
+   scan->flags = ScanUnmarked | ScanUncheckedNext;
+    
+    scan->scanFromEnd = (bool) scanFromEnd;
+    
+    if (scan->numberOfKeys > 0)
+   memmove(scan->keyData,
+       key,
+       scan->numberOfKeys * sizeof(ScanKeyData));
+}
+
+/* ----------------
+ *  IndexScanEnd -- End and index scan.
+ *
+ * This routine is not used by any existing access method, but is
+ * suitable for use if you don't want to do sophisticated locking.
+ *
+ *  Returns:
+ * None.
+ *
+ *  Side Effects:
+ * None.
+ * ----------------
+ */
+void
+IndexScanEnd(IndexScanDesc scan)
+{
+    if (! IndexScanIsValid(scan))
+   elog(WARN, "IndexScanEnd: invalid scan");
+    
+    pfree(scan);
+}
+
+/* ----------------
+ *  IndexScanMarkPosition -- Mark current position in a scan.
+ *
+ * This routine isn't used by any existing access method, but is the
+ * one that AM implementors should use, if they don't want to do any
+ * special locking.  If relation-level locking is sufficient, this is
+ * the routine for you.
+ *
+ *  Returns:
+ * None.
+ *
+ *  Side Effects:
+ * None.
+ * ----------------
+ */
+void
+IndexScanMarkPosition(IndexScanDesc scan)
+{
+    RetrieveIndexResult    result;
+    
+    if (scan->flags & ScanUncheckedPrevious) {
+   result = 
+       index_getnext(scan, BackwardScanDirection);
+   
+   if (result != NULL) {
+       scan->previousItemData = result->index_iptr;
+   } else {
+       ItemPointerSetInvalid(&scan->previousItemData);
+   }
+   
+    } else if (scan->flags & ScanUncheckedNext) {
+   result = (RetrieveIndexResult)
+       index_getnext(scan, ForwardScanDirection);
+   
+   if (result != NULL) {
+       scan->nextItemData = result->index_iptr;
+   } else {
+       ItemPointerSetInvalid(&scan->nextItemData);
+   }
+    }
+    
+    scan->previousMarkData = scan->previousItemData;
+    scan->currentMarkData = scan->currentItemData;
+    scan->nextMarkData = scan->nextItemData;
+    
+    scan->flags = 0x0; /* XXX should have a symbolic name */
+}
+
+/* ----------------
+ *  IndexScanRestorePosition -- Restore position on a marked scan.
+ *
+ * This routine isn't used by any existing access method, but is the
+ * one that AM implementors should use if they don't want to do any
+ * special locking.  If relation-level locking is sufficient, then
+ * this is the one you want.
+ *
+ *  Returns:
+ * None.
+ *
+ *  Side Effects:
+ * None.
+ * ----------------
+ */
+void
+IndexScanRestorePosition(IndexScanDesc scan)
+{  
+    if (scan->flags & ScanUnmarked) 
+   elog(WARN, "IndexScanRestorePosition: no mark to restore");
+    
+    scan->previousItemData = scan->previousMarkData;
+    scan->currentItemData = scan->currentMarkData;
+    scan->nextItemData = scan->nextMarkData;
+    
+    scan->flags = 0x0; /* XXX should have a symbolic name */
+}
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c

new file mode 100644 (file)

index 0000000..bffe3a4
--- /dev/null
+++ b/src/backend/access/index/indexam.c
@@ -0,0 +1,411 @@
+/*-------------------------------------------------------------------------
+ *
+ * indexam.c--
+ *    general index access method routines
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
+ *
+ * INTERFACE ROUTINES
+ * index_open  - open an index relation by relationId
+ * index_openr     - open a index relation by name
+ * index_close     - close a index relation
+ * index_beginscan - start a scan of an index
+ * index_rescan    - restart a scan of an index
+ * index_endscan   - end a scan
+ * index_insert    - insert an index tuple into a relation
+ * index_delete    - delete an item from an index relation
+ * index_markpos   - mark a scan position
+ * index_restrpos  - restore a scan position
+ * index_getnext   - get the next tuple from a scan
+ * **  index_fetch - retrieve tuple with tid
+ * **  index_replace   - replace a tuple
+ * **  index_getattr   - get an attribute from an index tuple
+ * index_getprocid - get a support procedure id from the rel tuple
+ * 
+ * IndexScanIsValid - check index scan
+ *
+ * NOTES
+ * This file contains the index_ routines which used
+ * to be a scattered collection of stuff in access/genam.
+ *
+ * The ** routines: index_fetch, index_replace, and index_getattr
+ * have not yet been implemented.  They may not be needed.
+ *
+ * old comments
+ *     Scans are implemented as follows:
+ *
+ *     `0' represents an invalid item pointer.
+ *     `-' represents an unknown item pointer.
+ *     `X' represents a known item pointers.
+ *     `+' represents known or invalid item pointers.
+ *     `*' represents any item pointers.
+ *
+ *     State is represented by a triple of these symbols in the order of
+ *     previous, current, next.  Note that the case of reverse scans works
+ *     identically.
+ *
+ *     State   Result
+ *     (1) + + -   + 0 0       (if the next item pointer is invalid)
+ *     (2)     + X -       (otherwise)
+ *     (3) * 0 0   * 0 0       (no change)
+ *     (4) + X 0   X 0 0       (shift)
+ *     (5) * + X   + X -       (shift, add unknown)
+ *
+ *     All other states cannot occur.
+ *
+ *     Note: It would be possible to cache the status of the previous and
+ *       next item pointer using the flags.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/attnum.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/itup.h"
+#include "access/relscan.h"
+#include "access/sdir.h"
+#include "access/skey.h"
+#include "access/funcindex.h"
+
+#include "storage/lmgr.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/relcache.h"
+
+#include "catalog/catname.h"
+#include "catalog/pg_attribute.h"
+#include "catalog/pg_index.h"
+#include "catalog/pg_proc.h"
+
+#include "catalog/index.h"
+
+#include "fmgr.h"
+
+/* ----------------
+ *   undefine macros we aren't going to use that would otherwise
+ *   get in our way..  delete is defined in c.h and the am's are
+ *   defined in heapam.h
+ * ----------------
+ */
+#undef delete
+#undef aminsert
+#undef amdelete
+#undef ambeginscan
+#undef amrescan
+#undef amendscan
+#undef ammarkpos
+#undef amrestrpos
+#undef amgettuple
+
+/* ----------------------------------------------------------------
+ *         macros used in index_ routines
+ * ----------------------------------------------------------------
+ */
+#define RELATION_CHECKS \
+Assert(RelationIsValid(relation)); \
+    Assert(PointerIsValid(relation->rd_am))
+     
+#define SCAN_CHECKS \
+     Assert(IndexScanIsValid(scan)); \
+    Assert(RelationIsValid(scan->relation)); \
+    Assert(PointerIsValid(scan->relation->rd_am))
+     
+#define GET_REL_PROCEDURE(x,y) \
+    CppConcat(procedure = relation->rd_am->,y); \
+    if (! RegProcedureIsValid(procedure)) \
+    elog(WARN, "index_%s: invalid %s regproc", \
+         CppAsString(x), CppAsString(y))
+     
+#define GET_SCAN_PROCEDURE(x,y) \
+    CppConcat(procedure = scan->relation->rd_am->,y); \
+    if (! RegProcedureIsValid(procedure)) \
+    elog(WARN, "index_%s: invalid %s regproc", \
+         CppAsString(x), CppAsString(y))
+     
+     
+/* ----------------------------------------------------------------
+ *        index_ interface functions
+ * ----------------------------------------------------------------
+ */
+/* ----------------
+ * index_open - open an index relation by relationId
+ *
+ * presently the relcache routines do all the work we need
+ * to open/close index relations.
+ * ----------------
+ */
+Relation
+index_open(Oid relationId)
+{
+    return RelationIdGetRelation(relationId);
+}
+
+/* ----------------
+ * index_openr - open a index relation by name
+ *
+ * presently the relcache routines do all the work we need
+ * to open/close index relations.
+ * ----------------
+ */
+Relation
+index_openr(char *relationName)
+{
+    return RelationNameGetRelation(relationName);
+}
+
+/* ----------------
+ * index_close - close a index relation
+ *
+ * presently the relcache routines do all the work we need
+ * to open/close index relations.
+ * ----------------
+ */
+void
+index_close(Relation relation)
+{
+    (void) RelationClose(relation);
+}
+
+/* ----------------
+ * index_insert - insert an index tuple into a relation
+ * ----------------
+ */
+InsertIndexResult
+index_insert(Relation relation,
+        IndexTuple indexTuple)
+{
+    RegProcedure       procedure;
+    InsertIndexResult      specificResult;
+    
+    RELATION_CHECKS;
+    GET_REL_PROCEDURE(insert,aminsert);
+    
+    /* ----------------
+     * have the am's insert proc do all the work.  
+     * ----------------
+     */
+    specificResult = (InsertIndexResult)
+   fmgr(procedure, relation, indexTuple, NULL);
+    
+    /* ----------------
+     * the insert proc is supposed to return a "specific result" and
+     *  this routine has to return a "general result" so after we get
+     *  something back from the insert proc, we allocate a
+     *  "general result" and copy some crap between the two.
+     *
+     *  As far as I'm concerned all this result shit is needlessly c
+     *  omplicated and should be eliminated.  -cim 1/19/91
+     *
+     *  mao concurs.  regardless of how we feel here, however, it is
+     *  important to free memory we don't intend to return to anyone.
+     *  2/28/91
+     *
+     *  this "general result" crap is now gone. -ay 3/6/95
+     * ----------------
+     */
+    
+    return (specificResult);
+}
+
+/* ----------------
+ * index_delete - delete an item from an index relation
+ * ----------------
+ */
+void
+index_delete(Relation relation, ItemPointer indexItem)
+{
+    RegProcedure   procedure;
+    
+    RELATION_CHECKS;
+    GET_REL_PROCEDURE(delete,amdelete);
+    
+    (void) fmgr(procedure, relation, indexItem);    
+}
+
+/* ----------------
+ * index_beginscan - start a scan of an index
+ * ----------------
+ */
+IndexScanDesc
+index_beginscan(Relation relation,
+       bool scanFromEnd,
+       uint16 numberOfKeys,
+       ScanKey key)
+{
+    IndexScanDesc  scandesc;
+    RegProcedure   procedure;
+    
+    RELATION_CHECKS;
+    GET_REL_PROCEDURE(beginscan,ambeginscan);
+    
+    RelationSetRIntentLock(relation);
+    
+    scandesc = (IndexScanDesc)
+   fmgr(procedure, relation, scanFromEnd, numberOfKeys, key);
+    
+    return scandesc;
+}
+
+/* ----------------
+ * index_rescan  - restart a scan of an index
+ * ----------------
+ */
+void
+index_rescan(IndexScanDesc scan, bool scanFromEnd, ScanKey key)
+{
+    RegProcedure   procedure;
+    
+    SCAN_CHECKS;
+    GET_SCAN_PROCEDURE(rescan,amrescan);
+    
+    (void) fmgr(procedure, scan, scanFromEnd, key);
+}
+
+/* ----------------
+ * index_endscan - end a scan
+ * ----------------
+ */
+void
+index_endscan(IndexScanDesc scan)
+{
+    RegProcedure   procedure;
+    
+    SCAN_CHECKS;
+    GET_SCAN_PROCEDURE(endscan,amendscan);
+    
+    (void) fmgr(procedure, scan);
+    
+    RelationUnsetRIntentLock(scan->relation);
+}
+
+/* ----------------
+ * index_markpos  - mark a scan position
+ * ----------------
+ */
+void
+index_markpos(IndexScanDesc scan)
+{
+    RegProcedure   procedure;
+    
+    SCAN_CHECKS;
+    GET_SCAN_PROCEDURE(markpos,ammarkpos);
+    
+    (void) fmgr(procedure, scan);
+}
+
+/* ----------------
+ * index_restrpos  - restore a scan position
+ * ----------------
+ */
+void
+index_restrpos(IndexScanDesc scan)
+{
+    RegProcedure   procedure;
+    
+    SCAN_CHECKS;
+    GET_SCAN_PROCEDURE(restrpos,amrestrpos);
+    
+    (void) fmgr(procedure, scan);
+}
+
+/* ----------------
+ * index_getnext - get the next tuple from a scan
+ *
+ *     A RetrieveIndexResult is a index tuple/heap tuple pair
+ * ----------------
+ */
+RetrieveIndexResult
+index_getnext(IndexScanDesc scan,
+         ScanDirection direction)
+{
+    RegProcedure       procedure;
+    RetrieveIndexResult        result;
+    
+    SCAN_CHECKS;
+    GET_SCAN_PROCEDURE(getnext,amgettuple);
+    
+    /* ----------------
+     * have the am's gettuple proc do all the work.  
+     * ----------------
+     */
+    result = (RetrieveIndexResult)
+   fmgr(procedure, scan, direction);
+    
+    return result;
+}
+
+/* ----------------
+ * index_getprocid
+ *
+ * Some indexed access methods may require support routines that are
+ * not in the operator class/operator model imposed by pg_am.  These
+ * access methods may store the OIDs of registered procedures they
+ * need in pg_amproc.  These registered procedure OIDs are ordered in
+ * a way that makes sense to the access method, and used only by the
+ * access method.  The general index code doesn't know anything about
+ * the routines involved; it just builds an ordered list of them for
+ * each attribute on which an index is defined.
+ *
+ * This routine returns the requested procedure OID for a particular
+ * indexed attribute.
+ * ----------------
+ */
+RegProcedure
+index_getprocid(Relation irel,
+       AttrNumber attnum,
+       uint16 procnum)
+{
+    RegProcedure *loc;
+    int natts;
+    
+    natts = irel->rd_rel->relnatts;
+    
+    loc = irel->rd_support;
+
+    Assert(loc != NULL);
+    
+    return (loc[(natts * (procnum - 1)) + (attnum - 1)]);
+}
+
+Datum
+GetIndexValue(HeapTuple tuple,
+         TupleDesc hTupDesc,
+         int attOff,
+         AttrNumber attrNums[],
+         FuncIndexInfo *fInfo,
+         bool *attNull,
+         Buffer buffer)
+{
+    Datum returnVal;
+    bool   isNull;
+    
+    if (PointerIsValid(fInfo) && FIgetProcOid(fInfo) != InvalidOid) {
+   int i;
+   Datum *attData = (Datum *)palloc(FIgetnArgs(fInfo)*sizeof(Datum));
+   
+   for (i = 0; i < FIgetnArgs(fInfo); i++) {
+       attData[i] = (Datum) heap_getattr(tuple, 
+                         buffer, 
+                         attrNums[i], 
+                         hTupDesc,
+                         attNull);
+   }
+   returnVal = (Datum)fmgr_array_args(FIgetProcOid(fInfo),
+                      FIgetnArgs(fInfo),
+                      (char **) attData,
+                      &isNull);
+   pfree(attData);
+   *attNull = FALSE;
+    }else {
+   returnVal = (Datum) heap_getattr(tuple, buffer, attrNums[attOff], 
+                    hTupDesc, attNull);
+    }
+    return returnVal;
+}
diff --git a/src/backend/access/index/istrat.c b/src/backend/access/index/istrat.c

new file mode 100644 (file)

index 0000000..602d2bd
--- /dev/null
+++ b/src/backend/access/index/istrat.c
@@ -0,0 +1,679 @@
+/*-------------------------------------------------------------------------
+ *
+ * istrat.c--
+ *    index scan strategy manipulation code and index strategy manipulation
+ *    operator code.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/index/Attic/istrat.c,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/attnum.h"
+#include "access/heapam.h"
+#include "access/istrat.h"
+#include "access/itup.h"   /* for MaxIndexAttributeNumber */
+#include "access/skey.h"
+#include "utils/tqual.h"   /* for NowTimeQual */
+
+#include "fmgr.h"
+#include "utils/elog.h"
+#include "utils/rel.h"
+
+#include "catalog/catname.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
+#include "catalog/pg_index.h"
+#include "catalog/pg_proc.h"
+
+/* ----------------------------------------------------------------
+ *            misc strategy support routines
+ * ----------------------------------------------------------------
+ */
+     
+/* 
+ * StrategyNumberIsValid
+ * StrategyNumberIsInBounds
+ * StrategyMapIsValid
+ * StrategyTransformMapIsValid
+ * IndexStrategyIsValid
+ *
+ *     ... are now macros in istrat.h -cim 4/27/91
+ */
+     
+/*
+ * StrategyMapGetScanKeyEntry --
+ * Returns a scan key entry of a index strategy mapping member.
+ *
+ * Note:
+ * Assumes that the index strategy mapping is valid.
+ * Assumes that the index strategy number is valid.
+ * Bounds checking should be done outside this routine.
+ */
+ScanKey
+StrategyMapGetScanKeyEntry(StrategyMap map,
+              StrategyNumber strategyNumber)
+{
+    Assert(StrategyMapIsValid(map));
+    Assert(StrategyNumberIsValid(strategyNumber));
+    return (&map->entry[strategyNumber - 1]);
+}
+
+/*
+ * IndexStrategyGetStrategyMap --
+ * Returns an index strategy mapping of an index strategy.
+ *
+ * Note:
+ * Assumes that the index strategy is valid.
+ * Assumes that the number of index strategies is valid.
+ * Bounds checking should be done outside this routine.
+ */
+StrategyMap
+IndexStrategyGetStrategyMap(IndexStrategy indexStrategy,
+               StrategyNumber maxStrategyNum,
+               AttrNumber attrNum)
+{
+    Assert(IndexStrategyIsValid(indexStrategy));
+    Assert(StrategyNumberIsValid(maxStrategyNum));
+    Assert(AttributeNumberIsValid(attrNum));
+    
+    maxStrategyNum = AMStrategies(maxStrategyNum); /* XXX */
+    return
+   &indexStrategy->strategyMapData[maxStrategyNum * (attrNum - 1)];
+}
+
+/*
+ * AttributeNumberGetIndexStrategySize --
+ * Computes the size of an index strategy.
+ */
+Size
+AttributeNumberGetIndexStrategySize(AttrNumber maxAttributeNumber,
+                   StrategyNumber maxStrategyNumber)
+{
+    maxStrategyNumber = AMStrategies(maxStrategyNumber);   /* XXX */
+    return
+   maxAttributeNumber * maxStrategyNumber * sizeof (ScanKeyData);
+}
+
+/* 
+ * StrategyTransformMapIsValid is now a macro in istrat.h -cim 4/27/91
+ */
+
+/* ----------------
+ * StrategyOperatorIsValid
+ * ----------------
+ */
+bool
+StrategyOperatorIsValid(StrategyOperator operator,
+           StrategyNumber maxStrategy)
+{
+    return (bool)
+   (PointerIsValid(operator) &&
+    StrategyNumberIsInBounds(operator->strategy, maxStrategy) &&
+    !(operator->flags & ~(SK_NEGATE | SK_COMMUTE)));
+}
+
+/* ----------------
+ * StrategyTermIsValid
+ * ----------------
+ */
+bool
+StrategyTermIsValid(StrategyTerm term,
+           StrategyNumber maxStrategy)
+{
+    Index  index;
+    
+    if (! PointerIsValid(term) || term->degree == 0)
+   return false;
+    
+    for (index = 0; index < term->degree; index += 1) {
+   if (! StrategyOperatorIsValid(&term->operatorData[index],
+                     maxStrategy)) {
+       
+       return false;
+   }
+    }
+    
+    return true;
+}
+
+/* ----------------
+ * StrategyExpressionIsValid
+ * ----------------
+ */
+bool
+StrategyExpressionIsValid(StrategyExpression expression,
+             StrategyNumber maxStrategy)
+{
+    StrategyTerm   *termP;
+    
+    if (!PointerIsValid(expression))
+   return true;
+    
+    if (!StrategyTermIsValid(expression->term[0], maxStrategy))
+   return false;
+    
+    termP = &expression->term[1];
+    while (StrategyTermIsValid(*termP, maxStrategy))
+   termP += 1;
+    
+    return (bool)
+   (! PointerIsValid(*termP));
+}
+
+/* ----------------
+ * StrategyEvaluationIsValid
+ * ----------------
+ */
+bool
+StrategyEvaluationIsValid(StrategyEvaluation evaluation)
+{
+    Index  index;
+    
+    if (! PointerIsValid(evaluation) ||
+   ! StrategyNumberIsValid(evaluation->maxStrategy) ||
+   ! StrategyTransformMapIsValid(evaluation->negateTransform) ||
+   ! StrategyTransformMapIsValid(evaluation->commuteTransform) ||
+   ! StrategyTransformMapIsValid(evaluation->negateCommuteTransform)) {
+   
+   return false;
+    }
+    
+    for (index = 0; index < evaluation->maxStrategy; index += 1) {
+   if (! StrategyExpressionIsValid(evaluation->expression[index],
+                   evaluation->maxStrategy)) {
+       
+       return false;
+   }
+    }
+    return true;
+}
+
+/* ----------------
+ * StrategyTermEvaluate
+ * ----------------
+ */
+static bool
+StrategyTermEvaluate(StrategyTerm term,
+            StrategyMap map,
+            Datum left,
+            Datum right)
+{
+    Index      index;
+    long       tmpres;
+    bool       result;
+    StrategyOperator   operator;
+    ScanKey        entry;
+    
+    for (index = 0, operator = &term->operatorData[0];
+    index < term->degree; index += 1, operator += 1) {
+   
+   entry = &map->entry[operator->strategy - 1];
+   
+   Assert(RegProcedureIsValid(entry->sk_procedure));
+   
+   switch (operator->flags ^ entry->sk_flags) {
+   case 0x0:
+       tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure,
+                     left, right);
+       break;
+       
+   case SK_NEGATE:
+       tmpres = (long) !FMGR_PTR2(entry->sk_func, entry->sk_procedure,
+                      left, right);
+       break;
+       
+   case SK_COMMUTE:
+       tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure,
+                     right, left);
+       break;
+       
+   case SK_NEGATE | SK_COMMUTE:
+       tmpres = (long) !FMGR_PTR2(entry->sk_func, entry->sk_procedure,
+                      right, left);
+       break;
+       
+   default:
+       elog(FATAL, "StrategyTermEvaluate: impossible case %d",
+        operator->flags ^ entry->sk_flags);
+   }
+   
+   result = (bool) tmpres;
+   if (!result)
+       return result;
+    }
+    
+    return result;
+}
+
+
+/* ----------------
+ * RelationGetStrategy
+ * ----------------
+ */
+StrategyNumber
+RelationGetStrategy(Relation relation,
+           AttrNumber attributeNumber,
+           StrategyEvaluation evaluation,
+           RegProcedure procedure)
+{
+    StrategyNumber strategy;
+    StrategyMap        strategyMap;
+    ScanKey        entry;
+    Index      index;
+    int        numattrs;
+    
+    Assert(RelationIsValid(relation));
+    numattrs = RelationGetNumberOfAttributes(relation);
+    
+    Assert(relation->rd_rel->relkind == RELKIND_INDEX);    /* XXX use accessor */
+    Assert(AttributeNumberIsValid(attributeNumber));
+    Assert( (attributeNumber >= 1) && (attributeNumber < 1 + numattrs));
+    
+    Assert(StrategyEvaluationIsValid(evaluation));
+    Assert(RegProcedureIsValid(procedure));
+    
+    strategyMap =
+   IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation),
+                   evaluation->maxStrategy,
+                   attributeNumber);
+    
+    /* get a strategy number for the procedure ignoring flags for now */
+    for (index = 0; index < evaluation->maxStrategy; index += 1) {
+   if (strategyMap->entry[index].sk_procedure == procedure) {
+       break;
+   }
+    }
+    
+    if (index == evaluation->maxStrategy)
+   return InvalidStrategy;
+    
+    strategy = 1 + index;
+    entry = StrategyMapGetScanKeyEntry(strategyMap, strategy);
+    
+    Assert(!(entry->sk_flags & ~(SK_NEGATE | SK_COMMUTE)));
+    
+    switch (entry->sk_flags & (SK_NEGATE | SK_COMMUTE)) {
+    case 0x0:
+   return strategy;
+   
+    case SK_NEGATE:
+   strategy = evaluation->negateTransform->strategy[strategy - 1];
+   break;
+   
+    case SK_COMMUTE:
+   strategy = evaluation->commuteTransform->strategy[strategy - 1];
+   break;
+   
+    case SK_NEGATE | SK_COMMUTE:
+   strategy = evaluation->negateCommuteTransform->strategy[strategy - 1];
+   break;
+   
+    default:
+   elog(FATAL, "RelationGetStrategy: impossible case %d", entry->sk_flags);
+    }
+    
+    
+    if (! StrategyNumberIsInBounds(strategy, evaluation->maxStrategy)) {
+   if (! StrategyNumberIsValid(strategy)) {
+       elog(WARN, "RelationGetStrategy: corrupted evaluation");
+   }
+    }
+    
+    return strategy;
+}
+
+/* ----------------
+ * RelationInvokeStrategy
+ * ----------------
+ */
+bool       /* XXX someday, this may return Datum */
+RelationInvokeStrategy(Relation relation,
+              StrategyEvaluation evaluation,
+              AttrNumber attributeNumber,
+              StrategyNumber strategy,
+              Datum left,
+              Datum right)
+{
+    StrategyNumber newStrategy;
+    StrategyMap        strategyMap;
+    ScanKey        entry;
+    StrategyTermData   termData;
+    int            numattrs;
+    
+    Assert(RelationIsValid(relation));
+    Assert(relation->rd_rel->relkind == RELKIND_INDEX);    /* XXX use accessor */
+    numattrs = RelationGetNumberOfAttributes(relation);
+    
+    Assert(StrategyEvaluationIsValid(evaluation));
+    Assert(AttributeNumberIsValid(attributeNumber));
+    Assert( (attributeNumber >= 1) && (attributeNumber < 1 + numattrs));
+
+    Assert(StrategyNumberIsInBounds(strategy, evaluation->maxStrategy));
+    
+    termData.degree = 1;
+    
+    strategyMap =
+   IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation),
+                   evaluation->maxStrategy,
+                   attributeNumber);
+    
+    entry = StrategyMapGetScanKeyEntry(strategyMap, strategy);
+    
+    if (RegProcedureIsValid(entry->sk_procedure)) {
+   termData.operatorData[0].strategy = strategy;
+   termData.operatorData[0].flags = 0x0;
+   
+   return
+       StrategyTermEvaluate(&termData, strategyMap, left, right);
+    }
+    
+    
+    newStrategy = evaluation->negateTransform->strategy[strategy - 1];
+    if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) {
+   
+   entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy);
+   
+   if (RegProcedureIsValid(entry->sk_procedure)) {
+       termData.operatorData[0].strategy = newStrategy;
+       termData.operatorData[0].flags = SK_NEGATE;
+       
+       return
+       StrategyTermEvaluate(&termData, strategyMap, left, right);
+   }
+    }
+    
+    newStrategy = evaluation->commuteTransform->strategy[strategy - 1];
+    if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) {
+   
+   entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy);
+   
+   if (RegProcedureIsValid(entry->sk_procedure)) {
+       termData.operatorData[0].strategy = newStrategy;
+       termData.operatorData[0].flags = SK_COMMUTE;
+       
+       return
+       StrategyTermEvaluate(&termData, strategyMap, left, right);
+   }
+    }
+    
+    newStrategy = evaluation->negateCommuteTransform->strategy[strategy - 1];
+    if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) {
+   
+   entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy);
+   
+   if (RegProcedureIsValid(entry->sk_procedure)) {
+       termData.operatorData[0].strategy = newStrategy;
+       termData.operatorData[0].flags = SK_NEGATE | SK_COMMUTE;
+       
+       return
+       StrategyTermEvaluate(&termData, strategyMap, left, right);
+   }
+    }
+    
+    if (PointerIsValid(evaluation->expression[strategy - 1])) {
+   StrategyTerm        *termP;
+   
+   termP = &evaluation->expression[strategy - 1]->term[0];
+   while (PointerIsValid(*termP)) {
+       Index   index;
+       
+       for (index = 0; index < (*termP)->degree; index += 1) {
+       entry = StrategyMapGetScanKeyEntry(strategyMap,
+                          (*termP)->operatorData[index].strategy);
+       
+       if (! RegProcedureIsValid(entry->sk_procedure)) {
+           break;
+       }
+       }
+       
+       if (index == (*termP)->degree) {
+       return
+           StrategyTermEvaluate(*termP, strategyMap, left, right);
+       }
+       
+       termP += 1;
+   }
+    }
+    
+    elog(WARN, "RelationInvokeStrategy: cannot evaluate strategy %d",
+    strategy);
+
+     /* not reached, just to make compiler happy */
+     return FALSE; 
+
+
+}
+
+/* ----------------
+ * OperatorRelationFillScanKeyEntry
+ * ----------------
+ */
+static void
+OperatorRelationFillScanKeyEntry(Relation operatorRelation,
+                Oid operatorObjectId,
+                ScanKey entry)
+{
+    HeapScanDesc   scan;
+    ScanKeyData        scanKeyData;
+    HeapTuple      tuple;
+    
+    ScanKeyEntryInitialize(&scanKeyData, 0, 
+              ObjectIdAttributeNumber,
+              ObjectIdEqualRegProcedure,
+              ObjectIdGetDatum(operatorObjectId));
+    
+    scan = heap_beginscan(operatorRelation, false, NowTimeQual,
+             1, &scanKeyData);
+    
+    tuple = heap_getnext(scan, false, (Buffer *)NULL);
+    if (! HeapTupleIsValid(tuple)) {
+   elog(WARN, "OperatorObjectIdFillScanKeyEntry: unknown operator %lu",
+        (uint32) operatorObjectId);
+    }
+    
+    entry->sk_flags = 0;
+    entry->sk_procedure =
+   ((OperatorTupleForm) GETSTRUCT(tuple))->oprcode;
+    fmgr_info(entry->sk_procedure, &entry->sk_func, &entry->sk_nargs);
+    
+    if (! RegProcedureIsValid(entry->sk_procedure)) {
+   elog(WARN,
+        "OperatorObjectIdFillScanKeyEntry: no procedure for operator %lu",
+        (uint32) operatorObjectId);
+    }
+    
+    heap_endscan(scan);
+}
+
+
+/*
+ * IndexSupportInitialize --
+ * Initializes an index strategy and associated support procedures.
+ */
+void
+IndexSupportInitialize(IndexStrategy indexStrategy,
+              RegProcedure *indexSupport,
+              Oid indexObjectId,
+              Oid accessMethodObjectId,
+              StrategyNumber maxStrategyNumber,
+              StrategyNumber maxSupportNumber,
+              AttrNumber maxAttributeNumber)
+{
+    Relation       relation;
+    Relation       operatorRelation;
+    HeapScanDesc   scan;
+    HeapTuple      tuple;
+    ScanKeyData        entry[2];
+    StrategyMap        map;
+    AttrNumber     attributeNumber;
+    int            attributeIndex;
+    Oid            operatorClassObjectId[ MaxIndexAttributeNumber ];
+    
+    maxStrategyNumber = AMStrategies(maxStrategyNumber);
+    
+    ScanKeyEntryInitialize(&entry[0], 0, Anum_pg_index_indexrelid,
+              ObjectIdEqualRegProcedure, 
+              ObjectIdGetDatum(indexObjectId));
+    
+    relation = heap_openr(IndexRelationName);
+    scan = heap_beginscan(relation, false, NowTimeQual, 1, entry);
+    tuple = heap_getnext(scan, 0, (Buffer *)NULL);
+    if (! HeapTupleIsValid(tuple))
+   elog(WARN, "IndexSupportInitialize: corrupted catalogs");
+    
+    /*
+     * XXX note that the following assumes the INDEX tuple is well formed and
+     * that the key[] and class[] are 0 terminated.
+     */
+    for (attributeIndex=0; attributeIndex<maxAttributeNumber; attributeIndex++)
+   {
+       IndexTupleForm  iform;
+       
+       iform = (IndexTupleForm) GETSTRUCT(tuple);
+       
+       if (!OidIsValid(iform->indkey[attributeIndex])) {
+       if (attributeIndex == 0) {
+           elog(WARN, "IndexSupportInitialize: no pg_index tuple");
+       }
+       break;
+       }
+       
+       operatorClassObjectId[attributeIndex]
+       = iform->indclass[attributeIndex];
+   }
+    
+    heap_endscan(scan);
+    heap_close(relation);
+    
+    /* if support routines exist for this access method, load them */
+    if (maxSupportNumber > 0) {
+   
+   ScanKeyEntryInitialize(&entry[0], 0, Anum_pg_amproc_amid,
+                  ObjectIdEqualRegProcedure,
+                  ObjectIdGetDatum(accessMethodObjectId));
+   
+   ScanKeyEntryInitialize(&entry[1], 0, Anum_pg_amproc_amopclaid,
+                  ObjectIdEqualRegProcedure, 0);
+   
+/* relation = heap_openr(Name_pg_amproc); */
+   relation = heap_openr(AccessMethodProcedureRelationName);
+
+   
+   for (attributeNumber = maxAttributeNumber; attributeNumber > 0;
+        attributeNumber--) {
+       
+       int16       support;
+       Form_pg_amproc  form;
+       RegProcedure    *loc;
+       
+       loc = &indexSupport[((attributeNumber - 1) * maxSupportNumber)];
+       
+       for (support = maxSupportNumber; --support >= 0; ) {
+       loc[support] = InvalidOid;
+       }
+       
+       entry[1].sk_argument =
+       ObjectIdGetDatum(operatorClassObjectId[attributeNumber - 1]);
+       
+       scan = heap_beginscan(relation, false, NowTimeQual, 2, entry);
+       
+       while (tuple = heap_getnext(scan, 0, (Buffer *)NULL),
+          HeapTupleIsValid(tuple)) {
+       
+       form = (Form_pg_amproc) GETSTRUCT(tuple);
+       loc[(form->amprocnum - 1)] = form->amproc;
+       }
+       
+       heap_endscan(scan);
+   }
+   heap_close(relation);
+    }
+    
+    ScanKeyEntryInitialize(&entry[0], 0, 
+              Anum_pg_amop_amopid,
+                           ObjectIdEqualRegProcedure,
+                           ObjectIdGetDatum(accessMethodObjectId));
+    
+    ScanKeyEntryInitialize(&entry[1], 0, 
+              Anum_pg_amop_amopclaid,
+                           ObjectIdEqualRegProcedure, 0);
+    
+    relation = heap_openr(AccessMethodOperatorRelationName);
+    operatorRelation = heap_openr(OperatorRelationName);
+    
+    for (attributeNumber = maxAttributeNumber; attributeNumber > 0;
+    attributeNumber--) {
+   
+   StrategyNumber  strategy;
+   
+   entry[1].sk_argument =
+       ObjectIdGetDatum(operatorClassObjectId[attributeNumber - 1]);
+   
+   map = IndexStrategyGetStrategyMap(indexStrategy,
+                     maxStrategyNumber,
+                     attributeNumber);
+   
+   for (strategy = 1; strategy <= maxStrategyNumber; strategy++)
+       ScanKeyEntrySetIllegal(StrategyMapGetScanKeyEntry(map, strategy));
+   
+   scan = heap_beginscan(relation, false, NowTimeQual, 2, entry);
+   
+   while (tuple = heap_getnext(scan, 0, (Buffer *)NULL),
+          HeapTupleIsValid(tuple)) {
+       Form_pg_amop form;
+       
+       form = (Form_pg_amop) GETSTRUCT(tuple);
+       
+       OperatorRelationFillScanKeyEntry(operatorRelation,
+                        form->amopopr,
+                        StrategyMapGetScanKeyEntry(map, form->amopstrategy));
+   }
+   
+   heap_endscan(scan);
+    }
+    
+    heap_close(operatorRelation);
+    heap_close(relation);
+}
+
+/* ----------------
+ * IndexStrategyDisplay
+ * ----------------
+ */
+#ifdef ISTRATDEBUG
+int
+IndexStrategyDisplay(IndexStrategy indexStrategy,
+            StrategyNumber numberOfStrategies,
+            int numberOfAttributes)
+{
+    StrategyMap    strategyMap;
+    AttrNumber attributeNumber;
+    StrategyNumber strategyNumber;
+    
+    for (attributeNumber = 1; attributeNumber <= numberOfAttributes;
+    attributeNumber += 1) {
+   
+   strategyMap = IndexStrategyGetStrategyMap(indexStrategy,
+                         numberOfStrategies,
+                         attributeNumber);
+   
+   for (strategyNumber = 1;
+        strategyNumber <= AMStrategies(numberOfStrategies);
+        strategyNumber += 1) {
+       
+       printf(":att %d\t:str %d\t:opr 0x%x(%d)\n",
+          attributeNumber, strategyNumber,
+          strategyMap->entry[strategyNumber - 1].sk_procedure,
+          strategyMap->entry[strategyNumber - 1].sk_procedure);
+   }
+    }
+}
+#endif /* defined(ISTRATDEBUG) */
+
+
diff --git a/src/backend/access/iqual.h b/src/backend/access/iqual.h

new file mode 100644 (file)

index 0000000..5fab98a
--- /dev/null
+++ b/src/backend/access/iqual.h
@@ -0,0 +1,32 @@
+/*-------------------------------------------------------------------------
+ *
+ * iqual.h--
+ *    Index scan key qualification definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: iqual.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    IQUAL_H
+#define IQUAL_H
+
+#include "c.h"
+
+#include "storage/itemid.h"
+#include "utils/rel.h"
+#include "access/skey.h"
+
+/* ----------------
+ * index tuple qualification support
+ * ----------------
+ */
+
+extern int NIndexTupleProcessed;
+
+extern bool index_keytest(IndexTuple tuple, TupleDesc tupdesc,
+             int scanKeySize, ScanKey key);
+
+#endif /* IQUAL_H */
diff --git a/src/backend/access/istrat.h b/src/backend/access/istrat.h

new file mode 100644 (file)

index 0000000..201e70e
--- /dev/null
+++ b/src/backend/access/istrat.h
@@ -0,0 +1,80 @@
+/*-------------------------------------------------------------------------
+ *
+ * istrat.h--
+ *    POSTGRES index strategy definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: istrat.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    ISTRAT_H
+#define ISTRAT_H
+
+#include "postgres.h"
+#include "access/attnum.h"
+#include "access/skey.h"
+#include "access/strat.h"
+#include "utils/rel.h"     /* for Relation */
+
+/*
+ * StrategyNumberIsValid --
+ * True iff the strategy number is valid.
+ */
+#define StrategyNumberIsValid(strategyNumber) \
+    ((bool) ((strategyNumber) != InvalidStrategy))
+
+/*
+ * StrategyNumberIsInBounds --
+ * True iff strategy number is within given bounds.
+ *
+ * Note:
+ * Assumes StrategyNumber is an unsigned type.
+ * Assumes the bounded interval to be (0,max].
+ */
+#define StrategyNumberIsInBounds(strategyNumber, maxStrategyNumber) \
+    ((bool)(InvalidStrategy < (strategyNumber) && \
+       (strategyNumber) <= (maxStrategyNumber)))
+
+/*
+ * StrategyMapIsValid --
+ * True iff the index strategy mapping is valid.
+ */
+#define    StrategyMapIsValid(map) PointerIsValid(map)
+
+/*
+ * IndexStrategyIsValid --
+ * True iff the index strategy is valid.
+ */
+#define    IndexStrategyIsValid(s) PointerIsValid(s)
+
+extern ScanKey StrategyMapGetScanKeyEntry(StrategyMap map,
+                     StrategyNumber strategyNumber);
+extern StrategyMap IndexStrategyGetStrategyMap(IndexStrategy indexStrategy,
+   StrategyNumber maxStrategyNum, AttrNumber attrNum);
+
+extern Size
+AttributeNumberGetIndexStrategySize(AttrNumber maxAttributeNumber,
+                   StrategyNumber maxStrategyNumber);
+extern bool StrategyOperatorIsValid(StrategyOperator operator,
+                   StrategyNumber maxStrategy);
+extern bool StrategyTermIsValid(StrategyTerm term,
+               StrategyNumber maxStrategy);
+extern bool StrategyExpressionIsValid(StrategyExpression expression,
+                     StrategyNumber maxStrategy);
+extern bool StrategyEvaluationIsValid(StrategyEvaluation evaluation);
+extern StrategyNumber RelationGetStrategy(Relation relation,
+   AttrNumber attributeNumber, StrategyEvaluation evaluation,
+   RegProcedure procedure);
+extern bool RelationInvokeStrategy(Relation relation,
+   StrategyEvaluation evaluation, AttrNumber attributeNumber,
+   StrategyNumber strategy, Datum left, Datum right);
+extern void IndexSupportInitialize(IndexStrategy indexStrategy,
+   RegProcedure *indexSupport, Oid indexObjectId,
+   Oid accessMethodObjectId, StrategyNumber maxStrategyNumber,
+   StrategyNumber maxSupportNumber, AttrNumber maxAttributeNumber);
+
+
+#endif /* ISTRAT_H */
diff --git a/src/backend/access/itup.h b/src/backend/access/itup.h

new file mode 100644 (file)

index 0000000..028bf43
--- /dev/null
+++ b/src/backend/access/itup.h
@@ -0,0 +1,104 @@
+/*-------------------------------------------------------------------------
+ *
+ * itup.h--
+ *    POSTGRES index tuple definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: itup.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ITUP_H
+#define ITUP_H
+
+#include "c.h"
+#include "access/ibit.h"
+#include "access/tupdesc.h"    /* for TupleDesc */
+#include "storage/itemptr.h"
+
+#define MaxIndexAttributeNumber    7
+
+typedef struct IndexTupleData {
+    ItemPointerData        t_tid; /* reference TID to base tuple */
+
+    /*
+     * t_info is layed out in the following fashion:
+     *
+     * 15th (leftmost) bit: "has nulls" bit
+     * 14th bit: "has varlenas" bit
+     * 13th bit: "has rules" bit - (removed ay 11/94)
+     * bits 12-0 bit: size of tuple.
+     */
+
+    unsigned short     t_info; /* various info about tuple */
+
+    /*
+     * please make sure sizeof(IndexTupleData) is MAXALIGN'ed.
+     * See IndexInfoFindDataOffset() for the reason.
+     */
+    
+} IndexTupleData;      /* MORE DATA FOLLOWS AT END OF STRUCT */
+
+typedef IndexTupleData *IndexTuple;
+
+
+typedef struct InsertIndexResultData {
+    ItemPointerData    pointerData;
+} InsertIndexResultData;
+
+typedef InsertIndexResultData *InsertIndexResult;
+
+
+typedef struct RetrieveIndexResultData {
+    ItemPointerData    index_iptr;
+    ItemPointerData    heap_iptr;
+} RetrieveIndexResultData;
+
+typedef RetrieveIndexResultData    *RetrieveIndexResult;
+
+
+/*-----------------
+ * PredInfo -
+ *    used for partial indices
+ *-----------------
+ */
+typedef struct PredInfo {
+    Node       *pred;
+    Node       *oldPred;
+} PredInfo;
+
+
+/* ----------------
+ * externs 
+ * ----------------
+ */
+
+#define INDEX_SIZE_MASK 0x1FFF
+#define INDEX_NULL_MASK 0x8000
+#define INDEX_VAR_MASK  0x4000
+
+#define IndexTupleSize(itup)       (((IndexTuple) (itup))->t_info & 0x1FFF)
+#define IndexTupleDSize(itup)                      ((itup).t_info & 0x1FFF)
+#define IndexTupleNoNulls(itup)  (!(((IndexTuple) (itup))->t_info & 0x8000))
+#define IndexTupleAllFixed(itup) (!(((IndexTuple) (itup))->t_info & 0x4000))
+
+#define IndexTupleHasMinHeader(itup) (IndexTupleNoNulls(itup))
+
+
+/* indextuple.h */
+extern IndexTuple index_formtuple(TupleDesc tupleDescriptor,
+                 Datum value[], char null[]);
+extern char *fastgetiattr(IndexTuple tup, int attnum,
+   TupleDesc att, bool *isnull);
+extern Datum index_getattr(IndexTuple tuple, AttrNumber attNum,
+   TupleDesc tupDesc, bool *isNullOutP);
+extern RetrieveIndexResult
+FormRetrieveIndexResult(ItemPointer indexItemPointer,
+           ItemPointer heapItemPointer);
+extern void CopyIndexTuple(IndexTuple source, IndexTuple *target);
+
+
+#endif /* ITUP_H */
+
diff --git a/src/backend/access/nbtree.h b/src/backend/access/nbtree.h

new file mode 100644 (file)

index 0000000..d5c37a2
--- /dev/null
+++ b/src/backend/access/nbtree.h
@@ -0,0 +1,264 @@
+/*-------------------------------------------------------------------------
+ *
+ * nbtree.h--
+ *    header file for postgres btree access method implementation.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: nbtree.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    NBTREE_H
+#define    NBTREE_H
+
+#include "access/attnum.h"
+#include "access/itup.h"
+#include "access/htup.h"
+#include "access/tupdesc.h"
+
+#include "access/istrat.h"
+#include "access/funcindex.h"
+#include "access/relscan.h"
+#include "access/sdir.h"
+#include "nodes/pg_list.h"
+
+/*
+ *  BTPageOpaqueData -- At the end of every page, we store a pointer
+ *  to both siblings in the tree.  See Lehman and Yao's paper for more
+ *  info.  In addition, we need to know what sort of page this is
+ *  (leaf or internal), and whether the page is available for reuse.
+ *
+ *  Lehman and Yao's algorithm requires a ``high key'' on every page.
+ *  The high key on a page is guaranteed to be greater than or equal
+ *  to any key that appears on this page.  Our insertion algorithm
+ *  guarantees that we can use the initial least key on our right
+ *  sibling as the high key.  We allocate space for the line pointer
+ *  to the high key in the opaque data at the end of the page.
+ *
+ *  Rightmost pages in the tree have no high key.
+ */
+
+typedef struct BTPageOpaqueData {
+    BlockNumber    btpo_prev;
+    BlockNumber    btpo_next;
+    uint16 btpo_flags;
+
+#define BTP_LEAF   (1 << 0)
+#define BTP_ROOT   (1 << 1)
+#define BTP_FREE   (1 << 2)
+#define BTP_META   (1 << 3)
+
+} BTPageOpaqueData;
+
+typedef BTPageOpaqueData   *BTPageOpaque;
+
+/*
+ *  ScanOpaqueData is used to remember which buffers we're currently
+ *  examining in the scan.  We keep these buffers locked and pinned
+ *  and recorded in the opaque entry of the scan in order to avoid
+ *  doing a ReadBuffer() for every tuple in the index.  This avoids
+ *  semop() calls, which are expensive.
+ */
+
+typedef struct BTScanOpaqueData {
+    Buffer btso_curbuf;
+    Buffer btso_mrkbuf;
+} BTScanOpaqueData;
+
+typedef BTScanOpaqueData   *BTScanOpaque;
+
+/*
+ *  BTItems are what we store in the btree.  Each item has an index
+ *  tuple, including key and pointer values.  In addition, we must
+ *  guarantee that all tuples in the index are unique, in order to
+ *  satisfy some assumptions in Lehman and Yao.  The way that we do
+ *  this is by generating a new OID for every insertion that we do in
+ *  the tree.  This adds eight bytes to the size of btree index
+ *  tuples.  Note that we do not use the OID as part of a composite
+ *  key; the OID only serves as a unique identifier for a given index
+ *  tuple (logical position within a page).
+ */
+
+typedef struct BTItemData {
+    Oid                bti_oid;
+    int32          bti_dummy;  /* padding to make bti_itup
+                        * align at 8-byte boundary
+                        */
+    IndexTupleData     bti_itup;
+} BTItemData;
+
+typedef BTItemData *BTItem;
+
+/*
+ *  BTStackData -- As we descend a tree, we push the (key, pointer)
+ *  pairs from internal nodes onto a private stack.  If we split a
+ *  leaf, we use this stack to walk back up the tree and insert data
+ *  into parent nodes (and possibly to split them, too).  Lehman and
+ *  Yao's update algorithm guarantees that under no circumstances can
+ *  our private stack give us an irredeemably bad picture up the tree.
+ *  Again, see the paper for details.
+ */
+
+typedef struct BTStackData {
+    BlockNumber        bts_blkno;
+    OffsetNumber   bts_offset;
+    BTItem     bts_btitem;
+    struct BTStackData *bts_parent;
+} BTStackData;
+
+typedef BTStackData    *BTStack;
+
+/*
+ *  We need to be able to tell the difference between read and write
+ *  requests for pages, in order to do locking correctly.
+ */
+
+#define    BT_READ     0
+#define    BT_WRITE    1
+
+/*
+ *  Similarly, the difference between insertion and non-insertion binary
+ *  searches on a given page makes a difference when we're descending the
+ *  tree.
+ */
+
+#define BT_INSERTION   0
+#define BT_DESCENT 1
+
+/*
+ *  In general, the btree code tries to localize its knowledge about
+ *  page layout to a couple of routines.  However, we need a special
+ *  value to indicate "no page number" in those places where we expect
+ *  page numbers.
+ */
+
+#define P_NONE     0
+#define    P_LEFTMOST(opaque)  ((opaque)->btpo_prev == P_NONE)
+#define    P_RIGHTMOST(opaque) ((opaque)->btpo_next == P_NONE)
+
+#define    P_HIKEY     ((OffsetNumber) 1)
+#define    P_FIRSTKEY  ((OffsetNumber) 2)
+
+/*
+ *  Strategy numbers -- ordering of these is <, <=, =, >=, > 
+ */
+
+#define BTLessStrategyNumber       1
+#define BTLessEqualStrategyNumber  2
+#define BTEqualStrategyNumber      3
+#define BTGreaterEqualStrategyNumber   4
+#define BTGreaterStrategyNumber        5
+#define BTMaxStrategyNumber        5
+
+/*
+ *  When a new operator class is declared, we require that the user
+ *  supply us with an amproc procedure for determining whether, for
+ *  two keys a and b, a < b, a = b, or a > b.  This routine must
+ *  return < 0, 0, > 0, respectively, in these three cases.  Since we
+ *  only have one such proc in amproc, it's number 1.
+ */
+
+#define BTORDER_PROC   1
+
+
+/*
+ * prototypes for functions in nbtinsert.c
+ */
+extern InsertIndexResult _bt_doinsert(Relation rel, BTItem btitem);
+extern bool _bt_itemcmp(Relation rel, Size keysz, BTItem item1, BTItem item2,
+           StrategyNumber strat);
+
+/*
+ * prototypes for functions in nbtpage.c
+ */
+extern void _bt_metapinit(Relation rel);
+extern void _bt_checkmeta(Relation rel);
+extern Buffer _bt_getroot(Relation rel, int access);
+extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
+extern void _bt_relbuf(Relation rel, Buffer buf, int access);
+extern void _bt_wrtbuf(Relation rel, Buffer buf);
+extern void _bt_wrtnorelbuf(Relation rel, Buffer buf);
+extern void _bt_pageinit(Page page, Size size);
+extern void _bt_metaproot(Relation rel, BlockNumber rootbknum);
+extern Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access);
+extern void _bt_setpagelock(Relation rel, BlockNumber blkno, int access);
+extern void _bt_unsetpagelock(Relation rel, BlockNumber blkno, int access);
+extern void _bt_pagedel(Relation rel, ItemPointer tid);
+
+/*
+ * prototypes for functions in nbtree.c
+ */
+extern bool BuildingBtree; /* in nbtree.c */
+
+extern void btbuild(Relation heap, Relation index, int natts,
+   AttrNumber *attnum, IndexStrategy istrat, uint16 pcount,
+   Datum *params, FuncIndexInfo *finfo, PredInfo *predInfo);
+extern InsertIndexResult btinsert(Relation rel, IndexTuple itup);
+extern char *btgettuple(IndexScanDesc scan, ScanDirection dir);
+extern char *btbeginscan(Relation rel, bool fromEnd, uint16 keysz,
+            ScanKey scankey);
+
+extern void btrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey);
+extern void btmovescan(IndexScanDesc scan, Datum v);
+extern void btendscan(IndexScanDesc scan);
+extern void btmarkpos(IndexScanDesc scan);
+extern void btrestrpos(IndexScanDesc scan);
+extern void btdelete(Relation rel, ItemPointer tid);
+
+/*
+ * prototypes for functions in nbtscan.c
+ */
+extern void _bt_regscan(IndexScanDesc scan);
+extern void _bt_dropscan(IndexScanDesc scan);
+extern void _bt_adjscans(Relation rel, ItemPointer tid);
+extern void _bt_scandel(IndexScanDesc scan, BlockNumber blkno,
+           OffsetNumber offno);
+extern bool _bt_scantouched(IndexScanDesc scan, BlockNumber blkno,
+               OffsetNumber offno);
+
+/*
+ * prototypes for functions in nbtsearch.c
+ */
+extern BTStack _bt_search(Relation rel, int keysz, ScanKey scankey,
+             Buffer *bufP);
+extern Buffer _bt_moveright(Relation rel, Buffer buf, int keysz,
+               ScanKey scankey, int access);
+extern bool _bt_skeycmp(Relation rel, Size keysz, ScanKey scankey,
+           Page page, ItemId itemid, StrategyNumber strat);
+extern OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz,
+               ScanKey scankey, int srchtype);
+extern RetrieveIndexResult _bt_next(IndexScanDesc scan, ScanDirection dir);
+extern RetrieveIndexResult _bt_first(IndexScanDesc scan, ScanDirection dir);
+extern bool _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir);
+
+/*
+ * prototypes for functions in nbtstrat.c
+ */
+extern StrategyNumber _bt_getstrat(Relation rel, AttrNumber attno,
+                  RegProcedure proc);
+extern bool _bt_invokestrat(Relation rel, AttrNumber attno,
+               StrategyNumber strat, Datum left, Datum right);
+
+/*
+ * prototypes for functions in nbtutils.c
+ */
+extern ScanKey  _bt_mkscankey(Relation rel, IndexTuple itup);
+extern void _bt_freeskey(ScanKey skey);
+extern void _bt_freestack(BTStack stack);
+extern void _bt_orderkeys(Relation relation, uint16 *numberOfKeys,
+             ScanKey key);
+extern bool _bt_checkqual(IndexScanDesc scan, IndexTuple itup);
+extern BTItem _bt_formitem(IndexTuple itup);
+
+/*
+ * prototypes for functions in nbtsort.c
+ */
+extern void *_bt_spoolinit(Relation index, int ntapes);
+extern void _bt_spooldestroy(void *spool);
+extern void _bt_spool(Relation index, BTItem btitem, void *spool);
+extern void _bt_upperbuild(Relation index, BlockNumber blk, int level);
+extern void _bt_leafbuild(Relation index, void *spool);
+
+#endif /* NBTREE_H */
diff --git a/src/backend/access/nbtree/Makefile.inc b/src/backend/access/nbtree/Makefile.inc

new file mode 100644 (file)

index 0000000..5085400
--- /dev/null
+++ b/src/backend/access/nbtree/Makefile.inc
@@ -0,0 +1,15 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for access/nbtree (btree acess methods)
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
+#
+#-------------------------------------------------------------------------
+
+SUBSRCS+= nbtcompare.c nbtinsert.c nbtpage.c nbtree.c nbtscan.c nbtsearch.c \
+   nbtstrat.c nbtutils.c nbtsort.c
diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README

new file mode 100644 (file)

index 0000000..a204ad4
--- /dev/null
+++ b/src/backend/access/nbtree/README
@@ -0,0 +1,68 @@
+$Header: /cvsroot/pgsql/src/backend/access/nbtree/README,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
+
+This directory contains a correct implementation of Lehman and Yao's
+btree management algorithm that supports concurrent access for Postgres.
+We have made the following changes in order to incorporate their algorithm
+into Postgres:
+
+   +  The requirement that all btree keys be unique is too onerous,
+      but the algorithm won't work correctly without it.  As a result,
+      this implementation adds an OID (guaranteed to be unique) to
+      every key in the index.  This guarantees uniqueness within a set
+      of duplicates.  Space overhead is four bytes.
+
+      For this reason, when we're passed an index tuple to store by the
+      common access method code, we allocate a larger one and copy the
+      supplied tuple into it.  No Postgres code outside of the btree
+      access method knows about this xid or sequence number.
+
+   +  Lehman and Yao don't require read locks, but assume that in-
+      memory copies of tree nodes are unshared.  Postgres shares
+      in-memory buffers among backends.  As a result, we do page-
+      level read locking on btree nodes in order to guarantee that
+      no record is modified while we are examining it.  This reduces
+      concurrency but guaranteees correct behavior.
+
+   +  Read locks on a page are held for as long as a scan has a pointer
+      to the page.  However, locks are always surrendered before the
+      sibling page lock is acquired (for readers), so we remain deadlock-
+      free.  I will do a formal proof if I get bored anytime soon.
+
+In addition, the following things are handy to know:
+
+   +  Page zero of every btree is a meta-data page.  This page stores
+      the location of the root page, a pointer to a list of free
+      pages, and other stuff that's handy to know.
+
+   +  This algorithm doesn't really work, since it requires ordered
+      writes, and UNIX doesn't support ordered writes.
+
+   +  There's one other case where we may screw up in this
+      implementation.  When we start a scan, we descend the tree
+      to the key nearest the one in the qual, and once we get there,
+      position ourselves correctly for the qual type (eg, <, >=, etc).
+      If we happen to step off a page, decide we want to get back to
+      it, and fetch the page again, and if some bad person has split
+      the page and moved the last tuple we saw off of it, then the
+      code complains about botched concurrency in an elog(WARN, ...)
+      and gives up the ghost.  This is the ONLY violation of Lehman
+      and Yao's guarantee of correct behavior that I am aware of in
+      this code.
+
+Notes to operator class implementors:
+
+   With this implementation, we require the user to supply us with
+   a procedure for pg_amproc.  This procedure should take two keys
+   A and B and return < 0, 0, or > 0 if A < B, A = B, or A > B,
+   respectively.  See the contents of that relation for the btree
+   access method for some samples.
+
+Notes to mao for implementation document:
+
+   On deletions, we need to adjust the position of active scans on
+   the index.  The code in nbtscan.c handles this.  We don't need to
+   do this for splits because of the way splits are handled; if they
+   happen behind us, we'll automatically go to the next page, and if
+   they happen in front of us, we're not affected by them.  For
+   insertions, if we inserted a tuple behind the current scan location
+   on the current scan page, we move one space ahead.
diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c

new file mode 100644 (file)

index 0000000..e567b3c
--- /dev/null
+++ b/src/backend/access/nbtree/nbtcompare.c
@@ -0,0 +1,173 @@
+/*-------------------------------------------------------------------------
+ *
+ * btcompare.c--
+ *    Comparison functions for btree access method.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtcompare.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
+ *
+ *  NOTES
+ * These functions are stored in pg_amproc.  For each operator class
+ * defined on btrees, they compute
+ *
+ *     compare(a, b):
+ *         < 0 if a < b,
+ *         = 0 if a == b,
+ *         > 0 if a > b.
+ *-------------------------------------------------------------------------
+ */
+#include <string.h>
+#include "postgres.h"
+#include "utils/nabstime.h"
+
+int32
+btint2cmp(int16 a, int16 b)
+{
+    return ((int32) (a - b));
+}
+
+int32
+btint4cmp(int32 a, int32 b)
+{
+    return (a - b);
+}
+
+int32
+btint24cmp(int16 a, int32 b)
+{
+    return (((int32) a) - b);
+}
+
+int32
+btint42cmp(int32 a, int16 b)
+{
+    return (a - ((int32) b));
+}
+
+int32
+btfloat4cmp(float32 a, float32 b)
+{
+    if (*a > *b)
+   return (1);
+    else if (*a == *b)
+   return (0);
+    else
+   return (-1);
+}
+
+int32
+btfloat8cmp(float64 a, float64 b)
+{
+    if (*a > *b)
+   return (1);
+    else if (*a == *b)
+   return (0);
+    else
+   return (-1);
+}
+
+int32
+btoidcmp(Oid a, Oid b)
+{
+    if (a > b)
+   return (1);
+    else if (a == b)
+   return (0);
+    else
+   return (-1);
+}
+
+int32
+btabstimecmp(AbsoluteTime a, AbsoluteTime b)
+{
+    if (AbsoluteTimeIsBefore(a, b))
+   return (1);
+    else if (AbsoluteTimeIsBefore(b, a))
+   return (-1);
+    else
+   return (0);
+}
+
+int32
+btcharcmp(char a, char b)
+{
+    return ((int32) (a - b));
+}
+
+int32
+btchar2cmp(uint16 a, uint16 b)
+{
+    return (strncmp((char *) &a, (char *) &b, 2));
+}
+
+int32
+btchar4cmp(uint32 a, uint32 b)
+{
+    return (strncmp((char *) &a, (char *) &b, 4));
+}
+
+int32
+btchar8cmp(char *a, char *b)
+{
+    return (strncmp(a, b, 8));
+}
+
+int32
+btchar16cmp(char *a, char *b)
+{
+    return (strncmp(a, b, 16));
+}
+
+int32
+btnamecmp(NameData *a, NameData *b)
+{
+     return (strncmp(a->data, b->data, NAMEDATALEN));
+}
+
+int32
+bttextcmp(struct varlena *a, struct varlena *b)
+{
+    char *ap, *bp;
+    int len;
+    int res;
+    
+    ap = VARDATA(a);
+    bp = VARDATA(b);
+    
+    /* len is the length of the shorter of the two strings */
+    if ((len = VARSIZE(a)) > VARSIZE(b))
+   len = VARSIZE(b);
+    
+    /* len includes the four bytes in which string length is stored */
+    len -= sizeof(VARSIZE(a));
+    
+    /*
+     *  If the two strings differ in the first len bytes, or if they're
+     *  the same in the first len bytes and they're both len bytes long,
+     *  we're done.
+     */
+    
+    res = 0;
+    if (len > 0) {
+   do {
+       res = (int) (*ap++ - *bp++);
+       len--;
+   } while (res == 0 && len != 0);
+    }
+    
+    if (res != 0 || VARSIZE(a) == VARSIZE(b))
+   return (res);
+    
+    /*
+     *  The two strings are the same in the first len bytes, and they
+     *  are of different lengths.
+     */
+    
+    if (VARSIZE(a) < VARSIZE(b))
+   return (-1);
+    else
+   return (1);
+}
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c

new file mode 100644 (file)

index 0000000..536c0aa
--- /dev/null
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -0,0 +1,831 @@
+/*-------------------------------------------------------------------------
+ *
+ * btinsert.c--
+ *    Item insertion in Lehman and Yao btrees for Postgres.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/nbtree.h"
+
+static InsertIndexResult _bt_insertonpg(Relation rel, Buffer buf, BTStack stack, int keysz, ScanKey scankey, BTItem btitem, BTItem afteritem);
+static Buffer _bt_split(Relation rel, Buffer buf);
+static OffsetNumber _bt_findsplitloc(Relation rel, Page page, OffsetNumber start, OffsetNumber maxoff, Size llimit);
+static void _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
+static OffsetNumber _bt_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, BTItem btitem, BTItem afteritem);
+static bool _bt_goesonpg(Relation rel, Buffer buf, Size keysz, ScanKey scankey, BTItem afteritem);
+static void _bt_updateitem(Relation rel, Size keysz, Buffer buf, Oid bti_oid, BTItem newItem);
+
+/*
+ *  _bt_doinsert() -- Handle insertion of a single btitem in the tree.
+ *
+ * This routine is called by the public interface routines, btbuild
+ * and btinsert.  By here, btitem is filled in, and has a unique
+ * (xid, seqno) pair.
+ */
+InsertIndexResult
+_bt_doinsert(Relation rel, BTItem btitem)
+{
+    ScanKey itup_scankey;
+    IndexTuple itup;
+    BTStack stack;
+    Buffer buf;
+    BlockNumber blkno;
+    int natts;
+    InsertIndexResult res;
+    
+    itup = &(btitem->bti_itup);
+    
+    /* we need a scan key to do our search, so build one */
+    itup_scankey = _bt_mkscankey(rel, itup);
+    natts = rel->rd_rel->relnatts;
+    
+    /* find the page containing this key */
+    stack = _bt_search(rel, natts, itup_scankey, &buf);
+    blkno = BufferGetBlockNumber(buf);
+    
+    /* trade in our read lock for a write lock */
+    _bt_relbuf(rel, buf, BT_READ);
+    buf = _bt_getbuf(rel, blkno, BT_WRITE);
+    
+    /*
+     *  If the page was split between the time that we surrendered our
+     *  read lock and acquired our write lock, then this page may no
+     *  longer be the right place for the key we want to insert.  In this
+     *  case, we need to move right in the tree.  See Lehman and Yao for
+     *  an excruciatingly precise description.
+     */
+    
+    buf = _bt_moveright(rel, buf, natts, itup_scankey, BT_WRITE);
+    
+    /* do the insertion */
+    res = _bt_insertonpg(rel, buf, stack, natts, itup_scankey,
+            btitem, (BTItem) NULL);
+    
+    /* be tidy */
+    _bt_freestack(stack);
+    _bt_freeskey(itup_scankey);
+    
+    return (res);
+}
+
+/*
+ *  _bt_insertonpg() -- Insert a tuple on a particular page in the index.
+ *
+ * This recursive procedure does the following things:
+ *
+ *     +  if necessary, splits the target page.
+ *     +  finds the right place to insert the tuple (taking into
+ *        account any changes induced by a split).
+ *     +  inserts the tuple.
+ *     +  if the page was split, pops the parent stack, and finds the
+ *        right place to insert the new child pointer (by walking
+ *        right using information stored in the parent stack).
+ *     +  invoking itself with the appropriate tuple for the right
+ *        child page on the parent.
+ *
+ * On entry, we must have the right buffer on which to do the
+ * insertion, and the buffer must be pinned and locked.  On return,
+ * we will have dropped both the pin and the write lock on the buffer.
+ *
+ * The locking interactions in this code are critical.  You should
+ * grok Lehman and Yao's paper before making any changes.  In addition,
+ * you need to understand how we disambiguate duplicate keys in this
+ * implementation, in order to be able to find our location using
+ * L&Y "move right" operations.  Since we may insert duplicate user
+ * keys, and since these dups may propogate up the tree, we use the
+ * 'afteritem' parameter to position ourselves correctly for the
+ * insertion on internal pages.
+ */
+static InsertIndexResult
+_bt_insertonpg(Relation rel,
+          Buffer buf,
+          BTStack stack,
+          int keysz,
+          ScanKey scankey,
+          BTItem btitem,
+          BTItem afteritem)
+{
+    InsertIndexResult res;
+    Page page;
+    Buffer rbuf;
+    Buffer pbuf;
+    Page rpage;
+    ScanKey newskey;
+    BTItem ritem;
+    BTPageOpaque rpageop;
+    BlockNumber rbknum, itup_blkno;
+    OffsetNumber itup_off;
+    int itemsz;
+    InsertIndexResult newres;
+    BTItem new_item = (BTItem) NULL;
+    BTItem lowLeftItem;
+    
+    page = BufferGetPage(buf);
+    itemsz = IndexTupleDSize(btitem->bti_itup)
+   + (sizeof(BTItemData) - sizeof(IndexTupleData));
+
+    itemsz = DOUBLEALIGN(itemsz);  /* be safe, PageAddItem will do this
+                      but we need to be consistent */
+    
+    if (PageGetFreeSpace(page) < itemsz) {
+   
+   /* split the buffer into left and right halves */
+   rbuf = _bt_split(rel, buf);
+   
+   /* which new page (left half or right half) gets the tuple? */
+   if (_bt_goesonpg(rel, buf, keysz, scankey, afteritem)) {
+       /* left page */
+       itup_off = _bt_pgaddtup(rel, buf, keysz, scankey,
+                   itemsz, btitem, afteritem);
+       itup_blkno = BufferGetBlockNumber(buf);
+   } else {
+       /* right page */
+       itup_off = _bt_pgaddtup(rel, rbuf, keysz, scankey,
+                   itemsz, btitem, afteritem);
+       itup_blkno = BufferGetBlockNumber(rbuf);
+   }
+   
+   /*
+    *  By here,
+    *
+    *  +  our target page has been split;
+    *  +  the original tuple has been inserted;
+    *  +  we have write locks on both the old (left half) and new
+    *     (right half) buffers, after the split; and
+    *  +  we have the key we want to insert into the parent.
+    *
+    *  Do the parent insertion.  We need to hold onto the locks for
+    *  the child pages until we locate the parent, but we can release
+    *  them before doing the actual insertion (see Lehman and Yao for
+    *  the reasoning).
+    */
+   
+   if (stack == (BTStack) NULL) {
+       
+       /* create a new root node and release the split buffers */
+       _bt_newroot(rel, buf, rbuf);
+       _bt_relbuf(rel, buf, BT_WRITE);
+       _bt_relbuf(rel, rbuf, BT_WRITE);
+       
+   } else {
+
+       /* form a index tuple that points at the new right page */
+       rbknum = BufferGetBlockNumber(rbuf);
+       rpage = BufferGetPage(rbuf);
+       rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage);
+       
+       /*
+        *  By convention, the first entry (0) on every
+        *  non-rightmost page is the high key for that page.  In
+        *  order to get the lowest key on the new right page, we
+        *  actually look at its second (1) entry.
+        */
+       
+       if (! P_RIGHTMOST(rpageop)) {
+       ritem = (BTItem) PageGetItem(rpage,
+                        PageGetItemId(rpage, P_FIRSTKEY));
+       } else {
+       ritem = (BTItem) PageGetItem(rpage,
+                        PageGetItemId(rpage, P_HIKEY));
+       }
+       
+       /* get a unique btitem for this key */
+       new_item = _bt_formitem(&(ritem->bti_itup));
+       
+       ItemPointerSet(&(new_item->bti_itup.t_tid), rbknum, P_HIKEY);
+       
+       /* find the parent buffer */
+       pbuf = _bt_getstackbuf(rel, stack, BT_WRITE);
+       
+       /*
+        *  If the key of new_item is < than the key of the item
+        *  in the parent page pointing to the left page
+        *  (stack->bts_btitem), we have to update the latter key;
+        *  otherwise the keys on the parent page wouldn't be
+        *  monotonically increasing after we inserted the new
+        *  pointer to the right page (new_item). This only
+        *  happens if our left page is the leftmost page and a
+        *  new minimum key had been inserted before, which is not
+        *  reflected in the parent page but didn't matter so
+        *  far. If there are duplicate keys and this new minimum
+        *  key spills over to our new right page, we get an
+        *  inconsistency if we don't update the left key in the
+        *  parent page.
+        */
+       
+       if (_bt_itemcmp(rel, keysz, stack->bts_btitem, new_item,
+                       BTGreaterStrategyNumber)) {
+       lowLeftItem =
+           (BTItem) PageGetItem(page,
+                    PageGetItemId(page, P_FIRSTKEY));
+       /* page must have right pointer after split */
+       _bt_updateitem(rel, keysz, pbuf, stack->bts_btitem->bti_oid,
+                      lowLeftItem);
+       }
+       
+       /* don't need the children anymore */
+       _bt_relbuf(rel, buf, BT_WRITE);
+       _bt_relbuf(rel, rbuf, BT_WRITE);
+       
+       newskey = _bt_mkscankey(rel, &(new_item->bti_itup));
+       newres = _bt_insertonpg(rel, pbuf, stack->bts_parent,
+                   keysz, newskey, new_item,
+                   stack->bts_btitem);
+       
+       /* be tidy */
+       pfree(newres);
+       pfree(newskey);
+       pfree(new_item);
+   }
+    } else {
+   itup_off = _bt_pgaddtup(rel, buf, keysz, scankey,
+               itemsz, btitem, afteritem);
+   itup_blkno = BufferGetBlockNumber(buf);
+   
+   _bt_relbuf(rel, buf, BT_WRITE);
+    }
+    
+    /* by here, the new tuple is inserted */
+    res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
+    ItemPointerSet(&(res->pointerData), itup_blkno, itup_off);
+    
+    return (res);
+}
+
+/*
+ *  _bt_split() -- split a page in the btree.
+ *
+ * On entry, buf is the page to split, and is write-locked and pinned.
+ * Returns the new right sibling of buf, pinned and write-locked.  The
+ * pin and lock on buf are maintained.
+ */
+static Buffer
+_bt_split(Relation rel, Buffer buf)
+{
+    Buffer rbuf;
+    Page origpage;
+    Page leftpage, rightpage;
+    BTPageOpaque ropaque, lopaque, oopaque;
+    Buffer sbuf;
+    Page spage;
+    BTPageOpaque sopaque;
+    Size itemsz;
+    ItemId itemid;
+    BTItem item;
+    OffsetNumber leftoff, rightoff;
+    OffsetNumber start;
+    OffsetNumber maxoff;
+    OffsetNumber firstright;
+    OffsetNumber i;
+    Size llimit;
+    
+    rbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
+    origpage = BufferGetPage(buf);
+    leftpage = PageGetTempPage(origpage, sizeof(BTPageOpaqueData));
+    rightpage = BufferGetPage(rbuf);
+    
+    _bt_pageinit(rightpage, BufferGetPageSize(rbuf));
+    _bt_pageinit(leftpage, BufferGetPageSize(buf));
+    
+    /* init btree private data */
+    oopaque = (BTPageOpaque) PageGetSpecialPointer(origpage);
+    lopaque = (BTPageOpaque) PageGetSpecialPointer(leftpage);
+    ropaque = (BTPageOpaque) PageGetSpecialPointer(rightpage);
+    
+    /* if we're splitting this page, it won't be the root when we're done */
+    oopaque->btpo_flags &= ~BTP_ROOT;
+    lopaque->btpo_flags = ropaque->btpo_flags = oopaque->btpo_flags;
+    lopaque->btpo_prev = oopaque->btpo_prev;
+    ropaque->btpo_prev = BufferGetBlockNumber(buf);
+    lopaque->btpo_next = BufferGetBlockNumber(rbuf);
+    ropaque->btpo_next = oopaque->btpo_next;
+    
+    /*
+     *  If the page we're splitting is not the rightmost page at its
+     *  level in the tree, then the first (0) entry on the page is the
+     *  high key for the page.  We need to copy that to the right
+     *  half.  Otherwise (meaning the rightmost page case), we should
+     *  treat the line pointers beginning at zero as user data.
+     *
+     *  We leave a blank space at the start of the line table for the
+     *  left page.  We'll come back later and fill it in with the high
+     *  key item we get from the right key.
+     */
+    
+    leftoff = P_FIRSTKEY;
+    ropaque->btpo_next = oopaque->btpo_next;
+    if (! P_RIGHTMOST(oopaque)) {
+   /* splitting a non-rightmost page, start at the first data item */
+   start = P_FIRSTKEY;
+
+   /* copy the original high key to the new page */
+   itemid = PageGetItemId(origpage, P_HIKEY);
+   itemsz = ItemIdGetLength(itemid);
+   item = (BTItem) PageGetItem(origpage, itemid);
+   (void) PageAddItem(rightpage, (Item) item, itemsz, P_HIKEY, LP_USED);
+   rightoff = P_FIRSTKEY;
+    } else {
+   /* splitting a rightmost page, "high key" is the first data item */
+   start = P_HIKEY;
+
+   /* the new rightmost page will not have a high key */
+   rightoff = P_HIKEY;
+    }
+    maxoff = PageGetMaxOffsetNumber(origpage);
+    llimit = PageGetFreeSpace(leftpage) / 2;
+    firstright = _bt_findsplitloc(rel, origpage, start, maxoff, llimit);
+    
+    for (i = start; i <= maxoff; i = OffsetNumberNext(i)) {
+   itemid = PageGetItemId(origpage, i);
+   itemsz = ItemIdGetLength(itemid);
+   item = (BTItem) PageGetItem(origpage, itemid);
+   
+   /* decide which page to put it on */
+   if (i < firstright) {
+       (void) PageAddItem(leftpage, (Item) item, itemsz, leftoff,
+                  LP_USED);
+       leftoff = OffsetNumberNext(leftoff);
+   } else {
+       (void) PageAddItem(rightpage, (Item) item, itemsz, rightoff,
+                  LP_USED);
+       rightoff = OffsetNumberNext(rightoff);
+   }
+    }
+    
+    /*
+     *  Okay, page has been split, high key on right page is correct.  Now
+     *  set the high key on the left page to be the min key on the right
+     *  page.
+     */
+    
+    if (P_RIGHTMOST(ropaque)) {
+   itemid = PageGetItemId(rightpage, P_HIKEY);
+    } else {
+   itemid = PageGetItemId(rightpage, P_FIRSTKEY);
+    }
+    itemsz = ItemIdGetLength(itemid);
+    item = (BTItem) PageGetItem(rightpage, itemid);
+    
+    /*
+     *  We left a hole for the high key on the left page; fill it.  The
+     *  modal crap is to tell the page manager to put the new item on the
+     *  page and not screw around with anything else.  Whoever designed
+     *  this interface has presumably crawled back into the dung heap they
+     *  came from.  No one here will admit to it.
+     */
+    
+    PageManagerModeSet(OverwritePageManagerMode);
+    (void) PageAddItem(leftpage, (Item) item, itemsz, P_HIKEY, LP_USED);
+    PageManagerModeSet(ShufflePageManagerMode);
+    
+    /*
+     *  By here, the original data page has been split into two new halves,
+     *  and these are correct.  The algorithm requires that the left page
+     *  never move during a split, so we copy the new left page back on top
+     *  of the original.  Note that this is not a waste of time, since we
+     *  also require (in the page management code) that the center of a
+     *  page always be clean, and the most efficient way to guarantee this
+     *  is just to compact the data by reinserting it into a new left page.
+     */
+    
+    PageRestoreTempPage(leftpage, origpage);
+    
+    /* write these guys out */
+    _bt_wrtnorelbuf(rel, rbuf);
+    _bt_wrtnorelbuf(rel, buf);
+    
+    /*
+     *  Finally, we need to grab the right sibling (if any) and fix the
+     *  prev pointer there.  We are guaranteed that this is deadlock-free
+     *  since no other writer will be moving holding a lock on that page
+     *  and trying to move left, and all readers release locks on a page
+     *  before trying to fetch its neighbors.
+     */
+    
+    if (! P_RIGHTMOST(ropaque)) {
+   sbuf = _bt_getbuf(rel, ropaque->btpo_next, BT_WRITE);
+   spage = BufferGetPage(sbuf);
+   sopaque = (BTPageOpaque) PageGetSpecialPointer(spage);
+   sopaque->btpo_prev = BufferGetBlockNumber(rbuf);
+   
+   /* write and release the old right sibling */
+   _bt_wrtbuf(rel, sbuf);
+    }
+    
+    /* split's done */
+    return (rbuf);
+}
+
+/*
+ *  _bt_findsplitloc() -- find a safe place to split a page.
+ *
+ * In order to guarantee the proper handling of searches for duplicate
+ * keys, the first duplicate in the chain must either be the first
+ * item on the page after the split, or the entire chain must be on
+ * one of the two pages.  That is,
+ *     [1 2 2 2 3 4 5]
+ * must become
+ *     [1] [2 2 2 3 4 5]
+ * or
+ *     [1 2 2 2] [3 4 5]
+ * but not
+ *     [1 2 2] [2 3 4 5].
+ * However,
+ *     [2 2 2 2 2 3 4]
+ * may be split as
+ *     [2 2 2 2] [2 3 4].
+ */
+static OffsetNumber
+_bt_findsplitloc(Relation rel,
+        Page page,
+        OffsetNumber start,
+        OffsetNumber maxoff,
+        Size llimit)
+{
+    OffsetNumber i;
+    OffsetNumber saferight;
+    ItemId nxtitemid, safeitemid;
+    BTItem safeitem, nxtitem;
+    IndexTuple safetup, nxttup;
+    Size nbytes;
+    TupleDesc itupdesc;
+    int natts;
+    int attno;
+    Datum attsafe;
+    Datum attnext;
+    bool null;
+    
+    itupdesc = RelationGetTupleDescriptor(rel);
+    natts = rel->rd_rel->relnatts;
+    
+    saferight = start;
+    safeitemid = PageGetItemId(page, saferight);
+    nbytes = ItemIdGetLength(safeitemid) + sizeof(ItemIdData);
+    safeitem = (BTItem) PageGetItem(page, safeitemid);
+    safetup = &(safeitem->bti_itup);
+    
+    i = OffsetNumberNext(start);
+    
+    while (nbytes < llimit) {
+   
+   /* check the next item on the page */
+   nxtitemid = PageGetItemId(page, i);
+   nbytes += (ItemIdGetLength(nxtitemid) + sizeof(ItemIdData));
+   nxtitem = (BTItem) PageGetItem(page, nxtitemid);
+   nxttup = &(nxtitem->bti_itup);
+   
+   /* test against last known safe item */
+   for (attno = 1; attno <= natts; attno++) {
+       attsafe = index_getattr(safetup, attno, itupdesc, &null);
+       attnext = index_getattr(nxttup, attno, itupdesc, &null);
+
+       /*
+        *  If the tuple we're looking at isn't equal to the last safe one
+        *  we saw, then it's our new safe tuple.
+        */
+       
+       if (!_bt_invokestrat(rel, attno, BTEqualStrategyNumber,
+                attsafe, attnext)) {
+       safetup = nxttup;
+       saferight = i;
+       
+       /* break is for the attno for loop */
+       break;
+       }
+   }
+   i = OffsetNumberNext(i);
+    }
+    
+    /*
+     *  If the chain of dups starts at the beginning of the page and extends
+     *  past the halfway mark, we can split it in the middle.
+     */
+    
+    if (saferight == start)
+   saferight = i;
+    
+    return (saferight);
+}
+
+/*
+ *  _bt_newroot() -- Create a new root page for the index.
+ *
+ * We've just split the old root page and need to create a new one.
+ * In order to do this, we add a new root page to the file, then lock
+ * the metadata page and update it.  This is guaranteed to be deadlock-
+ * free, because all readers release their locks on the metadata page
+ * before trying to lock the root, and all writers lock the root before
+ * trying to lock the metadata page.  We have a write lock on the old
+ * root page, so we have not introduced any cycles into the waits-for
+ * graph.
+ *
+ * On entry, lbuf (the old root) and rbuf (its new peer) are write-
+ * locked.  We don't drop the locks in this routine; that's done by
+ * the caller.  On exit, a new root page exists with entries for the
+ * two new children.  The new root page is neither pinned nor locked.
+ */
+static void
+_bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
+{
+    Buffer rootbuf;
+    Page lpage, rpage, rootpage;
+    BlockNumber lbkno, rbkno;
+    BlockNumber rootbknum;
+    BTPageOpaque rootopaque;
+    ItemId itemid;
+    BTItem item;
+    Size itemsz;
+    BTItem new_item;
+    
+    /* get a new root page */
+    rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
+    rootpage = BufferGetPage(rootbuf);
+    _bt_pageinit(rootpage, BufferGetPageSize(rootbuf));
+    
+    /* set btree special data */
+    rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
+    rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE;
+    rootopaque->btpo_flags |= BTP_ROOT;
+    
+    /*
+     *  Insert the internal tuple pointers.
+     */
+    
+    lbkno = BufferGetBlockNumber(lbuf);
+    rbkno = BufferGetBlockNumber(rbuf);
+    lpage = BufferGetPage(lbuf);
+    rpage = BufferGetPage(rbuf);
+    
+    /*
+     * step over the high key on the left page while building the 
+     * left page pointer.
+     */
+    itemid = PageGetItemId(lpage, P_FIRSTKEY);
+    itemsz = ItemIdGetLength(itemid);
+    item = (BTItem) PageGetItem(lpage, itemid);
+    new_item = _bt_formitem(&(item->bti_itup));
+    ItemPointerSet(&(new_item->bti_itup.t_tid), lbkno, P_FIRSTKEY);
+    
+    /*
+     * insert the left page pointer into the new root page.  the root
+     * page is the rightmost page on its level so the "high key" item
+     * is the first data item.
+     */
+    (void) PageAddItem(rootpage, (Item) new_item, itemsz, P_HIKEY, LP_USED);
+    pfree(new_item);
+    
+    /*
+     * the right page is the rightmost page on the second level, so 
+     * the "high key" item is the first data item on that page as well.
+     */
+    itemid = PageGetItemId(rpage, P_HIKEY);
+    itemsz = ItemIdGetLength(itemid);
+    item = (BTItem) PageGetItem(rpage, itemid);
+    new_item = _bt_formitem(&(item->bti_itup));
+    ItemPointerSet(&(new_item->bti_itup.t_tid), rbkno, P_HIKEY);
+    
+    /*
+     * insert the right page pointer into the new root page.
+     */
+    (void) PageAddItem(rootpage, (Item) new_item, itemsz, P_FIRSTKEY, LP_USED);
+    pfree(new_item);
+    
+    /* write and let go of the root buffer */
+    rootbknum = BufferGetBlockNumber(rootbuf);
+    _bt_wrtbuf(rel, rootbuf);
+    
+    /* update metadata page with new root block number */
+    _bt_metaproot(rel, rootbknum);
+}
+
+/*
+ *  _bt_pgaddtup() -- add a tuple to a particular page in the index.
+ *
+ * This routine adds the tuple to the page as requested, and keeps the
+ * write lock and reference associated with the page's buffer.  It is
+ * an error to call pgaddtup() without a write lock and reference.  If
+ * afteritem is non-null, it's the item that we expect our new item
+ * to follow.  Otherwise, we do a binary search for the correct place
+ * and insert the new item there.
+ */
+static OffsetNumber
+_bt_pgaddtup(Relation rel,
+        Buffer buf,
+        int keysz,
+        ScanKey itup_scankey,
+        Size itemsize,
+        BTItem btitem,
+        BTItem afteritem)
+{
+    OffsetNumber itup_off;
+    OffsetNumber first;
+    Page page;
+    BTPageOpaque opaque;
+    BTItem chkitem;
+    Oid afteroid;
+    
+    page = BufferGetPage(buf);
+    opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+    first = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+    
+    if (afteritem == (BTItem) NULL) {
+   itup_off = _bt_binsrch(rel, buf, keysz, itup_scankey, BT_INSERTION);
+    } else {
+   afteroid = afteritem->bti_oid;
+   itup_off = first;
+   
+   do {
+       chkitem =
+       (BTItem) PageGetItem(page, PageGetItemId(page, itup_off));
+       itup_off = OffsetNumberNext(itup_off);
+   } while (chkitem->bti_oid != afteroid);
+    }
+
+    (void) PageAddItem(page, (Item) btitem, itemsize, itup_off, LP_USED);
+    
+    /* write the buffer, but hold our lock */
+    _bt_wrtnorelbuf(rel, buf);
+    
+    return (itup_off);
+}
+
+/*
+ *  _bt_goesonpg() -- Does a new tuple belong on this page?
+ *
+ * This is part of the complexity introduced by allowing duplicate
+ * keys into the index.  The tuple belongs on this page if:
+ *
+ *     + there is no page to the right of this one; or
+ *     + it is less than the high key on the page; or
+ *     + the item it is to follow ("afteritem") appears on this
+ *       page.
+ */
+static bool
+_bt_goesonpg(Relation rel,
+        Buffer buf,
+        Size keysz,
+        ScanKey scankey,
+        BTItem afteritem)
+{
+    Page page;
+    ItemId hikey;
+    BTPageOpaque opaque;
+    BTItem chkitem;
+    OffsetNumber offnum, maxoff;
+    Oid afteroid;
+    bool found;
+    
+    page = BufferGetPage(buf);
+    
+    /* no right neighbor? */
+    opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+    if (P_RIGHTMOST(opaque))
+   return (true);
+    
+    /*
+     *  this is a non-rightmost page, so it must have a high key item.
+     *
+     *  If the scan key is < the high key (the min key on the next page),
+     *  then it for sure belongs here.
+     */
+    hikey = PageGetItemId(page, P_HIKEY);
+    if (_bt_skeycmp(rel, keysz, scankey, page, hikey, BTLessStrategyNumber))
+   return (true);
+    
+    /*
+     *  If the scan key is > the high key, then it for sure doesn't belong
+     *  here.
+     */
+    
+    if (_bt_skeycmp(rel, keysz, scankey, page, hikey, BTGreaterStrategyNumber))
+   return (false);
+    
+    /*
+     *  If we have no adjacency information, and the item is equal to the
+     *  high key on the page (by here it is), then the item does not belong
+     *  on this page.
+     */
+    
+    if (afteritem == (BTItem) NULL)
+   return (false);
+    
+    /* damn, have to work for it.  i hate that. */
+    afteroid = afteritem->bti_oid;
+    maxoff = PageGetMaxOffsetNumber(page);
+    
+    /*
+     *  Search the entire page for the afteroid.  We need to do this, rather
+     *  than doing a binary search and starting from there, because if the
+     *  key we're searching for is the leftmost key in the tree at this
+     *  level, then a binary search will do the wrong thing.  Splits are
+     *  pretty infrequent, so the cost isn't as bad as it could be.
+     */
+    
+    found = false;
+    for (offnum = P_FIRSTKEY;
+    offnum <= maxoff;
+    offnum = OffsetNumberNext(offnum)) {
+   chkitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
+   if (chkitem->bti_oid == afteroid) {
+       found = true;
+       break;
+   }
+    }
+    
+    return (found);
+}
+
+/*
+ * _bt_itemcmp() -- compare item1 to item2 using a requested
+ *              strategy (<, <=, =, >=, >)
+ *
+ */
+bool
+_bt_itemcmp(Relation rel,
+       Size keysz,
+       BTItem item1,
+       BTItem item2,
+       StrategyNumber strat)
+{
+    TupleDesc tupDes;
+    IndexTuple indexTuple1, indexTuple2;
+    Datum attrDatum1, attrDatum2;
+    int i;
+    bool isNull;
+    bool compare;
+    
+    tupDes = RelationGetTupleDescriptor(rel);
+    indexTuple1 = &(item1->bti_itup);
+    indexTuple2 = &(item2->bti_itup);
+    
+    for (i = 1; i <= keysz; i++) {
+   attrDatum1 = index_getattr(indexTuple1, i, tupDes, &isNull);
+   attrDatum2 = index_getattr(indexTuple2, i, tupDes, &isNull);
+   compare = _bt_invokestrat(rel, i, strat, attrDatum1, attrDatum2);
+   if (!compare) {
+       return (false);
+   }
+    }
+    return (true);
+}
+
+/*
+ * _bt_updateitem() -- updates the key of the item identified by the
+ *             oid with the key of newItem (done in place)
+ *
+ */
+static void
+_bt_updateitem(Relation rel,
+          Size keysz,
+          Buffer buf,
+          Oid bti_oid,
+          BTItem newItem)
+{
+    Page page;
+    OffsetNumber maxoff;
+    OffsetNumber i;
+    ItemPointerData itemPtrData;
+    BTItem item;
+    IndexTuple oldIndexTuple, newIndexTuple;
+    
+    page = BufferGetPage(buf);
+    maxoff = PageGetMaxOffsetNumber(page);
+    
+    /* locate item on the page */
+    i = P_HIKEY;
+    do {
+   item = (BTItem) PageGetItem(page, PageGetItemId(page, i));
+   i = OffsetNumberNext(i);
+    } while (i <= maxoff && item->bti_oid != bti_oid);
+    
+    /* this should never happen (in theory) */
+    if (item->bti_oid != bti_oid) {
+   elog(FATAL, "_bt_getstackbuf was lying!!");
+    }
+    
+    oldIndexTuple = &(item->bti_itup);
+    newIndexTuple = &(newItem->bti_itup);
+    
+    /* keep the original item pointer */
+    ItemPointerCopy(&(oldIndexTuple->t_tid), &itemPtrData);
+    CopyIndexTuple(newIndexTuple, &oldIndexTuple);
+    ItemPointerCopy(&itemPtrData, &(oldIndexTuple->t_tid));
+}
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c

new file mode 100644 (file)

index 0000000..ce411a8
--- /dev/null
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -0,0 +1,523 @@
+/*-------------------------------------------------------------------------
+ *
+ * btpage.c--
+ *    BTree-specific page management code for the Postgres btree access
+ *    method.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
+ *
+ *  NOTES
+ *     Postgres btree pages look like ordinary relation pages.  The opaque
+ *     data at high addresses includes pointers to left and right siblings
+ *     and flag data describing page state.  The first page in a btree, page
+ *     zero, is special -- it stores meta-information describing the tree.
+ *     Pages one and higher store the actual tree data.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/genam.h"
+#include "access/nbtree.h"
+
+#define BTREE_METAPAGE 0
+#define BTREE_MAGIC    0x053162
+#define BTREE_VERSION  0
+
+typedef struct BTMetaPageData {
+    uint32 btm_magic;
+    uint32 btm_version;
+    BlockNumber    btm_root;
+} BTMetaPageData;
+
+#define    BTPageGetMeta(p) \
+    ((BTMetaPageData *) &((PageHeader) p)->pd_linp[0])
+
+extern bool    BuildingBtree;
+
+/*
+ *  We use high-concurrency locking on btrees.  There are two cases in
+ *  which we don't do locking.  One is when we're building the btree.
+ *  Since the creating transaction has not committed, no one can see
+ *  the index, and there's no reason to share locks.  The second case
+ *  is when we're just starting up the database system.  We use some
+ *  special-purpose initialization code in the relation cache manager
+ *  (see utils/cache/relcache.c) to allow us to do indexed scans on
+ *  the system catalogs before we'd normally be able to.  This happens
+ *  before the lock table is fully initialized, so we can't use it.
+ *  Strictly speaking, this violates 2pl, but we don't do 2pl on the
+ *  system catalogs anyway, so I declare this to be okay.
+ */
+
+#define USELOCKING (!BuildingBtree && !IsInitProcessingMode())
+
+/*
+ *  _bt_metapinit() -- Initialize the metadata page of a btree.
+ */
+void
+_bt_metapinit(Relation rel)
+{
+    Buffer buf;
+    Page pg;
+    int nblocks;
+    BTMetaPageData metad;
+    BTPageOpaque op;
+    
+    /* can't be sharing this with anyone, now... */
+    if (USELOCKING)
+   RelationSetLockForWrite(rel);
+    
+    if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) {
+   elog(WARN, "Cannot initialize non-empty btree %s",
+        RelationGetRelationName(rel));
+    }
+    
+    buf = ReadBuffer(rel, P_NEW);
+    pg = BufferGetPage(buf);
+    _bt_pageinit(pg, BufferGetPageSize(buf));
+    
+    metad.btm_magic = BTREE_MAGIC;
+    metad.btm_version = BTREE_VERSION;
+    metad.btm_root = P_NONE;
+    memmove((char *) BTPageGetMeta(pg), (char *) &metad, sizeof(metad));
+    
+    op = (BTPageOpaque) PageGetSpecialPointer(pg);
+    op->btpo_flags = BTP_META;
+
+    WriteBuffer(buf);
+    
+    /* all done */
+    if (USELOCKING)
+   RelationUnsetLockForWrite(rel);
+}
+
+/*
+ *  _bt_checkmeta() -- Verify that the metadata stored in a btree are
+ *            reasonable.
+ */
+void
+_bt_checkmeta(Relation rel)
+{
+    Buffer metabuf;
+    Page metap;
+    BTMetaPageData *metad;
+    BTPageOpaque op;
+    int nblocks;
+    
+    /* if the relation is empty, this is init time; don't complain */
+    if ((nblocks = RelationGetNumberOfBlocks(rel)) == 0)
+   return;
+    
+    metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
+    metap = BufferGetPage(metabuf);
+    op = (BTPageOpaque) PageGetSpecialPointer(metap);
+    if (!(op->btpo_flags & BTP_META)) {
+   elog(WARN, "Invalid metapage for index %s",
+        RelationGetRelationName(rel));
+    }
+    metad = BTPageGetMeta(metap);
+
+    if (metad->btm_magic != BTREE_MAGIC) {
+   elog(WARN, "Index %s is not a btree",
+        RelationGetRelationName(rel));
+    }
+    
+    if (metad->btm_version != BTREE_VERSION) {
+   elog(WARN, "Version mismatch on %s:  version %d file, version %d code",
+        RelationGetRelationName(rel),
+        metad->btm_version, BTREE_VERSION);
+    }
+    
+    _bt_relbuf(rel, metabuf, BT_READ);
+}
+
+/*
+ *  _bt_getroot() -- Get the root page of the btree.
+ *
+ * Since the root page can move around the btree file, we have to read
+ * its location from the metadata page, and then read the root page
+ * itself.  If no root page exists yet, we have to create one.  The
+ * standard class of race conditions exists here; I think I covered
+ * them all in the Hopi Indian rain dance of lock requests below.
+ *
+ * We pass in the access type (BT_READ or BT_WRITE), and return the
+ * root page's buffer with the appropriate lock type set.  Reference
+ * count on the root page gets bumped by ReadBuffer.  The metadata
+ * page is unlocked and unreferenced by this process when this routine
+ * returns.
+ */
+Buffer
+_bt_getroot(Relation rel, int access)
+{
+    Buffer metabuf;
+    Page metapg;
+    BTPageOpaque metaopaque;
+    Buffer rootbuf;
+    Page rootpg;
+    BTPageOpaque rootopaque;
+    BlockNumber rootblkno;
+    BTMetaPageData *metad;
+    
+    metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
+    metapg = BufferGetPage(metabuf);
+    metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
+    Assert(metaopaque->btpo_flags & BTP_META);
+    metad = BTPageGetMeta(metapg);
+    
+    /* if no root page initialized yet, do it */
+    if (metad->btm_root == P_NONE) {
+   
+   /* turn our read lock in for a write lock */
+   _bt_relbuf(rel, metabuf, BT_READ);
+   metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);
+   metapg = BufferGetPage(metabuf);
+   metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
+   Assert(metaopaque->btpo_flags & BTP_META);
+   metad = BTPageGetMeta(metapg);
+   
+   /*
+    *  Race condition:  if someone else initialized the metadata between
+    *  the time we released the read lock and acquired the write lock,
+    *  above, we want to avoid doing it again.
+    */
+   
+   if (metad->btm_root == P_NONE) {
+       
+       /*
+        *  Get, initialize, write, and leave a lock of the appropriate
+        *  type on the new root page.  Since this is the first page in
+        *  the tree, it's a leaf.
+        */
+       
+       rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
+       rootblkno = BufferGetBlockNumber(rootbuf);
+       rootpg = BufferGetPage(rootbuf);
+       metad->btm_root = rootblkno;
+       _bt_pageinit(rootpg, BufferGetPageSize(rootbuf));
+       rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg);
+       rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT);
+       _bt_wrtnorelbuf(rel, rootbuf);
+       
+       /* swap write lock for read lock, if appropriate */
+       if (access != BT_WRITE) {
+       _bt_setpagelock(rel, rootblkno, BT_READ);
+       _bt_unsetpagelock(rel, rootblkno, BT_WRITE);
+       }
+       
+       /* okay, metadata is correct */
+       _bt_wrtbuf(rel, metabuf);
+   } else {
+       
+       /*
+        *  Metadata initialized by someone else.  In order to guarantee
+        *  no deadlocks, we have to release the metadata page and start
+        *  all over again.
+        */
+       
+       _bt_relbuf(rel, metabuf, BT_WRITE);
+       return (_bt_getroot(rel, access));
+   }
+    } else {
+   rootbuf = _bt_getbuf(rel, metad->btm_root, access);
+   
+   /* done with the meta page */
+   _bt_relbuf(rel, metabuf, BT_READ);
+    }
+    
+    /*
+     *  Race condition:  If the root page split between the time we looked
+     *  at the metadata page and got the root buffer, then we got the wrong
+     *  buffer.
+     */
+    
+    rootpg = BufferGetPage(rootbuf);
+    rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg);
+    if (!(rootopaque->btpo_flags & BTP_ROOT)) {
+   
+   /* it happened, try again */
+   _bt_relbuf(rel, rootbuf, access);
+   return (_bt_getroot(rel, access));
+    }
+    
+    /*
+     *  By here, we have a correct lock on the root block, its reference
+     *  count is correct, and we have no lock set on the metadata page.
+     *  Return the root block.
+     */
+    
+    return (rootbuf);
+}
+
+/*
+ *  _bt_getbuf() -- Get a buffer by block number for read or write.
+ *
+ * When this routine returns, the appropriate lock is set on the
+ * requested buffer its reference count is correct.
+ */
+Buffer
+_bt_getbuf(Relation rel, BlockNumber blkno, int access)
+{
+    Buffer buf;
+    Page page;
+    
+    /*
+     *  If we want a new block, we can't set a lock of the appropriate type
+     *  until we've instantiated the buffer.
+     */
+    
+    if (blkno != P_NEW) {
+   if (access == BT_WRITE)
+       _bt_setpagelock(rel, blkno, BT_WRITE);
+   else
+       _bt_setpagelock(rel, blkno, BT_READ);
+   
+   buf = ReadBuffer(rel, blkno);
+    } else {
+   buf = ReadBuffer(rel, blkno);
+   blkno = BufferGetBlockNumber(buf);
+   page = BufferGetPage(buf);
+   _bt_pageinit(page, BufferGetPageSize(buf));
+   
+   if (access == BT_WRITE)
+       _bt_setpagelock(rel, blkno, BT_WRITE);
+   else
+       _bt_setpagelock(rel, blkno, BT_READ);
+    }
+    
+    /* ref count and lock type are correct */
+    return (buf);
+}
+
+/*
+ *  _bt_relbuf() -- release a locked buffer.
+ */
+void
+_bt_relbuf(Relation rel, Buffer buf, int access)
+{
+    BlockNumber blkno;
+    
+    blkno = BufferGetBlockNumber(buf);
+    
+    /* access had better be one of read or write */
+    if (access == BT_WRITE)
+   _bt_unsetpagelock(rel, blkno, BT_WRITE);
+    else
+   _bt_unsetpagelock(rel, blkno, BT_READ);
+    
+    ReleaseBuffer(buf);
+}
+
+/*
+ *  _bt_wrtbuf() -- write a btree page to disk.
+ *
+ * This routine releases the lock held on the buffer and our reference
+ * to it.  It is an error to call _bt_wrtbuf() without a write lock
+ * or a reference to the buffer.
+ */
+void
+_bt_wrtbuf(Relation rel, Buffer buf)
+{
+    BlockNumber blkno;
+    
+    blkno = BufferGetBlockNumber(buf);
+    WriteBuffer(buf);
+    _bt_unsetpagelock(rel, blkno, BT_WRITE);
+}
+
+/*
+ *  _bt_wrtnorelbuf() -- write a btree page to disk, but do not release
+ *          our reference or lock.
+ *
+ * It is an error to call _bt_wrtnorelbuf() without a write lock
+ * or a reference to the buffer.
+ */
+void
+_bt_wrtnorelbuf(Relation rel, Buffer buf)
+{
+    BlockNumber blkno;
+    
+    blkno = BufferGetBlockNumber(buf);
+    WriteNoReleaseBuffer(buf);
+}
+
+/*
+ *  _bt_pageinit() -- Initialize a new page.
+ */
+void
+_bt_pageinit(Page page, Size size)
+{
+    /*
+     *  Cargo-cult programming -- don't really need this to be zero, but
+     *  creating new pages is an infrequent occurrence and it makes me feel
+     *  good when I know they're empty.
+     */
+    
+    memset(page, 0, size);
+    
+    PageInit(page, size, sizeof(BTPageOpaqueData));
+}
+
+/*
+ *  _bt_metaproot() -- Change the root page of the btree.
+ *
+ * Lehman and Yao require that the root page move around in order to
+ * guarantee deadlock-free short-term, fine-granularity locking.  When
+ * we split the root page, we record the new parent in the metadata page
+ * for the relation.  This routine does the work.
+ *
+ * No direct preconditions, but if you don't have the a write lock on
+ * at least the old root page when you call this, you're making a big
+ * mistake.  On exit, metapage data is correct and we no longer have
+ * a reference to or lock on the metapage.
+ */
+void
+_bt_metaproot(Relation rel, BlockNumber rootbknum)
+{
+    Buffer metabuf;
+    Page metap;
+    BTPageOpaque metaopaque;
+    BTMetaPageData *metad;
+    
+    metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);
+    metap = BufferGetPage(metabuf);
+    metaopaque = (BTPageOpaque) PageGetSpecialPointer(metap);
+    Assert(metaopaque->btpo_flags & BTP_META);
+    metad = BTPageGetMeta(metap);
+    metad->btm_root = rootbknum;
+    _bt_wrtbuf(rel, metabuf);
+}
+
+/*
+ *  _bt_getstackbuf() -- Walk back up the tree one step, and find the item
+ *          we last looked at in the parent.
+ *
+ * This is possible because we save a bit image of the last item
+ * we looked at in the parent, and the update algorithm guarantees
+ * that if items above us in the tree move, they only move right.
+ */
+Buffer
+_bt_getstackbuf(Relation rel, BTStack stack, int access)
+{
+    Buffer buf;
+    BlockNumber blkno;
+    OffsetNumber start, offnum, maxoff;
+    OffsetNumber i;
+    Page page;
+    ItemId itemid;
+    BTItem item;
+    BTPageOpaque opaque;
+    
+    blkno = stack->bts_blkno;
+    buf = _bt_getbuf(rel, blkno, access);
+    page = BufferGetPage(buf);
+    opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+    maxoff = PageGetMaxOffsetNumber(page);
+    
+    if (maxoff >= stack->bts_offset) {
+   itemid = PageGetItemId(page, stack->bts_offset);
+   item = (BTItem) PageGetItem(page, itemid);
+   
+   /* if the item is where we left it, we're done */
+   if (item->bti_oid == stack->bts_btitem->bti_oid)
+       return (buf);
+   
+   /* if the item has just moved right on this page, we're done */
+   for (i = OffsetNumberNext(stack->bts_offset);
+        i <= maxoff;
+        i = OffsetNumberNext(i)) {
+       itemid = PageGetItemId(page, i);
+       item = (BTItem) PageGetItem(page, itemid);
+       
+       /* if the item is where we left it, we're done */
+       if (item->bti_oid == stack->bts_btitem->bti_oid)
+       return (buf);
+   }
+    }
+    
+    /* by here, the item we're looking for moved right at least one page */
+    for (;;) {
+   blkno = opaque->btpo_next;
+   if (P_RIGHTMOST(opaque))
+       elog(FATAL, "my bits moved right off the end of the world!");
+   
+   _bt_relbuf(rel, buf, access);
+   buf = _bt_getbuf(rel, blkno, access);
+   page = BufferGetPage(buf);
+   maxoff = PageGetMaxOffsetNumber(page);
+   opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+   
+   /* if we have a right sibling, step over the high key */
+   start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+   
+   /* see if it's on this page */
+   for (offnum = start;
+        offnum <= maxoff;
+        offnum = OffsetNumberNext(offnum)) {
+       itemid = PageGetItemId(page, offnum);
+       item = (BTItem) PageGetItem(page, itemid);
+       if (item->bti_oid == stack->bts_btitem->bti_oid)
+       return (buf);
+   }
+    }
+}
+
+void
+_bt_setpagelock(Relation rel, BlockNumber blkno, int access)
+{
+    ItemPointerData iptr;
+    
+    if (USELOCKING) {
+   ItemPointerSet(&iptr, blkno, P_HIKEY);
+   
+   if (access == BT_WRITE)
+       RelationSetSingleWLockPage(rel, &iptr);
+   else
+       RelationSetSingleRLockPage(rel, &iptr);
+    }
+}
+
+void
+_bt_unsetpagelock(Relation rel, BlockNumber blkno, int access)
+{
+    ItemPointerData iptr;
+    
+    if (USELOCKING) {
+   ItemPointerSet(&iptr, blkno, P_HIKEY);
+   
+   if (access == BT_WRITE)
+       RelationUnsetSingleWLockPage(rel, &iptr);
+   else
+       RelationUnsetSingleRLockPage(rel, &iptr);
+    }
+}
+
+void
+_bt_pagedel(Relation rel, ItemPointer tid)
+{
+    Buffer buf;
+    Page page;
+    BlockNumber blkno;
+    OffsetNumber offno;
+    
+    blkno = ItemPointerGetBlockNumber(tid);
+    offno = ItemPointerGetOffsetNumber(tid);
+    
+    buf = _bt_getbuf(rel, blkno, BT_WRITE);
+    page = BufferGetPage(buf);
+    
+    PageIndexTupleDelete(page, offno);
+    
+    /* write the buffer and release the lock */
+    _bt_wrtbuf(rel, buf);
+}
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c

new file mode 100644 (file)

index 0000000..0601611
--- /dev/null
+++ b/src/backend/access/nbtree/nbtree.c
@@ -0,0 +1,516 @@
+/*-------------------------------------------------------------------------
+ *
+ * btree.c--
+ *    Implementation of Lehman and Yao's btree management algorithm for
+ *    Postgres.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
+ *
+ * NOTES
+ *    This file contains only the public interface routines.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/sdir.h"
+#include "access/nbtree.h"
+#include "access/funcindex.h"
+
+#include "nodes/execnodes.h"
+#include "nodes/plannodes.h"
+
+#include "executor/executor.h"
+#include "executor/tuptable.h"
+
+#include "catalog/index.h"
+
+bool   BuildingBtree = false;
+bool   FastBuild = false; /* turn this on to make bulk builds work*/
+
+/*
+ *  btbuild() -- build a new btree index.
+ *
+ * We use a global variable to record the fact that we're creating
+ * a new index.  This is used to avoid high-concurrency locking,
+ * since the index won't be visible until this transaction commits
+ * and since building is guaranteed to be single-threaded.
+ */
+void
+btbuild(Relation heap,
+   Relation index,
+   int natts,
+   AttrNumber *attnum,
+   IndexStrategy istrat,
+   uint16 pcount,
+   Datum *params,
+   FuncIndexInfo *finfo,
+   PredInfo *predInfo)
+{
+    HeapScanDesc hscan;
+    Buffer buffer;
+    HeapTuple htup;
+    IndexTuple itup;
+    TupleDesc htupdesc, itupdesc;
+    Datum *attdata;
+    bool *nulls;
+    InsertIndexResult res;
+    int nhtups, nitups;
+    int i;
+    BTItem btitem;
+    ExprContext *econtext;
+    TupleTable tupleTable;
+    TupleTableSlot *slot;
+    Oid hrelid, irelid;
+    Node *pred, *oldPred;
+    void *spool;
+    
+    /* note that this is a new btree */
+    BuildingBtree = true;
+    
+    pred = predInfo->pred;
+    oldPred = predInfo->oldPred;
+
+    /* initialize the btree index metadata page (if this is a new index) */
+    if (oldPred == NULL)
+   _bt_metapinit(index);
+    
+    /* get tuple descriptors for heap and index relations */
+    htupdesc = RelationGetTupleDescriptor(heap);
+    itupdesc = RelationGetTupleDescriptor(index);
+    
+    /* get space for data items that'll appear in the index tuple */
+    attdata = (Datum *) palloc(natts * sizeof(Datum));
+    nulls = (bool *) palloc(natts * sizeof(bool));
+    
+    /*
+     * If this is a predicate (partial) index, we will need to evaluate the
+     * predicate using ExecQual, which requires the current tuple to be in a
+     * slot of a TupleTable.  In addition, ExecQual must have an ExprContext
+     * referring to that slot.  Here, we initialize dummy TupleTable and
+     * ExprContext objects for this purpose. --Nels, Feb '92
+     */
+#ifndef OMIT_PARTIAL_INDEX
+    if (pred != NULL || oldPred != NULL) {
+   tupleTable = ExecCreateTupleTable(1);
+   slot = ExecAllocTableSlot(tupleTable);
+   econtext = makeNode(ExprContext);
+   FillDummyExprContext(econtext, slot, htupdesc, InvalidBuffer);
+    }
+#endif /* OMIT_PARTIAL_INDEX */
+    
+    /* start a heap scan */
+    hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL);
+    htup = heap_getnext(hscan, 0, &buffer);
+    
+    /* build the index */
+    nhtups = nitups = 0;
+    
+    if (FastBuild) {
+   spool = _bt_spoolinit(index, 7);
+   res = (InsertIndexResult) NULL;
+    }
+
+    for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) {
+   
+   nhtups++;
+   
+   /*
+    * If oldPred != NULL, this is an EXTEND INDEX command, so skip
+    * this tuple if it was already in the existing partial index
+    */
+   if (oldPred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+
+       /*SetSlotContents(slot, htup);*/
+       slot->val = htup;
+       if (ExecQual((List*)oldPred, econtext) == true) {
+       nitups++;
+       continue;
+       }
+#endif /* OMIT_PARTIAL_INDEX */        
+   }
+   
+   /* Skip this tuple if it doesn't satisfy the partial-index predicate */
+   if (pred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+       /* SetSlotContents(slot, htup); */
+       slot->val = htup;
+       if (ExecQual((List*)pred, econtext) == false)
+       continue;
+#endif /* OMIT_PARTIAL_INDEX */        
+   }
+   
+   nitups++;
+   
+   /*
+    *  For the current heap tuple, extract all the attributes
+    *  we use in this index, and note which are null.
+    */
+   
+   for (i = 1; i <= natts; i++) {
+       int  attoff;
+       bool attnull;
+       
+       /*
+        *  Offsets are from the start of the tuple, and are
+        *  zero-based; indices are one-based.  The next call
+        *  returns i - 1.  That's data hiding for you.
+        */
+       
+       attoff = AttrNumberGetAttrOffset(i);
+       attdata[attoff] = GetIndexValue(htup, 
+                       htupdesc,
+                       attoff, 
+                       attnum, 
+                       finfo, 
+                       &attnull,
+                       buffer);
+       nulls[attoff] = (attnull ? 'n' : ' ');
+   }
+   
+   /* form an index tuple and point it at the heap tuple */
+   itup = index_formtuple(itupdesc, attdata, nulls);
+   
+   /*
+    *  If the single index key is null, we don't insert it into
+    *  the index.  Btrees support scans on <, <=, =, >=, and >.
+    *  Relational algebra says that A op B (where op is one of the
+    *  operators above) returns null if either A or B is null.  This
+    *  means that no qualification used in an index scan could ever
+    *  return true on a null attribute.  It also means that indices
+    *  can't be used by ISNULL or NOTNULL scans, but that's an
+    *  artifact of the strategy map architecture chosen in 1986, not
+    *  of the way nulls are handled here.
+    */
+   
+   if (itup->t_info & INDEX_NULL_MASK) {
+       pfree(itup);
+       continue;
+   }
+   
+   itup->t_tid = htup->t_ctid;
+   btitem = _bt_formitem(itup);
+
+   /*
+    * if we are doing bottom-up btree build, we insert the index
+    * into a spool page for subsequent processing.  otherwise, we
+    * insert into the btree.
+    */
+   if (FastBuild) {
+       _bt_spool(index, btitem, spool);
+   } else {
+       res = _bt_doinsert(index, btitem);
+   }
+
+   pfree(btitem);
+   pfree(itup);
+   if (res) {
+       pfree(res);
+   }
+    }
+    
+    /* okay, all heap tuples are indexed */
+    heap_endscan(hscan);
+    
+    if (pred != NULL || oldPred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+   ExecDestroyTupleTable(tupleTable, true);
+   pfree(econtext);
+#endif /* OMIT_PARTIAL_INDEX */        
+    }
+    
+    /*
+     * if we are doing bottom-up btree build, we now have a bunch of
+     * sorted runs in the spool pages.  finish the build by (1)
+     * merging the runs, (2) inserting the sorted tuples into btree
+     * pages and (3) building the upper levels.
+     */
+    if (FastBuild) {
+   _bt_spool(index, (BTItem) NULL, spool); /* flush spool */
+   _bt_leafbuild(index, spool);
+   _bt_spooldestroy(spool);
+    }
+
+    /*
+     *  Since we just counted the tuples in the heap, we update its
+     *  stats in pg_class to guarantee that the planner takes advantage
+     *  of the index we just created. Finally, only update statistics
+     *  during normal index definitions, not for indices on system catalogs
+     *  created during bootstrap processing.  We must close the relations
+     *  before updatings statistics to guarantee that the relcache entries
+     *  are flushed when we increment the command counter in UpdateStats().
+     */
+    if (IsNormalProcessingMode())
+   {
+       hrelid = heap->rd_id;
+       irelid = index->rd_id;
+       heap_close(heap);
+       index_close(index);
+       UpdateStats(hrelid, nhtups, true);
+       UpdateStats(irelid, nitups, false);
+       if (oldPred != NULL) {
+       if (nitups == nhtups) pred = NULL;
+       UpdateIndexPredicate(irelid, oldPred, pred);
+       }  
+   }
+    
+    /* be tidy */
+    pfree(nulls);
+    pfree(attdata);
+    
+    /* all done */
+    BuildingBtree = false;
+}
+
+/*
+ *  btinsert() -- insert an index tuple into a btree.
+ *
+ * Descend the tree recursively, find the appropriate location for our
+ * new tuple, put it there, set its unique OID as appropriate, and
+ * return an InsertIndexResult to the caller.
+ */
+InsertIndexResult
+btinsert(Relation rel, IndexTuple itup)
+{
+    BTItem btitem;
+    InsertIndexResult res;
+    
+    if (itup->t_info & INDEX_NULL_MASK)
+   return ((InsertIndexResult) NULL);
+    
+    btitem = _bt_formitem(itup);
+    
+    res = _bt_doinsert(rel, btitem);
+    pfree(btitem);
+    
+    return (res);
+}
+
+/*
+ *  btgettuple() -- Get the next tuple in the scan.
+ */
+char *
+btgettuple(IndexScanDesc scan, ScanDirection dir)
+{
+    RetrieveIndexResult res;
+    
+    /*
+     *  If we've already initialized this scan, we can just advance it
+     *  in the appropriate direction.  If we haven't done so yet, we
+     *  call a routine to get the first item in the scan.
+     */
+    
+    if (ItemPointerIsValid(&(scan->currentItemData)))
+   res = _bt_next(scan, dir);
+    else
+   res = _bt_first(scan, dir);
+    
+    return ((char *) res);
+}
+
+/*
+ *  btbeginscan() -- start a scan on a btree index
+ */
+char *
+btbeginscan(Relation rel, bool fromEnd, uint16 keysz, ScanKey scankey)
+{
+    IndexScanDesc scan;
+    StrategyNumber strat;
+    BTScanOpaque so;
+    
+    /* first order the keys in the qualification */
+    if (keysz > 1)
+   _bt_orderkeys(rel, &keysz, scankey);
+    
+    /* now get the scan */
+    scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey);
+    so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
+    so->btso_curbuf = so->btso_mrkbuf = InvalidBuffer;
+    scan->opaque = so;
+    
+    /* finally, be sure that the scan exploits the tree order */
+    scan->scanFromEnd = false;
+    scan->flags = 0x0;
+    if (keysz > 0) {
+   strat = _bt_getstrat(scan->relation, 1 /* XXX */,
+                scankey[0].sk_procedure);
+   
+   if (strat == BTLessStrategyNumber
+       || strat == BTLessEqualStrategyNumber)
+       scan->scanFromEnd = true;
+    } else {
+   scan->scanFromEnd = true;
+    }
+    
+    /* register scan in case we change pages it's using */
+    _bt_regscan(scan);
+    
+    return ((char *) scan);
+}
+
+/*
+ *  btrescan() -- rescan an index relation
+ */
+void
+btrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey)
+{
+    ItemPointer iptr;
+    BTScanOpaque so;
+    
+    so = (BTScanOpaque) scan->opaque;
+    
+    /* we hold a read lock on the current page in the scan */
+    if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+   _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
+   so->btso_curbuf = InvalidBuffer;
+   ItemPointerSetInvalid(iptr);
+    }
+    
+    /* and we hold a read lock on the last marked item in the scan */
+    if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
+   _bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
+   so->btso_mrkbuf = InvalidBuffer;
+   ItemPointerSetInvalid(iptr);
+    }
+    
+    /* reset the scan key */
+    if (scan->numberOfKeys > 0) {
+   memmove(scan->keyData,
+       scankey,
+       scan->numberOfKeys * sizeof(ScanKeyData));
+    }
+}
+
+void
+btmovescan(IndexScanDesc scan, Datum v)
+{
+    ItemPointer iptr;
+    BTScanOpaque so;
+    
+    so = (BTScanOpaque) scan->opaque;
+    
+    /* release any locks we still hold */
+    if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+   _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
+   so->btso_curbuf = InvalidBuffer;
+   ItemPointerSetInvalid(iptr);
+    }
+    
+    scan->keyData[0].sk_argument = v;
+}
+
+/*
+ *  btendscan() -- close down a scan
+ */
+void
+btendscan(IndexScanDesc scan)
+{
+    ItemPointer iptr;
+    BTScanOpaque so;
+    
+    so = (BTScanOpaque) scan->opaque;
+    
+    /* release any locks we still hold */
+    if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+   if (BufferIsValid(so->btso_curbuf))
+       _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
+   so->btso_curbuf = InvalidBuffer;
+   ItemPointerSetInvalid(iptr);
+    }
+    
+    if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
+   if (BufferIsValid(so->btso_mrkbuf))
+       _bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
+   so->btso_mrkbuf = InvalidBuffer;
+   ItemPointerSetInvalid(iptr);
+    }
+    
+    /* don't need scan registered anymore */
+    _bt_dropscan(scan);
+    
+    /* be tidy */
+#ifdef PERFECT_MMGR
+    pfree (scan->opaque);
+#endif /* PERFECT_MMGR */
+}
+
+/*
+ *  btmarkpos() -- save current scan position
+ */
+void
+btmarkpos(IndexScanDesc scan)
+{
+    ItemPointer iptr;
+    BTScanOpaque so;
+    
+    so = (BTScanOpaque) scan->opaque;
+    
+    /* release lock on old marked data, if any */
+    if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
+   _bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
+   so->btso_mrkbuf = InvalidBuffer;
+   ItemPointerSetInvalid(iptr);
+    }
+    
+    /* bump lock on currentItemData and copy to currentMarkData */
+    if (ItemPointerIsValid(&(scan->currentItemData))) {
+   so->btso_mrkbuf = _bt_getbuf(scan->relation,
+                    BufferGetBlockNumber(so->btso_curbuf),
+                    BT_READ);
+   scan->currentMarkData = scan->currentItemData;
+    }
+}
+
+/*
+ *  btrestrpos() -- restore scan to last saved position
+ */
+void
+btrestrpos(IndexScanDesc scan)
+{
+    ItemPointer iptr;
+    BTScanOpaque so;
+    
+    so = (BTScanOpaque) scan->opaque;
+    
+    /* release lock on current data, if any */
+    if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+   _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
+   so->btso_curbuf = InvalidBuffer;
+   ItemPointerSetInvalid(iptr);
+    }
+    
+    /* bump lock on currentMarkData and copy to currentItemData */
+    if (ItemPointerIsValid(&(scan->currentMarkData))) {
+   so->btso_curbuf = _bt_getbuf(scan->relation,
+                    BufferGetBlockNumber(so->btso_mrkbuf),
+                    BT_READ);
+   
+   scan->currentItemData = scan->currentMarkData;
+    }
+}
+
+/* stubs */
+void
+btdelete(Relation rel, ItemPointer tid)
+{
+    /* adjust any active scans that will be affected by this deletion */
+    _bt_adjscans(rel, tid);
+    
+    /* delete the data from the page */
+    _bt_pagedel(rel, tid);
+}
diff --git a/src/backend/access/nbtree/nbtscan.c b/src/backend/access/nbtree/nbtscan.c

new file mode 100644 (file)

index 0000000..62a029b
--- /dev/null
+++ b/src/backend/access/nbtree/nbtscan.c
@@ -0,0 +1,164 @@
+/*-------------------------------------------------------------------------
+ *
+ * btscan.c--
+ *    manage scans on btrees.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
+ *
+ *
+ * NOTES
+ *   Because we can be doing an index scan on a relation while we update
+ *   it, we need to avoid missing data that moves around in the index.
+ *   The routines and global variables in this file guarantee that all
+ *   scans in the local address space stay correctly positioned.  This
+ *   is all we need to worry about, since write locking guarantees that
+ *   no one else will be on the same page at the same time as we are.
+ *
+ *   The scheme is to manage a list of active scans in the current backend.
+ *   Whenever we add or remove records from an index, or whenever we
+ *   split a leaf page, we check the list of active scans to see if any
+ *   has been affected.  A scan is affected only if it is on the same
+ *   relation, and the same page, as the update.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/sdir.h"
+#include "access/nbtree.h"
+
+typedef struct BTScanListData {
+    IndexScanDesc      btsl_scan;
+    struct BTScanListData  *btsl_next;
+} BTScanListData;
+
+typedef BTScanListData *BTScanList;
+
+static BTScanList  BTScans = (BTScanList) NULL;
+     
+/*
+ *  _bt_regscan() -- register a new scan.
+ */
+void
+_bt_regscan(IndexScanDesc scan)
+{
+    BTScanList new_el;
+    
+    new_el = (BTScanList) palloc(sizeof(BTScanListData));
+    new_el->btsl_scan = scan;
+    new_el->btsl_next = BTScans;
+    BTScans = new_el;
+}
+
+/*
+ *  _bt_dropscan() -- drop a scan from the scan list
+ */
+void
+_bt_dropscan(IndexScanDesc scan)
+{
+    BTScanList chk, last;
+    
+    last = (BTScanList) NULL;
+    for (chk = BTScans;
+    chk != (BTScanList) NULL && chk->btsl_scan != scan;
+    chk = chk->btsl_next) {
+   last = chk;
+    }
+    
+    if (chk == (BTScanList) NULL)
+   elog(WARN, "btree scan list trashed; can't find 0x%lx", scan);
+    
+    if (last == (BTScanList) NULL)
+   BTScans = chk->btsl_next;
+    else
+   last->btsl_next = chk->btsl_next;
+    
+#ifdef PERFECT_MEM
+    pfree (chk);
+#endif /* PERFECT_MEM */
+}
+
+void
+_bt_adjscans(Relation rel, ItemPointer tid)
+{
+    BTScanList l;
+    Oid relid;
+    
+    relid = rel->rd_id;
+    for (l = BTScans; l != (BTScanList) NULL; l = l->btsl_next) {
+   if (relid == l->btsl_scan->relation->rd_id)
+       _bt_scandel(l->btsl_scan, ItemPointerGetBlockNumber(tid),
+           ItemPointerGetOffsetNumber(tid));
+    }
+}
+
+void
+_bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
+{
+    ItemPointer current;
+    Buffer buf;
+    BTScanOpaque so;
+    
+    if (!_bt_scantouched(scan, blkno, offno))
+   return;
+    
+    so = (BTScanOpaque) scan->opaque;
+    buf = so->btso_curbuf;
+    
+    current = &(scan->currentItemData);
+    if (ItemPointerIsValid(current)
+   && ItemPointerGetBlockNumber(current) == blkno
+   && ItemPointerGetOffsetNumber(current) >= offno) {
+   _bt_step(scan, &buf, BackwardScanDirection);
+   so->btso_curbuf = buf;
+    }
+    
+    current = &(scan->currentMarkData);
+    if (ItemPointerIsValid(current)
+   && ItemPointerGetBlockNumber(current) == blkno
+   && ItemPointerGetOffsetNumber(current) >= offno) {
+   ItemPointerData tmp;
+   tmp = *current;
+   *current = scan->currentItemData;
+   scan->currentItemData = tmp;
+   _bt_step(scan, &buf, BackwardScanDirection);
+   so->btso_mrkbuf = buf;
+   tmp = *current;
+   *current = scan->currentItemData;
+   scan->currentItemData = tmp;
+    }
+}
+
+bool
+_bt_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
+{
+    ItemPointer current;
+    
+    current = &(scan->currentItemData);
+    if (ItemPointerIsValid(current)
+   && ItemPointerGetBlockNumber(current) == blkno
+   && ItemPointerGetOffsetNumber(current) >= offno)
+   return (true);
+    
+    current = &(scan->currentMarkData);
+    if (ItemPointerIsValid(current)
+   && ItemPointerGetBlockNumber(current) == blkno
+   && ItemPointerGetOffsetNumber(current) >= offno)
+   return (true);
+    
+    return (false);
+}
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c

new file mode 100644 (file)

index 0000000..d7a7fc7
--- /dev/null
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -0,0 +1,1133 @@
+/*-------------------------------------------------------------------------
+ *
+ * btsearch.c--
+ *    search code for postgres btrees.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "fmgr.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/skey.h"
+#include "access/sdir.h"
+#include "access/nbtree.h"
+
+static BTStack _bt_searchr(Relation rel, int keysz, ScanKey scankey, Buffer *bufP, BTStack stack_in);
+static OffsetNumber _bt_firsteq(Relation rel, TupleDesc itupdesc, Page page, Size keysz, ScanKey scankey, OffsetNumber offnum);
+static int _bt_compare(Relation rel, TupleDesc itupdesc, Page page, int keysz, ScanKey scankey, OffsetNumber offnum);
+static bool _bt_twostep(IndexScanDesc scan, Buffer *bufP, ScanDirection dir);
+static RetrieveIndexResult _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
+
+/*
+ *  _bt_search() -- Search for a scan key in the index.
+ *
+ * This routine is actually just a helper that sets things up and
+ * calls a recursive-descent search routine on the tree.
+ */
+BTStack
+_bt_search(Relation rel, int keysz, ScanKey scankey, Buffer *bufP)
+{
+    *bufP = _bt_getroot(rel, BT_READ);
+    return (_bt_searchr(rel, keysz, scankey, bufP, (BTStack) NULL));
+}
+
+/*
+ *  _bt_searchr() -- Search the tree recursively for a particular scankey.
+ */
+static BTStack
+_bt_searchr(Relation rel,
+       int keysz,
+       ScanKey scankey,
+       Buffer *bufP,
+       BTStack stack_in)
+{
+    BTStack stack;
+    OffsetNumber offnum;
+    Page page;
+    BTPageOpaque opaque;
+    BlockNumber par_blkno;
+    BlockNumber blkno;
+    ItemId itemid;
+    BTItem btitem;
+    BTItem item_save;
+    int item_nbytes;
+    IndexTuple itup;
+    
+    /* if this is a leaf page, we're done */
+    page = BufferGetPage(*bufP);
+    opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+    if (opaque->btpo_flags & BTP_LEAF)
+   return (stack_in);
+    
+    /*
+     *  Find the appropriate item on the internal page, and get the child
+     *  page that it points to.
+     */
+    
+    par_blkno = BufferGetBlockNumber(*bufP);
+    offnum = _bt_binsrch(rel, *bufP, keysz, scankey, BT_DESCENT);
+    itemid = PageGetItemId(page, offnum);
+    btitem = (BTItem) PageGetItem(page, itemid);
+    itup = &(btitem->bti_itup);
+    blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
+    
+    /*
+     *  We need to save the bit image of the index entry we chose in the
+     *  parent page on a stack.  In case we split the tree, we'll use this
+     *  bit image to figure out what our real parent page is, in case the
+     *  parent splits while we're working lower in the tree.  See the paper
+     *  by Lehman and Yao for how this is detected and handled.  (We use
+     *  unique OIDs to disambiguate duplicate keys in the index -- Lehman
+     *  and Yao disallow duplicate keys).
+     */
+    
+    item_nbytes = ItemIdGetLength(itemid);
+    item_save = (BTItem) palloc(item_nbytes);
+    memmove((char *) item_save, (char *) btitem, item_nbytes);
+    stack = (BTStack) palloc(sizeof(BTStackData));
+    stack->bts_blkno = par_blkno;
+    stack->bts_offset = offnum;
+    stack->bts_btitem = item_save;
+    stack->bts_parent = stack_in;
+    
+    /* drop the read lock on the parent page and acquire one on the child */
+    _bt_relbuf(rel, *bufP, BT_READ);
+    *bufP = _bt_getbuf(rel, blkno, BT_READ);
+    
+    /*
+     *  Race -- the page we just grabbed may have split since we read its
+     *  pointer in the parent.  If it has, we may need to move right to its
+     *  new sibling.  Do that.
+     */
+    
+    *bufP = _bt_moveright(rel, *bufP, keysz, scankey, BT_READ);
+    
+    /* okay, all set to move down a level */
+    return (_bt_searchr(rel, keysz, scankey, bufP, stack));
+}
+
+/*
+ *  _bt_moveright() -- move right in the btree if necessary.
+ *
+ * When we drop and reacquire a pointer to a page, it is possible that
+ * the page has changed in the meanwhile.  If this happens, we're
+ * guaranteed that the page has "split right" -- that is, that any
+ * data that appeared on the page originally is either on the page
+ * or strictly to the right of it.
+ *
+ * This routine decides whether or not we need to move right in the
+ * tree by examining the high key entry on the page.  If that entry
+ * is strictly less than one we expect to be on the page, then our
+ * picture of the page is incorrect and we need to move right.
+ *
+ * On entry, we have the buffer pinned and a lock of the proper type.
+ * If we move right, we release the buffer and lock and acquire the
+ * same on the right sibling.
+ */
+Buffer
+_bt_moveright(Relation rel,
+         Buffer buf,
+         int keysz,
+         ScanKey scankey,
+         int access)
+{
+    Page page;
+    BTPageOpaque opaque;
+    ItemId hikey;
+    ItemId itemid;
+    BlockNumber rblkno;
+    
+    page = BufferGetPage(buf);
+    opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+    
+    /* if we're on a rightmost page, we don't need to move right */
+    if (P_RIGHTMOST(opaque))
+   return (buf);
+    
+    /* by convention, item 0 on non-rightmost pages is the high key */
+    hikey = PageGetItemId(page, P_HIKEY);
+    
+    /*
+     *  If the scan key that brought us to this page is >= the high key
+     *  stored on the page, then the page has split and we need to move
+     *  right.
+     */
+    
+    if (_bt_skeycmp(rel, keysz, scankey, page, hikey,
+           BTGreaterEqualStrategyNumber)) {
+   
+   /* move right as long as we need to */
+   do {
+       /*
+        *  If this page consists of all duplicate keys (hikey and first
+        *  key on the page have the same value), then we don't need to
+        *  step right.
+        */
+       if (PageGetMaxOffsetNumber(page) > P_HIKEY) {
+       itemid = PageGetItemId(page, P_FIRSTKEY);
+       if (_bt_skeycmp(rel, keysz, scankey, page, itemid,
+               BTEqualStrategyNumber)) {
+           /* break is for the "move right" while loop */
+           break;
+       }
+       }
+       
+       /* step right one page */
+       rblkno = opaque->btpo_next;
+       _bt_relbuf(rel, buf, access);
+       buf = _bt_getbuf(rel, rblkno, access);
+       page = BufferGetPage(buf);
+       opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+       hikey = PageGetItemId(page, P_HIKEY);
+       
+   } while (! P_RIGHTMOST(opaque)
+        && _bt_skeycmp(rel, keysz, scankey, page, hikey,
+               BTGreaterEqualStrategyNumber));
+    }
+    return (buf);
+}
+
+/*
+ *  _bt_skeycmp() -- compare a scan key to a particular item on a page using
+ *          a requested strategy (<, <=, =, >=, >).
+ *
+ * We ignore the unique OIDs stored in the btree item here.  Those
+ * numbers are intended for use internally only, in repositioning a
+ * scan after a page split.  They do not impose any meaningful ordering.
+ *
+ * The comparison is A <op> B, where A is the scan key and B is the
+ * tuple pointed at by itemid on page.
+ */
+bool
+_bt_skeycmp(Relation rel,
+       Size keysz,
+       ScanKey scankey,
+       Page page,
+       ItemId itemid,
+       StrategyNumber strat)
+{
+    BTItem item;
+    IndexTuple indexTuple;
+    TupleDesc tupDes;
+    ScanKey entry;
+    int i;
+    Datum attrDatum;
+    Datum keyDatum;
+    bool compare;
+    bool isNull;
+    
+    item = (BTItem) PageGetItem(page, itemid);
+    indexTuple = &(item->bti_itup);
+    
+    tupDes = RelationGetTupleDescriptor(rel);
+    
+    /* see if the comparison is true for all of the key attributes */
+    for (i=1; i <= keysz; i++) {
+   
+   entry = &scankey[i-1];
+   attrDatum = index_getattr(indexTuple,
+                 entry->sk_attno,
+                 tupDes,
+                 &isNull);
+   keyDatum  = entry->sk_argument;
+   
+   compare = _bt_invokestrat(rel, i, strat, keyDatum, attrDatum);
+   if (!compare)
+       return (false);
+    }
+    
+    return (true);
+}
+
+/*
+ *  _bt_binsrch() -- Do a binary search for a key on a particular page.
+ *
+ * The scankey we get has the compare function stored in the procedure
+ * entry of each data struct.  We invoke this regproc to do the
+ * comparison for every key in the scankey.  _bt_binsrch() returns
+ * the OffsetNumber of the first matching key on the page, or the
+ * OffsetNumber at which the matching key would appear if it were
+ * on this page.
+ *
+ * By the time this procedure is called, we're sure we're looking
+ * at the right page -- don't need to walk right.  _bt_binsrch() has
+ * no lock or refcount side effects on the buffer.
+ */
+OffsetNumber
+_bt_binsrch(Relation rel,
+       Buffer buf,
+       int keysz,
+       ScanKey scankey,
+       int srchtype)
+{
+    TupleDesc itupdesc;
+    Page page;
+    BTPageOpaque opaque;
+    OffsetNumber low, mid, high;
+    bool match;
+    int result;
+    
+    page = BufferGetPage(buf);
+    opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+    
+    /* by convention, item 0 on any non-rightmost page is the high key */
+    low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+    
+    high = PageGetMaxOffsetNumber(page);
+    
+    /*
+     *  Since for non-rightmost pages, the zeroeth item on the page is the
+     *  high key, there are two notions of emptiness.  One is if nothing
+     *  appears on the page.  The other is if nothing but the high key does.
+     *  The reason we test high <= low, rather than high == low, is that
+     *  after vacuuming there may be nothing *but* the high key on a page.
+     *  In that case, given the scheme above, low = 1 and high = 0.
+     */
+    
+    if (PageIsEmpty(page) || (! P_RIGHTMOST(opaque) && high <= low))
+   return (low);
+    
+    itupdesc = RelationGetTupleDescriptor(rel);
+    match = false;
+    
+    while ((high - low) > 1) {
+   mid = low + ((high - low) / 2);
+   result = _bt_compare(rel, itupdesc, page, keysz, scankey, mid);
+   
+   if (result > 0)
+       low = mid;
+   else if (result < 0)
+       high = mid - 1;
+   else {
+       match = true;
+       break;
+   }
+    }
+    
+    /* if we found a match, we want to find the first one on the page */
+    if (match) {
+   return (_bt_firsteq(rel, itupdesc, page, keysz, scankey, mid));
+    } else {
+   
+   /*
+    *  We terminated because the endpoints got too close together.  There
+    *  are two cases to take care of.
+    *
+    *  For non-insertion searches on internal pages, we want to point at
+    *  the last key <, or first key =, the scankey on the page.  This
+    *  guarantees that we'll descend the tree correctly.
+    *
+    *  For all other cases, we want to point at the first key >=
+    *  the scankey on the page.  This guarantees that scans and
+    *  insertions will happen correctly.
+    */
+   
+   opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+   if (!(opaque->btpo_flags & BTP_LEAF) && srchtype == BT_DESCENT) {
+       
+       /*
+        *  We want the last key <, or first key ==, the scan key.
+        */
+       
+       result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
+       
+       if (result == 0) {
+       return (_bt_firsteq(rel, itupdesc, page, keysz, scankey, high));
+       } else if (result > 0) {
+       return (high);
+       } else {
+       return (low);
+       }
+   } else {
+       
+       /* we want the first key >= the scan key */
+       result = _bt_compare(rel, itupdesc, page, keysz, scankey, low);
+       if (result <= 0) {
+       return (low);
+       } else {
+       if (low == high)
+           return (OffsetNumberNext(low));
+       
+       result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
+       if (result <= 0)
+           return (high);
+       else
+           return (OffsetNumberNext(high));
+       }
+   }
+    }
+}
+
+static OffsetNumber
+_bt_firsteq(Relation rel,
+       TupleDesc itupdesc,
+       Page page,
+       Size keysz,
+       ScanKey scankey,
+       OffsetNumber offnum)
+{
+    BTPageOpaque opaque;
+    OffsetNumber limit;
+    
+    opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+    
+    /* skip the high key, if any */
+    limit = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+    
+    /* walk backwards looking for the first key in the chain of duplicates */
+    while (offnum > limit
+      && _bt_compare(rel, itupdesc, page,
+             keysz, scankey, OffsetNumberPrev(offnum)) == 0) {
+   offnum = OffsetNumberPrev(offnum);
+    }
+    
+    return (offnum);
+}
+
+/*
+ *  _bt_compare() -- Compare scankey to a particular tuple on the page.
+ *
+ * This routine returns:
+ *     -1 if scankey < tuple at offnum;
+ *      0 if scankey == tuple at offnum;
+ *     +1 if scankey > tuple at offnum.
+ *
+ * In order to avoid having to propagate changes up the tree any time
+ * a new minimal key is inserted, the leftmost entry on the leftmost
+ * page is less than all possible keys, by definition.
+ */
+static int
+_bt_compare(Relation rel,
+       TupleDesc itupdesc,
+       Page page,
+       int keysz,
+       ScanKey scankey,
+       OffsetNumber offnum)
+{
+    Datum datum;
+    BTItem btitem;
+    ItemId itemid;
+    IndexTuple itup;
+    BTPageOpaque opaque;
+    ScanKey entry;
+    AttrNumber attno;
+    int result;
+    int i;
+    bool null;
+    
+    /*
+     *  If this is a leftmost internal page, and if our comparison is
+     *  with the first key on the page, then the item at that position is
+     *  by definition less than the scan key.
+     */
+    
+    opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+    if (!(opaque->btpo_flags & BTP_LEAF)
+   && P_LEFTMOST(opaque)
+   && offnum == P_HIKEY) {
+   itemid = PageGetItemId(page, offnum);
+   
+   /*
+    *  we just have to believe that this will only be called with
+    *  offnum == P_HIKEY when P_HIKEY is the OffsetNumber of the
+    *  first actual data key (i.e., this is also a rightmost
+    *  page).  there doesn't seem to be any code that implies
+    *  that the leftmost page is normally missing a high key as
+    *  well as the rightmost page.  but that implies that this
+    *  code path only applies to the root -- which seems
+    *  unlikely..
+    */
+   if (! P_RIGHTMOST(opaque)) {
+       elog(WARN, "_bt_compare: invalid comparison to high key");
+   }
+
+   /*
+    *  If the item on the page is equal to the scankey, that's
+    *  okay to admit.  We just can't claim that the first key on
+    *  the page is greater than anything.
+    */
+   
+   if (_bt_skeycmp(rel, keysz, scankey, page, itemid,
+           BTEqualStrategyNumber)) {
+       return (0);
+   }
+   return (1);
+    }
+    
+    btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
+    itup = &(btitem->bti_itup);
+    
+    /*
+     *  The scan key is set up with the attribute number associated with each
+     *  term in the key.  It is important that, if the index is multi-key,
+     *  the scan contain the first k key attributes, and that they be in
+     *  order.  If you think about how multi-key ordering works, you'll
+     *  understand why this is.
+     *
+     *  We don't test for violation of this condition here.
+     */
+    
+    for (i = 1; i <= keysz; i++) {
+   long tmpres;
+   
+   entry = &scankey[i - 1];
+   attno = entry->sk_attno;
+   datum = index_getattr(itup, attno, itupdesc, &null);
+   tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure,
+                 entry->sk_argument, datum);
+   result = tmpres;
+   
+   /* if the keys are unequal, return the difference */
+   if (result != 0)
+       return (result);
+    }
+    
+    /* by here, the keys are equal */
+    return (0);
+}
+
+/*
+ *  _bt_next() -- Get the next item in a scan.
+ *
+ * On entry, we have a valid currentItemData in the scan, and a
+ * read lock on the page that contains that item.  We do not have
+ * the page pinned.  We return the next item in the scan.  On
+ * exit, we have the page containing the next item locked but not
+ * pinned.
+ */
+RetrieveIndexResult
+_bt_next(IndexScanDesc scan, ScanDirection dir)
+{
+    Relation rel;
+    Buffer buf;
+    Page page;
+    OffsetNumber offnum;
+    RetrieveIndexResult res;
+    BlockNumber blkno;
+    ItemPointer current;
+    ItemPointer iptr;
+    BTItem btitem;
+    IndexTuple itup;
+    BTScanOpaque so;
+    
+    rel = scan->relation;
+    so = (BTScanOpaque) scan->opaque;
+    current = &(scan->currentItemData);
+    
+    /*
+     *  XXX 10 may 91:  somewhere there's a bug in our management of the
+     *  cached buffer for this scan.  wei discovered it.  the following
+     *  is a workaround so he can work until i figure out what's going on.
+     */
+    
+    if (!BufferIsValid(so->btso_curbuf))
+   so->btso_curbuf = _bt_getbuf(rel, ItemPointerGetBlockNumber(current),
+                    BT_READ);
+    
+    /* we still have the buffer pinned and locked */
+    buf = so->btso_curbuf;
+    blkno = BufferGetBlockNumber(buf);
+    
+    /* step one tuple in the appropriate direction */
+    if (!_bt_step(scan, &buf, dir))
+   return ((RetrieveIndexResult) NULL);
+    
+    /* by here, current is the tuple we want to return */
+    offnum = ItemPointerGetOffsetNumber(current);
+    page = BufferGetPage(buf);
+    btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
+    itup = &btitem->bti_itup;
+    
+    if (_bt_checkqual(scan, itup)) {
+   iptr = (ItemPointer) palloc(sizeof(ItemPointerData));
+   memmove((char *) iptr, (char *) &(itup->t_tid),
+       sizeof(ItemPointerData));
+   res = FormRetrieveIndexResult(current, iptr);
+   
+   /* remember which buffer we have pinned and locked */
+   so->btso_curbuf = buf;
+    } else {
+   ItemPointerSetInvalid(current);
+   so->btso_curbuf = InvalidBuffer;
+   _bt_relbuf(rel, buf, BT_READ);
+   res = (RetrieveIndexResult) NULL;
+    }
+    
+    return (res);
+}
+
+/*
+ *  _bt_first() -- Find the first item in a scan.
+ *
+ * We need to be clever about the type of scan, the operation it's
+ * performing, and the tree ordering.  We return the RetrieveIndexResult
+ * of the first item in the tree that satisfies the qualification
+ * associated with the scan descriptor.  On exit, the page containing
+ * the current index tuple is read locked and pinned, and the scan's
+ * opaque data entry is updated to include the buffer.
+ */
+RetrieveIndexResult
+_bt_first(IndexScanDesc scan, ScanDirection dir)
+{
+    Relation rel;
+    TupleDesc itupdesc;
+    Buffer buf;
+    Page page;
+    BTStack stack;
+    OffsetNumber offnum, maxoff;
+    BTItem btitem;
+    IndexTuple itup;
+    ItemPointer current;
+    ItemPointer iptr;
+    BlockNumber blkno;
+    StrategyNumber strat;
+    RetrieveIndexResult res;
+    RegProcedure proc;
+    int result;
+    BTScanOpaque so;
+    ScanKeyData skdata;
+    
+    /* if we just need to walk down one edge of the tree, do that */
+    if (scan->scanFromEnd)
+   return (_bt_endpoint(scan, dir));
+    
+    rel = scan->relation;
+    itupdesc = RelationGetTupleDescriptor(scan->relation);
+    current = &(scan->currentItemData);
+    so = (BTScanOpaque) scan->opaque;
+    
+    /*
+     *  Okay, we want something more complicated.  What we'll do is use
+     *  the first item in the scan key passed in (which has been correctly
+     *  ordered to take advantage of index ordering) to position ourselves
+     *  at the right place in the scan.
+     */
+    
+    /*
+     *  XXX -- The attribute number stored in the scan key is the attno
+     *        in the heap relation.  We need to transmogrify this into
+     *         the index relation attno here.  For the moment, we have
+     *        hardwired attno == 1.
+     */
+    proc = index_getprocid(rel, 1, BTORDER_PROC);
+    ScanKeyEntryInitialize(&skdata, 0x0, 1, proc,
+              scan->keyData[0].sk_argument);
+    
+    stack = _bt_search(rel, 1, &skdata, &buf);
+    _bt_freestack(stack);
+    
+    /* find the nearest match to the manufactured scan key on the page */
+    offnum = _bt_binsrch(rel, buf, 1, &skdata, BT_DESCENT);
+    page = BufferGetPage(buf);
+    
+    /*
+     *  This will happen if the tree we're searching is entirely empty,
+     *  or if we're doing a search for a key that would appear on an
+     *  entirely empty internal page.  In either case, there are no
+     *  matching tuples in the index.
+     */
+    
+    if (PageIsEmpty(page)) {
+   ItemPointerSetInvalid(current);
+   so->btso_curbuf = InvalidBuffer;
+   _bt_relbuf(rel, buf, BT_READ);
+   return ((RetrieveIndexResult) NULL);
+    }
+    
+    maxoff = PageGetMaxOffsetNumber(page);
+    
+    if (offnum > maxoff)
+   offnum = maxoff;
+    
+    blkno = BufferGetBlockNumber(buf);
+    ItemPointerSet(current, blkno, offnum);
+    
+    /*
+     *  Now find the right place to start the scan.  Result is the
+     *  value we're looking for minus the value we're looking at
+     *  in the index.
+     */
+    
+    result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
+    strat = _bt_getstrat(rel, 1, scan->keyData[0].sk_procedure);
+    
+    switch (strat) {
+    case BTLessStrategyNumber:
+   if (result <= 0) {
+       do {
+       if (!_bt_twostep(scan, &buf, BackwardScanDirection))
+           break;
+       
+       offnum = ItemPointerGetOffsetNumber(current);
+       page = BufferGetPage(buf);
+       result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
+       } while (result <= 0);
+       
+       /* if this is true, the key we just looked at is gone */
+       if (result > 0)
+       (void) _bt_twostep(scan, &buf, ForwardScanDirection);
+   }
+   break;
+   
+    case BTLessEqualStrategyNumber:
+   if (result >= 0) {
+       do {
+       if (!_bt_twostep(scan, &buf, ForwardScanDirection))
+           break;
+       
+       offnum = ItemPointerGetOffsetNumber(current);
+       page = BufferGetPage(buf);
+       result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
+       } while (result >= 0);
+       
+       if (result < 0)
+       (void) _bt_twostep(scan, &buf, BackwardScanDirection);
+   }
+   break;
+   
+    case BTEqualStrategyNumber:
+   if (result != 0) {
+       _bt_relbuf(scan->relation, buf, BT_READ);
+       so->btso_curbuf = InvalidBuffer;
+       ItemPointerSetInvalid(&(scan->currentItemData));
+       return ((RetrieveIndexResult) NULL);
+   }
+   break;
+   
+    case BTGreaterEqualStrategyNumber:
+   if (result < 0) {
+       do {
+       if (!_bt_twostep(scan, &buf, BackwardScanDirection))
+           break;
+       
+       page = BufferGetPage(buf);
+       offnum = ItemPointerGetOffsetNumber(current);
+       result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
+       } while (result < 0);
+       
+       if (result > 0)
+       (void) _bt_twostep(scan, &buf, ForwardScanDirection);
+   }
+   break;
+   
+    case BTGreaterStrategyNumber:
+   if (result >= 0) {
+       do {
+       if (!_bt_twostep(scan, &buf, ForwardScanDirection))
+           break;
+       
+       offnum = ItemPointerGetOffsetNumber(current);
+       page = BufferGetPage(buf);
+       result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum);
+       } while (result >= 0);
+   }
+   break;
+    }
+    
+    /* okay, current item pointer for the scan is right */
+    offnum = ItemPointerGetOffsetNumber(current);
+    page = BufferGetPage(buf);
+    btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
+    itup = &btitem->bti_itup;
+    
+    if (_bt_checkqual(scan, itup)) {
+   iptr = (ItemPointer) palloc(sizeof(ItemPointerData));
+   memmove((char *) iptr, (char *) &(itup->t_tid),
+       sizeof(ItemPointerData));
+   res = FormRetrieveIndexResult(current, iptr);
+   pfree(iptr);
+   
+   /* remember which buffer we have pinned */
+   so->btso_curbuf = buf;
+    } else {
+   ItemPointerSetInvalid(current);
+   so->btso_curbuf = InvalidBuffer;
+   _bt_relbuf(rel, buf, BT_READ);
+   res = (RetrieveIndexResult) NULL;
+    }
+    
+    return (res);
+}
+
+/*
+ *  _bt_step() -- Step one item in the requested direction in a scan on
+ *       the tree.
+ *
+ * If no adjacent record exists in the requested direction, return
+ * false.  Else, return true and set the currentItemData for the
+ * scan to the right thing.
+ */
+bool
+_bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
+{
+    Page page;
+    BTPageOpaque opaque;
+    OffsetNumber offnum, maxoff;
+    OffsetNumber start;
+    BlockNumber blkno;
+    BlockNumber obknum;
+    BTScanOpaque so;
+    ItemPointer current;
+    Relation rel;
+    
+    rel = scan->relation;
+    current = &(scan->currentItemData);
+    offnum = ItemPointerGetOffsetNumber(current);
+    page = BufferGetPage(*bufP);
+    opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+    so = (BTScanOpaque) scan->opaque;
+    maxoff = PageGetMaxOffsetNumber(page);
+    
+    /* get the next tuple */
+    if (ScanDirectionIsForward(dir)) {
+   if (!PageIsEmpty(page) && offnum < maxoff) {
+       offnum = OffsetNumberNext(offnum);
+   } else {
+       
+       /* if we're at end of scan, release the buffer and return */
+       blkno = opaque->btpo_next;
+       if (P_RIGHTMOST(opaque)) {
+       _bt_relbuf(rel, *bufP, BT_READ);
+       ItemPointerSetInvalid(current);
+       *bufP = so->btso_curbuf = InvalidBuffer;
+       return (false);
+       } else {
+       
+       /* walk right to the next page with data */
+       _bt_relbuf(rel, *bufP, BT_READ);
+       for (;;) {
+           *bufP = _bt_getbuf(rel, blkno, BT_READ);
+           page = BufferGetPage(*bufP);
+           opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+           maxoff = PageGetMaxOffsetNumber(page);
+           start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+           
+           if (!PageIsEmpty(page) && start <= maxoff) {
+           break;
+           } else {
+           blkno = opaque->btpo_next;
+           _bt_relbuf(rel, *bufP, BT_READ);
+           if (blkno == P_NONE) {
+               *bufP = so->btso_curbuf = InvalidBuffer;
+               ItemPointerSetInvalid(current);
+               return (false);
+           }
+           }
+       }
+       offnum = start;
+       }
+   }
+    } else if (ScanDirectionIsBackward(dir)) {
+   
+   /* remember that high key is item zero on non-rightmost pages */
+   start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+
+   if (offnum > start) {
+       offnum = OffsetNumberPrev(offnum);
+   } else {
+       
+       /* if we're at end of scan, release the buffer and return */
+       blkno = opaque->btpo_prev;
+       if (P_LEFTMOST(opaque)) {
+       _bt_relbuf(rel, *bufP, BT_READ);
+       *bufP = so->btso_curbuf = InvalidBuffer;
+       ItemPointerSetInvalid(current);
+       return (false);
+       } else {
+       
+       obknum = BufferGetBlockNumber(*bufP);
+       
+       /* walk right to the next page with data */
+       _bt_relbuf(rel, *bufP, BT_READ);
+       for (;;) {
+           *bufP = _bt_getbuf(rel, blkno, BT_READ);
+           page = BufferGetPage(*bufP);
+           opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+           maxoff = PageGetMaxOffsetNumber(page);
+           
+           /*
+            *  If the adjacent page just split, then we may have the
+            *  wrong block.  Handle this case.  Because pages only
+            *  split right, we don't have to worry about this failing
+            *  to terminate.
+            */
+           
+           while (opaque->btpo_next != obknum) {
+           blkno = opaque->btpo_next;
+           _bt_relbuf(rel, *bufP, BT_READ);
+           *bufP = _bt_getbuf(rel, blkno, BT_READ);
+           page = BufferGetPage(*bufP);
+           opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+           maxoff = PageGetMaxOffsetNumber(page);
+           }
+           
+           /* don't consider the high key */
+           start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+           
+           /* anything to look at here? */
+           if (!PageIsEmpty(page) && maxoff >= start) {
+           break;
+           } else {
+           blkno = opaque->btpo_prev;
+           obknum = BufferGetBlockNumber(*bufP);
+           _bt_relbuf(rel, *bufP, BT_READ);
+           if (blkno == P_NONE) {
+               *bufP = so->btso_curbuf = InvalidBuffer;
+               ItemPointerSetInvalid(current);
+               return (false);
+           }
+           }
+       }
+       offnum = maxoff;    /* XXX PageIsEmpty? */
+       }
+   }
+    }
+    blkno = BufferGetBlockNumber(*bufP);
+    so->btso_curbuf = *bufP;
+    ItemPointerSet(current, blkno, offnum);
+    
+    return (true);
+}
+
+/*
+ *  _bt_twostep() -- Move to an adjacent record in a scan on the tree,
+ *          if an adjacent record exists.
+ *
+ * This is like _bt_step, except that if no adjacent record exists
+ * it restores us to where we were before trying the step.  This is
+ * only hairy when you cross page boundaries, since the page you cross
+ * from could have records inserted or deleted, or could even split.
+ * This is unlikely, but we try to handle it correctly here anyway.
+ *
+ * This routine contains the only case in which our changes to Lehman
+ * and Yao's algorithm.
+ *
+ * Like step, this routine leaves the scan's currentItemData in the
+ * proper state and acquires a lock and pin on *bufP.  If the twostep
+ * succeeded, we return true; otherwise, we return false.
+ */
+static bool
+_bt_twostep(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
+{
+    Page page;
+    BTPageOpaque opaque;
+    OffsetNumber offnum, maxoff;
+    OffsetNumber start;
+    ItemPointer current;
+    ItemId itemid;
+    int itemsz;
+    BTItem btitem;
+    BTItem svitem;
+    BlockNumber blkno;
+    
+    blkno = BufferGetBlockNumber(*bufP);
+    page = BufferGetPage(*bufP);
+    opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+    maxoff = PageGetMaxOffsetNumber(page);
+    current = &(scan->currentItemData);
+    offnum = ItemPointerGetOffsetNumber(current);
+    
+    start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+    
+    /* if we're safe, just do it */
+    if (ScanDirectionIsForward(dir) && offnum < maxoff) { /* XXX PageIsEmpty? */
+   ItemPointerSet(current, blkno, OffsetNumberNext(offnum));
+   return (true);
+    } else if (ScanDirectionIsBackward(dir) && offnum > start) {
+   ItemPointerSet(current, blkno, OffsetNumberPrev(offnum));
+   return (true);
+    }
+    
+    /* if we've hit end of scan we don't have to do any work */
+    if (ScanDirectionIsForward(dir) && P_RIGHTMOST(opaque)) {
+   return (false);
+    } else if (ScanDirectionIsBackward(dir) && P_LEFTMOST(opaque)) {
+   return (false);
+    }
+    
+    /*
+     *  Okay, it's off the page; let _bt_step() do the hard work, and we'll
+     *  try to remember where we were.  This is not guaranteed to work; this
+     *  is the only place in the code where concurrency can screw us up,
+     *  and it's because we want to be able to move in two directions in
+     *  the scan.
+     */
+    
+    itemid = PageGetItemId(page, offnum);
+    itemsz = ItemIdGetLength(itemid);
+    btitem = (BTItem) PageGetItem(page, itemid);
+    svitem = (BTItem) palloc(itemsz);
+    memmove((char *) svitem, (char *) btitem, itemsz);
+    
+    if (_bt_step(scan, bufP, dir)) {
+   pfree(svitem);
+   return (true);
+    }
+    
+    /* try to find our place again */
+    *bufP = _bt_getbuf(scan->relation, blkno, BT_READ);
+    page = BufferGetPage(*bufP);
+    maxoff = PageGetMaxOffsetNumber(page);
+    
+    while (offnum <= maxoff) {
+   itemid = PageGetItemId(page, offnum);
+   btitem = (BTItem) PageGetItem(page, itemid);
+   if (btitem->bti_oid == svitem->bti_oid) {
+       pfree(svitem);
+       ItemPointerSet(current, blkno, offnum);
+       return (false);
+   }
+    }
+    
+    /*
+     *  XXX crash and burn -- can't find our place.  We can be a little
+     *  smarter -- walk to the next page to the right, for example, since
+     *  that's the only direction that splits happen in.  Deletions screw
+     *  us up less often since they're only done by the vacuum daemon.
+     */
+    
+    elog(WARN, "btree synchronization error:  concurrent update botched scan");
+    
+    return (false);
+}
+
+/*
+ *  _bt_endpoint() -- Find the first or last key in the index.
+ */
+static RetrieveIndexResult
+_bt_endpoint(IndexScanDesc scan, ScanDirection dir)
+{
+    Relation rel;
+    Buffer buf;
+    Page page;
+    BTPageOpaque opaque;
+    ItemPointer current;
+    ItemPointer iptr;
+    OffsetNumber offnum, maxoff;
+    OffsetNumber start;
+    BlockNumber blkno;
+    BTItem btitem;
+    IndexTuple itup;
+    BTScanOpaque so;
+    RetrieveIndexResult res;
+    
+    rel = scan->relation;
+    current = &(scan->currentItemData);
+    
+    buf = _bt_getroot(rel, BT_READ);
+    blkno = BufferGetBlockNumber(buf);
+    page = BufferGetPage(buf);
+    opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+    
+    for (;;) {
+   if (opaque->btpo_flags & BTP_LEAF)
+       break;
+   
+   if (ScanDirectionIsForward(dir)) {
+       offnum = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+   } else {
+       offnum = PageGetMaxOffsetNumber(page);
+   }
+   
+   btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
+   itup = &(btitem->bti_itup);
+   
+   blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
+   
+   _bt_relbuf(rel, buf, BT_READ);
+   buf = _bt_getbuf(rel, blkno, BT_READ);
+   page = BufferGetPage(buf);
+   opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+   
+   /*
+    *  Race condition: If the child page we just stepped onto is
+    *  in the process of being split, we need to make sure we're
+    *  all the way at the right edge of the tree.  See the paper
+    *  by Lehman and Yao.
+    */
+   
+   if (ScanDirectionIsBackward(dir) && ! P_RIGHTMOST(opaque)) {
+       do {
+       blkno = opaque->btpo_next;
+       _bt_relbuf(rel, buf, BT_READ);
+       buf = _bt_getbuf(rel, blkno, BT_READ);
+       page = BufferGetPage(buf);
+       opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+       } while (! P_RIGHTMOST(opaque));
+   }
+    }
+    
+    /* okay, we've got the {left,right}-most page in the tree */
+    maxoff = PageGetMaxOffsetNumber(page);
+    
+    if (ScanDirectionIsForward(dir)) {
+   if (PageIsEmpty(page)) {
+       maxoff = FirstOffsetNumber;
+   } else {
+       maxoff = PageGetMaxOffsetNumber(page);
+   }
+   start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
+   
+   if (PageIsEmpty(page) || start > maxoff) {
+       ItemPointerSet(current, blkno, maxoff);
+       if (!_bt_step(scan, &buf, BackwardScanDirection))
+       return ((RetrieveIndexResult) NULL);
+       
+       start = ItemPointerGetOffsetNumber(current);
+       page = BufferGetPage(buf);
+   } else {
+       ItemPointerSet(current, blkno, start);
+   }
+    } else if (ScanDirectionIsBackward(dir)) {
+   if (PageIsEmpty(page)) {
+       ItemPointerSet(current, blkno, FirstOffsetNumber);
+       if (!_bt_step(scan, &buf, ForwardScanDirection))
+       return ((RetrieveIndexResult) NULL);
+       
+       start = ItemPointerGetOffsetNumber(current);
+       page = BufferGetPage(buf);
+   } else {
+       start = PageGetMaxOffsetNumber(page);
+       ItemPointerSet(current, blkno, start);
+   }
+    } else {
+   elog(WARN, "Illegal scan direction %d", dir);
+    }
+    
+    btitem = (BTItem) PageGetItem(page, PageGetItemId(page, start));
+    itup = &(btitem->bti_itup);
+    
+    /* see if we picked a winner */
+    if (_bt_checkqual(scan, itup)) {
+   iptr = (ItemPointer) palloc(sizeof(ItemPointerData));
+   memmove((char *) iptr, (char *) &(itup->t_tid),
+       sizeof(ItemPointerData));
+   res = FormRetrieveIndexResult(current, iptr);
+   
+   /* remember which buffer we have pinned */
+   so = (BTScanOpaque) scan->opaque;
+   so->btso_curbuf = buf;
+    } else {
+   _bt_relbuf(rel, buf, BT_READ);
+   res = (RetrieveIndexResult) NULL;
+    }
+    
+    return (res);
+}
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c

new file mode 100644 (file)

index 0000000..3d26763
--- /dev/null
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -0,0 +1,1196 @@
+/*-------------------------------------------------------------------------
+ * btsort.c--
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Id: nbtsort.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
+ *
+ * NOTES
+ *
+ * what we do is:
+ * - generate a set of initial one-block runs, distributed round-robin
+ *   between the output tapes.
+ * - for each pass,
+ *   - swap input and output tape sets, rewinding both and truncating
+ *     the output tapes.
+ *   - merge the current run in each input tape to the current output
+ *     tape.
+ *     - when each input run has been exhausted, switch to another output
+ *       tape and start processing another run.
+ * - when we have fewer runs than tapes, we know we are ready to start
+ *   merging into the btree leaf pages. 
+ * - every time we complete a level of the btree, we can construct the
+ *   next level up.  when we have only one page on a level, it can be
+ *   attached to the btree metapage and we are done.
+ *
+ * conventions:
+ * - external interface routines take in and return "void *" for their
+ *   opaque handles.  this is for modularity reasons (i prefer not to
+ *   export these structures without good reason).
+ *
+ * this code is moderately slow (~10% slower) compared to the regular
+ * btree (insertion) build code on sorted or well-clustered data.  on
+ * random data, however, the insertion build code is unusable -- the
+ * difference on a 60MB heap is a factor of 15 because the random
+ * probes into the btree thrash the buffer pool.
+ *
+ * this code currently packs the pages to 100% of capacity.  this is
+ * not wise, since *any* insertion will cause splitting.  filling to
+ * something like the standard 70% steady-state load factor for btrees
+ * would probably be better.
+ *
+ * somebody desperately needs to figure out how to do a better job of
+ * balancing the merge passes -- the fan-in on the final merges can be
+ * pretty poor, which is bad for performance.
+ *-------------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+
+#include "c.h"
+
+#include "access/nbtree.h"
+
+#include "storage/bufmgr.h"
+#include "storage/fd.h"
+#include "utils/rel.h"
+#include "utils/palloc.h"
+#include "utils/elog.h"
+
+/*#define FASTBUILD_DEBUG*/ /* turn on debugging output */
+
+#define FASTBUILD
+
+#ifdef FASTBUILD
+
+#define    MAXTAPES    (7)
+#define    TAPEBLCKSZ  (BLCKSZ << 2)
+#define    TAPETEMP    "pg_btsortXXXXXX"
+
+
+/*-------------------------------------------------------------------------
+ * sorting comparison routine - returns {-1,0,1} depending on whether
+ * the key in the left BTItem is {<,=,>} the key in the right BTItem.
+ *
+ * we want to use _bt_isortcmp as a comparison function for qsort(3),
+ * but it needs extra arguments, so we "pass them in" as global
+ * variables.  ick.  fortunately, they are the same throughout the
+ * build, so we need do this only once.  this is why you must call
+ * _bt_isortcmpinit before the call to qsort(3).
+ *
+ * a NULL BTItem is always assumed to be greater than any actual
+ * value; our heap routines (see below) assume that the smallest
+ * element in the heap is returned.  that way, NULL values from the
+ * exhausted tapes can sift down to the bottom of the heap.  in point
+ * of fact we just don't replace the elements of exhausted tapes, but
+ * what the heck.
+ * *-------------------------------------------------------------------------
+ */
+static Relation _bt_sortrel;
+
+static void
+_bt_isortcmpinit(Relation index)
+{
+    _bt_sortrel = index;
+}
+
+static int
+_bt_isortcmp(BTItem *bti1p, BTItem *bti2p)
+{
+    BTItem bti1 = *bti1p;
+    BTItem bti2 = *bti2p;
+
+    if (bti1 == (BTItem) NULL) {
+   if (bti2 == (BTItem) NULL) {
+       return(0);  /* 1 = 2 */
+   }
+   return(1);  /* 1 > 2 */
+    } else if (bti2 == (BTItem) NULL) {
+   return(-1); /* 1 < 2 */
+    } else if (_bt_itemcmp(_bt_sortrel, 1, bti1, bti2,
+              BTGreaterStrategyNumber)) {
+   return(1);  /* 1 > 2 */
+    } else if (_bt_itemcmp(_bt_sortrel, 1, bti2, bti1,
+              BTGreaterStrategyNumber)) {
+   return(-1); /* 1 < 2 */
+    }
+    return(0);     /* 1 = 2 */
+}
+
+/*-------------------------------------------------------------------------
+ * priority queue methods
+ *
+ * these were more-or-less lifted from the heap section of the 1984
+ * edition of gonnet's book on algorithms and data structures.  they
+ * are coded so that the smallest element in the heap is returned (we
+ * use them for merging sorted runs).
+ *
+ * XXX these probably ought to be generic library functions.
+ *-------------------------------------------------------------------------
+ */
+
+typedef struct {
+    int        btpqe_tape; /* tape identifier */
+    BTItem btpqe_item; /* pointer to BTItem in tape buffer */
+} BTPriQueueElem;
+
+#define    MAXELEM MAXTAPES
+typedef struct {
+    int            btpq_nelem;
+    BTPriQueueElem btpq_queue[MAXELEM];
+    Relation       btpq_rel;
+} BTPriQueue;
+
+/* be sure to call _bt_isortcmpinit first */
+#define GREATER(a, b) \
+    (_bt_isortcmp(&((a)->btpqe_item), &((b)->btpqe_item)) > 0)
+
+static void
+_bt_pqsift(BTPriQueue *q, int parent)
+{
+    int child;
+    BTPriQueueElem e;
+
+    for (child = parent * 2 + 1;
+    child < q->btpq_nelem;
+    child = parent * 2 + 1) {
+   if (child < q->btpq_nelem - 1) {
+       if (GREATER(&(q->btpq_queue[child]), &(q->btpq_queue[child+1]))) {
+       ++child;
+       }
+   }
+   if (GREATER(&(q->btpq_queue[parent]), &(q->btpq_queue[child]))) {
+       e = q->btpq_queue[child];               /* struct = */
+       q->btpq_queue[child] = q->btpq_queue[parent];   /* struct = */
+       q->btpq_queue[parent] = e;              /* struct = */
+       parent = child;
+   } else {
+       parent = child + 1;
+   }
+    }
+}
+
+static int
+_bt_pqnext(BTPriQueue *q, BTPriQueueElem *e)
+{
+    if (q->btpq_nelem < 1) {   /* already empty */
+   return(-1);
+    }
+    *e = q->btpq_queue[0];                 /* struct = */
+
+    if (--q->btpq_nelem < 1) { /* now empty, don't sift */
+   return(0);
+    }
+    q->btpq_queue[0] = q->btpq_queue[q->btpq_nelem];       /* struct = */
+    _bt_pqsift(q, 0);
+    return(0);
+}
+
+static void
+_bt_pqadd(BTPriQueue *q, BTPriQueueElem *e)
+{
+    int child, parent;
+
+    if (q->btpq_nelem >= MAXELEM) {
+   elog(WARN, "_bt_pqadd: queue overflow");
+    }
+
+    child = q->btpq_nelem++;
+    while (child > 0) {
+   parent = child / 2;
+   if (GREATER(e, &(q->btpq_queue[parent]))) {
+       break;
+   } else {
+       q->btpq_queue[child] = q->btpq_queue[parent];   /* struct = */
+       child = parent;
+   }
+    }
+
+    q->btpq_queue[child] = *e;                 /* struct = */
+}
+
+/*-------------------------------------------------------------------------
+ * tape methods
+ *-------------------------------------------------------------------------
+ */
+
+#define    BTITEMSZ(btitem) \
+    ((btitem) ? \
+     (IndexTupleDSize((btitem)->bti_itup) + \
+      (sizeof(BTItemData) - sizeof(IndexTupleData))) : \
+     0)
+#define    SPCLEFT(tape) \
+    (sizeof((tape)->bttb_data) - (tape)->bttb_top)
+#define    EMPTYTAPE(tape) \
+    ((tape)->bttb_ntup <= 0)
+#define    BTTAPEMAGIC 0x19660226
+
+/*
+ * this is what we use to shovel BTItems in and out of memory.  it's
+ * bigger than a standard block because we are doing a lot of strictly
+ * sequential i/o.  this is obviously something of a tradeoff since we
+ * are potentially reading a bunch of zeroes off of disk in many
+ * cases.
+ *
+ * BTItems are packed in and DOUBLEALIGN'd.
+ *
+ * the fd should not be going out to disk, strictly speaking, but it's
+ * the only thing like that so i'm not going to worry about wasting a
+ * few bytes.
+ */
+typedef struct {
+    int        bttb_magic; /* magic number */
+    int        bttb_fd;    /* file descriptor */
+    int        bttb_top;   /* top of free space within bttb_data */
+    short  bttb_ntup;  /* number of tuples in this block */
+    short  bttb_eor;   /* End-Of-Run marker */
+    char   bttb_data[TAPEBLCKSZ - 2 * sizeof(double)];
+} BTTapeBlock;
+
+
+/*
+ * reset the tape header for its next use without doing anything to
+ * the physical tape file.  (setting bttb_top to 0 makes the block
+ * empty.)
+ */
+static void
+_bt_tapereset(BTTapeBlock *tape)
+{
+    tape->bttb_eor = 0;
+    tape->bttb_top = 0;
+    tape->bttb_ntup = 0;
+}
+
+/*
+ * rewind the physical tape file.
+ */
+static void
+_bt_taperewind(BTTapeBlock *tape)
+{
+    (void) FileSeek(tape->bttb_fd, 0, SEEK_SET);
+}
+
+/*
+ * destroy the contents of the physical tape file without destroying
+ * the tape data structure or removing the physical tape file.
+ *
+ * we use the VFD version of ftruncate(2) to do this rather than
+ * unlinking and recreating the file.  you still have to wait while
+ * the OS frees up all of the file system blocks and stuff, but at
+ * least you don't have to delete and reinsert the directory entries.
+ */
+static void
+_bt_tapeclear(BTTapeBlock *tape)
+{
+    /* blow away the contents of the old file */
+    _bt_taperewind(tape);
+#if 0
+    FileSync(tape->bttb_fd);
+#endif
+    FileTruncate(tape->bttb_fd, 0);
+
+    /* reset the buffer */
+    _bt_tapereset(tape);
+}
+
+/*
+ * create a new BTTapeBlock, allocating memory for the data structure
+ * as well as opening a physical tape file.
+ */
+static BTTapeBlock *
+_bt_tapecreate(char *fname)
+{
+    BTTapeBlock *tape = (BTTapeBlock *) palloc(sizeof(BTTapeBlock));
+
+    if (tape == (BTTapeBlock *) NULL) {
+   elog(WARN, "_bt_tapecreate: out of memory");
+    }
+
+    tape->bttb_magic = BTTAPEMAGIC;
+
+    tape->bttb_fd = FileNameOpenFile(fname, O_RDWR|O_CREAT|O_TRUNC, 0600);
+    Assert(tape->bttb_fd >= 0);
+
+    /* initialize the buffer */
+    _bt_tapereset(tape);
+
+    return(tape);
+}
+
+/*
+ * destroy the BTTapeBlock structure and its physical tape file.
+ */
+static void
+_bt_tapedestroy(BTTapeBlock *tape)
+{
+    FileUnlink(tape->bttb_fd);
+    pfree((void *) tape);
+}
+
+/*
+ * flush the tape block to the file, marking End-Of-Run if requested.
+ */
+static void
+_bt_tapewrite(BTTapeBlock *tape, int eor)
+{
+    tape->bttb_eor = eor;
+    FileWrite(tape->bttb_fd, (char*)tape, TAPEBLCKSZ);
+    _bt_tapereset(tape);
+}
+
+/*
+ * read a tape block from the file, overwriting the current contents
+ * of the buffer.
+ *
+ * returns:
+ * - 0 if there are no more blocks in the tape or in this run (call
+ *   _bt_tapereset to clear the End-Of-Run marker)
+ * - 1 if a valid block was read
+ */
+static int
+_bt_taperead(BTTapeBlock *tape)
+{
+    int fd;
+    int nread;
+
+    if (tape->bttb_eor) {
+   return(0);      /* we are at End-Of-Run */
+    }
+
+    /*
+     * we're clobbering the old tape block, but we do need to save the
+     * VFD (the one in the block we're reading is bogus).
+     */
+    fd = tape->bttb_fd;
+    nread = FileRead(fd, (char*) tape, TAPEBLCKSZ);
+    tape->bttb_fd = fd;
+
+    if (nread != TAPEBLCKSZ) {
+   Assert(nread == 0); /* we are at EOF */
+   return(0);
+    }
+    Assert(tape->bttb_magic == BTTAPEMAGIC);
+    return(1);
+}
+
+/*
+ * get the next BTItem from a tape block.
+ *
+ * returns:
+ * - NULL if we have run out of BTItems
+ * - a pointer to the BTItemData in the block otherwise
+ *
+ * side effects:
+ * - sets 'pos' to the current position within the block.
+ */
+static BTItem
+_bt_tapenext(BTTapeBlock *tape, char **pos)
+{
+    Size itemsz;
+    BTItem bti;
+
+    if (*pos >= tape->bttb_data + tape->bttb_top) {
+   return((BTItem) NULL);
+    }
+    bti = (BTItem) *pos;
+    itemsz = BTITEMSZ(bti);
+    *pos += DOUBLEALIGN(itemsz);
+    return(bti);
+}
+
+/*
+ * copy a BTItem into a tape block.
+ *
+ * assumes that we have already checked to see if the block has enough
+ * space for the item.
+ *
+ * side effects:
+ *
+ * - advances the 'top' pointer in the tape block header to point to
+ * the beginning of free space.
+ */
+static void
+_bt_tapeadd(BTTapeBlock *tape, BTItem item, int itemsz)
+{
+    (void) memcpy(tape->bttb_data + tape->bttb_top, item, itemsz);
+    ++tape->bttb_ntup;
+    tape->bttb_top += DOUBLEALIGN(itemsz);
+}
+
+/*-------------------------------------------------------------------------
+ * spool methods
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * this structure holds the bookkeeping for a simple balanced multiway
+ * merge.  (polyphase merging is hairier than i want to get into right
+ * now, and i don't see why i have to care how many "tapes" i use
+ * right now.  though if psort was in a condition that i could hack it
+ * to do this, you bet i would.)
+ */
+typedef struct {
+    int        bts_ntapes;
+    int        bts_tape;
+    BTTapeBlock    **bts_itape;    /* input tape blocks */
+    BTTapeBlock    **bts_otape;    /* output tape blocks */
+} BTSpool;
+
+/*
+ * create and initialize a spool structure, including the underlying
+ * files.
+ */
+void *
+_bt_spoolinit(Relation index, int ntapes)
+{
+    char *mktemp();
+
+    BTSpool *btspool = (BTSpool *) palloc(sizeof(BTSpool));
+    int i;
+    char *fname = (char *) palloc(sizeof(TAPETEMP) + 1);
+
+    if (btspool == (BTSpool *) NULL || fname == (char *) NULL) {
+   elog(WARN, "_bt_spoolinit: out of memory");
+    }
+    (void) memset((char *) btspool, 0, sizeof(BTSpool));
+    btspool->bts_ntapes = ntapes;
+    btspool->bts_tape = 0;
+
+    btspool->bts_itape =
+   (BTTapeBlock **) palloc(sizeof(BTTapeBlock *) * ntapes);
+    btspool->bts_otape =
+   (BTTapeBlock **) palloc(sizeof(BTTapeBlock *) * ntapes);
+    if (btspool->bts_itape == (BTTapeBlock **) NULL ||
+   btspool->bts_otape == (BTTapeBlock **) NULL) {
+   elog(WARN, "_bt_spoolinit: out of memory");
+    }
+
+    for (i = 0; i < ntapes; ++i) {
+   btspool->bts_itape[i] =
+       _bt_tapecreate(mktemp(strcpy(fname, TAPETEMP)));
+   btspool->bts_otape[i] =
+       _bt_tapecreate(mktemp(strcpy(fname, TAPETEMP)));
+    }
+    pfree((void *) fname);
+
+    _bt_isortcmpinit(index);
+
+    return((void *) btspool);
+}
+
+/*
+ * clean up a spool structure and its substructures.
+ */
+void
+_bt_spooldestroy(void *spool)
+{
+    BTSpool *btspool = (BTSpool *) spool;
+    int i;
+
+    for (i = 0; i < btspool->bts_ntapes; ++i) {
+   _bt_tapedestroy(btspool->bts_otape[i]);
+   _bt_tapedestroy(btspool->bts_itape[i]);
+    }
+    pfree((void *) btspool);
+}
+
+/*
+ * flush out any dirty output tape blocks
+ */
+static void
+_bt_spoolflush(BTSpool *btspool)
+{
+    int i;
+
+    for (i = 0; i < btspool->bts_ntapes; ++i) {
+   if (!EMPTYTAPE(btspool->bts_otape[i])) {
+       _bt_tapewrite(btspool->bts_otape[i], 1);
+   }
+    }
+}
+
+/*
+ * swap input tapes and output tapes by swapping their file
+ * descriptors.  additional preparation for the next merge pass
+ * includes rewinding the new input tapes and clearing out the new
+ * output tapes.
+ */
+static void
+_bt_spoolswap(BTSpool *btspool)
+{
+    File tmpfd;
+    BTTapeBlock *itape;
+    BTTapeBlock *otape;
+    int i;
+
+    for (i = 0; i < btspool->bts_ntapes; ++i) {
+   itape = btspool->bts_itape[i];
+   otape = btspool->bts_otape[i];
+
+   /*
+    * swap the input and output VFDs.
+    */
+   tmpfd = itape->bttb_fd;
+   itape->bttb_fd = otape->bttb_fd;
+   otape->bttb_fd = tmpfd;
+
+   /*
+    * rewind the new input tape.
+    */
+   _bt_taperewind(itape);
+   _bt_tapereset(itape);
+
+   /*
+    * clear the new output tape -- it's ok to throw away the old
+    * inputs.
+    */
+   _bt_tapeclear(otape);
+    }    
+}
+
+/*-------------------------------------------------------------------------
+ * sorting routines
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * spool 'btitem' into an initial run.  as tape blocks are filled, the
+ * block BTItems are qsorted and written into some output tape (it
+ * doesn't matter which; we go round-robin for simplicity).  the
+ * initial runs are therefore always just one block.
+ */
+void
+_bt_spool(Relation index, BTItem btitem, void *spool)
+{
+    BTSpool *btspool = (BTSpool *) spool;
+    BTTapeBlock *itape;
+    Size itemsz;
+
+    itape = btspool->bts_itape[btspool->bts_tape];
+    itemsz = BTITEMSZ(btitem);
+    itemsz = DOUBLEALIGN(itemsz);
+
+    /*
+     * if this buffer is too full for this BTItemData, or if we have
+     * run out of BTItems, we need to sort the buffer and write it
+     * out.  in this case, the BTItemData will go into the next tape's
+     * buffer.
+     */
+    if (btitem == (BTItem) NULL || SPCLEFT(itape) < itemsz) {
+   BTItem *parray;
+   BTTapeBlock *otape;
+   BTItem bti;
+   char *pos;
+   int btisz;
+   int i;
+
+   /*
+    * build an array of pointers to the BTItemDatas on the input
+    * block.
+    */
+   parray = (BTItem *) palloc(itape->bttb_ntup * sizeof(BTItem));
+   if (parray == (BTItem *) NULL) {
+       elog(WARN, "_bt_spool: out of memory");
+   }
+   pos = itape->bttb_data;
+   for (i = 0; i < itape->bttb_ntup; ++i) {
+       parray[i] = _bt_tapenext(itape, &pos);
+   }
+
+   /*
+    * qsort the pointer array.
+    */
+   _bt_isortcmpinit(index);
+   qsort((void *) parray, itape->bttb_ntup, sizeof(BTItem), _bt_isortcmp);
+
+   /*
+    * write the spooled run into the output tape.  we copy the
+    * BTItemDatas in the order dictated by the sorted array of
+    * BTItems, not the original order.
+    *
+    * (since everything was DOUBLEALIGN'd and is all on a single
+    * page, everything had *better* still fit on one page..)
+    */
+   otape = btspool->bts_otape[btspool->bts_tape];
+   for (i = 0; i < itape->bttb_ntup; ++i) {
+       bti = parray[i];
+       btisz = BTITEMSZ(bti);
+       btisz = DOUBLEALIGN(btisz);
+       _bt_tapeadd(otape, bti, btisz);
+#ifdef FASTBUILD_DEBUG
+       {
+       bool isnull;
+       Datum d = index_getattr(&(bti->bti_itup), 1,
+                   RelationGetTupleDescriptor(index),
+                   &isnull);
+       printf("_bt_spool: inserted <%x> into output tape %d\n",
+              d, btspool->bts_tape);
+       }
+#endif /* FASTBUILD_DEBUG */
+   }
+
+   /*
+    * the initial runs are always single tape blocks.  flush the
+    * output block, marking End-Of-Run.
+    */
+   _bt_tapewrite(otape, 1);
+
+   /*
+    * reset the input buffer for the next run.  we don't have to
+    * write it out or anything -- we only use it to hold the
+    * unsorted BTItemDatas, the output tape contains all the
+    * sorted stuff.
+    *
+    * changing bts_tape changes the output tape and input tape;
+    * we change itape for the code below.
+    */
+   _bt_tapereset(itape);
+   btspool->bts_tape = (btspool->bts_tape + 1) % btspool->bts_ntapes;
+   itape = btspool->bts_itape[btspool->bts_tape];
+
+   /*
+    * destroy the pointer array.
+    */
+   pfree((void *) parray);
+    }
+
+    /* insert this item into the current buffer */
+    if (btitem != (BTItem) NULL) {
+   _bt_tapeadd(itape, btitem, itemsz);
+    }
+}
+
+/*
+ * allocate a new, clean btree page, not linked to any siblings.
+ */
+static void
+_bt_blnewpage(Relation index, Buffer *buf, Page *page, int flags)
+{
+    BTPageOpaque opaque;
+
+    *buf = _bt_getbuf(index, P_NEW, BT_WRITE);
+    *page = BufferGetPage(*buf);
+    _bt_pageinit(*page, BufferGetPageSize(*buf));
+    opaque = (BTPageOpaque) PageGetSpecialPointer(*page);
+    opaque->btpo_prev = opaque->btpo_next = P_NONE;
+    opaque->btpo_flags = flags;
+}
+
+/*
+ * slide an array of ItemIds back one slot (from P_FIRSTKEY to
+ * P_HIKEY).  we need to do this when we discover that we have built
+ * an ItemId array in what has turned out to be a P_RIGHTMOST page.
+ */
+static void
+_bt_slideleft(Relation index, Buffer buf, Page page)
+{
+    OffsetNumber off;
+    OffsetNumber maxoff;
+    ItemId previi;
+    ItemId thisii;
+
+    maxoff = PageGetMaxOffsetNumber(page);
+    previi = PageGetItemId(page, P_HIKEY);
+    for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off)) {
+   thisii = PageGetItemId(page, off);
+   *previi = *thisii;
+   previi = thisii;
+    }
+    ((PageHeader) page)->pd_lower -= sizeof(ItemIdData);
+}
+
+typedef struct {
+    Buffer     btps_buf;
+    Page       btps_page;
+    BTItem     btps_lastbti;
+    OffsetNumber   btps_lastoff;
+    OffsetNumber   btps_firstoff;
+} BTPageState;
+
+/*
+ * add an item to a disk page from a merge tape block.
+ *
+ * we must be careful to observe the following restrictions, placed
+ * upon us by the conventions in nbtsearch.c:
+ * - rightmost pages start data items at P_HIKEY instead of at
+ *   P_FIRSTKEY.
+ * - duplicates cannot be split among pages unless the chain of
+ *   duplicates starts at the first data item.
+ *
+ * a leaf page being built looks like:
+ *
+ * +----------------+---------------------------------+
+ * | PageHeaderData | linp0 linp1 linp2 ...           |
+ * +-----------+----+---------------------------------+
+ * | ... linpN |                  ^ first             |
+ * +-----------+--------------------------------------+
+ * |     ^ last                                       |
+ * |                                                  |
+ * |               v last                             |
+ * +-------------+------------------------------------+
+ * |             | itemN ...                          |
+ * +-------------+------------------+-----------------+
+ * |          ... item3 item2 item1 | "special space" |
+ * +--------------------------------+-----------------+
+ *                      ^ first
+ *
+ * contrast this with the diagram in bufpage.h; note the mismatch
+ * between linps and items.  this is because we reserve linp0 as a
+ * placeholder for the pointer to the "high key" item; when we have
+ * filled up the page, we will set linp0 to point to itemN and clear
+ * linpN.
+ *
+ * 'last' pointers indicate the last offset/item added to the page.
+ * 'first' pointers indicate the first offset/item that is part of a
+ * chain of duplicates extending from 'first' to 'last'.
+ *
+ * if all keys are unique, 'first' will always be the same as 'last'.
+ */
+static void
+_bt_buildadd(Relation index, BTPageState *state, BTItem bti, int flags)
+{
+    Buffer nbuf;
+    Page npage;
+    BTItem last_bti;
+    OffsetNumber first_off;
+    OffsetNumber last_off;
+    OffsetNumber off;
+    Size pgspc;
+    Size btisz;
+
+    nbuf = state->btps_buf;
+    npage = state->btps_page;
+    first_off = state->btps_firstoff;
+    last_off = state->btps_lastoff;
+    last_bti = state->btps_lastbti;
+
+    pgspc = PageGetFreeSpace(npage);
+    btisz = BTITEMSZ(bti);
+    btisz = DOUBLEALIGN(btisz);
+    if (pgspc < btisz) {
+   Buffer obuf = nbuf;
+   Page opage = npage;
+   OffsetNumber o, n;
+   ItemId ii;
+   ItemId hii;
+
+   _bt_blnewpage(index, &nbuf, &npage, flags);
+
+   /*
+    * if 'last' is part of a chain of duplicates that does not
+    * start at the beginning of the old page, the entire chain is
+    * copied to the new page; we delete all of the duplicates
+    * from the old page except the first, which becomes the high
+    * key item of the old page.
+    *
+    * if the chain starts at the beginning of the page or there
+    * is no chain ('first' == 'last'), we need only copy 'last'
+    * to the new page.  again, 'first' (== 'last') becomes the
+    * high key of the old page.
+    *
+    * note that in either case, we copy at least one item to the
+    * new page, so 'last_bti' will always be valid.  'bti' will
+    * never be the first data item on the new page.
+    */
+   if (first_off == P_FIRSTKEY) {
+       Assert(last_off != P_FIRSTKEY);
+       first_off = last_off;
+   }
+   for (o = first_off, n = P_FIRSTKEY;
+        o <= last_off;
+        o = OffsetNumberNext(o), n = OffsetNumberNext(n)) {
+       ii = PageGetItemId(opage, o);
+       (void) PageAddItem(npage, PageGetItem(opage, ii),
+                  ii->lp_len, n, LP_USED);
+#ifdef FASTBUILD_DEBUG
+       {
+       bool isnull;
+       BTItem tmpbti =
+           (BTItem) PageGetItem(npage, PageGetItemId(npage, n));
+       Datum d = index_getattr(&(tmpbti->bti_itup), 1,
+                   RelationGetTupleDescriptor(index),
+                   &isnull);
+       printf("_bt_buildadd: moved <%x> to offset %d\n",
+              d, n);
+       }
+#endif /* FASTBUILD_DEBUG */
+   }
+   for (o = last_off; o > first_off; o = OffsetNumberPrev(o)) {
+       PageIndexTupleDelete(opage, o);
+   }
+   hii = PageGetItemId(opage, P_HIKEY);
+   ii = PageGetItemId(opage, first_off);
+   *hii = *ii;
+   ii->lp_flags &= ~LP_USED;
+   ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData);
+
+   first_off = P_FIRSTKEY;
+   last_off = PageGetMaxOffsetNumber(npage);
+   last_bti = (BTItem) PageGetItem(npage, PageGetItemId(npage, last_off));
+
+   /*
+    * set the page (side link) pointers.
+    */
+   {
+       BTPageOpaque oopaque = (BTPageOpaque) PageGetSpecialPointer(opage);
+       BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(npage);
+
+       oopaque->btpo_next = BufferGetBlockNumber(nbuf);
+       nopaque->btpo_prev = BufferGetBlockNumber(obuf);
+       nopaque->btpo_next = P_NONE;
+   }
+
+   /*
+    * write out the old stuff.  we never want to see it again, so
+    * we can give up our lock (if we had one; BuildingBtree is
+    * set, so we aren't locking).
+    */
+   _bt_wrtbuf(index, obuf);
+    }
+    
+    /*
+     * if this item is different from the last item added, we start a
+     * new chain of duplicates.
+     */
+    off = OffsetNumberNext(last_off);
+    (void) PageAddItem(npage, (Item) bti, btisz, off, LP_USED);
+#ifdef FASTBUILD_DEBUG
+    {
+   bool isnull;
+   Datum d = index_getattr(&(bti->bti_itup), 1, 
+               RelationGetTupleDescriptor(index),
+               &isnull);
+   printf("_bt_buildadd: inserted <%x> at offset %d\n",
+          d, off);
+    }
+#endif /* FASTBUILD_DEBUG */
+    if (last_bti == (BTItem) NULL) {
+   first_off = P_FIRSTKEY;
+    } else if (!_bt_itemcmp(index, 1, bti, last_bti, BTEqualStrategyNumber)) {
+   first_off = off;
+    }
+    last_off = off;
+    last_bti = (BTItem) PageGetItem(npage, PageGetItemId(npage, off));
+
+    state->btps_buf = nbuf;
+    state->btps_page = npage;
+    state->btps_lastbti = last_bti;
+    state->btps_lastoff = last_off;
+    state->btps_firstoff = first_off;
+}
+
+/*
+ * take the input tapes stored by 'btspool' and perform successive
+ * merging passes until at most one run is left in each tape.  at that
+ * point, merge the final tape runs into a set of btree leaves.
+ *
+ * XXX three nested loops?  gross.  cut me up into smaller routines.
+ */
+static BlockNumber
+_bt_merge(Relation index, BTSpool *btspool)
+{
+    BTPageState state;
+    BlockNumber firstblk;
+    BTPriQueue q;
+    BTPriQueueElem e;
+    BTItem bti;
+    BTTapeBlock *itape;
+    BTTapeBlock *otape;
+    char *tapepos[MAXTAPES];
+    int tapedone[MAXTAPES];
+    int t;
+    int goodtapes;
+    int nruns;
+    Size btisz;
+    bool doleaf = false;
+
+    /*
+     * initialize state needed for the merge into the btree leaf pages.
+     */
+    (void) memset((char *) &state, 0, sizeof(BTPageState));
+    _bt_blnewpage(index, &(state.btps_buf), &(state.btps_page), BTP_LEAF);
+    state.btps_lastoff = P_HIKEY;
+    state.btps_lastbti = (BTItem) NULL;
+    firstblk = BufferGetBlockNumber(state.btps_buf);
+
+    do {                           /* pass */
+   /*
+    * each pass starts by flushing the previous outputs and
+    * swapping inputs and outputs.  this process also clears the
+    * new output tapes and rewinds the new input tapes.
+    */
+   btspool->bts_tape = btspool->bts_ntapes - 1;
+   _bt_spoolflush(btspool);
+   _bt_spoolswap(btspool);
+
+   nruns = 0;
+
+   for (;;) {                      /* run */
+       /*
+        * each run starts by selecting a new output tape.  the
+        * merged results of a given run are always sent to this
+        * one tape.
+        */
+       btspool->bts_tape = (btspool->bts_tape + 1) % btspool->bts_ntapes;
+       otape = btspool->bts_otape[btspool->bts_tape];
+
+       /*
+        * initialize the priority queue by loading it with the
+        * first element of the given run in each tape.  since we
+        * are starting a new run, we reset the tape (clearing the
+        * End-Of-Run marker) before reading it.  this means that
+        * _bt_taperead will return 0 only if the tape is actually
+        * at EOF.
+        */
+       (void) memset((char *) &q, 0, sizeof(BTPriQueue));
+       goodtapes = 0;
+       for (t = 0; t < btspool->bts_ntapes; ++t) {
+       itape = btspool->bts_itape[t];
+       tapepos[t] = itape->bttb_data;
+       _bt_tapereset(itape);
+       if (_bt_taperead(itape) == 0) {
+           tapedone[t] = 1;
+       } else {
+           ++goodtapes;
+           tapedone[t] = 0;
+           e.btpqe_tape = t;
+           e.btpqe_item = _bt_tapenext(itape, &tapepos[t]);
+           if (e.btpqe_item != (BTItem) NULL) {
+           _bt_pqadd(&q, &e);
+           }
+       }
+       }
+       /*
+        * if we don't have any tapes with any input (i.e., they
+        * are all at EOF), we must be done with this pass.
+        */
+       if (goodtapes == 0) {
+       break;  /* for */
+       }
+       ++nruns;
+   
+       /*
+        * output the smallest element from the queue until there are no
+        * more.
+        */
+       while (_bt_pqnext(&q, &e) >= 0) {           /* item */
+       /*
+        * replace the element taken from priority queue,
+        * fetching a new block if needed.  a tape can run out
+        * if it hits either End-Of-Run or EOF.
+        */
+       t = e.btpqe_tape;
+       bti = e.btpqe_item;
+       if (bti != (BTItem) NULL) {
+           btisz = BTITEMSZ(bti);
+           btisz = DOUBLEALIGN(btisz);
+           if (doleaf) {
+           _bt_buildadd(index, &state, bti, BTP_LEAF);
+#ifdef FASTBUILD_DEBUG
+           {
+               bool isnull;
+               Datum d = index_getattr(&(bti->bti_itup), 1,
+                   RelationGetTupleDescriptor(index),
+                           &isnull);
+               printf("_bt_merge: inserted <%x> into block %d\n",
+                  d, BufferGetBlockNumber(state.btps_buf));
+           }
+#endif /* FASTBUILD_DEBUG */
+           } else {
+           if (SPCLEFT(otape) < btisz) {
+               /*
+                * if it's full, write it out and add the
+                * item to the next block.  (since we know
+                * there will be at least one more block,
+                * we know we do *not* want to set
+                * End-Of-Run here!)
+                */
+               _bt_tapewrite(otape, 0);
+           }
+           _bt_tapeadd(otape, bti, btisz);
+#ifdef FASTBUILD_DEBUG
+           {
+               bool isnull;
+               Datum d = index_getattr(&(bti->bti_itup), 1,
+                 RelationGetTupleDescriptor(index), &isnull);
+               printf("_bt_merge: inserted <%x> into tape %d\n",
+                  d, btspool->bts_tape);
+           }
+#endif /* FASTBUILD_DEBUG */
+           }
+       }
+#ifdef FASTBUILD_DEBUG
+       {
+           bool isnull;
+           Datum d = index_getattr(&(bti->bti_itup), 1,
+                      RelationGetTupleDescriptor(index),
+                       &isnull);
+           printf("_bt_merge: got <%x> from tape %d\n", d, t);
+       }
+#endif /* FASTBUILD_DEBUG */
+
+       itape = btspool->bts_itape[t];
+       if (!tapedone[t]) {
+           BTItem newbti = _bt_tapenext(itape, &tapepos[t]);
+
+           if (newbti == (BTItem) NULL) {
+           if (_bt_taperead(itape) == 0) {
+               tapedone[t] = 1;
+           } else {
+               tapepos[t] = itape->bttb_data;
+               newbti = _bt_tapenext(itape, &tapepos[t]);
+           }
+           }
+           if (newbti != (BTItem) NULL) {
+           BTPriQueueElem nexte;
+           
+           nexte.btpqe_tape = t;
+           nexte.btpqe_item = newbti;
+           _bt_pqadd(&q, &nexte);
+           }
+       }
+       }                           /* item */
+   }                           /* run */
+   
+   /*
+    * we are here because we ran out of input on all of the input
+    * tapes.
+    *
+    * if this pass did not generate more actual output runs than
+    * we have tapes, we know we have at most one run in each
+    * tape.  this means that we are ready to merge into the final
+    * btree leaf pages instead of merging into a tape file.
+    */
+   if (nruns <= btspool->bts_ntapes) {
+       doleaf = true;
+   }
+    } while (nruns > 0);                   /* pass */
+
+    /*
+     * this is the rightmost page, so the ItemId array needs to be
+     * slid back one slot.
+     */
+    _bt_slideleft(index, state.btps_buf, state.btps_page);
+    _bt_wrtbuf(index, state.btps_buf);
+
+    return(firstblk);
+}
+
+
+/*
+ * given the block number 'blk' of the first page of a set of linked
+ * siblings (i.e., the start of an entire level of the btree),
+ * construct the corresponding next level of the btree.  we do this by
+ * placing minimum keys from each page into this page.  the format of
+ * the internal pages is otherwise the same as for leaf pages.
+ */
+void
+_bt_upperbuild(Relation index, BlockNumber blk, int level)
+{
+    Buffer rbuf;
+    Page rpage;
+    BTPageOpaque ropaque;
+    BTPageState state;
+    BlockNumber firstblk;
+    BTItem bti;
+    BTItem nbti;
+    OffsetNumber off;
+
+    rbuf = _bt_getbuf(index, blk, BT_WRITE);
+    rpage = BufferGetPage(rbuf);
+    ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
+
+    /*
+     * if we only have one page on a level, we can just make it the
+     * root.
+     */
+    if (P_RIGHTMOST(ropaque)) {
+   ropaque->btpo_flags |= BTP_ROOT;
+   _bt_wrtbuf(index, rbuf);
+   _bt_metaproot(index, blk);
+   return;
+    }
+    _bt_relbuf(index, rbuf, BT_WRITE);
+   
+    (void) memset((char *) &state, 0, sizeof(BTPageState));
+    _bt_blnewpage(index, &(state.btps_buf), &(state.btps_page), 0);
+    state.btps_lastoff = P_HIKEY;
+    state.btps_lastbti = (BTItem) NULL;
+    firstblk = BufferGetBlockNumber(state.btps_buf);
+    
+    /* for each page... */
+    do {
+   rbuf = _bt_getbuf(index, blk, BT_READ);
+   rpage = BufferGetPage(rbuf);
+   ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
+   
+   /* for each item... */
+   if (!PageIsEmpty(rpage)) {
+       /*
+        * form a new index tuple corresponding to the minimum key
+        * of the lower page and insert it into a page at this
+        * level.
+        */
+       off = P_RIGHTMOST(ropaque) ? P_HIKEY : P_FIRSTKEY;
+       bti = (BTItem) PageGetItem(rpage, PageGetItemId(rpage, off));
+       nbti = _bt_formitem(&(bti->bti_itup));
+       ItemPointerSet(&(nbti->bti_itup.t_tid), blk, P_HIKEY);
+#ifdef FASTBUILD_DEBUG
+       {
+       bool isnull;
+       Datum d = index_getattr(&(nbti->bti_itup), 1, 
+                   RelationGetTupleDescriptor(index),
+                   &isnull);
+       printf("_bt_upperbuild: inserting <%x> at %d\n",
+              d, level);
+       }
+#endif /* FASTBUILD_DEBUG */
+       _bt_buildadd(index, &state, nbti, 0);
+       pfree((void *) nbti);
+   }
+   blk = ropaque->btpo_next;
+   _bt_relbuf(index, rbuf, BT_READ);
+    } while (blk != P_NONE);
+   
+    /*
+     * this is the rightmost page, so the ItemId array needs to be
+     * slid back one slot.
+     */
+    _bt_slideleft(index, state.btps_buf, state.btps_page);
+    _bt_wrtbuf(index, state.btps_buf);
+    
+    _bt_upperbuild(index, firstblk, level + 1);
+}
+
+/*
+ * given a spool loading by successive calls to _bt_spool, create an
+ * entire btree.
+ */
+void
+_bt_leafbuild(Relation index, void *spool)
+{
+    BTSpool *btspool = (BTSpool *) spool;
+    BlockNumber firstblk;
+
+    /*
+     * merge the runs into btree leaf pages.
+     */
+    firstblk = _bt_merge(index, btspool);
+
+    /*
+     * build the upper levels of the btree.
+     */
+    _bt_upperbuild(index, firstblk, 0);
+}
+
+#else /* !FASTBUILD */
+
+void *_bt_spoolinit(Relation index, int ntapes) { return((void *) NULL); }
+void _bt_spooldestroy(void *spool) { }
+void _bt_spool(Relation index, BTItem btitem, void *spool) { }
+void _bt_upperbuild(Relation index, BlockNumber blk, int level) { }
+void _bt_leafbuild(Relation index, void *spool) { }
+
+#endif /* !FASTBUILD */
diff --git a/src/backend/access/nbtree/nbtstrat.c b/src/backend/access/nbtree/nbtstrat.c

new file mode 100644 (file)

index 0000000..2214c60
--- /dev/null
+++ b/src/backend/access/nbtree/nbtstrat.c
@@ -0,0 +1,134 @@
+/*-------------------------------------------------------------------------
+ *
+ * btstrat.c--
+ *    Srategy map entries for the btree indexed access method
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtstrat.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/genam.h"
+#include "access/nbtree.h"
+
+/*
+ * Note:
+ * StrategyNegate, StrategyCommute, and StrategyNegateCommute
+ * assume <, <=, ==, >=, > ordering.
+ */
+static StrategyNumber  BTNegate[5] = {
+    BTGreaterEqualStrategyNumber,
+    BTGreaterStrategyNumber,
+    InvalidStrategy,
+    BTLessStrategyNumber,
+    BTLessEqualStrategyNumber
+};
+
+static StrategyNumber  BTCommute[5] = {
+    BTGreaterStrategyNumber,
+    BTGreaterEqualStrategyNumber,
+    InvalidStrategy,
+    BTLessEqualStrategyNumber,
+    BTLessStrategyNumber
+};
+
+static StrategyNumber  BTNegateCommute[5] = {
+    BTLessEqualStrategyNumber,
+    BTLessStrategyNumber,
+    InvalidStrategy,
+    BTGreaterStrategyNumber,
+    BTGreaterEqualStrategyNumber
+};
+
+static uint16  BTLessTermData[] = {        /* XXX type clash */
+    2,
+    BTLessStrategyNumber,
+    SK_NEGATE,
+    BTLessStrategyNumber,
+    SK_NEGATE | SK_COMMUTE
+};
+
+static uint16  BTLessEqualTermData[] = {   /* XXX type clash */
+    2,
+    BTLessEqualStrategyNumber,
+    0x0,
+    BTLessEqualStrategyNumber,
+    SK_COMMUTE
+};
+
+static uint16  BTGreaterEqualTermData[] = {    /* XXX type clash */
+    2,
+    BTGreaterEqualStrategyNumber,
+    0x0,
+    BTGreaterEqualStrategyNumber,
+    SK_COMMUTE
+    };
+
+static uint16  BTGreaterTermData[] = {     /* XXX type clash */
+    2,
+    BTGreaterStrategyNumber,
+    SK_NEGATE,
+    BTGreaterStrategyNumber,
+    SK_NEGATE | SK_COMMUTE
+};
+
+static StrategyTerm    BTEqualExpressionData[] = {
+    (StrategyTerm)BTLessTermData,      /* XXX */
+    (StrategyTerm)BTLessEqualTermData,     /* XXX */
+    (StrategyTerm)BTGreaterEqualTermData,  /* XXX */
+    (StrategyTerm)BTGreaterTermData,       /* XXX */
+    NULL
+};
+
+static StrategyEvaluationData  BTEvaluationData = {
+    /* XXX static for simplicity */
+    
+    BTMaxStrategyNumber,
+    (StrategyTransformMap)BTNegate,    /* XXX */
+    (StrategyTransformMap)BTCommute,   /* XXX */
+    (StrategyTransformMap)BTNegateCommute, /* XXX */
+
+    { NULL, NULL, (StrategyExpression)BTEqualExpressionData, NULL, NULL,
+      NULL,NULL,NULL,NULL,NULL,NULL,NULL}
+};
+
+/* ----------------------------------------------------------------
+ * RelationGetBTStrategy
+ * ----------------------------------------------------------------
+ */
+
+StrategyNumber
+_bt_getstrat(Relation rel,
+        AttrNumber attno,
+        RegProcedure proc)
+{
+    StrategyNumber strat;
+    
+    strat = RelationGetStrategy(rel, attno, &BTEvaluationData, proc);
+    
+    Assert(StrategyNumberIsValid(strat));
+    
+    return (strat);
+}
+
+bool
+_bt_invokestrat(Relation rel,
+       AttrNumber attno,
+       StrategyNumber strat,
+       Datum left,
+       Datum right)
+{
+    return (RelationInvokeStrategy(rel, &BTEvaluationData, attno, strat, 
+                  left, right));
+}
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c

new file mode 100644 (file)

index 0000000..695a2b6
--- /dev/null
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -0,0 +1,239 @@
+/*-------------------------------------------------------------------------
+ *
+ * btutils.c--
+ *    Utility code for Postgres btree implementation.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <stdio.h>
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "fmgr.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+#include "utils/datum.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/iqual.h"
+#include "access/nbtree.h"
+
+ScanKey 
+_bt_mkscankey(Relation rel, IndexTuple itup)
+{     
+    ScanKey skey;
+    TupleDesc itupdesc;
+    int natts;
+    int i;
+    Datum arg;
+    RegProcedure proc;
+    bool null;
+    
+    natts = rel->rd_rel->relnatts;
+    itupdesc = RelationGetTupleDescriptor(rel);
+    
+    skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
+    
+    for (i = 0; i < natts; i++) {
+   arg = index_getattr(itup, i + 1, itupdesc, &null);
+   proc = index_getprocid(rel, i + 1, BTORDER_PROC);
+   ScanKeyEntryInitialize(&skey[i],
+                  0x0, (AttrNumber) (i + 1), proc, arg);
+    }
+    
+    return (skey);
+}
+
+void
+_bt_freeskey(ScanKey skey)
+{
+    pfree(skey);
+}
+
+void
+_bt_freestack(BTStack stack)
+{
+    BTStack ostack;
+    
+    while (stack != (BTStack) NULL) {
+   ostack = stack;
+   stack = stack->bts_parent;
+   pfree(ostack->bts_btitem);
+   pfree(ostack);
+    }
+}
+
+/*
+ *  _bt_orderkeys() -- Put keys in a sensible order for conjunctive quals.
+ *
+ * The order of the keys in the qual match the ordering imposed by
+ * the index.  This routine only needs to be called if there are
+ * more than one qual clauses using this index.
+ */
+void
+_bt_orderkeys(Relation relation, uint16 *numberOfKeys, ScanKey key)
+{
+    ScanKey xform;
+    ScanKeyData *cur;
+    StrategyMap map;
+    int nbytes;
+    long test;
+    int i, j;
+    int init[BTMaxStrategyNumber+1];
+    
+    /* haven't looked at any strategies yet */
+    for (i = 0; i <= BTMaxStrategyNumber; i++)
+   init[i] = 0;
+    
+    /* get space for the modified array of keys */
+    nbytes = BTMaxStrategyNumber * sizeof(ScanKeyData);
+    xform = (ScanKey) palloc(nbytes);
+    memset(xform, 0, nbytes); 
+    
+    
+    /* get the strategy map for this index/attribute pair */
+    /*
+     *  XXX
+     *  When we support multiple keys in a single index, this is what
+     *  we'll want to do.  At present, the planner is hosed, so we
+     *  hard-wire the attribute number below.  Postgres only does single-
+     *  key indices...
+     * map = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation),
+     *                     BTMaxStrategyNumber,
+     *                     key->data[0].attributeNumber);
+     */
+    map = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation),
+                     BTMaxStrategyNumber,
+                     1 /* XXX */ );
+    
+    /* check each key passed in */
+    for (i = *numberOfKeys; --i >= 0; ) {
+   cur = &key[i];
+   for (j = BTMaxStrategyNumber; --j >= 0; ) {
+       if (cur->sk_procedure == map->entry[j].sk_procedure)
+       break;
+   }
+   
+   /* have we seen one of these before? */
+   if (init[j]) {
+       /* yup, use the appropriate value */
+       test =
+       (long) FMGR_PTR2(cur->sk_func, cur->sk_procedure,
+                cur->sk_argument, xform[j].sk_argument);
+       if (test)
+       xform[j].sk_argument = cur->sk_argument;
+   } else {
+       /* nope, use this value */
+       memmove(&xform[j], cur, sizeof(*cur));
+      
+       init[j] = 1;
+   }
+    }
+    
+    /* if = has been specified, no other key will be used */
+    if (init[BTEqualStrategyNumber - 1]) {
+   init[BTLessStrategyNumber - 1] = 0;
+   init[BTLessEqualStrategyNumber - 1] = 0;
+   init[BTGreaterEqualStrategyNumber - 1] = 0;
+   init[BTGreaterStrategyNumber - 1] = 0;
+    }
+    
+    /* only one of <, <= */
+    if (init[BTLessStrategyNumber - 1]
+   && init[BTLessEqualStrategyNumber - 1]) {
+   
+   ScanKeyData *lt, *le;
+   
+   lt = &xform[BTLessStrategyNumber - 1];
+   le = &xform[BTLessEqualStrategyNumber - 1];
+   
+   /*
+    *  DO NOT use the cached function stuff here -- this is key
+    *  ordering, happens only when the user expresses a hokey
+    *  qualification, and gets executed only once, anyway.  The
+    *  transform maps are hard-coded, and can't be initialized
+    *  in the correct way.
+    */
+   
+   test = (long) fmgr(le->sk_procedure, le->sk_argument, lt->sk_argument);
+   
+   if (test)
+       init[BTLessEqualStrategyNumber - 1] = 0;
+   else
+       init[BTLessStrategyNumber - 1] = 0;
+    }
+    
+    /* only one of >, >= */
+    if (init[BTGreaterStrategyNumber - 1]
+   && init[BTGreaterEqualStrategyNumber - 1]) {
+   
+   ScanKeyData *gt, *ge;
+   
+   gt = &xform[BTGreaterStrategyNumber - 1];
+   ge = &xform[BTGreaterEqualStrategyNumber - 1];
+   
+   /* see note above on function cache */
+   test = (long) fmgr(ge->sk_procedure, gt->sk_argument, gt->sk_argument);
+   
+   if (test)
+       init[BTGreaterStrategyNumber - 1] = 0;
+   else
+       init[BTGreaterEqualStrategyNumber - 1] = 0;
+    }
+    
+    /* okay, reorder and count */
+    j = 0;
+    
+    for (i = BTMaxStrategyNumber; --i >= 0; )
+   if (init[i])
+       key[j++] = xform[i];
+    
+    *numberOfKeys = j;
+    
+    pfree(xform);
+}
+
+bool
+_bt_checkqual(IndexScanDesc scan, IndexTuple itup)
+{
+    if (scan->numberOfKeys > 0)
+   return (index_keytest(itup, RelationGetTupleDescriptor(scan->relation),
+                 scan->numberOfKeys, scan->keyData));
+    else
+   return (true);
+}
+
+BTItem
+_bt_formitem(IndexTuple itup)
+{
+    int nbytes_btitem;
+    BTItem btitem;
+    Size tuplen;
+    extern Oid newoid();
+    
+    /* disallow nulls in btree keys */
+    if (itup->t_info & INDEX_NULL_MASK)
+   elog(WARN, "btree indices cannot include null keys");
+    
+    /* make a copy of the index tuple with room for the sequence number */
+    tuplen = IndexTupleSize(itup);
+    nbytes_btitem = tuplen +
+   (sizeof(BTItemData) - sizeof(IndexTupleData));
+    
+    btitem = (BTItem) palloc(nbytes_btitem);
+    memmove((char *) &(btitem->bti_itup), (char *) itup, tuplen);
+    
+    btitem->bti_oid = newoid();
+    return (btitem);
+}
diff --git a/src/backend/access/printtup.h b/src/backend/access/printtup.h

new file mode 100644 (file)

index 0000000..b5843da
--- /dev/null
+++ b/src/backend/access/printtup.h
@@ -0,0 +1,26 @@
+/*-------------------------------------------------------------------------
+ *
+ * printtup.h--
+ *    
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: printtup.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    PRINTTUP_H
+#define PRINTTUP_H
+
+#include "access/htup.h"
+#include "access/tupdesc.h"
+
+extern Oid typtoout(Oid type);
+extern void printtup(HeapTuple tuple, TupleDesc typeinfo);
+extern void showatts(char *name, TupleDesc attinfo);
+extern void debugtup(HeapTuple tuple, TupleDesc typeinfo);
+extern void printtup_internal(HeapTuple tuple, TupleDesc typeinfo);
+extern Oid gettypelem(Oid type);
+
+#endif /* PRINTTUP_H */
diff --git a/src/backend/access/relscan.h b/src/backend/access/relscan.h

new file mode 100644 (file)

index 0000000..7899e9d
--- /dev/null
+++ b/src/backend/access/relscan.h
@@ -0,0 +1,87 @@
+/*-------------------------------------------------------------------------
+ *
+ * relscan.h--
+ *    POSTGRES internal relation scan descriptor definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: relscan.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    RELSCAN_H
+#define RELSCAN_H
+
+#include "c.h"
+
+#include "access/skey.h"
+#include "storage/buf.h"
+#include "access/htup.h"
+#include "storage/itemptr.h"
+
+#include "utils/tqual.h"
+#include "utils/rel.h"
+
+
+typedef ItemPointerData    MarkData;
+
+typedef struct HeapScanDescData {
+   Relation    rs_rd;      /* pointer to relation descriptor */
+   HeapTuple   rs_ptup;    /* previous tuple in scan */
+   HeapTuple   rs_ctup;    /* current tuple in scan */
+   HeapTuple   rs_ntup;    /* next tuple in scan */
+   Buffer      rs_pbuf;    /* previous buffer in scan */
+   Buffer      rs_cbuf;    /* current buffer in scan */
+   Buffer      rs_nbuf;    /* next buffer in scan */
+   ItemPointerData rs_mptid;   /* marked previous tid */
+   ItemPointerData rs_mctid;   /* marked current tid */
+   ItemPointerData rs_mntid;   /* marked next tid */
+   ItemPointerData rs_mcd;     /* marked current delta XXX ??? */
+   bool        rs_atend;   /* restart scan at end? */
+   TimeQual    rs_tr;      /* time qualification */
+   uint16      rs_cdelta;  /* current delta in chain */
+   uint16      rs_nkeys;   /* number of attributes in keys */
+   ScanKey     rs_key;     /* key descriptors */
+} HeapScanDescData;
+
+typedef HeapScanDescData *HeapScanDesc;
+
+typedef struct IndexScanDescData {
+   Relation    relation;       /* relation descriptor */
+   void        *opaque;        /* am-specific slot */
+   ItemPointerData previousItemData;   /* previous index pointer */
+   ItemPointerData currentItemData;    /* current index pointer */
+   ItemPointerData nextItemData;       /* next index pointer */
+   MarkData    previousMarkData;   /* marked previous pointer */
+   MarkData    currentMarkData;    /* marked current  pointer */
+   MarkData    nextMarkData;       /* marked next pointer */
+   uint8       flags;          /* scan position flags */
+   bool        scanFromEnd;        /* restart scan at end? */
+   uint16      numberOfKeys;       /* number of key attributes */
+   ScanKey     keyData;        /* key descriptor */
+} IndexScanDescData;
+
+typedef IndexScanDescData  *IndexScanDesc;
+
+/* ----------------
+ * IndexScanDescPtr is used in the executor where we have to
+ * keep track of several index scans when using several indices
+ * - cim 9/10/89
+ * ----------------
+ */
+typedef IndexScanDesc      *IndexScanDescPtr;
+
+/*
+ * HeapScanIsValid --
+ * True iff the heap scan is valid.
+ */
+#define    HeapScanIsValid(scan) PointerIsValid(scan)
+
+/*
+ * IndexScanIsValid --
+ * True iff the index scan is valid.
+ */
+#define IndexScanIsValid(scan) PointerIsValid(scan)
+
+#endif /* RELSCAN_H */
diff --git a/src/backend/access/rtree.h b/src/backend/access/rtree.h

new file mode 100644 (file)

index 0000000..79f1622
--- /dev/null
+++ b/src/backend/access/rtree.h
@@ -0,0 +1,98 @@
+/*-------------------------------------------------------------------------
+ *
+ * rtree.h--
+ *    common declarations for the rtree access method code.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: rtree.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef RTREE_H
+#define RTREE_H
+
+/* see rtstrat.c for what all this is about */
+#define RTNStrategies          8
+#define RTLeftStrategyNumber       1
+#define RTOverLeftStrategyNumber   2
+#define RTOverlapStrategyNumber        3
+#define RTOverRightStrategyNumber  4
+#define RTRightStrategyNumber      5
+#define RTSameStrategyNumber       6
+#define RTContainsStrategyNumber   7
+#define RTContainedByStrategyNumber    8
+
+#define RTNProcs           3
+#define RT_UNION_PROC          1
+#define RT_INTER_PROC          2
+#define RT_SIZE_PROC           3
+
+#define F_LEAF     (1 << 0)
+
+typedef struct RTreePageOpaqueData {
+   uint32      flags;
+} RTreePageOpaqueData;
+
+typedef RTreePageOpaqueData    *RTreePageOpaque;
+
+/*
+ *  When we descend a tree, we keep a stack of parent pointers.
+ */
+
+typedef struct RTSTACK {
+   struct RTSTACK  *rts_parent;
+   OffsetNumber    rts_child;
+   BlockNumber rts_blk;
+} RTSTACK;
+
+/*
+ *  When we're doing a scan, we need to keep track of the parent stack
+ *  for the marked and current items.  Also, rtrees have the following
+ *  property:  if you're looking for the box (1,1,2,2), on the internal
+ *  nodes you have to search for all boxes that *contain* (1,1,2,2), and
+ *  not the ones that match it.  We have a private scan key for internal
+ *  nodes in the opaque structure for rtrees for this reason.  See
+ *  access/index-rtree/rtscan.c and rtstrat.c for how it gets initialized.
+ */
+
+typedef struct RTreeScanOpaqueData {
+   struct RTSTACK  *s_stack;
+   struct RTSTACK  *s_markstk;
+   uint16      s_flags;
+   uint16      s_internalNKey;
+   ScanKey     s_internalKey;
+} RTreeScanOpaqueData;
+
+typedef RTreeScanOpaqueData    *RTreeScanOpaque;
+
+/*
+ *  When we're doing a scan and updating a tree at the same time, the
+ *  updates may affect the scan.  We use the flags entry of the scan's
+ *  opaque space to record our actual position in response to updates
+ *  that we can't handle simply by adjusting pointers.
+ */
+
+#define RTS_CURBEFORE  ((uint16) (1 << 0))
+#define RTS_MRKBEFORE  ((uint16) (1 << 1))
+
+/* root page of an rtree */
+#define P_ROOT     0
+
+/*
+ *  When we update a relation on which we're doing a scan, we need to
+ *  check the scan and fix it if the update affected any of the pages it
+ *  touches.  Otherwise, we can miss records that we should see.  The only
+ *  times we need to do this are for deletions and splits.  See the code in
+ *  rtscan.c for how the scan is fixed.  These two contants tell us what sort
+ *  of operation changed the index.
+ */
+
+#define    RTOP_DEL    0
+#define    RTOP_SPLIT  1
+
+/* defined in rtree.c */
+extern void freestack(RTSTACK *s);
+
+#endif /* RTREE_H */
diff --git a/src/backend/access/rtree/Makefile.inc b/src/backend/access/rtree/Makefile.inc

new file mode 100644 (file)

index 0000000..a93a5e5
--- /dev/null
+++ b/src/backend/access/rtree/Makefile.inc
@@ -0,0 +1,14 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for access/rtree (R-Tree access method)
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
+#
+#-------------------------------------------------------------------------
+
+SUBSRCS+= rtget.c rtproc.c rtree.c rtscan.c rtstrat.c
diff --git a/src/backend/access/rtree/rtget.c b/src/backend/access/rtree/rtget.c

new file mode 100644 (file)

index 0000000..fb2e169
--- /dev/null
+++ b/src/backend/access/rtree/rtget.c
@@ -0,0 +1,320 @@
+/*-------------------------------------------------------------------------
+ *
+ * rtget.c--
+ *    fetch tuples from an rtree scan.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtget.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/iqual.h"
+#include "access/rtree.h"
+#include "access/sdir.h"
+
+static OffsetNumber findnext(IndexScanDesc s, Page p, OffsetNumber n,
+                ScanDirection dir);
+static RetrieveIndexResult rtscancache(IndexScanDesc s, ScanDirection dir);
+static RetrieveIndexResult rtfirst(IndexScanDesc s, ScanDirection dir);
+static RetrieveIndexResult rtnext(IndexScanDesc s, ScanDirection dir);
+static ItemPointer rtheapptr(Relation r, ItemPointer itemp);
+
+
+RetrieveIndexResult
+rtgettuple(IndexScanDesc s, ScanDirection dir)
+{
+    RetrieveIndexResult res;
+    
+    /* if we have it cached in the scan desc, just return the value */
+    if ((res = rtscancache(s, dir)) != (RetrieveIndexResult) NULL)
+   return (res);
+    
+    /* not cached, so we'll have to do some work */
+    if (ItemPointerIsValid(&(s->currentItemData))) {
+   res = rtnext(s, dir);
+    } else {
+   res = rtfirst(s, dir);
+    }
+    return (res);
+}
+
+static RetrieveIndexResult
+rtfirst(IndexScanDesc s, ScanDirection dir)
+{
+    Buffer b;
+    Page p;
+    OffsetNumber n;
+    OffsetNumber maxoff;
+    RetrieveIndexResult res;
+    RTreePageOpaque po;
+    RTreeScanOpaque so;
+    RTSTACK *stk;
+    BlockNumber blk;
+    IndexTuple it;
+    ItemPointer ip;
+    
+    b = ReadBuffer(s->relation, P_ROOT);
+    p = BufferGetPage(b);
+    po = (RTreePageOpaque) PageGetSpecialPointer(p);
+    so = (RTreeScanOpaque) s->opaque;
+    
+    for (;;) {
+   maxoff = PageGetMaxOffsetNumber(p);
+   if (ScanDirectionIsBackward(dir))
+       n = findnext(s, p, maxoff, dir);
+   else
+       n = findnext(s, p, FirstOffsetNumber, dir);
+   
+   while (n < FirstOffsetNumber || n > maxoff) {
+       
+       ReleaseBuffer(b);
+       if (so->s_stack == (RTSTACK *) NULL)
+       return ((RetrieveIndexResult) NULL);
+       
+       stk = so->s_stack;
+       b = ReadBuffer(s->relation, stk->rts_blk);
+       p = BufferGetPage(b);
+       po = (RTreePageOpaque) PageGetSpecialPointer(p);
+       maxoff = PageGetMaxOffsetNumber(p);
+       
+       if (ScanDirectionIsBackward(dir)) {
+       n = OffsetNumberPrev(stk->rts_child);
+       } else {
+       n = OffsetNumberNext(stk->rts_child);
+       }
+       so->s_stack = stk->rts_parent;
+       pfree(stk);
+       
+       n = findnext(s, p, n, dir);
+   }
+   if (po->flags & F_LEAF) {
+       ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n);
+       
+       it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
+       ip = (ItemPointer) palloc(sizeof(ItemPointerData));
+       memmove((char *) ip, (char *) &(it->t_tid),
+           sizeof(ItemPointerData));
+       ReleaseBuffer(b);
+       
+       res = FormRetrieveIndexResult(&(s->currentItemData), ip);
+       
+       return (res);
+   } else {
+       stk = (RTSTACK *) palloc(sizeof(RTSTACK));
+       stk->rts_child = n;
+       stk->rts_blk = BufferGetBlockNumber(b);
+       stk->rts_parent = so->s_stack;
+       so->s_stack = stk;
+       
+       it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
+       blk = ItemPointerGetBlockNumber(&(it->t_tid));
+       
+       ReleaseBuffer(b);
+       b = ReadBuffer(s->relation, blk);
+       p = BufferGetPage(b);
+       po = (RTreePageOpaque) PageGetSpecialPointer(p);
+   }
+    }
+}
+
+static RetrieveIndexResult
+rtnext(IndexScanDesc s, ScanDirection dir)
+{
+    Buffer b;
+    Page p;
+    OffsetNumber n;
+    OffsetNumber maxoff;
+    RetrieveIndexResult res;
+    RTreePageOpaque po;
+    RTreeScanOpaque so;
+    RTSTACK *stk;
+    BlockNumber blk;
+    IndexTuple it;
+    ItemPointer ip;
+    
+    blk = ItemPointerGetBlockNumber(&(s->currentItemData));
+    n = ItemPointerGetOffsetNumber(&(s->currentItemData));
+    
+    if (ScanDirectionIsForward(dir)) {
+   n = OffsetNumberNext(n);
+    } else {
+   n = OffsetNumberPrev(n);
+    }
+
+    b = ReadBuffer(s->relation, blk);
+    p = BufferGetPage(b);
+    po = (RTreePageOpaque) PageGetSpecialPointer(p);
+    so = (RTreeScanOpaque) s->opaque;
+    
+    for (;;) {
+   maxoff = PageGetMaxOffsetNumber(p);
+   n = findnext(s, p, n, dir);
+   
+   while (n < FirstOffsetNumber || n > maxoff) {
+       
+       ReleaseBuffer(b);
+       if (so->s_stack == (RTSTACK *) NULL)
+       return ((RetrieveIndexResult) NULL);
+       
+       stk = so->s_stack;
+       b = ReadBuffer(s->relation, stk->rts_blk);
+       p = BufferGetPage(b);
+       maxoff = PageGetMaxOffsetNumber(p);
+       po = (RTreePageOpaque) PageGetSpecialPointer(p);
+       
+       if (ScanDirectionIsBackward(dir)) {
+       n = OffsetNumberPrev(stk->rts_child);
+       } else {
+       n = OffsetNumberNext(stk->rts_child);
+       }
+       so->s_stack = stk->rts_parent;
+       pfree(stk);
+       
+       n = findnext(s, p, n, dir);
+   }
+   if (po->flags & F_LEAF) {
+       ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n);
+       
+       it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
+       ip = (ItemPointer) palloc(sizeof(ItemPointerData));
+       memmove((char *) ip, (char *) &(it->t_tid),
+           sizeof(ItemPointerData));
+       ReleaseBuffer(b);
+       
+       res = FormRetrieveIndexResult(&(s->currentItemData), ip);
+       
+       return (res);
+   } else {
+       stk = (RTSTACK *) palloc(sizeof(RTSTACK));
+       stk->rts_child = n;
+       stk->rts_blk = BufferGetBlockNumber(b);
+       stk->rts_parent = so->s_stack;
+       so->s_stack = stk;
+       
+       it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
+       blk = ItemPointerGetBlockNumber(&(it->t_tid));
+       
+       ReleaseBuffer(b);
+       b = ReadBuffer(s->relation, blk);
+       p = BufferGetPage(b);
+       po = (RTreePageOpaque) PageGetSpecialPointer(p);
+       
+       if (ScanDirectionIsBackward(dir)) {
+       n = PageGetMaxOffsetNumber(p);
+       } else {
+       n = FirstOffsetNumber;
+       }
+   }
+    }
+}
+
+static OffsetNumber
+findnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir)
+{
+    OffsetNumber maxoff;
+    IndexTuple it;
+    RTreePageOpaque po;
+    RTreeScanOpaque so;
+    
+    maxoff = PageGetMaxOffsetNumber(p);
+    po = (RTreePageOpaque) PageGetSpecialPointer(p);
+    so = (RTreeScanOpaque) s->opaque;
+    
+    /*
+     *  If we modified the index during the scan, we may have a pointer to
+     *  a ghost tuple, before the scan.  If this is the case, back up one.
+     */
+    
+    if (so->s_flags & RTS_CURBEFORE) {
+   so->s_flags &= ~RTS_CURBEFORE;
+   n = OffsetNumberPrev(n);
+    }
+    
+    while (n >= FirstOffsetNumber && n <= maxoff) {
+   it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
+   if (po->flags & F_LEAF) {
+       if (index_keytest(it, 
+                 RelationGetTupleDescriptor(s->relation),
+                 s->numberOfKeys, s->keyData))
+       break;
+   } else {
+       if (index_keytest(it, 
+                 RelationGetTupleDescriptor(s->relation),
+                 so->s_internalNKey, so->s_internalKey))
+       break;
+   }
+   
+   if (ScanDirectionIsBackward(dir)) {
+       n = OffsetNumberPrev(n);
+   } else {
+       n = OffsetNumberNext(n);
+   }
+    }
+    
+    return (n);
+}
+
+static RetrieveIndexResult
+rtscancache(IndexScanDesc s, ScanDirection dir)
+{
+    RetrieveIndexResult res;
+    ItemPointer ip;
+    
+    if (!(ScanDirectionIsNoMovement(dir)
+     && ItemPointerIsValid(&(s->currentItemData)))) {
+   
+   return ((RetrieveIndexResult) NULL);
+    } 
+    
+    ip = rtheapptr(s->relation, &(s->currentItemData));
+    
+    if (ItemPointerIsValid(ip))
+   res = FormRetrieveIndexResult(&(s->currentItemData), ip);
+    else
+   res = (RetrieveIndexResult) NULL;
+    
+    return (res);
+}
+
+/*
+ *  rtheapptr returns the item pointer to the tuple in the heap relation
+ *  for which itemp is the index relation item pointer.
+ */
+static ItemPointer
+rtheapptr(Relation r, ItemPointer itemp)
+{
+    Buffer b;
+    Page p;
+    IndexTuple it;
+    ItemPointer ip;
+    OffsetNumber n;
+    
+    ip = (ItemPointer) palloc(sizeof(ItemPointerData));
+    if (ItemPointerIsValid(itemp)) {
+   b = ReadBuffer(r, ItemPointerGetBlockNumber(itemp));
+   p = BufferGetPage(b);
+   n = ItemPointerGetOffsetNumber(itemp);
+   it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
+   memmove((char *) ip, (char *) &(it->t_tid),
+       sizeof(ItemPointerData));
+   ReleaseBuffer(b);
+    } else {
+   ItemPointerSetInvalid(ip);
+    }
+    
+    return (ip);
+}
diff --git a/src/backend/access/rtree/rtproc.c b/src/backend/access/rtree/rtproc.c

new file mode 100644 (file)

index 0000000..a2f7bef
--- /dev/null
+++ b/src/backend/access/rtree/rtproc.c
@@ -0,0 +1,150 @@
+/*-------------------------------------------------------------------------
+ *
+ * rtproc.c--
+ *    pg_amproc entries for rtrees.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtproc.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <math.h>
+#include <string.h>
+
+#include "postgres.h"
+
+#include "utils/elog.h"
+#include "utils/geo-decls.h"
+#include "utils/palloc.h"
+
+BOX
+*rt_box_union(BOX *a, BOX *b)
+{
+    BOX *n;
+    
+    if ((n = (BOX *) palloc(sizeof (*n))) == (BOX *) NULL)
+   elog(WARN, "Cannot allocate box for union");
+    
+    n->xh = Max(a->xh, b->xh);
+    n->yh = Max(a->yh, b->yh);
+    n->xl = Min(a->xl, b->xl);
+    n->yl = Min(a->yl, b->yl);
+    
+    return (n);
+}
+
+BOX *
+rt_box_inter(BOX *a, BOX *b)
+{
+    BOX *n;
+    
+    if ((n = (BOX *) palloc(sizeof (*n))) == (BOX *) NULL)
+   elog(WARN, "Cannot allocate box for union");
+    
+    n->xh = Min(a->xh, b->xh);
+    n->yh = Min(a->yh, b->yh);
+    n->xl = Max(a->xl, b->xl);
+    n->yl = Max(a->yl, b->yl);
+    
+    if (n->xh < n->xl || n->yh < n->yl) {
+   pfree(n);
+   return ((BOX *) NULL);
+    }
+    
+    return (n);
+}
+
+void
+rt_box_size(BOX *a, float *size)
+{
+    if (a == (BOX *) NULL || a->xh <= a->xl || a->yh <= a->yl)
+   *size = 0.0;
+    else
+   *size = (float) ((a->xh - a->xl) * (a->yh - a->yl));
+    
+    return;
+}
+
+/*
+ *  rt_bigbox_size() -- Compute a size for big boxes.
+ *
+ * In an earlier release of the system, this routine did something
+ * different from rt_box_size.  We now use floats, rather than ints,
+ * as the return type for the size routine, so we no longer need to
+ * have a special return type for big boxes.
+ */
+void
+rt_bigbox_size(BOX *a, float *size)
+{
+    rt_box_size(a, size);
+}
+
+POLYGON *
+rt_poly_union(POLYGON *a, POLYGON *b)
+{
+    POLYGON *p;
+    
+    p = (POLYGON *)PALLOCTYPE(POLYGON);
+    
+    if (!PointerIsValid(p))
+   elog(WARN, "Cannot allocate polygon for union");
+    
+    memset((char *) p, 0, sizeof(POLYGON));    /* zero any holes */
+    p->size = sizeof(POLYGON);
+    p->npts = 0;
+    p->boundbox.xh = Max(a->boundbox.xh, b->boundbox.xh);
+    p->boundbox.yh = Max(a->boundbox.yh, b->boundbox.yh);
+    p->boundbox.xl = Min(a->boundbox.xl, b->boundbox.xl);
+    p->boundbox.yl = Min(a->boundbox.yl, b->boundbox.yl);
+    return p;
+}
+
+void
+rt_poly_size(POLYGON *a, float *size)
+{
+    double xdim, ydim;
+    
+    size = (float *) palloc(sizeof(float));
+    if (a == (POLYGON *) NULL || 
+   a->boundbox.xh <= a->boundbox.xl || 
+   a->boundbox.yh <= a->boundbox.yl)
+   *size = 0.0;
+    else {
+   xdim = (a->boundbox.xh - a->boundbox.xl);
+   ydim = (a->boundbox.yh - a->boundbox.yl);
+   
+   *size = (float) (xdim * ydim);
+    }
+    
+    return;
+}
+
+POLYGON *
+rt_poly_inter(POLYGON *a, POLYGON *b)
+{
+    POLYGON *p;
+    
+    p = (POLYGON *) PALLOCTYPE(POLYGON);
+    
+    if (!PointerIsValid(p))
+   elog(WARN, "Cannot allocate polygon for intersection");
+    
+    memset((char *) p, 0, sizeof(POLYGON));    /* zero any holes */
+    p->size = sizeof(POLYGON);
+    p->npts = 0;
+    p->boundbox.xh = Min(a->boundbox.xh, b->boundbox.xh);
+    p->boundbox.yh = Min(a->boundbox.yh, b->boundbox.yh);
+    p->boundbox.xl = Max(a->boundbox.xl, b->boundbox.xl);
+    p->boundbox.yl = Max(a->boundbox.yl, b->boundbox.yl);
+    
+    if (p->boundbox.xh < p->boundbox.xl || p->boundbox.yh < p->boundbox.yl)
+   {
+       pfree(p);
+       return ((POLYGON *) NULL);
+   }
+    
+    return (p);
+}
diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c

new file mode 100644 (file)

index 0000000..96efc3b
--- /dev/null
+++ b/src/backend/access/rtree/rtree.c
@@ -0,0 +1,955 @@
+/*-------------------------------------------------------------------------
+ *
+ * rtree.c--
+ *    interface routines for the postgres rtree indexed access method.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/rtree.h"
+#include "access/rtscan.h"
+#include "access/funcindex.h"
+#include "access/tupdesc.h"
+
+#include "nodes/execnodes.h"
+#include "nodes/plannodes.h"
+
+#include "executor/executor.h"
+#include "executor/tuptable.h"
+
+#include "catalog/index.h"
+
+typedef struct SPLITVEC {
+    OffsetNumber   *spl_left;
+    int            spl_nleft;
+    char       *spl_ldatum;
+    OffsetNumber   *spl_right;
+    int            spl_nright;
+    char       *spl_rdatum;
+} SPLITVEC;
+
+typedef struct RTSTATE {
+    func_ptr unionFn;      /* union function */
+    func_ptr sizeFn;       /* size function */
+    func_ptr interFn;      /* intersection function */
+} RTSTATE;
+
+/* non-export function prototypes */
+static InsertIndexResult rtdoinsert(Relation r, IndexTuple itup,
+                   RTSTATE *rtstate);
+static void rttighten(Relation r, RTSTACK *stk, char *datum, int att_size,
+             RTSTATE *rtstate);
+static InsertIndexResult dosplit(Relation r, Buffer buffer, RTSTACK *stack,
+                IndexTuple itup, RTSTATE *rtstate);
+static void rtintinsert(Relation r, RTSTACK *stk, IndexTuple ltup,
+           IndexTuple rtup, RTSTATE *rtstate);
+static void rtnewroot(Relation r, IndexTuple lt, IndexTuple rt);
+static void picksplit(Relation r, Page page, SPLITVEC *v, IndexTuple itup,
+             RTSTATE *rtstate);
+static void RTInitBuffer(Buffer b, uint32 f);
+static OffsetNumber choose(Relation r, Page p, IndexTuple it,
+              RTSTATE *rtstate);
+static int nospace(Page p, IndexTuple it);
+static void initRtstate(RTSTATE *rtstate, Relation index);
+
+
+void
+rtbuild(Relation heap,
+   Relation index,
+   int natts,
+   AttrNumber *attnum,
+   IndexStrategy istrat,
+   uint16 pcount,
+   Datum *params,
+   FuncIndexInfo *finfo,
+   PredInfo *predInfo)
+{
+    HeapScanDesc scan;
+    Buffer buffer;
+    AttrNumber i;
+    HeapTuple htup;
+    IndexTuple itup;
+    TupleDesc hd, id;
+    InsertIndexResult res;
+    Datum *d;
+    bool *nulls;
+    int nb, nh, ni;
+    ExprContext *econtext;
+    TupleTable tupleTable;
+    TupleTableSlot *slot;
+    Oid hrelid, irelid;
+    Node *pred, *oldPred;
+    RTSTATE rtState;
+
+    initRtstate(&rtState, index);
+       
+    /* rtrees only know how to do stupid locking now */
+    RelationSetLockForWrite(index);
+
+    pred = predInfo->pred;
+    oldPred = predInfo->oldPred;
+
+    /*
+     *  We expect to be called exactly once for any index relation.
+     *  If that's not the case, big trouble's what we have.
+     */
+    
+    if (oldPred == NULL && (nb = RelationGetNumberOfBlocks(index)) != 0)
+   elog(WARN, "%s already contains data", index->rd_rel->relname.data);
+    
+    /* initialize the root page (if this is a new index) */
+    if (oldPred == NULL) {
+   buffer = ReadBuffer(index, P_NEW);
+   RTInitBuffer(buffer, F_LEAF);
+   WriteBuffer(buffer);
+    }
+    
+    /* init the tuple descriptors and get set for a heap scan */
+    hd = RelationGetTupleDescriptor(heap);
+    id = RelationGetTupleDescriptor(index);
+    d = (Datum *)palloc(natts * sizeof (*d));
+    nulls = (bool *)palloc(natts * sizeof (*nulls));
+    
+    /*
+     * If this is a predicate (partial) index, we will need to evaluate the
+     * predicate using ExecQual, which requires the current tuple to be in a
+     * slot of a TupleTable.  In addition, ExecQual must have an ExprContext
+     * referring to that slot.  Here, we initialize dummy TupleTable and
+     * ExprContext objects for this purpose. --Nels, Feb '92
+     */
+#ifndef OMIT_PARTIAL_INDEX
+    if (pred != NULL || oldPred != NULL) {
+   tupleTable = ExecCreateTupleTable(1);
+   slot =  ExecAllocTableSlot(tupleTable);
+   econtext = makeNode(ExprContext);
+   FillDummyExprContext(econtext, slot, hd, buffer);
+    }
+#endif /* OMIT_PARTIAL_INDEX */    
+    scan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL);
+    htup = heap_getnext(scan, 0, &buffer);
+    
+    /* count the tuples as we insert them */
+    nh = ni = 0;
+    
+    for (; HeapTupleIsValid(htup); htup = heap_getnext(scan, 0, &buffer)) {
+   
+   nh++;
+   
+   /*
+    * If oldPred != NULL, this is an EXTEND INDEX command, so skip
+    * this tuple if it was already in the existing partial index
+    */
+   if (oldPred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+       /*SetSlotContents(slot, htup); */
+       slot->val = htup;
+       if (ExecQual((List*)oldPred, econtext) == true) {
+       ni++;
+       continue;
+       }
+#endif /* OMIT_PARTIAL_INDEX */        
+   }
+   
+   /* Skip this tuple if it doesn't satisfy the partial-index predicate */
+   if (pred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+       /*SetSlotContents(slot, htup); */
+       slot->val = htup;
+       if (ExecQual((List*)pred, econtext) == false)
+       continue;
+#endif /* OMIT_PARTIAL_INDEX */        
+   }
+   
+   ni++;
+   
+   /*
+    *  For the current heap tuple, extract all the attributes
+    *  we use in this index, and note which are null.
+    */
+   
+   for (i = 1; i <= natts; i++) {
+       int  attoff;
+       bool attnull;
+       
+       /*
+        *  Offsets are from the start of the tuple, and are
+        *  zero-based; indices are one-based.  The next call
+        *  returns i - 1.  That's data hiding for you.
+        */
+       
+       attoff = AttrNumberGetAttrOffset(i);
+       /*
+         d[attoff] = HeapTupleGetAttributeValue(htup, buffer,
+         */
+       d[attoff] = GetIndexValue(htup, 
+                     hd,
+                     attoff, 
+                     attnum, 
+                     finfo, 
+                     &attnull,
+                     buffer);
+       nulls[attoff] = (attnull ? 'n' : ' ');
+   }
+   
+   /* form an index tuple and point it at the heap tuple */
+   itup = index_formtuple(id, &d[0], nulls);
+   itup->t_tid = htup->t_ctid;
+   
+   /*
+    *  Since we already have the index relation locked, we
+    *  call rtdoinsert directly.  Normal access method calls
+    *  dispatch through rtinsert, which locks the relation
+    *  for write.  This is the right thing to do if you're
+    *  inserting single tups, but not when you're initializing
+    *  the whole index at once.
+    */
+   
+   res = rtdoinsert(index, itup, &rtState);
+   pfree(itup);
+   pfree(res);
+    }
+    
+    /* okay, all heap tuples are indexed */
+    heap_endscan(scan);
+    RelationUnsetLockForWrite(index);
+    
+    if (pred != NULL || oldPred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+   ExecDestroyTupleTable(tupleTable, true);
+   pfree(econtext);
+#endif /* OMIT_PARTIAL_INDEX */        
+    }
+    
+    /*
+     *  Since we just counted the tuples in the heap, we update its
+     *  stats in pg_relation to guarantee that the planner takes
+     *  advantage of the index we just created.  UpdateStats() does a
+     *  CommandCounterIncrement(), which flushes changed entries from
+     *  the system relcache.  The act of constructing an index changes
+     *  these heap and index tuples in the system catalogs, so they
+     *  need to be flushed.  We close them to guarantee that they
+     *  will be.
+     */
+    
+    hrelid = heap->rd_id;
+    irelid = index->rd_id;
+    heap_close(heap);
+    index_close(index);
+    
+    UpdateStats(hrelid, nh, true);
+    UpdateStats(irelid, ni, false);
+    
+    if (oldPred != NULL) {
+   if (ni == nh) pred = NULL;
+   UpdateIndexPredicate(irelid, oldPred, pred);
+    }
+    
+    /* be tidy */
+    pfree(nulls);
+    pfree(d);
+}
+
+/*
+ *  rtinsert -- wrapper for rtree tuple insertion.
+ *
+ *    This is the public interface routine for tuple insertion in rtrees.
+ *    It doesn't do any work; just locks the relation and passes the buck.
+ */
+InsertIndexResult
+rtinsert(Relation r, IndexTuple itup)
+{
+    InsertIndexResult res;
+    RTSTATE rtState;
+
+    initRtstate(&rtState, r);
+    
+    RelationSetLockForWrite(r);
+    res = rtdoinsert(r, itup, &rtState);
+    
+    /* XXX two-phase locking -- don't unlock the relation until EOT */
+    return (res);
+}
+
+static InsertIndexResult
+rtdoinsert(Relation r, IndexTuple itup, RTSTATE *rtstate)
+{
+    Page page;
+    Buffer buffer;
+    BlockNumber blk;
+    IndexTuple which;
+    OffsetNumber l;
+    RTSTACK *stack;
+    InsertIndexResult res;
+    RTreePageOpaque opaque;
+    char *datum;
+    
+    blk = P_ROOT;
+    buffer = InvalidBuffer;
+    stack = (RTSTACK *) NULL;
+    
+    do {
+   /* let go of current buffer before getting next */
+   if (buffer != InvalidBuffer)
+       ReleaseBuffer(buffer);
+   
+   /* get next buffer */
+   buffer = ReadBuffer(r, blk);
+   page = (Page) BufferGetPage(buffer);
+   
+   opaque = (RTreePageOpaque) PageGetSpecialPointer(page);
+   if (!(opaque->flags & F_LEAF)) {
+       RTSTACK *n;
+       ItemId iid;
+       
+       n = (RTSTACK *) palloc(sizeof(RTSTACK));
+       n->rts_parent = stack;
+       n->rts_blk = blk;
+       n->rts_child = choose(r, page, itup, rtstate);
+       stack = n;
+       
+       iid = PageGetItemId(page, n->rts_child);
+       which = (IndexTuple) PageGetItem(page, iid);
+       blk = ItemPointerGetBlockNumber(&(which->t_tid));
+   }
+    } while (!(opaque->flags & F_LEAF));
+    
+    if (nospace(page, itup)) {
+   /* need to do a split */
+   res = dosplit(r, buffer, stack, itup, rtstate);
+   freestack(stack);
+   WriteBuffer(buffer);  /* don't forget to release buffer! */
+   return (res);
+    }
+    
+    /* add the item and write the buffer */
+    if (PageIsEmpty(page)) {
+   l = PageAddItem(page, (Item) itup, IndexTupleSize(itup),
+           FirstOffsetNumber,
+           LP_USED);
+    } else {
+   l = PageAddItem(page, (Item) itup, IndexTupleSize(itup),
+           OffsetNumberNext(PageGetMaxOffsetNumber(page)),
+           LP_USED);
+    }
+    
+    WriteBuffer(buffer);
+    
+    datum = (((char *) itup) + sizeof(IndexTupleData));
+    
+    /* now expand the page boundary in the parent to include the new child */
+    rttighten(r, stack, datum,
+         (IndexTupleSize(itup) - sizeof(IndexTupleData)), rtstate);
+    freestack(stack);
+    
+    /* build and return an InsertIndexResult for this insertion */
+    res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
+    ItemPointerSet(&(res->pointerData), blk, l);
+    
+    return (res);
+}
+
+static void
+rttighten(Relation r,
+     RTSTACK *stk,
+     char *datum,
+     int att_size,
+     RTSTATE *rtstate)
+{
+    char *oldud;
+    char *tdatum;
+    Page p;
+    float old_size, newd_size;
+    Buffer b;
+    
+    if (stk == (RTSTACK *) NULL)
+   return;
+    
+    b = ReadBuffer(r, stk->rts_blk);
+    p = BufferGetPage(b);
+    
+    oldud = (char *) PageGetItem(p, PageGetItemId(p, stk->rts_child));
+    oldud += sizeof(IndexTupleData);
+    
+    (*rtstate->sizeFn)(oldud, &old_size);
+    datum = (char *) (*rtstate->unionFn)(oldud, datum);
+    
+    (*rtstate->sizeFn)(datum, &newd_size);
+    
+    if (newd_size != old_size) {
+   TupleDesc td = RelationGetTupleDescriptor(r);
+   
+   if (td->attrs[0]->attlen < 0) {
+       /*
+        * This is an internal page, so 'oldud' had better be a
+        * union (constant-length) key, too.  (See comment below.)
+        */
+       Assert(VARSIZE(datum) == VARSIZE(oldud));
+       memmove(oldud, datum, VARSIZE(datum));
+   } else {
+       memmove(oldud, datum, att_size);
+   }
+   WriteBuffer(b);
+   
+   /*
+    *  The user may be defining an index on variable-sized data (like
+    *  polygons).  If so, we need to get a constant-sized datum for
+    *  insertion on the internal page.  We do this by calling the union
+    *  proc, which is guaranteed to return a rectangle.
+    */
+   
+   tdatum = (char *) (*rtstate->unionFn)(datum, datum);
+   rttighten(r, stk->rts_parent, tdatum, att_size, rtstate);
+   pfree(tdatum);
+    } else {
+   ReleaseBuffer(b);
+    }
+    pfree(datum);
+}
+
+/*
+ *  dosplit -- split a page in the tree.
+ *
+ *    This is the quadratic-cost split algorithm Guttman describes in
+ *    his paper.  The reason we chose it is that you can implement this
+ *    with less information about the data types on which you're operating.
+ */
+static InsertIndexResult
+dosplit(Relation r,
+   Buffer buffer,
+   RTSTACK *stack,
+   IndexTuple itup,
+   RTSTATE *rtstate)
+{
+    Page p;
+    Buffer leftbuf, rightbuf;
+    Page left, right;
+    ItemId itemid;
+    IndexTuple item;
+    IndexTuple ltup, rtup;
+    OffsetNumber maxoff;
+    OffsetNumber i;
+    OffsetNumber leftoff, rightoff;
+    BlockNumber lbknum, rbknum;
+    BlockNumber bufblock;
+    RTreePageOpaque opaque;
+    int blank;
+    InsertIndexResult res;
+    char *isnull;
+    SPLITVEC v;
+    TupleDesc tupDesc;
+    
+    isnull = (char *) palloc(r->rd_rel->relnatts);
+    for (blank = 0; blank < r->rd_rel->relnatts; blank++)
+   isnull[blank] = ' ';
+    p = (Page) BufferGetPage(buffer);
+    opaque = (RTreePageOpaque) PageGetSpecialPointer(p);
+    
+    /*
+     *  The root of the tree is the first block in the relation.  If
+     *  we're about to split the root, we need to do some hocus-pocus
+     *  to enforce this guarantee.
+     */
+    
+    if (BufferGetBlockNumber(buffer) == P_ROOT) {
+   leftbuf = ReadBuffer(r, P_NEW);
+   RTInitBuffer(leftbuf, opaque->flags);
+   lbknum = BufferGetBlockNumber(leftbuf);
+   left = (Page) BufferGetPage(leftbuf);
+    } else {
+   leftbuf = buffer;
+   IncrBufferRefCount(buffer);
+   lbknum = BufferGetBlockNumber(buffer);
+   left = (Page) PageGetTempPage(p, sizeof(RTreePageOpaqueData));
+    }
+    
+    rightbuf = ReadBuffer(r, P_NEW);
+    RTInitBuffer(rightbuf, opaque->flags);
+    rbknum = BufferGetBlockNumber(rightbuf);
+    right = (Page) BufferGetPage(rightbuf);
+    
+    picksplit(r, p, &v, itup, rtstate);
+    
+    leftoff = rightoff = FirstOffsetNumber;
+    maxoff = PageGetMaxOffsetNumber(p);
+    for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
+   itemid = PageGetItemId(p, i);
+   item = (IndexTuple) PageGetItem(p, itemid);
+   
+   if (i == *(v.spl_left)) {
+       (void) PageAddItem(left, (Item) item, IndexTupleSize(item),
+                  leftoff, LP_USED);
+       leftoff = OffsetNumberNext(leftoff);
+       v.spl_left++;   /* advance in left split vector */
+   } else {
+       (void) PageAddItem(right, (Item) item, IndexTupleSize(item),
+                  rightoff, LP_USED);
+       rightoff = OffsetNumberNext(rightoff);
+       v.spl_right++;  /* advance in right split vector */
+   }
+    }
+    
+    /* build an InsertIndexResult for this insertion */
+    res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
+    
+    /* now insert the new index tuple */
+    if (*(v.spl_left) != FirstOffsetNumber) {
+   (void) PageAddItem(left, (Item) itup, IndexTupleSize(itup),
+              leftoff, LP_USED);
+   leftoff = OffsetNumberNext(leftoff);
+   ItemPointerSet(&(res->pointerData), lbknum, leftoff);
+    } else {
+   (void) PageAddItem(right, (Item) itup, IndexTupleSize(itup),
+              rightoff, LP_USED);
+   rightoff = OffsetNumberNext(rightoff);
+   ItemPointerSet(&(res->pointerData), rbknum, rightoff);
+    }
+    
+    if ((bufblock = BufferGetBlockNumber(buffer)) != P_ROOT) {
+   PageRestoreTempPage(left, p);
+    }
+    WriteBuffer(leftbuf);
+    WriteBuffer(rightbuf);
+    
+    /*
+     *  Okay, the page is split.  We have three things left to do:
+     *
+     *    1)  Adjust any active scans on this index to cope with changes
+     *        we introduced in its structure by splitting this page.
+     *
+     *    2)  "Tighten" the bounding box of the pointer to the left
+     *       page in the parent node in the tree, if any.  Since we
+     *       moved a bunch of stuff off the left page, we expect it
+     *       to get smaller.  This happens in the internal insertion
+     *        routine.
+     *
+     *    3)  Insert a pointer to the right page in the parent.  This
+     *       may cause the parent to split.  If it does, we need to
+     *       repeat steps one and two for each split node in the tree.
+     */
+    
+    /* adjust active scans */
+    rtadjscans(r, RTOP_SPLIT, bufblock, FirstOffsetNumber);
+    
+    tupDesc = r->rd_att;
+    ltup = (IndexTuple) index_formtuple(tupDesc,
+                   (Datum *) &(v.spl_ldatum), isnull);
+    rtup = (IndexTuple) index_formtuple(tupDesc,
+                   (Datum *) &(v.spl_rdatum), isnull);
+    pfree(isnull);
+    
+    /* set pointers to new child pages in the internal index tuples */
+    ItemPointerSet(&(ltup->t_tid), lbknum, 1);
+    ItemPointerSet(&(rtup->t_tid), rbknum, 1);
+    
+    rtintinsert(r, stack, ltup, rtup, rtstate);
+    
+    pfree(ltup);
+    pfree(rtup);
+    
+    return (res);
+}
+
+static void
+rtintinsert(Relation r,
+       RTSTACK *stk,
+       IndexTuple ltup,
+       IndexTuple rtup,
+       RTSTATE *rtstate)
+{
+    IndexTuple old;
+    Buffer b;
+    Page p;
+    char *ldatum, *rdatum, *newdatum;
+    InsertIndexResult res;
+    
+    if (stk == (RTSTACK *) NULL) {
+   rtnewroot(r, ltup, rtup);
+   return;
+    }
+    
+    b = ReadBuffer(r, stk->rts_blk);
+    p = BufferGetPage(b);
+    old = (IndexTuple) PageGetItem(p, PageGetItemId(p, stk->rts_child));
+    
+    /*
+     *  This is a hack.  Right now, we force rtree keys to be constant size.
+     *  To fix this, need delete the old key and add both left and right
+     *  for the two new pages.  The insertion of left may force a split if
+     *  the new left key is bigger than the old key.
+     */
+    
+    if (IndexTupleSize(old) != IndexTupleSize(ltup))
+   elog(WARN, "Variable-length rtree keys are not supported.");
+    
+    /* install pointer to left child */
+    memmove(old, ltup,IndexTupleSize(ltup));
+    
+    if (nospace(p, rtup)) {
+   newdatum = (((char *) ltup) + sizeof(IndexTupleData));
+   rttighten(r, stk->rts_parent, newdatum,
+         (IndexTupleSize(ltup) - sizeof(IndexTupleData)), rtstate);
+   res = dosplit(r, b, stk->rts_parent, rtup, rtstate);
+   WriteBuffer(b);  /* don't forget to release buffer!  - 01/31/94 */
+   pfree(res);
+    } else {
+   (void) PageAddItem(p, (Item) rtup, IndexTupleSize(rtup),
+              PageGetMaxOffsetNumber(p), LP_USED);
+   WriteBuffer(b);
+   ldatum = (((char *) ltup) + sizeof(IndexTupleData));
+   rdatum = (((char *) rtup) + sizeof(IndexTupleData));
+   newdatum = (char *) (*rtstate->unionFn)(ldatum, rdatum);
+   
+   rttighten(r, stk->rts_parent, newdatum,
+         (IndexTupleSize(rtup) - sizeof(IndexTupleData)), rtstate);
+   
+   pfree(newdatum);
+    }
+}
+
+static void
+rtnewroot(Relation r, IndexTuple lt, IndexTuple rt)
+{
+    Buffer b;
+    Page p;
+    
+    b = ReadBuffer(r, P_ROOT);
+    RTInitBuffer(b, 0);
+    p = BufferGetPage(b);
+    (void) PageAddItem(p, (Item) lt, IndexTupleSize(lt),
+              FirstOffsetNumber, LP_USED);
+    (void) PageAddItem(p, (Item) rt, IndexTupleSize(rt),
+              OffsetNumberNext(FirstOffsetNumber), LP_USED);
+    WriteBuffer(b);
+}
+
+static void
+picksplit(Relation r,
+     Page page,
+     SPLITVEC *v,
+     IndexTuple itup,
+     RTSTATE *rtstate)
+{
+    OffsetNumber maxoff;
+    OffsetNumber i, j;
+    IndexTuple item_1, item_2;
+    char *datum_alpha, *datum_beta;
+    char *datum_l, *datum_r;
+    char *union_d, *union_dl, *union_dr;
+    char *inter_d;
+    bool firsttime;
+    float size_alpha, size_beta, size_union, size_inter;
+    float size_waste, waste;
+    float size_l, size_r;
+    int nbytes;
+    OffsetNumber seed_1 = 0, seed_2 = 0;
+    OffsetNumber *left, *right;
+    
+    maxoff = PageGetMaxOffsetNumber(page);
+    
+    nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+    v->spl_left = (OffsetNumber *) palloc(nbytes);
+    v->spl_right = (OffsetNumber *) palloc(nbytes);
+    
+    firsttime = true;
+    waste = 0.0;
+    
+    for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) {
+   item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
+   datum_alpha = ((char *) item_1) + sizeof(IndexTupleData);
+   for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) {
+       item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, j));
+       datum_beta = ((char *) item_2) + sizeof(IndexTupleData);
+       
+       /* compute the wasted space by unioning these guys */
+       union_d = (char *)(rtstate->unionFn)(datum_alpha, datum_beta);
+       (rtstate->sizeFn)(union_d, &size_union);
+       inter_d = (char *)(rtstate->interFn)(datum_alpha, datum_beta);
+       (rtstate->sizeFn)(inter_d, &size_inter);
+       size_waste = size_union - size_inter;
+       
+       pfree(union_d);
+       
+       if (inter_d != (char *) NULL)
+       pfree(inter_d);
+       
+       /*
+        *  are these a more promising split that what we've
+        *  already seen?
+        */
+       
+       if (size_waste > waste || firsttime) {
+       waste = size_waste;
+       seed_1 = i;
+       seed_2 = j;
+       firsttime = false;
+       }
+   }
+    }
+    
+    left = v->spl_left;
+    v->spl_nleft = 0;
+    right = v->spl_right;
+    v->spl_nright = 0;
+    
+    item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_1));
+    datum_alpha = ((char *) item_1) + sizeof(IndexTupleData);
+    datum_l = (char *)(*rtstate->unionFn)(datum_alpha, datum_alpha);
+    (*rtstate->sizeFn)(datum_l, &size_l);
+    item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_2));
+    datum_beta = ((char *) item_2) + sizeof(IndexTupleData);
+    datum_r = (char *)(*rtstate->unionFn)(datum_beta, datum_beta);
+    (*rtstate->sizeFn)(datum_r, &size_r);
+    
+    /*
+     *  Now split up the regions between the two seeds.  An important
+     *  property of this split algorithm is that the split vector v
+     *  has the indices of items to be split in order in its left and
+     *  right vectors.  We exploit this property by doing a merge in
+     *  the code that actually splits the page.
+     *
+     *  For efficiency, we also place the new index tuple in this loop.
+     *  This is handled at the very end, when we have placed all the
+     *  existing tuples and i == maxoff + 1.
+     */
+    
+    maxoff = OffsetNumberNext(maxoff);
+    for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
+   
+   /*
+    *  If we've already decided where to place this item, just
+    *  put it on the right list.  Otherwise, we need to figure
+    *  out which page needs the least enlargement in order to
+    *  store the item.
+    */
+   
+   if (i == seed_1) {
+       *left++ = i;
+       v->spl_nleft++;
+       continue;
+   } else if (i == seed_2) {
+       *right++ = i;
+       v->spl_nright++;
+       continue;
+   }
+   
+   /* okay, which page needs least enlargement? */ 
+   if (i == maxoff) {
+       item_1 = itup;
+   } else {
+       item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
+   }
+   
+   datum_alpha = ((char *) item_1) + sizeof(IndexTupleData);
+   union_dl = (char *)(*rtstate->unionFn)(datum_l, datum_alpha);
+   union_dr = (char *)(*rtstate->unionFn)(datum_r, datum_alpha);
+   (*rtstate->sizeFn)(union_dl, &size_alpha);
+   (*rtstate->sizeFn)(union_dr, &size_beta);
+   
+   /* pick which page to add it to */
+   if (size_alpha - size_l < size_beta - size_r) {
+       pfree(datum_l);
+       pfree(union_dr);
+       datum_l = union_dl;
+       size_l = size_alpha;
+       *left++ = i;
+       v->spl_nleft++;
+   } else {
+       pfree(datum_r);
+       pfree(union_dl);
+       datum_r = union_dr;
+       size_r = size_alpha;
+       *right++ = i;
+       v->spl_nright++;
+   }
+    }
+    *left = *right = FirstOffsetNumber;    /* sentinel value, see dosplit() */
+    
+    v->spl_ldatum = datum_l;
+    v->spl_rdatum = datum_r;
+}
+
+static void
+RTInitBuffer(Buffer b, uint32 f)
+{
+    RTreePageOpaque opaque;
+    Page page;
+    Size pageSize;
+    
+    pageSize = BufferGetPageSize(b);
+    
+    page = BufferGetPage(b);
+    memset(page, 0, (int) pageSize);
+    PageInit(page, pageSize, sizeof(RTreePageOpaqueData));
+    
+    opaque = (RTreePageOpaque) PageGetSpecialPointer(page);
+    opaque->flags = f;
+}
+
+static OffsetNumber
+choose(Relation r, Page p, IndexTuple it, RTSTATE *rtstate)
+{
+    OffsetNumber maxoff;
+    OffsetNumber i;
+    char *ud, *id;
+    char *datum;
+    float usize, dsize;
+    OffsetNumber which;
+    float which_grow;
+    
+    id = ((char *) it) + sizeof(IndexTupleData);
+    maxoff = PageGetMaxOffsetNumber(p);
+    which_grow = -1.0;
+    which = -1;
+    
+    for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
+   datum = (char *) PageGetItem(p, PageGetItemId(p, i));
+   datum += sizeof(IndexTupleData);
+   (*rtstate->sizeFn)(datum, &dsize);
+   ud = (char *) (*rtstate->unionFn)(datum, id);
+   (*rtstate->sizeFn)(ud, &usize);
+   pfree(ud);
+   if (which_grow < 0 || usize - dsize < which_grow) {
+       which = i;
+       which_grow = usize - dsize;
+       if (which_grow == 0)
+       break;
+   }
+    }
+    
+    return (which);
+}
+
+static int
+nospace(Page p, IndexTuple it)
+{
+    return (PageGetFreeSpace(p) < IndexTupleSize(it));
+}
+
+void
+freestack(RTSTACK *s)
+{
+    RTSTACK *p;
+    
+    while (s != (RTSTACK *) NULL) {
+   p = s->rts_parent;
+   pfree(s);
+   s = p;
+    }
+}
+
+char *
+rtdelete(Relation r, ItemPointer tid)
+{
+    BlockNumber blkno;
+    OffsetNumber offnum;
+    Buffer buf;
+    Page page;
+    
+    /* must write-lock on delete */
+    RelationSetLockForWrite(r);
+    
+    blkno = ItemPointerGetBlockNumber(tid);
+    offnum = ItemPointerGetOffsetNumber(tid);
+    
+    /* adjust any scans that will be affected by this deletion */
+    rtadjscans(r, RTOP_DEL, blkno, offnum);
+    
+    /* delete the index tuple */
+    buf = ReadBuffer(r, blkno);
+    page = BufferGetPage(buf);
+    
+    PageIndexTupleDelete(page, offnum);
+    
+    WriteBuffer(buf);
+    
+    /* XXX -- two-phase locking, don't release the write lock */
+    return ((char *) NULL);
+}
+
+static void initRtstate(RTSTATE *rtstate, Relation index)
+{
+    RegProcedure union_proc, size_proc, inter_proc;
+    func_ptr user_fn;
+    int pronargs;
+
+    union_proc = index_getprocid(index, 1, RT_UNION_PROC);
+    size_proc = index_getprocid(index, 1, RT_SIZE_PROC);
+    inter_proc = index_getprocid(index, 1, RT_INTER_PROC);
+    fmgr_info(union_proc, &user_fn, &pronargs);
+    rtstate->unionFn = user_fn;
+    fmgr_info(size_proc, &user_fn, &pronargs);
+    rtstate->sizeFn = user_fn;
+    fmgr_info(inter_proc, &user_fn, &pronargs);
+    rtstate->interFn = user_fn;
+    return;
+}
+
+#define RTDEBUG
+#ifdef RTDEBUG
+#include "utils/geo-decls.h"
+
+void
+_rtdump(Relation r)
+{
+    Buffer buf;
+    Page page;
+    OffsetNumber offnum, maxoff;
+    BlockNumber blkno;
+    BlockNumber nblocks;
+    RTreePageOpaque po;
+    IndexTuple itup;
+    BlockNumber itblkno;
+    OffsetNumber itoffno;
+    char *datum;
+    char *itkey;
+    
+    nblocks = RelationGetNumberOfBlocks(r);
+    for (blkno = 0; blkno < nblocks; blkno++) {
+   buf = ReadBuffer(r, blkno);
+   page = BufferGetPage(buf);
+   po = (RTreePageOpaque) PageGetSpecialPointer(page);
+   maxoff = PageGetMaxOffsetNumber(page);
+   printf("Page %d maxoff %d <%s>\n", blkno, maxoff,
+          (po->flags & F_LEAF ? "LEAF" : "INTERNAL"));
+   
+   if (PageIsEmpty(page)) {
+       ReleaseBuffer(buf);
+       continue;
+   }
+   
+   for (offnum = FirstOffsetNumber;
+        offnum <= maxoff;
+        offnum = OffsetNumberNext(offnum)) {
+       itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+       itblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
+       itoffno = ItemPointerGetOffsetNumber(&(itup->t_tid));
+       datum = ((char *) itup);
+       datum += sizeof(IndexTupleData);
+       itkey = (char *) box_out((BOX *) datum);
+       printf("\t[%d] size %d heap <%d,%d> key:%s\n",
+          offnum, IndexTupleSize(itup), itblkno, itoffno, itkey);
+       pfree(itkey);
+   }
+   
+   ReleaseBuffer(buf);
+    }
+}
+#endif /* defined RTDEBUG */
+
diff --git a/src/backend/access/rtree/rtscan.c b/src/backend/access/rtree/rtscan.c

new file mode 100644 (file)

index 0000000..aa68f0d
--- /dev/null
+++ b/src/backend/access/rtree/rtscan.c
@@ -0,0 +1,392 @@
+/*-------------------------------------------------------------------------
+ *
+ * rtscan.c--
+ *    routines to manage scans on index relations
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtscan.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/rtree.h"
+#include "access/rtstrat.h"
+
+/* routines defined and used here */
+static void rtregscan(IndexScanDesc s);
+static void rtdropscan(IndexScanDesc s);
+static void rtadjone(IndexScanDesc s, int op, BlockNumber blkno,
+            OffsetNumber offnum);
+static void adjuststack(RTSTACK *stk, BlockNumber blkno,
+           OffsetNumber offnum);
+static void adjustiptr(IndexScanDesc s, ItemPointer iptr,
+              int op, BlockNumber blkno, OffsetNumber offnum);
+
+/*
+ *  Whenever we start an rtree scan in a backend, we register it in private
+ *  space.  Then if the rtree index gets updated, we check all registered
+ *  scans and adjust them if the tuple they point at got moved by the
+ *  update.  We only need to do this in private space, because when we update
+ *  an rtree we have a write lock on the tree, so no other process can have
+ *  any locks at all on it.  A single transaction can have write and read
+ *  locks on the same object, so that's why we need to handle this case.
+ */
+
+typedef struct RTScanListData {
+    IndexScanDesc      rtsl_scan;
+    struct RTScanListData  *rtsl_next;
+} RTScanListData;
+
+typedef RTScanListData *RTScanList;
+
+/* pointer to list of local scans on rtrees */
+static RTScanList RTScans = (RTScanList) NULL;
+     
+IndexScanDesc
+rtbeginscan(Relation r,
+       bool fromEnd,
+       uint16 nkeys,
+       ScanKey key)
+{
+    IndexScanDesc s;
+    
+    RelationSetLockForRead(r);
+    s = RelationGetIndexScan(r, fromEnd, nkeys, key);
+    rtregscan(s);
+    
+    return (s);
+}
+
+void
+rtrescan(IndexScanDesc s, bool fromEnd, ScanKey key)
+{
+    RTreeScanOpaque p;
+    RegProcedure internal_proc;
+    int i;
+    
+    if (!IndexScanIsValid(s)) {
+   elog(WARN, "rtrescan: invalid scan.");
+   return;
+    }
+    
+    /*
+     *  Clear all the pointers.
+     */
+    
+    ItemPointerSetInvalid(&s->previousItemData);
+    ItemPointerSetInvalid(&s->currentItemData);
+    ItemPointerSetInvalid(&s->nextItemData);
+    ItemPointerSetInvalid(&s->previousMarkData);
+    ItemPointerSetInvalid(&s->currentMarkData);
+    ItemPointerSetInvalid(&s->nextMarkData);
+    
+    /*
+     *  Set flags.
+     */
+    if (RelationGetNumberOfBlocks(s->relation) == 0) {
+   s->flags = ScanUnmarked;
+    } else if (fromEnd) {
+   s->flags = ScanUnmarked | ScanUncheckedPrevious;
+    } else {
+   s->flags = ScanUnmarked | ScanUncheckedNext;
+    }
+    
+    s->scanFromEnd = fromEnd;
+    
+    if (s->numberOfKeys > 0) {
+   memmove(s->keyData,
+       key,
+       s->numberOfKeys * sizeof(ScanKeyData));
+    }
+    
+    p = (RTreeScanOpaque) s->opaque;
+    if (p != (RTreeScanOpaque) NULL) {
+   freestack(p->s_stack);
+   freestack(p->s_markstk);
+   p->s_stack = p->s_markstk = (RTSTACK *) NULL;
+   p->s_flags = 0x0;
+    } else {
+   /* initialize opaque data */
+   p = (RTreeScanOpaque) palloc(sizeof(RTreeScanOpaqueData));
+   p->s_internalKey =
+       (ScanKey) palloc(sizeof(ScanKeyData) * s->numberOfKeys);
+   p->s_stack = p->s_markstk = (RTSTACK *) NULL;
+   p->s_internalNKey = s->numberOfKeys;
+   p->s_flags = 0x0;
+   for (i = 0; i < s->numberOfKeys; i++)
+       p->s_internalKey[i].sk_argument = s->keyData[i].sk_argument;
+   s->opaque = p;
+   if (s->numberOfKeys > 0) {
+       
+       /*
+        *  Scans on internal pages use different operators than they
+        *  do on leaf pages.  For example, if the user wants all boxes
+        *  that exactly match (x1,y1,x2,y2), then on internal pages
+        *  we need to find all boxes that contain (x1,y1,x2,y2).
+        */
+       
+       for (i = 0; i < s->numberOfKeys; i++) {
+       internal_proc = RTMapOperator(s->relation,
+                         s->keyData[i].sk_attno,
+                         s->keyData[i].sk_procedure);
+       ScanKeyEntryInitialize(&(p->s_internalKey[i]),
+                      s->keyData[i].sk_flags,
+                      s->keyData[i].sk_attno,
+                      internal_proc,
+                      s->keyData[i].sk_argument);
+       }
+   }
+    }
+}
+
+void
+rtmarkpos(IndexScanDesc s)
+{
+    RTreeScanOpaque p;
+    RTSTACK *o, *n, *tmp;
+    
+    s->currentMarkData = s->currentItemData;
+    p = (RTreeScanOpaque) s->opaque;
+    if (p->s_flags & RTS_CURBEFORE)
+   p->s_flags |= RTS_MRKBEFORE;
+    else
+   p->s_flags &= ~RTS_MRKBEFORE;
+    
+    o = (RTSTACK *) NULL;
+    n = p->s_stack;
+    
+    /* copy the parent stack from the current item data */
+    while (n != (RTSTACK *) NULL) {
+   tmp = (RTSTACK *) palloc(sizeof(RTSTACK));
+   tmp->rts_child = n->rts_child;
+   tmp->rts_blk = n->rts_blk;
+   tmp->rts_parent = o;
+   o = tmp;
+   n = n->rts_parent;
+    }
+    
+    freestack(p->s_markstk);
+    p->s_markstk = o;
+}
+
+void
+rtrestrpos(IndexScanDesc s)
+{
+    RTreeScanOpaque p;
+    RTSTACK *o, *n, *tmp;
+    
+    s->currentItemData = s->currentMarkData;
+    p = (RTreeScanOpaque) s->opaque;
+    if (p->s_flags & RTS_MRKBEFORE)
+   p->s_flags |= RTS_CURBEFORE;
+    else
+   p->s_flags &= ~RTS_CURBEFORE;
+    
+    o = (RTSTACK *) NULL;
+    n = p->s_markstk;
+    
+    /* copy the parent stack from the current item data */
+    while (n != (RTSTACK *) NULL) {
+   tmp = (RTSTACK *) palloc(sizeof(RTSTACK));
+   tmp->rts_child = n->rts_child;
+   tmp->rts_blk = n->rts_blk;
+   tmp->rts_parent = o;
+   o = tmp;
+   n = n->rts_parent;
+    }
+    
+    freestack(p->s_stack);
+    p->s_stack = o;
+}
+
+void
+rtendscan(IndexScanDesc s)
+{
+    RTreeScanOpaque p;
+    
+    p = (RTreeScanOpaque) s->opaque;
+    
+    if (p != (RTreeScanOpaque) NULL) {
+   freestack(p->s_stack);
+   freestack(p->s_markstk);
+    }
+    
+    rtdropscan(s);
+    /* XXX don't unset read lock -- two-phase locking */
+}
+
+static void
+rtregscan(IndexScanDesc s)
+{
+    RTScanList l;
+    
+    l = (RTScanList) palloc(sizeof(RTScanListData));
+    l->rtsl_scan = s;
+    l->rtsl_next = RTScans;
+    RTScans = l;
+}
+
+static void
+rtdropscan(IndexScanDesc s)
+{
+    RTScanList l;
+    RTScanList prev;
+    
+    prev = (RTScanList) NULL;
+    
+    for (l = RTScans;
+    l != (RTScanList) NULL && l->rtsl_scan != s;
+    l = l->rtsl_next) {
+   prev = l;
+    }
+    
+    if (l == (RTScanList) NULL)
+   elog(WARN, "rtree scan list corrupted -- cannot find 0x%lx", s);
+    
+    if (prev == (RTScanList) NULL)
+   RTScans = l->rtsl_next;
+    else
+   prev->rtsl_next = l->rtsl_next;
+    
+    pfree(l);
+}
+
+void
+rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum)
+{
+    RTScanList l;
+    Oid relid;
+    
+    relid = r->rd_id;
+    for (l = RTScans; l != (RTScanList) NULL; l = l->rtsl_next) {
+   if (l->rtsl_scan->relation->rd_id == relid)
+       rtadjone(l->rtsl_scan, op, blkno, offnum);
+    }
+}
+
+/*
+ *  rtadjone() -- adjust one scan for update.
+ *
+ * By here, the scan passed in is on a modified relation.  Op tells
+ * us what the modification is, and blkno and offind tell us what
+ * block and offset index were affected.  This routine checks the
+ * current and marked positions, and the current and marked stacks,
+ * to see if any stored location needs to be changed because of the
+ * update.  If so, we make the change here.
+ */
+static void
+rtadjone(IndexScanDesc s,
+    int op,
+    BlockNumber blkno,
+    OffsetNumber offnum)
+{
+    RTreeScanOpaque so;
+    
+    adjustiptr(s, &(s->currentItemData), op, blkno, offnum);
+    adjustiptr(s, &(s->currentMarkData), op, blkno, offnum);
+    
+    so = (RTreeScanOpaque) s->opaque;
+    
+    if (op == RTOP_SPLIT) {
+   adjuststack(so->s_stack, blkno, offnum);
+   adjuststack(so->s_markstk, blkno, offnum);
+    }
+}
+
+/*
+ *  adjustiptr() -- adjust current and marked item pointers in the scan
+ *
+ * Depending on the type of update and the place it happened, we
+ * need to do nothing, to back up one record, or to start over on
+ * the same page.
+ */
+static void
+adjustiptr(IndexScanDesc s,
+      ItemPointer iptr,
+      int op,
+      BlockNumber blkno,
+      OffsetNumber offnum)
+{
+    OffsetNumber curoff;
+    RTreeScanOpaque so;
+    
+    if (ItemPointerIsValid(iptr)) {
+   if (ItemPointerGetBlockNumber(iptr) == blkno) {
+       curoff = ItemPointerGetOffsetNumber(iptr);
+       so = (RTreeScanOpaque) s->opaque;
+       
+       switch (op) {
+       case RTOP_DEL:
+       /* back up one if we need to */
+       if (curoff >= offnum) {
+           
+           if (curoff > FirstOffsetNumber) {
+           /* just adjust the item pointer */
+           ItemPointerSet(iptr, blkno, OffsetNumberPrev(curoff));
+           } else {
+           /* remember that we're before the current tuple */
+           ItemPointerSet(iptr, blkno, FirstOffsetNumber);
+           if (iptr == &(s->currentItemData))
+               so->s_flags |= RTS_CURBEFORE;
+           else
+               so->s_flags |= RTS_MRKBEFORE;
+           }
+       }
+       break;
+       
+       case RTOP_SPLIT:
+       /* back to start of page on split */
+       ItemPointerSet(iptr, blkno, FirstOffsetNumber);
+       if (iptr == &(s->currentItemData))
+           so->s_flags &= ~RTS_CURBEFORE;
+       else
+           so->s_flags &= ~RTS_MRKBEFORE;
+       break;
+       
+       default:
+       elog(WARN, "Bad operation in rtree scan adjust: %d", op);
+       }
+   }
+    }
+}
+
+/*
+ *  adjuststack() -- adjust the supplied stack for a split on a page in
+ *          the index we're scanning.
+ *
+ * If a page on our parent stack has split, we need to back up to the
+ * beginning of the page and rescan it.  The reason for this is that
+ * the split algorithm for rtrees doesn't order tuples in any useful
+ * way on a single page.  This means on that a split, we may wind up
+ * looking at some heap tuples more than once.  This is handled in the
+ * access method update code for heaps; if we've modified the tuple we
+ * are looking at already in this transaction, we ignore the update
+ * request.
+ */
+/*ARGSUSED*/
+static void
+adjuststack(RTSTACK *stk,
+       BlockNumber blkno,
+       OffsetNumber offnum)
+{
+    while (stk != (RTSTACK *) NULL) {
+   if (stk->rts_blk == blkno)
+       stk->rts_child = FirstOffsetNumber;
+   
+   stk = stk->rts_parent;
+    }
+}
diff --git a/src/backend/access/rtree/rtstrat.c b/src/backend/access/rtree/rtstrat.c

new file mode 100644 (file)

index 0000000..c5d934a
--- /dev/null
+++ b/src/backend/access/rtree/rtstrat.c
@@ -0,0 +1,239 @@
+/*-------------------------------------------------------------------------
+ *
+ * rtstrat.c--
+ *    strategy map data for rtrees.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtstrat.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "utils/rel.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "access/istrat.h"
+#include "access/rtree.h"
+
+/*
+ *  Note:  negate, commute, and negatecommute all assume that operators are
+ *    ordered as follows in the strategy map:
+ *
+ * left, left-or-overlap, overlap, right-or-overlap, right, same,
+ * contains, contained-by
+ *
+ *  The negate, commute, and negatecommute arrays are used by the planner
+ *  to plan indexed scans over data that appears in the qualificiation in
+ *  a boolean negation, or whose operands appear in the wrong order.  For
+ *  example, if the operator "<%" means "contains", and the user says
+ *
+ * where not rel.box <% "(10,10,20,20)"::box
+ *
+ *  the planner can plan an index scan by noting that rtree indices have
+ *  an operator in their operator class for negating <%.
+ *
+ *  Similarly, if the user says something like
+ *
+ * where "(10,10,20,20)"::box <% rel.box
+ *
+ *  the planner can see that the rtree index on rel.box has an operator in
+ *  its opclass for commuting <%, and plan the scan using that operator.
+ *  This added complexity in the access methods makes the planner a lot easier
+ *  to write.
+ */
+
+/* if a op b, what operator tells us if (not a op b)? */
+static StrategyNumber  RTNegate[RTNStrategies] = {
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy
+    };
+
+/* if a op_1 b, what is the operator op_2 such that b op_2 a? */
+static StrategyNumber  RTCommute[RTNStrategies] = {
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy
+    };
+
+/* if a op_1 b, what is the operator op_2 such that (b !op_2 a)? */
+static StrategyNumber  RTNegateCommute[RTNStrategies] = {
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy,
+    InvalidStrategy
+    };
+
+/*
+ *  Now do the TermData arrays.  These exist in case the user doesn't give
+ *  us a full set of operators for a particular operator class.  The idea
+ *  is that by making multiple comparisons using any one of the supplied
+ *  operators, we can decide whether two n-dimensional polygons are equal.
+ *  For example, if a contains b and b contains a, we may conclude that
+ *  a and b are equal.
+ *
+ *  The presence of the TermData arrays in all this is a historical accident.
+ *  Early in the development of the POSTGRES access methods, it was believed
+ *  that writing functions was harder than writing arrays.  This is wrong;
+ *  TermData is hard to understand and hard to get right.  In general, when
+ *  someone populates a new operator class, the populate it completely.  If
+ *  Mike Hirohama had forced Cimarron Taylor to populate the strategy map
+ *  for btree int2_ops completely in 1988, you wouldn't have to deal with
+ *  all this now.  Too bad for you.
+ *
+ *  Since you can't necessarily do this in all cases (for example, you can't
+ *  do it given only "intersects" or "disjoint"), TermData arrays for some
+ *  operators don't appear below.
+ *
+ *  Note that if you DO supply all the operators required in a given opclass
+ *  by inserting them into the pg_opclass system catalog, you can get away
+ *  without doing all this TermData stuff.  Since the rtree code is intended
+ *  to be a reference for access method implementors, I'm doing TermData
+ *  correctly here.
+ *
+ *  Note on style:  these are all actually of type StrategyTermData, but
+ *  since those have variable-length data at the end of the struct we can't
+ *  properly initialize them if we declare them to be what they are.
+ */
+
+/* if you only have "contained-by", how do you determine equality? */
+static uint16 RTContainedByTermData[] = {
+    2,                 /* make two comparisons */
+    RTContainedByStrategyNumber,       /* use "a contained-by b" */
+    0x0,                   /* without any magic */
+    RTContainedByStrategyNumber,       /* then use contained-by, */
+    SK_COMMUTE             /* swapping a and b */
+    };
+
+/* if you only have "contains", how do you determine equality? */
+static uint16 RTContainsTermData[] = {
+    2,                 /* make two comparisons */
+    RTContainsStrategyNumber,      /* use "a contains b" */
+    0x0,                   /* without any magic */
+    RTContainsStrategyNumber,      /* then use contains again, */
+    SK_COMMUTE             /* swapping a and b */
+    };
+
+/* now put all that together in one place for the planner */
+static StrategyTerm RTEqualExpressionData[] = {
+    (StrategyTerm) RTContainedByTermData,
+    (StrategyTerm) RTContainsTermData,
+    NULL
+    };
+
+/*
+ *  If you were sufficiently attentive to detail, you would go through
+ *  the ExpressionData pain above for every one of the seven strategies
+ *  we defined.  I am not.  Now we declare the StrategyEvaluationData
+ *  structure that gets shipped around to help the planner and the access
+ *  method decide what sort of scan it should do, based on (a) what the
+ *  user asked for, (b) what operators are defined for a particular opclass,
+ *  and (c) the reams of information we supplied above.
+ *
+ *  The idea of all of this initialized data is to make life easier on the
+ *  user when he defines a new operator class to use this access method.
+ *  By filling in all the data, we let him get away with leaving holes in his
+ *  operator class, and still let him use the index.  The added complexity
+ *  in the access methods just isn't worth the trouble, though.
+ */
+
+static StrategyEvaluationData RTEvaluationData = {
+    RTNStrategies,             /* # of strategies */
+    (StrategyTransformMap) RTNegate,   /* how to do (not qual) */
+    (StrategyTransformMap) RTCommute,  /* how to swap operands */
+    (StrategyTransformMap) RTNegateCommute,    /* how to do both */
+    {
+   NULL,                   /* express left */
+   NULL,                   /* express overleft */
+   NULL,                   /* express over */
+   NULL,                   /* express overright */
+   NULL,                   /* express right */
+   (StrategyExpression) RTEqualExpressionData, /* express same */
+   NULL,                   /* express contains */
+   NULL,                   /* express contained-by */
+   NULL,
+   NULL,
+   NULL
+    }
+};
+
+/*
+ *  Okay, now something peculiar to rtrees that doesn't apply to most other
+ *  indexing structures:  When we're searching a tree for a given value, we
+ *  can't do the same sorts of comparisons on internal node entries as we
+ *  do at leaves.  The reason is that if we're looking for (say) all boxes
+ *  that are the same as (0,0,10,10), then we need to find all leaf pages
+ *  that overlap that region.  So internally we search for overlap, and at
+ *  the leaf we search for equality.
+ *
+ *  This array maps leaf search operators to the internal search operators.
+ *  We assume the normal ordering on operators:
+ *
+ * left, left-or-overlap, overlap, right-or-overlap, right, same,
+ * contains, contained-by
+ */
+static StrategyNumber RTOperMap[RTNStrategies] = {
+    RTOverLeftStrategyNumber,
+    RTOverLeftStrategyNumber,
+    RTOverlapStrategyNumber,
+    RTOverRightStrategyNumber,
+    RTOverRightStrategyNumber,
+    RTContainsStrategyNumber,
+    RTContainsStrategyNumber,
+    RTOverlapStrategyNumber
+    };
+
+StrategyNumber
+RelationGetRTStrategy(Relation r,
+             AttrNumber attnum,
+             RegProcedure proc)
+{
+    return (RelationGetStrategy(r, attnum, &RTEvaluationData, proc));
+}
+
+bool
+RelationInvokeRTStrategy(Relation r,
+            AttrNumber attnum,
+            StrategyNumber s,
+            Datum left,
+            Datum right)
+{
+    return (RelationInvokeStrategy(r, &RTEvaluationData, attnum, s,
+                  left, right));
+}
+
+RegProcedure
+RTMapOperator(Relation r,
+         AttrNumber attnum,
+         RegProcedure proc)
+{
+    StrategyNumber procstrat;
+    StrategyMap strategyMap;
+    
+    procstrat = RelationGetRTStrategy(r, attnum, proc);
+    strategyMap = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(r),
+                         RTNStrategies,
+                         attnum);
+    
+    return (strategyMap->entry[RTOperMap[procstrat - 1] - 1].sk_procedure);
+}
diff --git a/src/backend/access/rtscan.h b/src/backend/access/rtscan.h

new file mode 100644 (file)

index 0000000..a928303
--- /dev/null
+++ b/src/backend/access/rtscan.h
@@ -0,0 +1,17 @@
+/*-------------------------------------------------------------------------
+ *
+ * rtscan.h--
+ *    routines defined in access/rtree/rtscan.c
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: rtscan.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef RTSCAN_H
+
+void rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum);
+
+#endif /* RTSCAN_H */
diff --git a/src/backend/access/rtstrat.h b/src/backend/access/rtstrat.h

new file mode 100644 (file)

index 0000000..5b439e7
--- /dev/null
+++ b/src/backend/access/rtstrat.h
@@ -0,0 +1,18 @@
+/*-------------------------------------------------------------------------
+ *
+ * rtstrat.h--
+ *    routines defined in access/rtree/rtstrat.c
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: rtstrat.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef RTSTRAT_H
+
+extern RegProcedure RTMapOperator(Relation r,  AttrNumber attnum,
+                 RegProcedure proc);
+
+#endif /* RTSTRAT_H */
diff --git a/src/backend/access/sdir.h b/src/backend/access/sdir.h

new file mode 100644 (file)

index 0000000..030007d
--- /dev/null
+++ b/src/backend/access/sdir.h
@@ -0,0 +1,57 @@
+/*-------------------------------------------------------------------------
+ *
+ * sdir.h--
+ *    POSTGRES scan direction definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: sdir.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    SDIR_H
+#define SDIR_H
+
+#include "c.h"
+
+/*
+ * ScanDirection was an int8 for no apparent reason. I kept the original
+ * values because I'm not sure if I'll break anything otherwise.  -ay 2/95
+ */
+typedef enum ScanDirection {
+    BackwardScanDirection = -1,
+    NoMovementScanDirection = 0,
+    ForwardScanDirection = 1
+} ScanDirection;
+
+/*
+ * ScanDirectionIsValid --
+ * True iff scan direciton is valid.
+ */
+#define ScanDirectionIsValid(direction) \
+    ((bool) (BackwardScanDirection <= direction && \
+        direction <= ForwardScanDirection))
+
+/*
+ * ScanDirectionIsBackward --
+ * True iff scan direciton is backward.
+ */
+#define ScanDirectionIsBackward(direction) \
+    ((bool) (direction == BackwardScanDirection))
+
+/*
+ * ScanDirectionIsNoMovement --
+ * True iff scan direciton indicates no movement.
+ */
+#define ScanDirectionIsNoMovement(direction) \
+    ((bool) (direction == NoMovementScanDirection))
+
+/*
+ * ScanDirectionIsForward --
+ * True iff scan direciton is forward.
+ */
+#define ScanDirectionIsForward(direction) \
+    ((bool) (direction == ForwardScanDirection))
+
+#endif /* SDIR_H */
diff --git a/src/backend/access/skey.h b/src/backend/access/skey.h

new file mode 100644 (file)

index 0000000..3cadf34
--- /dev/null
+++ b/src/backend/access/skey.h
@@ -0,0 +1,52 @@
+/*-------------------------------------------------------------------------
+ *
+ * skey.h--
+ *    POSTGRES scan key definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: skey.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ *
+ * Note:
+ * Needs more accessor/assignment routines.
+ *-------------------------------------------------------------------------
+ */
+#ifndef    SKEY_H
+#define SKEY_H
+
+#include "postgres.h"
+#include "access/attnum.h"
+
+
+typedef struct ScanKeyData {
+    bits16     sk_flags;   /* flags */
+    AttrNumber     sk_attno;   /* domain number */
+    RegProcedure   sk_procedure;   /* procedure OID */
+    func_ptr            sk_func;
+    int32      sk_nargs;
+    Datum      sk_argument;    /* data to compare */
+} ScanKeyData;
+
+typedef ScanKeyData    *ScanKey;
+
+
+#define    SK_ISNULL   0x1
+#define    SK_UNARY    0x2
+#define    SK_NEGATE   0x4
+#define    SK_COMMUTE  0x8
+
+#define ScanUnmarked       0x01
+#define ScanUncheckedPrevious  0x02
+#define ScanUncheckedNext  0x04
+
+
+/*
+ * prototypes for functions in access/common/scankey.c
+ */
+extern void ScanKeyEntrySetIllegal(ScanKey entry);
+extern void ScanKeyEntryInitialize(ScanKey entry, bits16 flags,
+     AttrNumber attributeNumber, RegProcedure procedure, Datum argument);
+
+#endif /* SKEY_H */
diff --git a/src/backend/access/strat.h b/src/backend/access/strat.h

new file mode 100644 (file)

index 0000000..4ddb219
--- /dev/null
+++ b/src/backend/access/strat.h
@@ -0,0 +1,86 @@
+/*-------------------------------------------------------------------------
+ *
+ * strat.h--
+ *    index strategy type definitions
+ *    (separated out from original istrat.h to avoid circular refs)
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: strat.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STRAT_H
+#define STRAT_H
+
+#include "postgres.h"
+#include "access/attnum.h"
+#include "access/skey.h"
+
+typedef uint16 StrategyNumber;
+
+#define InvalidStrategy    0
+
+typedef struct StrategyTransformMapData {
+    StrategyNumber strategy[1];    /* VARIABLE LENGTH ARRAY */
+} StrategyTransformMapData;    /* VARIABLE LENGTH STRUCTURE */
+
+typedef StrategyTransformMapData   *StrategyTransformMap;
+
+typedef struct StrategyOperatorData {
+    StrategyNumber strategy;
+    bits16     flags;      /* scan qualification flags h/skey.h */
+} StrategyOperatorData;
+
+typedef StrategyOperatorData   *StrategyOperator;
+
+typedef struct StrategyTermData {  /* conjunctive term */
+    uint16         degree;
+    StrategyOperatorData   operatorData[1];    /* VARIABLE LENGTH */
+} StrategyTermData;    /* VARIABLE LENGTH STRUCTURE */
+
+typedef StrategyTermData   *StrategyTerm;
+
+typedef struct StrategyExpressionData {    /* disjunctive normal form */
+    StrategyTerm   term[1];    /* VARIABLE LENGTH ARRAY */
+} StrategyExpressionData;  /* VARIABLE LENGTH STRUCTURE */
+
+typedef StrategyExpressionData *StrategyExpression;
+
+typedef struct StrategyEvaluationData {
+    StrategyNumber     maxStrategy;
+    StrategyTransformMap   negateTransform;
+    StrategyTransformMap   commuteTransform;
+    StrategyTransformMap   negateCommuteTransform;
+    StrategyExpression expression[12]; /* XXX VARIABLE LENGTH */
+} StrategyEvaluationData;  /* VARIABLE LENGTH STRUCTURE */
+
+typedef StrategyEvaluationData *StrategyEvaluation;
+
+/*
+ * StrategyTransformMapIsValid --
+ * Returns true iff strategy transformation map is valid.
+ */
+#define    StrategyTransformMapIsValid(transform) PointerIsValid(transform)
+
+
+#ifndef    CorrectStrategies       /* XXX this should be removable */
+#define AMStrategies(foo)  12
+#else  /* !defined(CorrectStrategies) */
+#define AMStrategies(foo)  (foo)
+#endif /* !defined(CorrectStrategies) */
+
+typedef struct StrategyMapData {
+   ScanKeyData     entry[1];   /* VARIABLE LENGTH ARRAY */
+} StrategyMapData; /* VARIABLE LENGTH STRUCTURE */
+
+typedef StrategyMapData    *StrategyMap;
+
+typedef struct IndexStrategyData {
+   StrategyMapData strategyMapData[1]; /* VARIABLE LENGTH ARRAY */
+} IndexStrategyData;   /* VARIABLE LENGTH STRUCTURE */
+
+typedef IndexStrategyData  *IndexStrategy;
+
+#endif /*STRAT_H */
diff --git a/src/backend/access/transam.h b/src/backend/access/transam.h

new file mode 100644 (file)

index 0000000..0f5a972
--- /dev/null
+++ b/src/backend/access/transam.h
@@ -0,0 +1,213 @@
+/*-------------------------------------------------------------------------
+ *
+ * transam.h--
+ *    postgres transaction access method support code header
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: transam.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ *   NOTES
+ * Transaction System Version 101 now support proper oid
+ * generation and recording in the variable relation.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TRANSAM_H
+#define TRANSAM_H
+
+/* ----------------
+ * transaction system version id
+ *
+ * this is stored on the first page of the log, time and variable
+ * relations on the first 4 bytes.  This is so that if we improve
+ * the format of the transaction log after postgres version 2, then
+ * people won't have to rebuild their databases.
+ *
+ * TRANS_SYSTEM_VERSION 100 means major version 1 minor version 0.
+ * Two databases with the same major version should be compatible,
+ * even if their minor versions differ.
+ * ----------------
+ */
+#define TRANS_SYSTEM_VERSION   101
+
+/* ----------------
+ * transaction id status values
+ *
+ * someday we will use "11" = 3 = XID_INVALID to mean the
+ * starting of run-length encoded log data.
+ * ----------------
+ */
+#define XID_COMMIT      2              /* transaction commited */
+#define XID_ABORT       1              /* transaction aborted */
+#define XID_INPROGRESS  0              /* transaction in progress */
+#define XID_INVALID     3              /* other */
+
+typedef unsigned char XidStatus;       /* (2 bits) */
+
+/* ----------------
+ *     BitIndexOf computes the index of the Nth xid on a given block
+ * ----------------
+ */
+#define BitIndexOf(N)   ((N) * 2)
+
+/* ----------------
+ * transaction page definitions
+ * ----------------
+ */
+#define TP_DataSize        BLCKSZ
+#define TP_NumXidStatusPerBlock    (TP_DataSize * 4)
+#define TP_NumTimePerBlock (TP_DataSize / 4)
+
+/* ----------------
+ * LogRelationContents structure
+ *
+ * This structure describes the storage of the data in the
+ * first 128 bytes of the log relation.  This storage is never
+ * used for transaction status because transaction id's begin
+ * their numbering at 512.
+ *
+ * The first 4 bytes of this relation store the version
+ * number of the transction system.
+ * ----------------
+ */
+typedef struct LogRelationContentsData {
+    int            TransSystemVersion;
+} LogRelationContentsData;
+
+typedef LogRelationContentsData *LogRelationContents;
+
+/* ----------------
+ * TimeRelationContents structure
+ *
+ * This structure describes the storage of the data in the
+ * first 2048 bytes of the time relation.  This storage is never
+ * used for transaction commit times because transaction id's begin
+ * their numbering at 512.
+ *
+ * The first 4 bytes of this relation store the version
+ * number of the transction system.
+ * ----------------
+ */
+typedef struct TimeRelationContentsData {
+    int            TransSystemVersion;
+} TimeRelationContentsData;
+
+typedef TimeRelationContentsData *TimeRelationContents;
+
+/* ----------------
+ * VariableRelationContents structure
+ *
+ * The variable relation is a special "relation" which
+ * is used to store various system "variables" persistantly.
+ * Unlike other relations in the system, this relation
+ * is updated in place whenever the variables change.
+ *
+ * The first 4 bytes of this relation store the version
+ * number of the transction system.
+ *
+ * Currently, the relation has only one page and the next
+ * available xid, the last committed xid and the next
+ * available oid are stored there.
+ * ----------------
+ */
+typedef struct VariableRelationContentsData {
+    int            TransSystemVersion;
+    TransactionId  nextXidData;
+    TransactionId  lastXidData;
+    Oid            nextOid;
+} VariableRelationContentsData;
+
+typedef VariableRelationContentsData *VariableRelationContents;
+
+/* ----------------
+ * extern declarations
+ * ----------------
+ */
+
+/*
+ * prototypes for functions in transam/transam.c
+ */
+extern int RecoveryCheckingEnabled();
+extern void SetRecoveryCheckingEnabled(bool state);
+extern bool TransactionLogTest(TransactionId transactionId, XidStatus status);
+extern void TransactionLogUpdate(TransactionId transactionId,
+                XidStatus status);
+extern AbsoluteTime TransactionIdGetCommitTime(TransactionId transactionId);
+extern void TransRecover(Relation logRelation);
+extern void InitializeTransactionLog();
+extern bool TransactionIdDidCommit(TransactionId transactionId);
+extern bool TransactionIdDidAbort(TransactionId transactionId);
+extern bool TransactionIdIsInProgress(TransactionId transactionId);
+extern void TransactionIdCommit(TransactionId transactionId);
+extern void TransactionIdAbort(TransactionId transactionId);
+extern void TransactionIdSetInProgress(TransactionId transactionId);
+
+/* in transam/transsup.c */
+extern void AmiTransactionOverride(bool flag);
+extern void TransComputeBlockNumber(Relation relation,
+   TransactionId transactionId, BlockNumber *blockNumberOutP);
+extern XidStatus TransBlockGetLastTransactionIdStatus(Block tblock,
+   TransactionId baseXid, TransactionId *returnXidP);
+extern XidStatus TransBlockGetXidStatus(Block tblock,
+                   TransactionId transactionId);
+extern void TransBlockSetXidStatus(Block tblock,
+   TransactionId transactionId, XidStatus xstatus);
+extern AbsoluteTime TransBlockGetCommitTime(Block tblock,
+   TransactionId transactionId);
+extern void TransBlockSetCommitTime(Block tblock,
+   TransactionId transactionId, AbsoluteTime commitTime);
+extern XidStatus TransBlockNumberGetXidStatus(Relation relation,
+   BlockNumber blockNumber, TransactionId xid, bool *failP);
+extern void TransBlockNumberSetXidStatus(Relation relation,
+   BlockNumber blockNumber, TransactionId xid, XidStatus xstatus,
+   bool *failP);
+extern AbsoluteTime TransBlockNumberGetCommitTime(Relation relation,
+   BlockNumber blockNumber, TransactionId xid, bool *failP);
+extern void TransBlockNumberSetCommitTime(Relation relation,
+   BlockNumber blockNumber, TransactionId xid, AbsoluteTime xtime,
+   bool *failP);
+extern void TransGetLastRecordedTransaction(Relation relation,
+   TransactionId xid, bool *failP);
+
+/* in transam/varsup.c */
+extern void VariableRelationGetNextXid(TransactionId *xidP);
+extern void VariableRelationGetLastXid(TransactionId *xidP);
+extern void VariableRelationPutNextXid(TransactionId xid);
+extern void VariableRelationPutLastXid(TransactionId xid);
+extern void VariableRelationGetNextOid(Oid *oid_return);
+extern void VariableRelationPutNextOid(Oid *oidP);
+extern void GetNewTransactionId(TransactionId *xid);
+extern void UpdateLastCommittedXid(TransactionId xid);
+extern void GetNewObjectIdBlock(Oid *oid_return, int oid_block_size);
+extern void GetNewObjectId(Oid *oid_return);
+
+/* ----------------
+ * global variable extern declarations
+ * ----------------
+ */
+
+/* in transam.c */
+extern Relation    LogRelation;
+extern Relation    TimeRelation;
+extern Relation    VariableRelation;
+
+extern TransactionId   cachedGetCommitTimeXid;
+extern AbsoluteTime    cachedGetCommitTime;
+extern TransactionId   cachedTestXid;
+extern XidStatus   cachedTestXidStatus;
+
+extern TransactionId NullTransactionId;
+extern TransactionId AmiTransactionId;
+extern TransactionId FirstTransactionId;
+
+extern int RecoveryCheckingEnableState;
+
+/* in transsup.c */
+extern bool AMI_OVERRIDE;  
+
+/* in varsup.c */
+extern int OidGenLockId;
+
+#endif /* TRAMSAM_H */
diff --git a/src/backend/access/transam/Makefile.inc b/src/backend/access/transam/Makefile.inc

new file mode 100644 (file)

index 0000000..c4f5b95
--- /dev/null
+++ b/src/backend/access/transam/Makefile.inc
@@ -0,0 +1,14 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for access/transam
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
+#
+#-------------------------------------------------------------------------
+
+SUBSRCS+= transam.c transsup.c varsup.c xact.c xid.c 
diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c

new file mode 100644 (file)

index 0000000..b3789a8
--- /dev/null
+++ b/src/backend/access/transam/transam.c
@@ -0,0 +1,675 @@
+/*-------------------------------------------------------------------------
+ *
+ * transam.c--
+ *    postgres transaction log/time interface routines
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/transam/transam.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
+ *
+ * NOTES
+ *    This file contains the high level access-method interface to the
+ *    transaction system.
+ * 
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "machine.h"       /* in port/ directory (needed for BLCKSZ) */
+
+#include "access/heapam.h"
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+
+#include "utils/memutils.h"
+#include "utils/mcxt.h"
+#include "utils/rel.h"
+#include "utils/elog.h"
+
+#include "utils/nabstime.h"
+#include "catalog/catname.h"
+
+#include "access/transam.h"
+#include "access/xact.h"
+#include "commands/vacuum.h"   /* for VacuumRunning */
+
+/* ----------------
+ *    global variables holding pointers to relations used
+ *    by the transaction system.  These are initialized by
+ *    InitializeTransactionLog().
+ * ----------------
+ */
+
+Relation LogRelation     = (Relation) NULL;
+Relation TimeRelation    = (Relation) NULL;
+Relation VariableRelation = (Relation) NULL;
+
+/* ----------------
+ *     global variables holding cached transaction id's and statuses.
+ * ----------------
+ */
+TransactionId  cachedGetCommitTimeXid;
+AbsoluteTime   cachedGetCommitTime;
+TransactionId  cachedTestXid;
+XidStatus  cachedTestXidStatus;
+
+/* ----------------
+ * transaction system constants
+ * ----------------
+ */
+/* ----------------------------------------------------------------
+ * transaction system constants
+ *
+ * read the comments for GetNewTransactionId in order to
+ *      understand the initial values for AmiTransactionId and
+ *      FirstTransactionId. -cim 3/23/90
+ * ----------------------------------------------------------------
+ */
+TransactionId NullTransactionId = (TransactionId) 0;
+
+TransactionId AmiTransactionId = (TransactionId) 512;
+
+TransactionId FirstTransactionId = (TransactionId) 514;
+
+/* ----------------
+ * transaction recovery state variables
+ *
+ * When the transaction system is initialized, we may
+ * need to do recovery checking.  This decision is decided
+ * by the postmaster or the user by supplying the backend
+ * with a special flag.  In general, we want to do recovery
+ * checking whenever we are running without a postmaster
+ * or when the number of backends running under the postmaster
+ * goes from zero to one. -cim 3/21/90
+ * ----------------
+ */
+int RecoveryCheckingEnableState = 0;
+
+/* ------------------
+ * spinlock for oid generation
+ * -----------------
+ */
+extern int OidGenLockId;
+
+/* ----------------
+ * globals that must be reset at abort
+ * ----------------
+ */
+extern bool    BuildingBtree;
+
+
+/* ----------------
+ * recovery checking accessors
+ * ----------------
+ */
+int
+RecoveryCheckingEnabled()
+{    
+    return RecoveryCheckingEnableState;
+}
+
+void
+SetRecoveryCheckingEnabled(bool state)
+{    
+    RecoveryCheckingEnableState = (state == true);
+}
+
+/* ----------------------------------------------------------------
+ * postgres log/time access method interface
+ *
+ * TransactionLogTest
+ * TransactionLogUpdate
+ * ========
+ *    these functions do work for the interface
+ *    functions - they search/retrieve and append/update
+ *    information in the log and time relations.
+ * ----------------------------------------------------------------
+ */
+
+/* --------------------------------
+ * TransactionLogTest
+ * --------------------------------
+ */
+
+bool   /* true/false: does transaction id have specified status? */
+TransactionLogTest(TransactionId transactionId,    /* transaction id to test */
+          XidStatus status)        /* transaction status */
+{
+    BlockNumber        blockNumber;
+    XidStatus      xidstatus;  /* recorded status of xid */
+    bool       fail = false;       /* success/failure */
+    
+    /* ----------------
+     *     during initialization consider all transactions
+     *  as having been committed
+     * ----------------
+     */
+    if (! RelationIsValid(LogRelation))
+   return (bool) (status == XID_COMMIT);
+    
+    /* ----------------
+     *  before going to the buffer manager, check our single
+     *   item cache to see if we didn't just check the transaction
+     *   status a moment ago.
+     * ----------------
+     */
+    if (TransactionIdEquals(transactionId, cachedTestXid))
+   return (bool)
+       (status == cachedTestXidStatus);
+    
+    /* ----------------
+     * compute the item pointer corresponding to the
+     *  page containing our transaction id.  We save the item in
+     *  our cache to speed up things if we happen to ask for the
+     *  same xid's status more than once.
+     * ----------------
+     */
+    TransComputeBlockNumber(LogRelation, transactionId, &blockNumber);
+    xidstatus = TransBlockNumberGetXidStatus(LogRelation,
+                        blockNumber,
+                        transactionId,
+                        &fail);
+    
+    if (! fail) {
+   TransactionIdStore(transactionId, &cachedTestXid);
+   cachedTestXidStatus = xidstatus;
+   return (bool)
+       (status == xidstatus);
+    }
+    
+    /* ----------------
+     *   here the block didn't contain the information we wanted
+     * ----------------
+     */
+    elog(WARN, "TransactionLogTest: failed to get xidstatus");
+    
+    /*
+     * so lint is happy...
+     */
+    return(false);
+}
+
+/* --------------------------------
+ * TransactionLogUpdate
+ * --------------------------------
+ */
+void
+TransactionLogUpdate(TransactionId transactionId, /* trans id to update */
+            XidStatus status) /* new trans status */
+{
+    BlockNumber        blockNumber;
+    bool       fail = false;       /* success/failure */
+    AbsoluteTime   currentTime;    /* time of this transaction */
+    
+    /* ----------------
+     *     during initialization we don't record any updates.
+     * ----------------
+     */
+    if (! RelationIsValid(LogRelation))
+   return;
+    
+    /* ----------------
+     *  get the transaction commit time
+     * ----------------
+     */
+    currentTime = getSystemTime();
+    
+    /* ----------------
+     *  update the log relation
+     * ----------------
+     */
+    TransComputeBlockNumber(LogRelation, transactionId, &blockNumber);
+    TransBlockNumberSetXidStatus(LogRelation,
+                blockNumber,
+                transactionId,
+                status,
+                &fail);
+    
+    /* ----------------
+     *  update (invalidate) our single item TransactionLogTest cache.
+     * ----------------
+     */
+    TransactionIdStore(transactionId, &cachedTestXid);
+    cachedTestXidStatus = status;
+    
+    /* ----------------
+     * now we update the time relation, if necessary
+     *  (we only record commit times)
+     * ----------------
+     */
+    if (RelationIsValid(TimeRelation) && status == XID_COMMIT) {
+   TransComputeBlockNumber(TimeRelation, transactionId, &blockNumber);
+   TransBlockNumberSetCommitTime(TimeRelation,
+                     blockNumber,
+                     transactionId,
+                     currentTime,
+                     &fail);
+   /* ----------------
+    *   update (invalidate) our single item GetCommitTime cache.
+    * ----------------
+    */
+   TransactionIdStore(transactionId, &cachedGetCommitTimeXid);
+   cachedGetCommitTime = currentTime;
+    }
+    
+    /* ----------------
+     * now we update the "last committed transaction" field
+     *  in the variable relation if we are recording a commit.
+     * ----------------
+     */
+    if (RelationIsValid(VariableRelation) && status == XID_COMMIT)
+   UpdateLastCommittedXid(transactionId);
+}
+
+/* --------------------------------
+ * TransactionIdGetCommitTime
+ * --------------------------------
+ */
+
+AbsoluteTime  /* commit time of transaction id */
+TransactionIdGetCommitTime(TransactionId transactionId) /* transaction id to test */
+{
+    BlockNumber        blockNumber;
+    AbsoluteTime   commitTime;     /* commit time */
+    bool       fail = false;       /* success/failure */
+    
+    /* ----------------
+     *   return invalid if we aren't running yet...
+     * ----------------
+     */
+    if (! RelationIsValid(TimeRelation))
+   return INVALID_ABSTIME;
+    
+    /* ----------------
+     *  before going to the buffer manager, check our single
+     *   item cache to see if we didn't just get the commit time
+     *   a moment ago.
+     * ----------------
+     */
+    if (TransactionIdEquals(transactionId, cachedGetCommitTimeXid))
+   return cachedGetCommitTime;
+    
+    /* ----------------
+     * compute the item pointer corresponding to the
+     *  page containing our transaction commit time
+     * ----------------
+     */
+    TransComputeBlockNumber(TimeRelation, transactionId, &blockNumber);
+    commitTime = TransBlockNumberGetCommitTime(TimeRelation,
+                          blockNumber,
+                          transactionId,
+                          &fail);
+    
+    /* ----------------
+     * update our cache and return the transaction commit time
+     * ----------------
+     */
+    if (! fail) {
+   TransactionIdStore(transactionId, &cachedGetCommitTimeXid);
+   cachedGetCommitTime = commitTime;
+   return commitTime;
+    } else
+   return INVALID_ABSTIME;
+}
+
+/* ----------------------------------------------------------------
+ *          transaction recovery code
+ * ----------------------------------------------------------------
+ */
+
+/* --------------------------------
+ * TransRecover
+ *
+ *     preform transaction recovery checking.
+ *
+ * Note: this should only be preformed if no other backends
+ *       are running.  This is known by the postmaster and
+ *       conveyed by the postmaster passing a "do recovery checking"
+ *       flag to the backend.
+ *
+ * here we get the last recorded transaction from the log,
+ * get the "last" and "next" transactions from the variable relation
+ * and then preform some integrity tests:
+ *
+ *     1) No transaction may exist higher then the "next" available
+ *         transaction recorded in the variable relation.  If this is the
+ *         case then it means either the log or the variable relation
+ *         has become corrupted.
+ *
+ *      2) The last committed transaction may not be higher then the
+ *         next available transaction for the same reason.
+ *
+ *      3) The last recorded transaction may not be lower then the
+ *         last committed transaction.  (the reverse is ok - it means
+ *         that some transactions have aborted since the last commit)
+ *
+ * Here is what the proper situation looks like.  The line
+ * represents the data stored in the log.  'c' indicates the
+ *      transaction was recorded as committed, 'a' indicates an
+ *      abortted transaction and '.' represents information not
+ *      recorded.  These may correspond to in progress transactions.
+ *
+ *      c  c  a  c  .  .  a  .  .  .  .  .  .  .  .  .  .
+ *           |                 |
+ *          last          next
+ *
+ * Since "next" is only incremented by GetNewTransactionId() which
+ *      is called when transactions are started.  Hence if there
+ *      are commits or aborts after "next", then it means we committed
+ *      or aborted BEFORE we started the transaction.  This is the
+ * rational behind constraint (1).
+ *
+ *      Likewise, "last" should never greater then "next" for essentially
+ *      the same reason - it would imply we committed before we started.
+ *      This is the reasoning for (2).
+ *
+ * (3) implies we may never have a situation such as:
+ *
+ *      c  c  a  c  .  .  a  c  .  .  .  .  .  .  .  .  .
+ *           |                 |
+ *          last          next
+ *
+ *      where there is a 'c' greater then "last".
+ *
+ *      Recovery checking is more difficult in the case where
+ *      several backends are executing concurrently because the
+ * transactions may be executing in the other backends.
+ *      So, we only do recovery stuff when the backend is explicitly
+ *      passed a flag on the command line.
+ * --------------------------------
+ */
+void
+TransRecover(Relation logRelation)
+{
+#if 0    
+    /* ----------------
+     *    first get the last recorded transaction in the log.
+     * ----------------
+     */
+    TransGetLastRecordedTransaction(logRelation, logLastXid, &fail);
+    if (fail == true)
+   elog(WARN, "TransRecover: failed TransGetLastRecordedTransaction");
+    
+    /* ----------------
+     *    next get the "last" and "next" variables
+     * ----------------
+     */
+    VariableRelationGetLastXid(&varLastXid);
+    VariableRelationGetNextXid(&varNextXid);
+    
+    /* ----------------
+     *    intregity test (1)
+     * ----------------
+     */
+    if (TransactionIdIsLessThan(varNextXid, logLastXid))
+   elog(WARN, "TransRecover: varNextXid < logLastXid");
+    
+    /* ----------------
+     *    intregity test (2)
+     * ----------------
+     */
+    
+    /* ----------------
+     *    intregity test (3)
+     * ----------------
+     */
+    
+    /* ----------------
+     *  here we have a valid "
+     *
+     *     **** RESUME HERE ****
+     * ----------------
+     */
+    varNextXid = TransactionIdDup(varLastXid);
+    TransactionIdIncrement(&varNextXid);
+    
+    VarPut(var, VAR_PUT_LASTXID, varLastXid);
+    VarPut(var, VAR_PUT_NEXTXID, varNextXid);
+#endif
+}
+
+/* ----------------------------------------------------------------
+ *         Interface functions
+ *
+ * InitializeTransactionLog
+ * ========
+ *    this function (called near cinit) initializes
+ *    the transaction log, time and variable relations.
+ *
+ * TransactionId DidCommit
+ * TransactionId DidAbort
+ * TransactionId IsInProgress
+ * ========
+ *    these functions test the transaction status of
+ *    a specified transaction id.
+ *
+ * TransactionId Commit
+ * TransactionId Abort
+ * TransactionId SetInProgress
+ * ========
+ *    these functions set the transaction status
+ *    of the specified xid. TransactionIdCommit() also
+ *    records the current time in the time relation
+ *    and updates the variable relation counter.
+ *
+ * ----------------------------------------------------------------
+ */
+
+/*
+ * InitializeTransactionLog --
+ * Initializes transaction logging.
+ */
+void
+InitializeTransactionLog()
+{
+    Relation     logRelation;
+    Relation     timeRelation;
+    MemoryContext oldContext;
+    
+    /* ----------------
+     *    don't do anything during bootstrapping
+     * ----------------
+     */
+    if (AMI_OVERRIDE)
+   return;
+    
+    /* ----------------
+     *  disable the transaction system so the access methods
+     *   don't interfere during initialization.
+     * ----------------
+     */
+    OverrideTransactionSystem(true);
+    
+    /* ----------------
+     * make sure allocations occur within the top memory context
+     *  so that our log management structures are protected from
+     *  garbage collection at the end of every transaction.
+     * ----------------
+     */
+    oldContext = MemoryContextSwitchTo(TopMemoryContext); 
+    
+    /* ----------------
+     *   first open the log and time relations
+     *   (these are created by amiint so they are guaranteed to exist)
+     * ----------------
+     */
+    logRelation =  heap_openr(LogRelationName);
+    timeRelation =     heap_openr(TimeRelationName);
+    VariableRelation =     heap_openr(VariableRelationName);
+    /* ----------------
+     *   XXX TransactionLogUpdate requires that LogRelation
+     *  and TimeRelation are valid so we temporarily set
+     *  them so we can initialize things properly.
+     *  This could be done cleaner.
+     * ----------------
+     */
+    LogRelation =  logRelation;
+    TimeRelation = timeRelation;
+    
+    /* ----------------
+     *   if we have a virgin database, we initialize the log and time
+     *  relation by committing the AmiTransactionId (id 512) and we
+     *   initialize the variable relation by setting the next available
+     *   transaction id to FirstTransactionId (id 514).  OID initialization
+     *   happens as a side effect of bootstrapping in varsup.c.
+     * ----------------
+     */
+    SpinAcquire(OidGenLockId);
+    if (!TransactionIdDidCommit(AmiTransactionId)) {
+   
+   /* ----------------
+    *  SOMEDAY initialize the information stored in
+    *          the headers of the log/time/variable relations.
+    * ----------------
+    */
+   TransactionLogUpdate(AmiTransactionId, XID_COMMIT);
+   VariableRelationPutNextXid(FirstTransactionId);
+   
+    } else if (RecoveryCheckingEnabled()) {
+   /* ----------------
+    *  if we have a pre-initialized database and if the
+    *  perform recovery checking flag was passed then we
+    *  do our database integrity checking.
+    * ----------------
+    */
+   TransRecover(logRelation);
+    }
+    LogRelation =  (Relation) NULL;
+    TimeRelation = (Relation) NULL;
+    SpinRelease(OidGenLockId);
+    
+    /* ----------------
+     * now re-enable the transaction system
+     * ----------------
+     */
+    OverrideTransactionSystem(false);
+    
+    /* ----------------
+     * instantiate the global variables
+     * ----------------
+     */
+    LogRelation =  logRelation;
+    TimeRelation =     timeRelation;
+    
+    /* ----------------
+     * restore the memory context to the previous context
+     *  before we return from initialization.
+     * ----------------
+     */
+    MemoryContextSwitchTo(oldContext);
+}
+
+/* --------------------------------
+ * TransactionId DidCommit
+ * TransactionId DidAbort
+ * TransactionId IsInProgress
+ * --------------------------------
+ */
+
+/*
+ * TransactionIdDidCommit --
+ * True iff transaction associated with the identifier did commit.
+ *
+ * Note:
+ * Assumes transaction identifier is valid.
+ */
+bool   /* true if given transaction committed */
+TransactionIdDidCommit(TransactionId transactionId)
+{
+    if (AMI_OVERRIDE)
+   return true;
+    
+    return
+   TransactionLogTest(transactionId, XID_COMMIT);
+}
+
+/*
+ * TransactionIdDidAborted --
+ * True iff transaction associated with the identifier did abort.
+ *
+ * Note:
+ * Assumes transaction identifier is valid.
+ * XXX Is this unneeded?
+ */
+bool   /* true if given transaction aborted */
+TransactionIdDidAbort(TransactionId transactionId)
+{
+    if (AMI_OVERRIDE)
+   return false;
+    
+    return
+   TransactionLogTest(transactionId, XID_ABORT);
+}
+
+bool   /* true if given transaction neither committed nor aborted */
+TransactionIdIsInProgress(TransactionId transactionId)
+{
+    if (AMI_OVERRIDE)
+   return false;
+    
+    return
+   TransactionLogTest(transactionId, XID_INPROGRESS);
+}
+
+/* --------------------------------
+ * TransactionId Commit
+ * TransactionId Abort
+ * TransactionId SetInProgress
+ * --------------------------------
+ */
+
+/*
+ * TransactionIdCommit --
+ * Commits the transaction associated with the identifier.
+ *
+ * Note:
+ * Assumes transaction identifier is valid.
+ */
+void
+TransactionIdCommit(TransactionId transactionId)
+{
+    if (AMI_OVERRIDE)
+   return;
+    
+    /*
+     * Within TransactionLogUpdate we call UpdateLastCommited()
+     * which assumes we have exclusive access to pg_variable.
+     * Therefore we need to get exclusive access before calling
+     * TransactionLogUpdate. -mer 18 Aug 1992
+     */
+    SpinAcquire(OidGenLockId);
+    TransactionLogUpdate(transactionId, XID_COMMIT);
+    SpinRelease(OidGenLockId);
+}
+
+/*
+ * TransactionIdAbort --
+ * Aborts the transaction associated with the identifier.
+ *
+ * Note:
+ * Assumes transaction identifier is valid.
+ */
+void
+TransactionIdAbort(TransactionId transactionId)
+{
+    BuildingBtree = false;
+    
+    if (VacuumRunning)
+   vc_abort();
+    
+    if (AMI_OVERRIDE)
+   return;
+    
+    TransactionLogUpdate(transactionId, XID_ABORT);
+}
+
+void
+TransactionIdSetInProgress(TransactionId transactionId)
+{
+    if (AMI_OVERRIDE)
+   return;
+    
+    TransactionLogUpdate(transactionId, XID_INPROGRESS);
+}
diff --git a/src/backend/access/transam/transsup.c b/src/backend/access/transam/transsup.c

new file mode 100644 (file)

index 0000000..a1e5b17
--- /dev/null
+++ b/src/backend/access/transam/transsup.c
@@ -0,0 +1,663 @@
+/*-------------------------------------------------------------------------
+ *
+ * transsup.c--
+ *    postgres transaction access method support code
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/transsup.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
+ *
+ * NOTES
+ *    This file contains support functions for the high
+ *    level access method interface routines found in transam.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "machine.h"       /* in port/ directory (needed for BLCKSZ) */
+
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+
+#include "utils/rel.h"
+#include "utils/elog.h"
+#include "utils/memutils.h"
+#include "utils/nabstime.h"
+
+#include "catalog/heap.h"
+#include "access/transam.h"    /* where the declarations go */
+#include "access/xact.h"   /* where the declarations go */
+
+#include "storage/smgr.h"
+
+/* ----------------------------------------------------------------
+ *           general support routines
+ * ----------------------------------------------------------------
+ */
+
+/* --------------------------------
+ * AmiTransactionOverride
+ *
+ * This function is used to manipulate the bootstrap flag.
+ * --------------------------------
+ */
+void
+AmiTransactionOverride(bool flag)
+{
+    AMI_OVERRIDE = flag;
+}
+
+/* --------------------------------
+ * TransComputeBlockNumber
+ * --------------------------------
+ */
+void
+TransComputeBlockNumber(Relation relation, /* relation to test */
+           TransactionId transactionId, /* transaction id to test */
+           BlockNumber *blockNumberOutP)
+{
+    long   itemsPerBlock;
+    
+    /* ----------------
+     *  we calculate the block number of our transaction
+     *  by dividing the transaction id by the number of
+     *  transaction things per block.  
+     * ----------------
+     */
+    if (relation == LogRelation)
+   itemsPerBlock = TP_NumXidStatusPerBlock;
+    else if (relation == TimeRelation)
+   itemsPerBlock = TP_NumTimePerBlock;
+    else
+   elog(WARN, "TransComputeBlockNumber: unknown relation");
+    
+    /* ----------------
+     * warning! if the transaction id's get too large
+     *  then a BlockNumber may not be large enough to hold the results
+     *  of our division.
+     *
+     * XXX  this will all vanish soon when we implement an improved
+     *       transaction id schema -cim 3/23/90
+     *
+     *  This has vanished now that xid's are 4 bytes (no longer 5).
+     *  -mer 5/24/92
+     * ----------------
+     */
+    (*blockNumberOutP) = transactionId / itemsPerBlock;
+}
+
+
+/* ----------------------------------------------------------------
+ *          trans block support routines
+ * ----------------------------------------------------------------
+ */
+
+/* --------------------------------
+ * TransBlockGetLastTransactionIdStatus
+ *
+ * This returns the status and transaction id of the last
+ * transaction information recorded on the given TransBlock.
+ * --------------------------------
+ */
+
+XidStatus
+TransBlockGetLastTransactionIdStatus(Block tblock,
+                    TransactionId baseXid,
+                    TransactionId *returnXidP)
+{
+    Index         index;
+    Index         maxIndex;
+    bits8         bit1;
+    bits8    bit2;
+    BitIndex      offset;
+    XidStatus     xstatus;
+    
+    /* ----------------
+     * sanity check
+     * ----------------
+     */
+    Assert((tblock != NULL));
+    
+    /* ----------------
+     * search downward from the top of the block data, looking
+     *  for the first Non-in progress transaction status.  Since we
+     *  are scanning backward, this will be last recorded transaction
+     *  status on the block.
+     * ----------------
+     */
+    maxIndex = TP_NumXidStatusPerBlock;
+    for (index = maxIndex-1; index>=0; index--) {
+   offset =  BitIndexOf(index);
+   bit1 =    ((bits8) BitArrayBitIsSet((BitArray) tblock, offset++)) << 1;
+   bit2 =    (bits8)  BitArrayBitIsSet((BitArray) tblock, offset);
+   
+   xstatus =  (bit1 | bit2) ;
+   
+   /* ----------------
+    *  here we have the status of some transaction, so test
+    *  if the status is recorded as "in progress".  If so, then
+    *  we save the transaction id in the place specified by the caller.
+    * ----------------
+    */
+   if (xstatus != XID_INPROGRESS) {
+       if (returnXidP != NULL) {
+       TransactionIdStore(baseXid, returnXidP);
+       TransactionIdAdd(returnXidP, index);
+       }
+       break;
+   }
+    }
+    
+    /* ----------------
+     * if we get here and index is 0 it means we couldn't find
+     *  a non-inprogress transaction on the block.  For now we just
+     *  return this info to the user.  They can check if the return
+     *  status is "in progress" to know this condition has arisen.
+     * ----------------
+     */
+    if (index == 0) {
+   if (returnXidP != NULL)
+       TransactionIdStore(baseXid, returnXidP);
+    }
+    
+    /* ----------------
+     * return the status to the user
+     * ----------------
+     */
+    return xstatus;
+}
+
+/* --------------------------------
+ * TransBlockGetXidStatus
+ *
+ * This returns the status of the desired transaction
+ * --------------------------------
+ */
+
+XidStatus
+TransBlockGetXidStatus(Block tblock,
+              TransactionId transactionId)
+{
+    Index              index;
+    bits8              bit1;
+    bits8          bit2;
+    BitIndex           offset;
+    
+    /* ----------------
+     * sanity check
+     * ----------------
+     */
+    if (tblock == NULL) {
+   return XID_INVALID;
+    }
+    
+    /* ----------------
+     * calculate the index into the transaction data where
+     *  our transaction status is located
+     *
+     *  XXX this will be replaced soon when we move to the
+     *      new transaction id scheme -cim 3/23/90
+     *
+     *  The old system has now been replaced. -mer 5/24/92
+     * ----------------
+     */
+    index = transactionId % TP_NumXidStatusPerBlock;
+    
+    /* ----------------
+     * get the data at the specified index
+     * ----------------
+     */
+    offset =    BitIndexOf(index);
+    bit1 =      ((bits8)   BitArrayBitIsSet((BitArray) tblock, offset++)) << 1;
+    bit2 =      (bits8)    BitArrayBitIsSet((BitArray) tblock, offset);
+    
+    /* ----------------
+     * return the transaction status to the caller
+     * ----------------
+     */
+    return (XidStatus)
+   (bit1 | bit2);
+}
+
+/* --------------------------------
+ * TransBlockSetXidStatus
+ *
+ * This sets the status of the desired transaction
+ * --------------------------------
+ */
+void
+TransBlockSetXidStatus(Block tblock,
+              TransactionId transactionId, 
+              XidStatus xstatus)
+{
+    Index              index;
+    BitIndex           offset;
+    
+    /* ----------------
+     * sanity check
+     * ----------------
+     */
+    if (tblock == NULL)
+   return;
+    
+    /* ----------------
+     * calculate the index into the transaction data where
+     *  we sould store our transaction status.
+     *
+     *  XXX this will be replaced soon when we move to the
+     *      new transaction id scheme -cim 3/23/90
+     *
+     *  The new scheme is here -mer 5/24/92
+     * ----------------
+     */
+    index = transactionId % TP_NumXidStatusPerBlock;
+    
+    offset =    BitIndexOf(index);
+    
+    /* ----------------
+     * store the transaction value at the specified offset
+     * ----------------
+     */
+    switch(xstatus) {
+    case XID_COMMIT:             /* set 10 */
+   BitArraySetBit((BitArray) tblock, offset);
+   BitArrayClearBit((BitArray) tblock, offset + 1);
+   break;
+    case XID_ABORT:             /* set 01 */
+   BitArrayClearBit((BitArray) tblock, offset);
+   BitArraySetBit((BitArray) tblock, offset + 1);
+   break;
+    case XID_INPROGRESS:        /* set 00 */
+   BitArrayClearBit((BitArray) tblock, offset);
+   BitArrayClearBit((BitArray) tblock, offset + 1);
+   break;
+    default:
+   elog(NOTICE,
+        "TransBlockSetXidStatus: invalid status: %d (ignored)",
+        xstatus);
+   break;
+    }
+}
+
+/* --------------------------------
+ * TransBlockGetCommitTime
+ *
+ * This returns the transaction commit time for the
+ * specified transaction id in the trans block.
+ * --------------------------------
+ */
+AbsoluteTime
+TransBlockGetCommitTime(Block tblock,
+           TransactionId transactionId)
+{
+    Index          index;
+    AbsoluteTime       *timeArray;
+    
+    /* ----------------
+     * sanity check
+     * ----------------
+     */
+    if (tblock == NULL)
+   return INVALID_ABSTIME;
+    
+    /* ----------------
+     * calculate the index into the transaction data where
+     *  our transaction commit time is located
+     *
+     *  XXX this will be replaced soon when we move to the
+     *      new transaction id scheme -cim 3/23/90
+     *
+     *  The new scheme is here. -mer 5/24/92
+     * ----------------
+     */
+    index = transactionId % TP_NumTimePerBlock;
+    
+    /* ----------------
+     * return the commit time to the caller
+     * ----------------
+     */
+    timeArray =  (AbsoluteTime *) tblock;
+    return (AbsoluteTime)
+   timeArray[ index ];
+}
+
+/* --------------------------------
+ * TransBlockSetCommitTime
+ *
+ * This sets the commit time of the specified transaction
+ * --------------------------------
+ */
+void
+TransBlockSetCommitTime(Block tblock,
+           TransactionId transactionId,
+           AbsoluteTime commitTime)
+{
+    Index      index;
+    AbsoluteTime   *timeArray;
+    
+    /* ----------------
+     * sanity check
+     * ----------------
+     */
+    if (tblock == NULL)
+   return;
+    
+    
+    /* ----------------
+     * calculate the index into the transaction data where
+     *  we sould store our transaction status.  
+     *
+     *  XXX this will be replaced soon when we move to the
+     *      new transaction id scheme -cim 3/23/90
+     *
+     *  The new scheme is here.  -mer 5/24/92
+     * ----------------
+     */
+    index = transactionId % TP_NumTimePerBlock;
+    
+    /* ----------------
+     * store the transaction commit time at the specified index
+     * ----------------
+     */
+    timeArray =  (AbsoluteTime *) tblock;
+    timeArray[ index ] = commitTime;
+}
+
+/* ----------------------------------------------------------------
+ *            transam i/o support routines
+ * ----------------------------------------------------------------
+ */
+
+/* --------------------------------
+ * TransBlockNumberGetXidStatus
+ * --------------------------------
+ */
+XidStatus
+TransBlockNumberGetXidStatus(Relation relation,
+                BlockNumber blockNumber,
+                TransactionId xid,
+                bool *failP)
+{
+    Buffer     buffer;     /* buffer associated with block */
+    Block      block;      /* block containing xstatus */
+    XidStatus      xstatus;    /* recorded status of xid */
+    bool       localfail;      /* bool used if failP = NULL */
+    
+    /* ----------------
+     * SOMEDAY place a read lock on the log relation
+     *  That someday is today 5 Aug 1991 -mer
+     * ----------------
+     */
+    RelationSetLockForRead(relation);
+    
+    /* ----------------
+     * get the page containing the transaction information
+     * ----------------
+     */
+    buffer =      ReadBuffer(relation, blockNumber);
+    block =       BufferGetBlock(buffer);
+    
+    /* ----------------
+     * get the status from the block.  note, for now we always
+     *  return false in failP.
+     * ----------------
+     */
+    if (failP == NULL)
+   failP = &localfail;
+    (*failP) = false;
+    
+    xstatus = TransBlockGetXidStatus(block, xid);
+    
+    /* ----------------
+     * release the buffer and return the status
+     * ----------------
+     */
+    ReleaseBuffer(buffer);
+    
+    /* ----------------
+     * SOMEDAY release our lock on the log relation
+     * ----------------
+     */
+    RelationUnsetLockForRead(relation);
+    
+    return
+   xstatus;
+}
+
+/* --------------------------------
+ * TransBlockNumberSetXidStatus
+ * --------------------------------
+ */
+void
+TransBlockNumberSetXidStatus(Relation relation,
+                BlockNumber blockNumber,
+                TransactionId xid,
+                XidStatus xstatus,
+                bool *failP)
+{
+    Buffer     buffer;     /* buffer associated with block */
+    Block      block;      /* block containing xstatus */
+    bool       localfail;      /* bool used if failP = NULL */
+    
+    /* ----------------
+     * SOMEDAY gain exclusive access to the log relation
+     *
+     *  That someday is today 5 Aug 1991 -mer
+     * ----------------
+     */
+    RelationSetLockForWrite(relation);
+    
+    /* ----------------
+     * get the block containing the transaction status
+     * ----------------
+     */
+    buffer =   ReadBuffer(relation, blockNumber);
+    block =    BufferGetBlock(buffer);
+    
+    /* ----------------
+     * attempt to update the status of the transaction on the block.
+     *  if we are successful, write the block. otherwise release the buffer.
+     *  note, for now we always return false in failP.
+     * ----------------
+     */
+    if (failP == NULL)
+   failP = &localfail;
+    (*failP) = false;
+    
+    TransBlockSetXidStatus(block, xid, xstatus);
+    
+    if ((*failP) == false)
+   WriteBuffer(buffer);
+    else
+   ReleaseBuffer(buffer);
+    
+    /* ----------------
+     * SOMEDAY release our lock on the log relation
+     * ----------------
+     */    
+    RelationUnsetLockForWrite(relation);
+}
+
+/* --------------------------------
+ * TransBlockNumberGetCommitTime
+ * --------------------------------
+ */
+AbsoluteTime
+TransBlockNumberGetCommitTime(Relation relation,
+                 BlockNumber blockNumber,
+                 TransactionId xid,
+                 bool *failP)
+{
+    Buffer     buffer;     /* buffer associated with block */
+    Block      block;      /* block containing commit time */
+    bool       localfail;      /* bool used if failP = NULL */
+    AbsoluteTime   xtime;      /* commit time */
+    
+    /* ----------------
+     * SOMEDAY place a read lock on the time relation
+     *
+     *  That someday is today 5 Aug. 1991 -mer
+     * ----------------
+     */
+    RelationSetLockForRead(relation);
+    
+    /* ----------------
+     * get the block containing the transaction information
+     * ----------------
+     */
+    buffer =       ReadBuffer(relation, blockNumber);
+    block =        BufferGetBlock(buffer);
+    
+    /* ----------------
+     * get the commit time from the block
+     *  note, for now we always return false in failP.
+     * ----------------
+     */
+    if (failP == NULL)
+   failP = &localfail;
+    (*failP) = false;
+    
+    xtime = TransBlockGetCommitTime(block, xid);
+    
+    /* ----------------
+     * release the buffer and return the commit time
+     * ----------------
+     */
+    ReleaseBuffer(buffer);
+    
+    /* ----------------
+     * SOMEDAY release our lock on the time relation
+     * ----------------
+     */
+    RelationUnsetLockForRead(relation);
+    
+    if ((*failP) == false)
+   return xtime;
+    else
+   return INVALID_ABSTIME;
+    
+}
+
+/* --------------------------------
+ * TransBlockNumberSetCommitTime
+ * --------------------------------
+ */
+void
+TransBlockNumberSetCommitTime(Relation relation,
+                 BlockNumber blockNumber,
+                 TransactionId xid,
+                 AbsoluteTime xtime,
+                 bool *failP)
+{
+    Buffer     buffer;     /* buffer associated with block */
+    Block      block;      /* block containing commit time */
+    bool       localfail;      /* bool used if failP = NULL */
+    
+    /* ----------------
+     * SOMEDAY gain exclusive access to the time relation
+     *
+     *  That someday is today 5 Aug. 1991 -mer
+     * ----------------
+     */
+    RelationSetLockForWrite(relation);
+    
+    /* ----------------
+     * get the block containing our commit time
+     * ----------------
+     */
+    buffer =      ReadBuffer(relation, blockNumber);
+    block =       BufferGetBlock(buffer);
+    
+    /* ----------------
+     * attempt to update the commit time of the transaction on the block.
+     *  if we are successful, write the block. otherwise release the buffer.
+     *  note, for now we always return false in failP.
+     * ----------------
+     */
+    if (failP == NULL)
+   failP = &localfail;
+    (*failP) = false;
+    
+    TransBlockSetCommitTime(block, xid, xtime);
+    
+    if ((*failP) == false)
+   WriteBuffer(buffer);
+    else
+   ReleaseBuffer(buffer);
+    
+    /* ----------------
+     * SOMEDAY release our lock on the time relation
+     * ----------------
+     */
+    RelationUnsetLockForWrite(relation);
+    
+}
+
+/* --------------------------------
+ * TransGetLastRecordedTransaction
+ * --------------------------------
+ */
+void
+TransGetLastRecordedTransaction(Relation relation,
+               TransactionId xid, /* return: transaction id */
+               bool *failP)
+{
+    BlockNumber        blockNumber;    /* block number */
+    Buffer     buffer;     /* buffer associated with block */
+    Block      block;      /* block containing xid status */
+    BlockNumber        n;      /* number of blocks in the relation */
+    TransactionId  baseXid;
+    
+    (*failP) = false;
+    
+    /* ----------------
+     * SOMEDAY gain exclusive access to the log relation
+     *
+     *  That someday is today 5 Aug. 1991 -mer
+     *  It looks to me like we only need to set a read lock here, despite
+     *  the above comment about exclusive access.  The block is never 
+     *  actually written into, we only check status bits.
+     * ----------------
+     */
+    RelationSetLockForRead(relation);
+    
+    /* ----------------
+     * we assume the last block of the log contains the last
+     *  recorded transaction.  If the relation is empty we return
+     *  failure to the user.
+     * ----------------
+     */
+    n = RelationGetNumberOfBlocks(relation);
+    if (n == 0) {
+   (*failP) = true;
+   return;
+    }
+    
+    /* ----------------
+     * get the block containing the transaction information
+     * ----------------
+     */
+    blockNumber =  n-1;
+    buffer =   ReadBuffer(relation, blockNumber);
+    block =    BufferGetBlock(buffer);
+    
+    /* ----------------
+     * get the last xid on the block
+     * ----------------
+     */
+    baseXid = blockNumber * TP_NumXidStatusPerBlock;
+
+/* XXX ???? xid won't get returned! - AY '94 */
+    (void) TransBlockGetLastTransactionIdStatus(block, baseXid, &xid);
+    
+    ReleaseBuffer(buffer);
+    
+    /* ----------------
+     * SOMEDAY release our lock on the log relation
+     * ----------------
+     */
+    RelationUnsetLockForRead(relation);
+}
diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c

new file mode 100644 (file)

index 0000000..a53cc7d
--- /dev/null
+++ b/src/backend/access/transam/varsup.c
@@ -0,0 +1,606 @@
+/*-------------------------------------------------------------------------
+ *
+ * varsup.c--
+ *    postgres variable relation support routines
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <math.h>
+#include "postgres.h"
+
+#include "machine.h"       /* in port/ directory (needed for BLCKSZ) */
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+#include "storage/ipc.h"   /* for OIDGENLOCKID */
+
+#include "utils/rel.h"
+#include "utils/elog.h"
+
+#include "access/heapam.h"
+#include "access/transam.h"    /* where the declarations go */
+#include "access/xact.h"   /* where the declarations go */
+
+#include "catalog/catname.h"
+
+/* ----------
+ *      note: we reserve the first 16384 object ids for internal use.
+ *      oid's less than this appear in the .bki files.  the choice of
+ *      16384 is completely arbitrary.
+ * ----------
+ */
+#define BootstrapObjectIdData 16384
+
+/* ---------------------
+ * spin lock for oid generation
+ * ---------------------
+ */
+int OidGenLockId;
+
+/* ----------------------------------------------------------------
+ *       variable relation query/update routines
+ * ----------------------------------------------------------------
+ */
+
+/* --------------------------------
+ * VariableRelationGetNextXid
+ * --------------------------------
+ */
+void
+VariableRelationGetNextXid(TransactionId *xidP)
+{
+    Buffer buf;
+    VariableRelationContents var;
+    
+    /* ----------------
+     * We assume that a spinlock has been acquire to guarantee
+     * exclusive access to the variable relation.
+     * ----------------
+     */
+    
+    /* ----------------
+     * do nothing before things are initialized
+     * ----------------
+     */
+    if (! RelationIsValid(VariableRelation))
+   return;
+    
+    /* ----------------
+     * read the variable page, get the the nextXid field and
+     *  release the buffer
+     * ----------------
+     */
+    buf = ReadBuffer(VariableRelation, 0);
+    
+    if (! BufferIsValid(buf))
+   {
+       SpinRelease(OidGenLockId);
+       elog(WARN, "VariableRelationGetNextXid: ReadBuffer failed");
+   }
+    
+    var = (VariableRelationContents) BufferGetBlock(buf);
+    
+    TransactionIdStore(var->nextXidData, xidP);
+    ReleaseBuffer(buf);
+}
+
+/* --------------------------------
+ * VariableRelationGetLastXid
+ * --------------------------------
+ */
+void
+VariableRelationGetLastXid(TransactionId *xidP)
+{
+    Buffer buf;
+    VariableRelationContents var;
+    
+    /* ----------------
+     * We assume that a spinlock has been acquire to guarantee
+     * exclusive access to the variable relation.
+     * ----------------
+     */
+    
+    /* ----------------
+     * do nothing before things are initialized
+     * ----------------
+     */
+    if (! RelationIsValid(VariableRelation))
+   return;
+    
+    /* ----------------
+     * read the variable page, get the the lastXid field and
+     *  release the buffer
+     * ----------------
+     */
+    buf = ReadBuffer(VariableRelation, 0);
+    
+    if (! BufferIsValid(buf))
+   {
+       SpinRelease(OidGenLockId);
+       elog(WARN, "VariableRelationGetNextXid: ReadBuffer failed");
+   }
+    
+    var = (VariableRelationContents) BufferGetBlock(buf);
+    
+    TransactionIdStore(var->lastXidData, xidP);
+    
+    ReleaseBuffer(buf);
+}
+
+/* --------------------------------
+ * VariableRelationPutNextXid
+ * --------------------------------
+ */
+void
+VariableRelationPutNextXid(TransactionId xid)
+{
+    Buffer buf;
+    VariableRelationContents var;
+    
+    /* ----------------
+     * We assume that a spinlock has been acquire to guarantee
+     * exclusive access to the variable relation.
+     * ----------------
+     */
+    
+    /* ----------------
+     * do nothing before things are initialized
+     * ----------------
+     */
+    if (! RelationIsValid(VariableRelation))
+   return;
+    
+    /* ----------------
+     * read the variable page, update the nextXid field and
+     *  write the page back out to disk.
+     * ----------------
+     */
+    buf = ReadBuffer(VariableRelation, 0);
+    
+    if (! BufferIsValid(buf))
+   {
+       SpinRelease(OidGenLockId);
+       elog(WARN, "VariableRelationPutNextXid: ReadBuffer failed");
+   }
+    
+    var = (VariableRelationContents) BufferGetBlock(buf);
+    
+    TransactionIdStore(xid, &(var->nextXidData));
+    
+    WriteBuffer(buf);
+}
+
+/* --------------------------------
+ * VariableRelationPutLastXid
+ * --------------------------------
+ */
+void
+VariableRelationPutLastXid(TransactionId xid)
+{
+    Buffer buf;
+    VariableRelationContents var;
+    
+    /* ----------------
+     * We assume that a spinlock has been acquire to guarantee
+     * exclusive access to the variable relation.
+     * ----------------
+     */
+    
+    /* ----------------
+     * do nothing before things are initialized
+     * ----------------
+     */
+    if (! RelationIsValid(VariableRelation))
+   return;
+    
+    /* ----------------
+     * read the variable page, update the lastXid field and
+     *  force the page back out to disk.
+     * ----------------
+     */
+    buf = ReadBuffer(VariableRelation, 0);
+    
+    if (! BufferIsValid(buf))
+   {
+       SpinRelease(OidGenLockId);
+       elog(WARN, "VariableRelationPutLastXid: ReadBuffer failed");
+   }
+    
+    var = (VariableRelationContents) BufferGetBlock(buf);
+    
+    TransactionIdStore(xid, &(var->lastXidData));
+    
+    WriteBuffer(buf);
+}
+
+/* --------------------------------
+ * VariableRelationGetNextOid
+ * --------------------------------
+ */
+void
+VariableRelationGetNextOid(Oid *oid_return)
+{
+    Buffer buf;
+    VariableRelationContents var;
+    
+    /* ----------------
+     * We assume that a spinlock has been acquire to guarantee
+     * exclusive access to the variable relation.
+     * ----------------
+     */
+    
+    /* ----------------
+     * if the variable relation is not initialized, then we
+     *  assume we are running at bootstrap time and so we return
+     *  an invalid object id -- during this time GetNextBootstrapObjectId
+     *  should be called instead..
+     * ----------------
+     */
+    if (! RelationIsValid(VariableRelation)) {
+   if (PointerIsValid(oid_return))
+       (*oid_return) = InvalidOid;
+   return;
+    }
+    
+    /* ----------------
+     * read the variable page, get the the nextOid field and
+     *  release the buffer
+     * ----------------
+     */
+    buf = ReadBuffer(VariableRelation, 0);
+    
+    if (! BufferIsValid(buf))
+   {
+       SpinRelease(OidGenLockId);
+       elog(WARN, "VariableRelationGetNextXid: ReadBuffer failed");
+   }
+    
+    var = (VariableRelationContents) BufferGetBlock(buf);
+    
+    if (PointerIsValid(oid_return)) {
+   
+        /* ----------------
+         * nothing up my sleeve...  what's going on here is that this code
+    * is guaranteed never to be called until all files in data/base/
+    * are created, and the template database exists.  at that point,
+    * we want to append a pg_database tuple.  the first time we do
+    * this, the oid stored in pg_variable will be bogus, so we use
+    * a bootstrap value defined at the top of this file.
+    *
+    * this comment no longer holds true.  This code is called before
+    * all of the files in data/base are created and you can't rely
+    * on system oid's to be less than BootstrapObjectIdData. mer 9/18/91
+         * ----------------
+         */
+   if (OidIsValid(var->nextOid))
+       (*oid_return) = var->nextOid;
+   else
+       (*oid_return) = BootstrapObjectIdData;
+    }
+    
+    ReleaseBuffer(buf);
+}
+
+/* --------------------------------
+ * VariableRelationPutNextOid
+ * --------------------------------
+ */
+void
+VariableRelationPutNextOid(Oid *oidP)
+{
+    Buffer buf;
+    VariableRelationContents var;
+    
+    /* ----------------
+     * We assume that a spinlock has been acquire to guarantee
+     * exclusive access to the variable relation.
+     * ----------------
+     */
+    
+    /* ----------------
+     * do nothing before things are initialized
+     * ----------------
+     */
+    if (! RelationIsValid(VariableRelation))
+   return;
+    
+    /* ----------------
+     * sanity check
+     * ----------------
+     */
+    if (! PointerIsValid(oidP))
+   {
+       SpinRelease(OidGenLockId);
+       elog(WARN, "VariableRelationPutNextOid: invalid oid pointer");
+   }
+    
+    /* ----------------
+     * read the variable page, update the nextXid field and
+     *  write the page back out to disk.
+     * ----------------
+     */
+    buf = ReadBuffer(VariableRelation, 0);
+    
+    if (! BufferIsValid(buf))
+   {
+       SpinRelease(OidGenLockId);
+       elog(WARN, "VariableRelationPutNextXid: ReadBuffer failed");
+   }
+    
+    var = (VariableRelationContents) BufferGetBlock(buf);
+    
+    var->nextOid = (*oidP);
+    
+    WriteBuffer(buf);
+}
+
+/* ----------------------------------------------------------------
+ *     transaction id generation support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ * GetNewTransactionId
+ *
+ * In the version 2 transaction system, transaction id's are
+ * restricted in several ways.
+ *
+ * First, all transaction id's are even numbers (4, 88, 121342, etc).
+ * This means the binary representation of the number will never
+ * have the least significent bit set.  This bit is reserved to
+ * indicate that the transaction id does not in fact hold an XID,
+ * but rather a commit time.  This makes it possible for the
+ * vaccuum daemon to disgard information from the log and time
+ * relations for committed tuples.  This is important when archiving
+ * tuples to an optical disk because tuples with commit times
+ * stored in their xid fields will not need to consult the log
+ * and time relations.
+ *
+ * Second, since we may someday preform compression of the data
+ * in the log and time relations, we cause the numbering of the
+ * transaction ids to begin at 512.  This means that some space
+ * on the page of the log and time relations corresponding to
+ * transaction id's 0 - 510 will never be used.  This space is
+ * in fact used to store the version number of the postgres
+ * transaction log and will someday store compression information
+ * about the log.
+ *
+ * Lastly, rather then access the variable relation each time
+ * a backend requests a new transction id, we "prefetch" 32
+ * transaction id's by incrementing the nextXid stored in the
+ * var relation by 64 (remember only even xid's are legal) and then
+ * returning these id's one at a time until they are exhausted.
+ *     This means we reduce the number of accesses to the variable
+ * relation by 32 for each backend.
+ *
+ *     Note:  32 has no special significance.  We don't want the
+ *        number to be too large because if when the backend
+ *        terminates, we lose the xid's we cached.
+ *
+ * ----------------
+ */
+
+#define VAR_XID_PREFETCH   32
+
+static int prefetched_xid_count = 0;
+static TransactionId next_prefetched_xid;
+
+void
+GetNewTransactionId(TransactionId *xid)
+{
+    TransactionId nextid;
+    
+    /* ----------------
+     * during bootstrap initialization, we return the special
+     *  bootstrap transaction id.
+     * ----------------
+     */
+    if (AMI_OVERRIDE) {    
+   TransactionIdStore(AmiTransactionId, xid);
+   return;
+    }
+    
+    /* ----------------
+     *  if we run out of prefetched xids, then we get some
+     *  more before handing them out to the caller.
+     * ----------------
+     */
+    
+    if (prefetched_xid_count == 0) {
+   /* ----------------
+    *  obtain exclusive access to the variable relation page
+    *
+    *  get the "next" xid from the variable relation
+    *  and save it in the prefetched id.
+    * ----------------
+    */
+   SpinAcquire(OidGenLockId);
+   VariableRelationGetNextXid(&nextid);
+   TransactionIdStore(nextid, &next_prefetched_xid);
+   
+   /* ----------------
+    *  now increment the variable relation's next xid
+    *  and reset the prefetched_xid_count.  We multiply
+    *  the id by two because our xid's are always even.
+    * ----------------
+    */
+   prefetched_xid_count = VAR_XID_PREFETCH;
+   TransactionIdAdd(&nextid, prefetched_xid_count);
+   VariableRelationPutNextXid(nextid);
+   SpinRelease(OidGenLockId);
+    }
+    
+    /* ----------------
+     * return the next prefetched xid in the pointer passed by
+     *  the user and decrement the prefetch count.  We add two
+     *  to id we return the next time this is called because our
+     * transaction ids are always even.
+     *
+     *  XXX Transaction Ids used to be even as the low order bit was
+     *      used to determine commit status.  This is no long true so
+     *      we now use even and odd transaction ids. -mer 5/26/92
+     * ----------------
+     */
+    TransactionIdStore(next_prefetched_xid, xid);
+    TransactionIdAdd(&next_prefetched_xid, 1);
+    prefetched_xid_count--;
+}
+
+/* ----------------
+ * UpdateLastCommittedXid
+ * ----------------
+ */
+
+void
+UpdateLastCommittedXid(TransactionId xid)
+{
+    TransactionId lastid;
+    
+    
+    /* we assume that spinlock OidGenLockId has been acquired
+     * prior to entering this function
+     */
+    
+    /* ----------------
+     * get the "last committed" transaction id from
+     *  the variable relation page.
+     * ----------------
+     */
+    VariableRelationGetLastXid(&lastid);
+    
+    /* ----------------
+     * if the transaction id is greater than the last committed
+     *  transaction then we update the last committed transaction
+     *  in the variable relation.
+     * ----------------
+     */
+    if (TransactionIdIsLessThan(lastid, xid))
+   VariableRelationPutLastXid(xid);
+    
+}
+
+/* ----------------------------------------------------------------
+ *         object id generation support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ * GetNewObjectIdBlock
+ *
+ * This support function is used to allocate a block of object ids
+ * of the given size.  applications wishing to do their own object
+ * id assignments should use this 
+ * ----------------
+ */
+void
+GetNewObjectIdBlock(Oid *oid_return, /* place to return the new object id */
+           int oid_block_size) /* number of oids desired */
+{
+    Oid nextoid;       
+    
+    /* ----------------
+     * SOMEDAY obtain exclusive access to the variable relation page
+     *  That someday is today -mer 6 Aug 1992
+     * ----------------
+     */
+    SpinAcquire(OidGenLockId);
+    
+    /* ----------------
+     * get the "next" oid from the variable relation
+     * and give it to the caller.
+     * ----------------
+     */
+    VariableRelationGetNextOid(&nextoid);
+    if (PointerIsValid(oid_return))
+   (*oid_return) = nextoid;
+    
+    /* ----------------
+     * now increment the variable relation's next oid
+     * field by the size of the oid block requested.
+     * ----------------
+     */
+    nextoid += oid_block_size;
+    VariableRelationPutNextOid(&nextoid);
+    
+    /* ----------------
+     * SOMEDAY relinquish our lock on the variable relation page
+     *  That someday is today -mer 6 Apr 1992
+     * ----------------
+     */
+    SpinRelease(OidGenLockId);
+}
+
+/* ----------------
+ * GetNewObjectId
+ *
+ * This function allocates and parses out object ids.  Like
+ * GetNewTransactionId(), it "prefetches" 32 object ids by
+ * incrementing the nextOid stored in the var relation by 32 and then
+ * returning these id's one at a time until they are exhausted.
+ *     This means we reduce the number of accesses to the variable
+ * relation by 32 for each backend.
+ *
+ *     Note:  32 has no special significance.  We don't want the
+ *        number to be too large because if when the backend
+ *        terminates, we lose the oids we cached.
+ *
+ * ----------------
+ */
+
+#define VAR_OID_PREFETCH   32
+
+static int prefetched_oid_count = 0;
+static Oid next_prefetched_oid;
+
+void
+GetNewObjectId(Oid *oid_return)    /* place to return the new object id */
+{
+    /* ----------------
+     *  if we run out of prefetched oids, then we get some
+     *  more before handing them out to the caller.
+     * ----------------
+     */
+    
+    if (prefetched_oid_count == 0) {
+   int oid_block_size = VAR_OID_PREFETCH;
+   
+   /* ----------------
+    *  during bootstrap time, we want to allocate oids
+    *  one at a time.  Otherwise there might be some
+    *      bootstrap oid's left in the block we prefetch which
+    *  would be passed out after the variable relation was
+    *  initialized.  This would be bad.
+    * ----------------
+    */
+   if (! RelationIsValid(VariableRelation))
+       VariableRelation = heap_openr(VariableRelationName);
+   
+   /* ----------------
+    *  get a new block of prefetched object ids.
+    * ----------------
+    */
+   GetNewObjectIdBlock(&next_prefetched_oid, oid_block_size);
+   
+   /* ----------------
+    *  now reset the prefetched_oid_count.
+    * ----------------
+    */
+   prefetched_oid_count = oid_block_size;
+    }
+    
+    /* ----------------
+     * return the next prefetched oid in the pointer passed by
+     *  the user and decrement the prefetch count.
+     * ----------------
+     */
+    if (PointerIsValid(oid_return))
+   (*oid_return) = next_prefetched_oid;
+    
+    next_prefetched_oid++;
+    prefetched_oid_count--;
+}
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c

new file mode 100644 (file)

index 0000000..1798d09
--- /dev/null
+++ b/src/backend/access/transam/xact.c
@@ -0,0 +1,1314 @@
+/*-------------------------------------------------------------------------
+ *
+ * xact.c--
+ *    top level transaction system support routines
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $
+ * 
+ * NOTES
+ * Transaction aborts can now occur two ways:
+ *
+ * 1)  system dies from some internal cause  (Assert, etc..)
+ * 2)  user types abort
+ *
+ * These two cases used to be treated identically, but now
+ * we need to distinguish them.  Why?  consider the following
+ * two situatuons:
+ *
+ *     case 1              case 2
+ *     ------              ------
+ * 1) user types BEGIN     1) user types BEGIN
+ * 2) user does something      2) user does something
+ * 3) user does not like what  3) system aborts for some reason
+ *    she shes and types ABORT    
+ *
+ * In case 1, we want to abort the transaction and return to the
+ * default state.  In case 2, there may be more commands coming
+ * our way which are part of the same transaction block and we have
+ * to ignore these commands until we see an END transaction.
+ *
+ * Internal aborts are now handled by AbortTransactionBlock(), just as
+ * they always have been, and user aborts are now handled by
+ * UserAbortTransactionBlock().  Both of them rely on AbortTransaction()
+ * to do all the real work.  The only difference is what state we
+ * enter after AbortTransaction() does it's work:
+ * 
+ * * AbortTransactionBlock() leaves us in TBLOCK_ABORT and
+ * * UserAbortTransactionBlock() leaves us in TBLOCK_ENDABORT
+ * 
+ *   NOTES
+ * This file is an attempt at a redesign of the upper layer
+ * of the V1 transaction system which was too poorly thought
+ * out to describe.  This new system hopes to be both simpler
+ * in design, simpler to extend and needs to contain added
+ * functionality to solve problems beyond the scope of the V1
+ * system.  (In particuler, communication of transaction
+ * information between parallel backends has to be supported)
+ *
+ * The essential aspects of the transaction system are:
+ *
+ *     o  transaction id generation
+ *     o  transaction log updating
+ *     o  memory cleanup
+ *     o  cache invalidation
+ *     o  lock cleanup
+ *
+ * Hence, the functional division of the transaction code is
+ * based on what of the above things need to be done during
+ * a start/commit/abort transaction.  For instance, the
+ * routine AtCommit_Memory() takes care of all the memory
+ * cleanup stuff done at commit time.
+ *
+ * The code is layered as follows:
+ *
+ *     StartTransaction
+ *     CommitTransaction
+ *     AbortTransaction
+ *     UserAbortTransaction
+ *
+ * are provided to do the lower level work like recording
+ * the transaction status in the log and doing memory cleanup.
+ * above these routines are another set of functions:
+ *
+ *     StartTransactionCommand
+ *     CommitTransactionCommand
+ *     AbortCurrentTransaction
+ *
+ * These are the routines used in the postgres main processing
+ * loop.  They are sensitive to the current transaction block state
+ * and make calls to the lower level routines appropriately.
+ *
+ * Support for transaction blocks is provided via the functions:
+ *
+ *     StartTransactionBlock
+ *     CommitTransactionBlock
+ *     AbortTransactionBlock
+ *
+ * These are invoked only in responce to a user "BEGIN", "END",
+ * or "ABORT" command.  The tricky part about these functions
+ * is that they are called within the postgres main loop, in between
+ * the StartTransactionCommand() and CommitTransactionCommand().
+ *
+ * For example, consider the following sequence of user commands:
+ *
+ * 1)  begin
+ * 2)  retrieve (foo.all)
+ * 3)  append foo (bar = baz)
+ * 4)  end
+ *
+ * in the main processing loop, this results in the following
+ * transaction sequence:
+ *
+ *     /   StartTransactionCommand();
+ * 1) /    ProcessUtility();       << begin
+ *    \        StartTransactionBlock();
+ *     \   CommitTransactionCommand();
+ *
+ *     /   StartTransactionCommand();
+ * 2) <    ProcessQuery();         << retrieve (foo.all)
+ *     \   CommitTransactionCommand();
+ *
+ *     /   StartTransactionCommand();
+ * 3) <    ProcessQuery();         << append foo (bar = baz)
+ *     \   CommitTransactionCommand();
+ *
+ *     /   StartTransactionCommand();
+ * 4) /    ProcessUtility();       << end
+ *    \        CommitTransactionBlock();
+ *     \   CommitTransactionCommand();
+ *
+ * The point of this example is to demonstrate the need for
+ * StartTransactionCommand() and CommitTransactionCommand() to
+ * be state smart -- they should do nothing in between the calls
+ * to StartTransactionBlock() and EndTransactionBlock() and
+ *      outside these calls they need to do normal start/commit
+ * processing.
+ *
+ * Furthermore, suppose the "retrieve (foo.all)" caused an abort
+ * condition.  We would then want to abort the transaction and
+ * ignore all subsequent commands up to the "end".
+ * -cim 3/23/90
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include "access/xact.h"
+#include "commands/async.h"
+#include "storage/bufmgr.h"
+#include "storage/block.h"
+#include "storage/proc.h"
+#include "utils/inval.h"
+#include "utils/relcache.h"
+#include "access/transam.h"
+#include "catalog/heap.h"
+
+/* ----------------
+ * global variables holding the current transaction state.
+ *
+ *      Note: when we are running several slave processes, the
+ *            current transaction state data is copied into shared memory
+ *       and the CurrentTransactionState pointer changed to
+ *       point to the shared copy.  All this occurrs in slaves.c
+ * ----------------
+ */
+TransactionStateData CurrentTransactionStateData = {
+    0,             /* transaction id */
+    FirstCommandId,        /* command id */
+    0x0,           /* start time */
+    TRANS_DEFAULT,     /* transaction state */
+    TBLOCK_DEFAULT     /* transaction block state */
+    };
+
+TransactionState CurrentTransactionState =
+    &CurrentTransactionStateData;
+
+/* ----------------
+ * info returned when the system is desabled
+ *
+ * Note:  I have no idea what the significance of the
+ *        1073741823 in DisabledStartTime.. I just carried
+ *        this over when converting things from the old
+ *        V1 transaction system.  -cim 3/18/90
+ * ----------------
+ */
+TransactionId DisabledTransactionId = (TransactionId)-1;
+     
+CommandId DisabledCommandId = (CommandId) -1;
+     
+AbsoluteTime DisabledStartTime = (AbsoluteTime) 1073741823;
+     
+/* ----------------
+ * overflow flag
+ * ----------------
+ */
+bool CommandIdCounterOverflowFlag;
+     
+/* ----------------
+ * catalog creation transaction bootstrapping flag.
+ * This should be eliminated and added to the transaction
+ * state stuff.  -cim 3/19/90
+ * ----------------
+ */
+bool AMI_OVERRIDE = false;
+     
+/* ----------------------------------------------------------------
+ *          transaction state accessors
+ * ----------------------------------------------------------------
+ */
+     
+/* --------------------------------
+ * TranactionFlushEnabled()
+ * SetTranactionFlushEnabled()
+ *
+ * These are used to test and set the "TransactionFlushState"
+ * varable.  If this variable is true (the default), then
+ * the system will flush all dirty buffers to disk at the end
+ * of each transaction.   If false then we are assuming the
+ * buffer pool resides in stable main memory, in which case we
+ * only do writes as necessary.
+ * --------------------------------
+ */
+static int TransactionFlushState = 1;
+
+int
+TransactionFlushEnabled()
+{    
+    return TransactionFlushState;
+}
+
+void
+SetTransactionFlushEnabled(bool state)
+{    
+    TransactionFlushState = (state == true);
+}
+
+/* --------------------------------
+ * IsTransactionState
+ *
+ * This returns true if we are currently running a query
+ * within an executing transaction.
+ * --------------------------------
+ */
+bool
+IsTransactionState()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    switch (s->state) {
+    case TRANS_DEFAULT:        return false;
+    case TRANS_START:      return true;
+    case TRANS_INPROGRESS: return true;
+    case TRANS_COMMIT:     return true;
+    case TRANS_ABORT:      return true;
+    case TRANS_DISABLED:   return false;
+    }
+    /*
+     * Shouldn't get here, but lint is not happy with this...
+     */
+    return(false);
+}
+
+/* --------------------------------
+ * IsAbortedTransactionBlockState
+ *
+ * This returns true if we are currently running a query
+ * within an aborted transaction block.
+ * --------------------------------
+ */
+bool
+IsAbortedTransactionBlockState()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    if (s->blockState == TBLOCK_ABORT)
+   return true;
+    
+    return false;
+}
+
+/* --------------------------------
+ * OverrideTransactionSystem
+ *
+ * This is used to temporarily disable the transaction
+ * processing system in order to do initialization of
+ * the transaction system data structures and relations
+ * themselves.
+ * --------------------------------
+ */
+int SavedTransactionState;
+
+void
+OverrideTransactionSystem(bool flag)
+{
+    TransactionState s = CurrentTransactionState;
+    
+    if (flag == true) {
+   if (s->state == TRANS_DISABLED)
+       return;
+   
+   SavedTransactionState = s->state;
+   s->state = TRANS_DISABLED;
+    } else {
+   if (s->state != TRANS_DISABLED)
+       return;
+   
+   s->state = SavedTransactionState;
+    }
+}
+
+/* --------------------------------
+ * GetCurrentTransactionId
+ *
+ * This returns the id of the current transaction, or
+ * the id of the "disabled" transaction.
+ * --------------------------------
+ */
+TransactionId
+GetCurrentTransactionId()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    /* ----------------
+     * if the transaction system is disabled, we return
+     *  the special "disabled" transaction id.
+     * ----------------
+     */
+    if (s->state == TRANS_DISABLED)
+   return (TransactionId) DisabledTransactionId;
+    
+    /* ----------------
+     * otherwise return the current transaction id.
+     * ----------------
+     */
+    return (TransactionId) s->transactionIdData;
+}
+
+
+/* --------------------------------
+ * GetCurrentCommandId
+ * --------------------------------
+ */
+CommandId
+GetCurrentCommandId()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    /* ----------------
+     * if the transaction system is disabled, we return
+     *  the special "disabled" command id.
+     * ----------------
+     */
+    if (s->state == TRANS_DISABLED)
+   return (CommandId) DisabledCommandId;
+    
+    return s->commandId;
+}
+
+
+/* --------------------------------
+ * GetCurrentTransactionStartTime
+ * --------------------------------
+ */
+AbsoluteTime
+GetCurrentTransactionStartTime()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    /* ----------------
+     * if the transaction system is disabled, we return
+     *  the special "disabled" starting time.
+     * ----------------
+     */
+    if (s->state == TRANS_DISABLED)
+   return (AbsoluteTime) DisabledStartTime;
+    
+    return s->startTime;
+}
+
+
+/* --------------------------------
+ * TransactionIdIsCurrentTransactionId
+ * --------------------------------
+ */
+bool
+TransactionIdIsCurrentTransactionId(TransactionId xid)
+{
+    TransactionState s = CurrentTransactionState;
+    
+    if (AMI_OVERRIDE)
+   return false;
+    
+    return (bool)
+   TransactionIdEquals(xid, s->transactionIdData);
+}
+
+
+/* --------------------------------
+ * CommandIdIsCurrentCommandId
+ * --------------------------------
+ */
+bool
+CommandIdIsCurrentCommandId(CommandId cid)
+{
+    TransactionState s = CurrentTransactionState;
+    
+    if (AMI_OVERRIDE)
+   return false;
+    
+    return     
+   (cid == s->commandId) ? true : false;
+}
+
+
+/* --------------------------------
+ * ClearCommandIdCounterOverflowFlag
+ * --------------------------------
+ */
+void
+ClearCommandIdCounterOverflowFlag()
+{
+    CommandIdCounterOverflowFlag = false;
+}
+
+
+/* --------------------------------
+ * CommandCounterIncrement
+ * --------------------------------
+ */
+void
+CommandCounterIncrement()
+{
+    CurrentTransactionStateData.commandId += 1;
+    if (CurrentTransactionStateData.commandId == FirstCommandId) {
+   CommandIdCounterOverflowFlag = true;
+   elog(WARN, "You may only have 65535 commands per transaction");
+    }
+    
+    /* make cache changes visible to me */
+    AtCommit_Cache();
+    AtStart_Cache();
+}
+
+/* ----------------------------------------------------------------
+ *             initialization stuff
+ * ----------------------------------------------------------------
+ */
+void
+InitializeTransactionSystem()
+{
+    InitializeTransactionLog();
+}
+
+/* ----------------------------------------------------------------
+ *             StartTransaction stuff
+ * ----------------------------------------------------------------
+ */
+
+/* --------------------------------
+ * AtStart_Cache
+ * --------------------------------
+ */
+void
+AtStart_Cache()    
+{
+    DiscardInvalid();
+}
+
+/* --------------------------------
+ * AtStart_Locks
+ * --------------------------------
+ */
+void
+AtStart_Locks()    
+{
+    /*
+     * at present, it is unknown to me what belongs here -cim 3/18/90
+     *
+     * There isn't anything to do at the start of a xact for locks.
+     *  -mer 5/24/92
+     */
+}
+
+/* --------------------------------
+ * AtStart_Memory
+ * --------------------------------
+ */
+void
+AtStart_Memory()    
+{
+    Portal      portal;
+    MemoryContext    portalContext;
+    
+    /* ----------------
+     * get the blank portal and its memory context
+     * ----------------
+     */
+    portal = GetPortalByName(NULL);
+    portalContext = (MemoryContext) PortalGetHeapMemory(portal);
+    
+    /* ----------------
+     * tell system to allocate in the blank portal context
+     * ----------------
+     */
+    (void) MemoryContextSwitchTo(portalContext);
+    StartPortalAllocMode(DefaultAllocMode, 0);
+}
+
+
+/* ----------------------------------------------------------------
+ *             CommitTransaction stuff
+ * ----------------------------------------------------------------
+ */
+
+/* --------------------------------
+ * RecordTransactionCommit
+ *
+ * Note: the two calls to BufferManagerFlush() exist to ensure
+ *       that data pages are written before log pages.  These
+ *       explicit calls should be replaced by a more efficient
+ *       ordered page write scheme in the buffer manager
+ *       -cim 3/18/90
+ * --------------------------------
+ */
+void
+RecordTransactionCommit()    
+{
+    TransactionId xid;
+    int leak;
+    
+    /* ----------------
+     * get the current transaction id
+     * ----------------
+     */
+    xid = GetCurrentTransactionId();
+    
+    /* ----------------
+     * flush the buffer manager pages.  Note: if we have stable
+     *  main memory, dirty shared buffers are not flushed
+     *  plai 8/7/90
+     * ----------------
+     */
+    leak = BufferPoolCheckLeak();
+    FlushBufferPool(!TransactionFlushEnabled());
+    if (leak) ResetBufferPool();
+    
+    /* ----------------
+     * have the transaction access methods record the status
+     *  of this transaction id in the pg_log / pg_time relations.
+     * ----------------
+     */
+    TransactionIdCommit(xid);
+    
+    /* ----------------
+     * Now write the log/time info to the disk too.
+     * ----------------
+     */
+    leak = BufferPoolCheckLeak();
+    FlushBufferPool(!TransactionFlushEnabled());
+    if (leak) ResetBufferPool();
+}
+
+
+/* --------------------------------
+ * AtCommit_Cache
+ * --------------------------------
+ */
+void
+AtCommit_Cache()
+{
+    /* ----------------
+     * Make catalog changes visible to me for the next command.
+     * Other backends will not process my invalidation messages until
+     * after I commit and free my locks--though they will do
+     * unnecessary work if I abort.
+     * ----------------
+     */
+    RegisterInvalid(true);
+}
+
+/* --------------------------------
+ * AtCommit_Locks
+ * --------------------------------
+ */
+void
+AtCommit_Locks()  
+{
+    /* ----------------
+     * XXX What if ProcReleaseLocks fails?  (race condition?) 
+     *
+     *  Then you're up a creek! -mer 5/24/92
+     * ----------------
+     */
+    ProcReleaseLocks();
+}
+
+/* --------------------------------
+ * AtCommit_Memory
+ * --------------------------------
+ */
+void
+AtCommit_Memory()  
+{
+    /* ----------------
+     * now that we're "out" of a transaction, have the
+     *  system allocate things in the top memory context instead
+     *  of the blank portal memory context.
+     * ----------------
+     */
+    EndPortalAllocMode();
+    (void) MemoryContextSwitchTo(TopMemoryContext);
+}
+
+/* ----------------------------------------------------------------
+ *             AbortTransaction stuff
+ * ----------------------------------------------------------------
+ */
+
+/* --------------------------------
+ * RecordTransactionAbort
+ * --------------------------------
+ */
+void
+RecordTransactionAbort()    
+{
+    TransactionId xid;
+    
+    /* ----------------
+     * get the current transaction id
+     * ----------------
+     */
+    xid = GetCurrentTransactionId();
+    
+    /* ----------------
+     * have the transaction access methods record the status
+     *  of this transaction id in the pg_log / pg_time relations.
+     * ----------------
+     */
+    TransactionIdAbort(xid);
+    
+    /* ----------------
+     * flush the buffer manager pages.  Note: if we have stable
+     *  main memory, dirty shared buffers are not flushed
+     *  plai 8/7/90
+     * ----------------
+     */
+    ResetBufferPool();
+}
+
+/* --------------------------------
+ * AtAbort_Cache
+ * --------------------------------
+ */
+void
+AtAbort_Cache()    
+{
+    RegisterInvalid(false);
+}
+
+/* --------------------------------
+ * AtAbort_Locks
+ * --------------------------------
+ */
+void
+AtAbort_Locks()    
+{
+    /* ----------------
+     * XXX What if ProcReleaseLocks() fails?  (race condition?)
+     *
+     *  Then you're up a creek without a paddle! -mer
+     * ----------------
+     */
+    ProcReleaseLocks();
+}
+
+
+/* --------------------------------
+ * AtAbort_Memory
+ * --------------------------------
+ */
+void
+AtAbort_Memory()    
+{
+    /* ----------------
+     * after doing an abort transaction, make certain the
+     *  system uses the top memory context rather then the
+     *  portal memory context (until the next transaction).
+     * ----------------
+     */
+    (void) MemoryContextSwitchTo(TopMemoryContext);
+}
+
+/* ----------------------------------------------------------------
+ *         interface routines
+ * ----------------------------------------------------------------
+ */
+
+/* --------------------------------
+ * StartTransaction
+ *
+ * --------------------------------
+ */
+void
+StartTransaction()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    /* ----------------
+     * Check the current transaction state.  If the transaction system
+     *  is switched off, or if we're already in a transaction, do nothing.
+     *  We're already in a transaction when the monitor sends a null
+     *  command to the backend to flush the comm channel.  This is a
+     *  hacky fix to a communications problem, and we keep having to
+     *  deal with it here.  We should fix the comm channel code.  mao 080891
+     * ----------------
+     */
+    if (s->state == TRANS_DISABLED || s->state == TRANS_INPROGRESS)
+   return;
+    
+    /* ----------------
+     * set the current transaction state information
+     *  appropriately during start processing
+     * ----------------
+     */
+    s->state = TRANS_START;
+    
+    /* ----------------
+     * generate a new transaction id
+     * ----------------
+     */
+    GetNewTransactionId(&(s->transactionIdData));
+    
+    /* ----------------
+     * initialize current transaction state fields
+     * ----------------
+     */
+    s->commandId =         FirstCommandId;
+    s->startTime =         GetCurrentAbsoluteTime();
+    
+    /* ----------------
+     * initialize the various transaction subsystems
+     * ----------------
+     */
+    AtStart_Cache();
+    AtStart_Locks();
+    AtStart_Memory();
+    
+     /* --------------
+        initialize temporary relations list
+        the tempRelList is a list of temporary relations that
+        are created in the course of the transactions
+        they need to be destroyed properly at the end of the transactions
+      */
+     InitTempRelList();
+ 
+    /* ----------------
+     * done with start processing, set current transaction
+     *  state to "in progress"
+     * ----------------
+     */
+    s->state = TRANS_INPROGRESS;      
+}
+
+/* ---------------
+ * Tell me if we are currently in progress
+ * ---------------
+ */
+bool
+CurrentXactInProgress()
+{
+    return (CurrentTransactionState->state == TRANS_INPROGRESS);
+}
+
+/* --------------------------------
+ * CommitTransaction
+ *
+ * --------------------------------
+ */
+void
+CommitTransaction()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    /* ----------------
+     * check the current transaction state
+     * ----------------
+     */
+    if (s->state == TRANS_DISABLED)
+   return;
+    
+    if (s->state != TRANS_INPROGRESS)
+   elog(NOTICE, "CommitTransaction and not in in-progress state ");
+    
+    /* ----------------
+     * set the current transaction state information
+     *  appropriately during the abort processing
+     * ----------------
+     */
+    s->state = TRANS_COMMIT;
+    
+    /* ----------------
+     * do commit processing
+     * ----------------
+     */
+     DestroyTempRels();
+    AtEOXact_portals();
+    RecordTransactionCommit();
+    RelationPurgeLocalRelation(true);
+    AtCommit_Cache();
+    AtCommit_Locks();
+    AtCommit_Memory();
+    
+    /* ----------------
+     * done with commit processing, set current transaction
+     *  state back to default
+     * ----------------
+     */
+    s->state = TRANS_DEFAULT;    
+    {              /* want this after commit */
+   if (IsNormalProcessingMode())
+       Async_NotifyAtCommit();
+    }
+}
+
+/* --------------------------------
+ * AbortTransaction
+ *
+ * --------------------------------
+ */
+void
+AbortTransaction()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    /* ----------------
+     * check the current transaction state
+     * ----------------
+     */
+    if (s->state == TRANS_DISABLED)
+   return;
+    
+    if (s->state != TRANS_INPROGRESS)
+   elog(NOTICE, "AbortTransaction and not in in-progress state ");
+    
+    /* ----------------
+     * set the current transaction state information
+     *  appropriately during the abort processing
+     * ----------------
+     */
+    s->state = TRANS_ABORT;
+    
+    /* ----------------
+     * do abort processing
+     * ----------------
+     */
+    AtEOXact_portals();
+    RecordTransactionAbort();
+    RelationPurgeLocalRelation(false);
+    DestroyTempRels();
+    AtAbort_Cache();
+    AtAbort_Locks();
+    AtAbort_Memory();
+    
+    /* ----------------
+     * done with abort processing, set current transaction
+     *  state back to default
+     * ----------------
+     */
+    s->state = TRANS_DEFAULT;
+    {
+   /* We need to do this in case another process notified us while
+      we are in the middle of an aborted transaction.  We need to
+      notify our frontend after we finish the current transaction.
+      -- jw, 1/3/94
+      */
+   if (IsNormalProcessingMode())
+       Async_NotifyAtAbort();
+    }    
+}
+
+/* --------------------------------
+ * StartTransactionCommand
+ * --------------------------------
+ */
+void
+StartTransactionCommand()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    switch(s->blockState) {
+   /* ----------------
+    *  if we aren't in a transaction block, we
+    *  just do our usual start transaction.
+    * ----------------
+    */
+    case TBLOCK_DEFAULT:
+   StartTransaction();
+   break;
+   
+   /* ----------------
+    *  We should never experience this -- if we do it
+    *  means the BEGIN state was not changed in the previous
+    *  CommitTransactionCommand().  If we get it, we print
+    *  a warning and change to the in-progress state.
+    * ----------------
+    */
+    case TBLOCK_BEGIN:
+   elog(NOTICE, "StartTransactionCommand: unexpected TBLOCK_BEGIN");
+   s->blockState = TBLOCK_INPROGRESS;
+   break;
+   
+   /* ----------------
+    *  This is the case when are somewhere in a transaction
+    *  block and about to start a new command.  For now we
+    *  do nothing but someday we may do command-local resource
+    *  initialization.
+    * ----------------
+    */
+    case TBLOCK_INPROGRESS:
+   break;
+   
+   /* ----------------
+    *  As with BEGIN, we should never experience this --
+    *  if we do it means the END state was not changed in the
+    *  previous CommitTransactionCommand().  If we get it, we
+    *  print a warning, commit the transaction, start a new
+    *  transaction and change to the default state.
+    * ----------------
+    */
+    case TBLOCK_END:
+   elog(NOTICE, "StartTransactionCommand: unexpected TBLOCK_END");
+   s->blockState = TBLOCK_DEFAULT;
+   CommitTransaction();
+   StartTransaction();
+   break;
+   
+   /* ----------------
+    *  Here we are in the middle of a transaction block but
+    *  one of the commands caused an abort so we do nothing
+    *  but remain in the abort state.  Eventually we will get
+    *  to the "END TRANSACTION" which will set things straight.
+    * ----------------
+    */
+    case TBLOCK_ABORT:
+   break;  
+   
+   /* ----------------
+    *  This means we somehow aborted and the last call to
+    *  CommitTransactionCommand() didn't clear the state so
+    *  we remain in the ENDABORT state and mabey next time
+    *  we get to CommitTransactionCommand() the state will
+    *  get reset to default.
+    * ----------------
+    */
+    case TBLOCK_ENDABORT:
+   elog(NOTICE, "StartTransactionCommand: unexpected TBLOCK_ENDABORT");
+   break;  
+    }  
+}
+/* --------------------------------
+ * CommitTransactionCommand
+ * --------------------------------
+ */
+void
+CommitTransactionCommand()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    switch(s->blockState) {
+   /* ----------------
+    *  if we aren't in a transaction block, we
+    *  just do our usual transaction commit
+    * ----------------
+    */
+    case TBLOCK_DEFAULT:
+   CommitTransaction();
+   break;
+   
+   /* ----------------
+    *  This is the case right after we get a "BEGIN TRANSACTION"
+    *  command, but the user hasn't done anything else yet, so
+    *  we change to the "transaction block in progress" state
+    *  and return.   
+    * ----------------
+    */
+    case TBLOCK_BEGIN:
+   s->blockState = TBLOCK_INPROGRESS;
+   break;
+   
+   /* ----------------
+    *  This is the case when we have finished executing a command
+    *  someplace within a transaction block.  We increment the
+    *  command counter and return.  Someday we may free resources
+    *  local to the command.
+    * ----------------
+    */
+    case TBLOCK_INPROGRESS:
+   CommandCounterIncrement();
+   break;
+   
+   /* ----------------
+    *  This is the case when we just got the "END TRANSACTION"
+    *  statement, so we go back to the default state and
+    *  commit the transaction.
+    * ----------------
+    */
+    case TBLOCK_END:
+   s->blockState = TBLOCK_DEFAULT;
+   CommitTransaction();
+   break;
+   
+   /* ----------------
+    *  Here we are in the middle of a transaction block but
+    *  one of the commands caused an abort so we do nothing
+    *  but remain in the abort state.  Eventually we will get
+    *  to the "END TRANSACTION" which will set things straight.
+    * ----------------
+    */
+    case TBLOCK_ABORT:
+   break;
+   
+   /* ----------------
+    *  Here we were in an aborted transaction block which
+    *      just processed the "END TRANSACTION" command from the
+    *  user, so now we return the to default state.
+    * ----------------
+    */
+    case TBLOCK_ENDABORT:
+   s->blockState = TBLOCK_DEFAULT;  
+   break;
+    }    
+}
+
+/* --------------------------------
+ * AbortCurrentTransaction
+ * --------------------------------
+ */
+void
+AbortCurrentTransaction()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    switch(s->blockState) {
+   /* ----------------
+    *  if we aren't in a transaction block, we
+    *  just do our usual abort transaction.
+    * ----------------
+    */
+    case TBLOCK_DEFAULT:
+   AbortTransaction();
+   break;
+   
+   /* ----------------
+    *  If we are in the TBLOCK_BEGIN it means something
+    *  screwed up right after reading "BEGIN TRANSACTION"
+    *  so we enter the abort state.  Eventually an "END
+    *      TRANSACTION" will fix things.
+    * ----------------
+    */
+    case TBLOCK_BEGIN:
+   s->blockState = TBLOCK_ABORT;
+   AbortTransaction();
+   break;
+   
+   /* ----------------
+    *  This is the case when are somewhere in a transaction
+    *  block which aborted so we abort the transaction and
+    *  set the ABORT state.  Eventually an "END TRANSACTION"
+    *  will fix things and restore us to a normal state.
+    * ----------------
+    */
+    case TBLOCK_INPROGRESS:
+   s->blockState = TBLOCK_ABORT;
+   AbortTransaction();
+   break;
+   
+   /* ----------------
+    *  Here, the system was fouled up just after the
+    *  user wanted to end the transaction block so we
+    *  abort the transaction and put us back into the
+    *  default state.
+    * ----------------
+    */
+    case TBLOCK_END:
+   s->blockState = TBLOCK_DEFAULT;
+   AbortTransaction();
+   break;
+   
+   /* ----------------
+    *  Here, we are already in an aborted transaction
+    *  state and are waiting for an "END TRANSACTION" to
+    *  come along and lo and behold, we abort again!
+    *  So we just remain in the abort state.
+    * ----------------
+    */
+    case TBLOCK_ABORT:
+   break;
+   
+   /* ----------------
+    *  Here we were in an aborted transaction block which
+    *      just processed the "END TRANSACTION" command but somehow
+    *  aborted again.. since we must have done the abort
+    *      processing, we return to the default state.
+    * ----------------
+    */
+    case TBLOCK_ENDABORT:
+   s->blockState = TBLOCK_DEFAULT;  
+   break;
+    }
+}
+
+/* ----------------------------------------------------------------
+ *            transaction block support
+ * ----------------------------------------------------------------
+ */
+/* --------------------------------
+ * BeginTransactionBlock
+ * --------------------------------
+ */
+void
+BeginTransactionBlock()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    /* ----------------
+     * check the current transaction state
+     * ----------------
+     */
+    if (s->state == TRANS_DISABLED)
+   return;
+    
+    if (s->blockState != TBLOCK_DEFAULT)
+   elog(NOTICE, "BeginTransactionBlock and not in default state ");
+    
+    /* ----------------
+     * set the current transaction block state information
+     *  appropriately during begin processing
+     * ----------------
+     */
+    s->blockState = TBLOCK_BEGIN;
+    
+    /* ----------------
+     * do begin processing
+     * ----------------
+     */
+    
+    /* ----------------
+     * done with begin processing, set block state to inprogress
+     * ----------------
+     */
+    s->blockState = TBLOCK_INPROGRESS;    
+}
+
+/* --------------------------------
+ * EndTransactionBlock
+ * --------------------------------
+ */
+void
+EndTransactionBlock()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    /* ----------------
+     * check the current transaction state
+     * ----------------
+     */
+    if (s->state == TRANS_DISABLED)
+   return;
+    
+    if (s->blockState == TBLOCK_INPROGRESS) {
+   /* ----------------
+    *  here we are in a transaction block which should commit
+    *  when we get to the upcoming CommitTransactionCommand()
+    *  so we set the state to "END".  CommitTransactionCommand()
+    *  will recognize this and commit the transaction and return
+    *  us to the default state
+    * ----------------
+    */
+   s->blockState = TBLOCK_END;
+   return;
+    }
+    
+    if (s->blockState == TBLOCK_ABORT) {
+   /* ----------------
+    *  here, we are in a transaction block which aborted
+    *  and since the AbortTransaction() was already done,
+    *  we do whatever is needed and change to the special
+    *  "END ABORT" state.  The upcoming CommitTransactionCommand()
+    *  will recognise this and then put us back in the default
+    *  state.
+    * ----------------
+    */
+   s->blockState = TBLOCK_ENDABORT;
+   return;
+    }
+    
+    /* ----------------
+     * We should not get here, but if we do, we go to the ENDABORT
+     *  state after printing a warning.  The upcoming call to
+     *  CommitTransactionCommand() will then put us back into the
+     *  default state.
+     * ----------------
+     */
+    elog(NOTICE, "EndTransactionBlock and not inprogress/abort state ");
+    s->blockState = TBLOCK_ENDABORT;
+}
+
+/* --------------------------------
+ * AbortTransactionBlock
+ * --------------------------------
+ */
+void
+AbortTransactionBlock()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    /* ----------------
+     * check the current transaction state
+     * ----------------
+     */
+    if (s->state == TRANS_DISABLED)
+   return;
+    
+    if (s->blockState == TBLOCK_INPROGRESS) {
+   /* ----------------
+    *  here we were inside a transaction block something
+    *  screwed up inside the system so we enter the abort state,
+    *  do the abort processing and then return.
+    *  We remain in the abort state until we see the upcoming
+    *  END TRANSACTION command.
+    * ----------------
+    */
+   s->blockState = TBLOCK_ABORT;
+   
+   /* ----------------
+    *  do abort processing and return
+    * ----------------
+    */
+   AbortTransaction();
+   return;
+    }
+    
+    /* ----------------
+     * this case should not be possible, because it would mean
+     *  the user entered an "abort" from outside a transaction block.
+     *  So we print an error message, abort the transaction and
+     *  enter the "ENDABORT" state so we will end up in the default
+     *  state after the upcoming CommitTransactionCommand().
+     * ----------------
+     */
+    elog(NOTICE, "AbortTransactionBlock and not inprogress state");
+    AbortTransaction();
+    s->blockState = TBLOCK_ENDABORT;
+}
+
+/* --------------------------------
+ * UserAbortTransactionBlock
+ * --------------------------------
+ */
+void
+UserAbortTransactionBlock()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    /* ----------------
+     * check the current transaction state
+     * ----------------
+     */
+    if (s->state == TRANS_DISABLED)
+   return;
+    
+    if (s->blockState == TBLOCK_INPROGRESS) {
+   /* ----------------
+    *  here we were inside a transaction block and we
+    *  got an abort command from the user, so we move to
+    *  the abort state, do the abort processing and
+    *  then change to the ENDABORT state so we will end up
+    *  in the default state after the upcoming
+    *  CommitTransactionCommand().
+    * ----------------
+    */
+   s->blockState = TBLOCK_ABORT;
+   
+   /* ----------------
+    *  do abort processing
+    * ----------------
+    */
+   AbortTransaction();
+   
+   /* ----------------
+    *  change to the end abort state and return
+    * ----------------
+    */
+   s->blockState = TBLOCK_ENDABORT;
+   return;
+    }
+    
+    /* ----------------
+     * this case should not be possible, because it would mean
+     *  the user entered an "abort" from outside a transaction block.
+     *  So we print an error message, abort the transaction and
+     *  enter the "ENDABORT" state so we will end up in the default
+     *  state after the upcoming CommitTransactionCommand().
+     * ----------------
+     */
+    elog(NOTICE, "UserAbortTransactionBlock and not inprogress state");
+    AbortTransaction();
+    s->blockState = TBLOCK_ENDABORT;
+}
+
+bool
+IsTransactionBlock()
+{
+    TransactionState s = CurrentTransactionState;
+    
+    if (s->blockState == TBLOCK_INPROGRESS
+   || s->blockState == TBLOCK_ENDABORT) {
+   return (true);
+    }
+    
+    return (false);
+}
diff --git a/src/backend/access/transam/xid.c b/src/backend/access/transam/xid.c

new file mode 100644 (file)

index 0000000..faeeb62
--- /dev/null
+++ b/src/backend/access/transam/xid.c
@@ -0,0 +1,156 @@
+/*-------------------------------------------------------------------------
+ *
+ * xid.c--
+ *    POSTGRES transaction identifier code.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/xid.c,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
+ *
+ * OLD COMMENTS
+ * XXX WARNING
+ * Much of this file will change when we change our representation
+ * of transaction ids -cim 3/23/90
+ *
+ * It is time to make the switch from 5 byte to 4 byte transaction ids
+ * This file was totally reworked. -mer 5/22/92
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <stdio.h>
+#include "postgres.h"
+#include "utils/palloc.h"
+#include "utils/elog.h"
+#include "utils/memutils.h"
+#include "utils/nabstime.h"
+
+extern TransactionId NullTransactionId;
+extern TransactionId DisabledTransactionId;
+extern TransactionId AmiTransactionId;
+extern TransactionId FirstTransactionId;
+
+/* ----------------------------------------------------------------
+ *     TransactionIdIsValid
+ *
+ * Macro-ize me.
+ * ----------------------------------------------------------------
+ */
+bool
+TransactionIdIsValid(TransactionId transactionId)
+{
+    return ((bool) (transactionId != NullTransactionId) );
+}
+
+/* XXX char16 name for catalogs */
+TransactionId
+xidin(char *representation)
+{
+    return (atol(representation));
+}
+
+/* XXX char16 name for catalogs */
+char*
+xidout(TransactionId transactionId)
+{
+/*    return(TransactionIdFormString(transactionId)); */
+    char           *representation;
+    
+    /* maximum 32 bit unsigned integer representation takes 10 chars */
+    representation = palloc(11);
+    
+    (void)sprintf(representation, "%u", transactionId);
+    
+    return (representation);
+
+}
+
+/* ----------------------------------------------------------------
+ * StoreInvalidTransactionId
+ *
+ * Maybe do away with Pointer types in these routines.
+ *      Macro-ize this one.
+ * ----------------------------------------------------------------
+ */
+void
+StoreInvalidTransactionId(TransactionId *destination)
+{
+    *destination = NullTransactionId;
+}
+
+/* ----------------------------------------------------------------
+ * TransactionIdStore
+ *
+ *      Macro-ize this one.
+ * ----------------------------------------------------------------
+ */
+void
+TransactionIdStore(TransactionId transactionId,
+          TransactionId *destination)
+{
+    *destination = transactionId;
+}
+
+/* ----------------------------------------------------------------
+ * TransactionIdEquals
+ * ----------------------------------------------------------------
+ */
+bool
+TransactionIdEquals(TransactionId id1, TransactionId id2)
+{
+    return ((bool) (id1 == id2));
+}
+
+/* ----------------------------------------------------------------
+ * TransactionIdIsLessThan
+ * ----------------------------------------------------------------
+ */
+bool
+TransactionIdIsLessThan(TransactionId id1, TransactionId id2)
+{
+    return ((bool)(id1 < id2));
+}
+
+/* ----------------------------------------------------------------
+ * xideq
+ * ----------------------------------------------------------------
+ */
+
+/*
+ * xideq       - returns 1, iff xid1 == xid2
+ *               0  else;
+ */
+bool
+xideq(TransactionId xid1, TransactionId xid2)
+{
+    return( (bool) (xid1 == xid2) );
+}
+
+
+
+/* ----------------------------------------------------------------
+ * TransactionIdIncrement
+ * ----------------------------------------------------------------
+ */
+void
+TransactionIdIncrement(TransactionId *transactionId)
+{
+    
+    (*transactionId)++;
+    if (*transactionId == DisabledTransactionId)
+   elog(FATAL, "TransactionIdIncrement: exhausted XID's");
+    return;
+}
+
+/* ----------------------------------------------------------------
+ * TransactionIdAdd
+ * ----------------------------------------------------------------
+ */
+void
+TransactionIdAdd(TransactionId *xid, int value)
+{
+    *xid += value;
+    return;
+}
+
diff --git a/src/backend/access/tupdesc.h b/src/backend/access/tupdesc.h

new file mode 100644 (file)

index 0000000..a26bbc7
--- /dev/null
+++ b/src/backend/access/tupdesc.h
@@ -0,0 +1,53 @@
+/*-------------------------------------------------------------------------
+ *
+ * tupdesc.h--
+ *    POSTGRES tuple descriptor definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: tupdesc.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    TUPDESC_H
+#define TUPDESC_H
+
+#include "postgres.h"
+#include "access/attnum.h"
+#include "nodes/pg_list.h" /* for List */
+#include "catalog/pg_attribute.h"
+
+/*
+ * a TupleDesc is an array of AttributeTupleForms, each of which is a
+ * pointer to a AttributeTupleForm
+ */
+/* typedef AttributeTupleForm      *TupleDesc; */
+
+/* a TupleDesc is a pointer to a structure which includes an array of */
+/* AttributeTupleForms, i.e. pg_attribute information, and the size of */
+/* the array, i.e. the number of attributes */
+/* in short, a TupleDesc completely captures the attribute information */
+/* for a tuple */
+
+typedef struct tupleDesc {
+    int  natts;
+    AttributeTupleForm *attrs;
+} *TupleDesc;
+
+extern TupleDesc CreateTemplateTupleDesc(int natts);
+
+extern TupleDesc CreateTupleDesc(int natts, AttributeTupleForm *attrs);
+
+extern TupleDesc CreateTupleDescCopy(TupleDesc tupdesc);
+
+extern bool TupleDescInitEntry(TupleDesc desc,
+                  AttrNumber attributeNumber,
+                  char *attributeName, 
+                  char *typeName, 
+                  int attdim, 
+                  bool attisset);
+
+extern TupleDesc BuildDescForRelation(List *schema, char *relname);
+
+#endif /* TUPDESC_H */
diff --git a/src/backend/access/tupmacs.h b/src/backend/access/tupmacs.h

new file mode 100644 (file)

index 0000000..9a9bcce
--- /dev/null
+++ b/src/backend/access/tupmacs.h
@@ -0,0 +1,43 @@
+/*-------------------------------------------------------------------------
+ *
+ * tupmacs.h--
+ *    Tuple macros used by both index tuples and heap tuples.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: tupmacs.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TUPMACS_H
+#define TUPMACS_H
+
+/*
+ * check to see if the ATT'th bit of an array of 8-bit bytes is set.
+ */
+#define att_isnull(ATT, BITS) (!((BITS)[(ATT) >> 3] & (1 << ((ATT) & 0x07))))
+
+/*
+ * given a AttributeTupleForm and a pointer into a tuple's data
+ * area, return the correct value or pointer.
+ *
+ * note that T must already be properly LONGALIGN/SHORTALIGN'd for
+ * this to work correctly.
+ *
+ * the double-cast is to stop gcc from (correctly) complaining about 
+ * casting integer types with size < sizeof(char *) to (char *).
+ * sign-extension may get weird if you use an integer type that
+ * isn't the same size as (char *) for the first cast.  (on the other
+ * hand, it's safe to use another type for the (foo *)(T).)
+ */
+#define fetchatt(A, T) \
+ ((*(A))->attbyval \
+  ? ((*(A))->attlen > sizeof(int16) \
+     ? (char *) (long) *((int32 *)(T)) \
+     : ((*(A))->attlen < sizeof(int16) \
+        ? (char *) (long) *((char *)(T)) \
+        : (char *) (long) *((int16 *)(T)))) \
+  : (char *) (T))
+   
+#endif
diff --git a/src/backend/access/valid.h b/src/backend/access/valid.h

new file mode 100644 (file)

index 0000000..1c5cf8c
--- /dev/null
+++ b/src/backend/access/valid.h
@@ -0,0 +1,37 @@
+/*-------------------------------------------------------------------------
+ *
+ * valid.h--
+ *    POSTGRES tuple qualification validity definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: valid.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    VALID_H
+#define VALID_H
+
+#include "c.h"
+#include "access/skey.h"
+#include "storage/buf.h"
+#include "utils/tqual.h"
+#include "access/tupdesc.h"
+#include "utils/rel.h"
+#include "storage/bufpage.h"
+
+/* ----------------
+ * extern decl's
+ * ----------------
+ */
+
+extern bool heap_keytest(HeapTuple t, TupleDesc tupdesc,
+            int nkeys, ScanKey keys);
+
+extern HeapTuple heap_tuple_satisfies(ItemId itemId, Relation relation,
+       PageHeader disk_page, TimeQual qual, int nKeys, ScanKey key);
+
+extern bool TupleUpdatedByCurXactAndCmd(HeapTuple t);
+
+#endif /* VALID_H */
diff --git a/src/backend/access/xact.h b/src/backend/access/xact.h

new file mode 100644 (file)

index 0000000..15f376e
--- /dev/null
+++ b/src/backend/access/xact.h
@@ -0,0 +1,115 @@
+/*-------------------------------------------------------------------------
+ *
+ * xact.h--
+ *    postgres transaction system header
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: xact.h,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef XACT_H
+#define XACT_H
+
+#include <signal.h>
+
+#include "storage/ipc.h"
+#include "miscadmin.h"
+#include "utils/portal.h"
+#include "utils/elog.h"
+#include "utils/mcxt.h"
+#include "utils/nabstime.h"
+
+/* ----------------
+ * transaction state structure
+ * ----------------
+ */
+typedef struct TransactionStateData {
+    TransactionId  transactionIdData;
+    CommandId      commandId;
+    AbsoluteTime       startTime;
+    int            state;
+    int            blockState;
+} TransactionStateData;
+
+/* ----------------
+ * transaction states
+ * ----------------
+ */
+#define TRANS_DEFAULT      0
+#define TRANS_START        1
+#define TRANS_INPROGRESS   2
+#define TRANS_COMMIT       3
+#define TRANS_ABORT        4
+#define TRANS_DISABLED     5
+
+/* ----------------
+ * transaction block states
+ * ----------------
+ */
+#define TBLOCK_DEFAULT     0
+#define TBLOCK_BEGIN       1
+#define TBLOCK_INPROGRESS  2
+#define TBLOCK_END     3
+#define TBLOCK_ABORT       4
+#define TBLOCK_ENDABORT        5
+
+typedef TransactionStateData *TransactionState;
+
+/* ----------------
+ * extern definitions
+ * ----------------
+ */
+extern int TransactionFlushEnabled();
+extern void SetTransactionFlushEnabled(bool state);
+
+extern bool IsTransactionState(void);
+extern bool IsAbortedTransactionBlockState(void);
+extern void OverrideTransactionSystem(bool flag);
+extern TransactionId GetCurrentTransactionId(void);
+extern CommandId GetCurrentCommandId(void);
+extern AbsoluteTime GetCurrentTransactionStartTime(void);
+extern bool TransactionIdIsCurrentTransactionId(TransactionId xid);
+extern bool CommandIdIsCurrentCommandId(CommandId cid);
+extern void ClearCommandIdCounterOverflowFlag(void);
+extern void CommandCounterIncrement(void);
+extern void InitializeTransactionSystem(void);
+extern void AtStart_Cache(void);
+extern void AtStart_Locks(void);
+extern void AtStart_Memory(void);
+extern void RecordTransactionCommit(void);
+extern void AtCommit_Cache(void);
+extern void AtCommit_Locks(void);
+extern void AtCommit_Memory(void);
+extern void RecordTransactionAbort(void);
+extern void AtAbort_Cache(void);
+extern void AtAbort_Locks(void);
+extern void AtAbort_Memory(void);
+extern void StartTransaction(void);
+extern bool CurrentXactInProgress(void);
+extern void CommitTransaction(void);
+extern void AbortTransaction(void);
+extern void StartTransactionCommand(void);
+extern void CommitTransactionCommand(void);
+extern void AbortCurrentTransaction(void);
+extern void BeginTransactionBlock(void);
+extern void EndTransactionBlock(void);
+extern void AbortTransactionBlock(void);
+extern bool IsTransactionBlock();
+extern void UserAbortTransactionBlock();
+
+extern TransactionId DisabledTransactionId;
+
+/* defined in xid.c */
+extern bool TransactionIdIsValid(TransactionId transactionId);
+extern void StoreInvalidTransactionId(TransactionId *destination);
+extern void TransactionIdStore(TransactionId transactionId,
+                  TransactionId *destination);
+extern bool TransactionIdEquals(TransactionId id1, TransactionId id2);
+extern bool TransactionIdIsLessThan(TransactionId id1, TransactionId id2);
+extern void TransactionIdIncrement(TransactionId *transactionId);
+extern void TransactionIdAdd(TransactionId *xid, int value);
+
+#endif /* XACT_H */
diff --git a/src/backend/bootstrap/Makefile.inc b/src/backend/bootstrap/Makefile.inc

new file mode 100644 (file)

index 0000000..7287134
--- /dev/null
+++ b/src/backend/bootstrap/Makefile.inc
@@ -0,0 +1,63 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for the bootstrap module
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/backend/bootstrap/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
+#
+#
+# Another kinda weird Makefile.inc cause we need two
+#  scanner/parsers in the backend and most yaccs and lexs
+#  don't have the prefix option.
+#
+#  sed files are HACK CITY! - redo...
+#
+#-------------------------------------------------------------------------
+
+bootdir= $(CURDIR)/bootstrap
+VPATH:= $(VPATH):$(bootdir)
+
+#BOOTYACCS= bootstrap_tokens.h bootparse.c
+BOOTYACCS= bootparse.c
+
+SRCS_BOOTSTRAP= bootparse.c bootscanner.c bootstrap.c 
+
+$(BOOTYACCS): bootparse.y
+   cd $(objdir); \
+   $(YACC) $(YFLAGS) $<; \
+   sed -f $(bootdir)/boot.sed < y.tab.c > bootparse.c; \
+   mv y.tab.h bootstrap_tokens.h; \
+   rm -f y.tab.c
+
+$(objdir)/bootparse.o: bootparse.c
+   $(cc_inobjdir)
+
+
+bootscanner.c: bootscanner.l
+   cd $(objdir); \
+   $(LEX) $<; \
+   sed -f $(bootdir)/boot.sed < lex.yy.c > bootscanner.c; \
+   rm -f lex.yy.c
+
+$(objdir)/bootscanner.o: bootscanner.c
+   $(cc_inobjdir)
+
+
+
+#
+# The following insures that y.tab.h gets made as bootstrap.c
+# includes it
+#
+bootstrap.o: $(BOOTYACCS)
+
+POSTGRES_DEPEND+= $(BOOTYACCS) bootscanner.c
+
+
+CLEANFILES+= bootscanner.c $(BOOTYACCS) y.tab.h y.output
+
+HEADERS+= bootstrap.h
+
diff --git a/src/backend/bootstrap/boot.sed b/src/backend/bootstrap/boot.sed

new file mode 100644 (file)

index 0000000..8ec7102
--- /dev/null
+++ b/src/backend/bootstrap/boot.sed
@@ -0,0 +1,9 @@
+#
+# lex.sed - sed rules to remove conflicts between the 
+#               bootstrap backend interface LEX scanner and the
+#               normal backend SQL LEX scanner
+#
+# $Header: /cvsroot/pgsql/src/backend/bootstrap/Attic/boot.sed,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
+#
+s/^yy/Int_yy/g
+s/\([^a-zA-Z0-9_]\)yy/\1Int_yy/g
diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y

new file mode 100644 (file)

index 0000000..0362b30
--- /dev/null
+++ b/src/backend/bootstrap/bootparse.y
@@ -0,0 +1,293 @@
+%{
+/*-------------------------------------------------------------------------
+ *
+ * backendparse.y--
+ *    yacc parser grammer for the "backend" initialization program.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/bootstrap/bootparse.y,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "access/heapam.h"
+#include "access/tupdesc.h"
+#include "bootstrap/bootstrap.h"
+#include "utils/portal.h" 
+#include "storage/smgr.h" 
+#include "nodes/pg_list.h"
+#include "catalog/catalog.h"
+#include "catalog/catname.h"
+#include "catalog/heap.h"
+#include "catalog/index.h"
+#include "commands/rename.h"
+#include "commands/defrem.h"
+#include "access/transam.h"
+#include "access/xact.h"
+
+#define DO_START { StartTransactionCommand();\
+        }
+
+#define DO_END   { CommitTransactionCommand();\
+          if (!Quiet) { EMITPROMPT; }\
+          fflush(stdout); \
+        }
+
+int num_tuples_read = 0;
+static Oid objectid;
+
+%}
+
+%union {
+    List     *list;
+    IndexElem  *ielem;
+    char     *str;
+    int        ival;
+}
+
+%type <list>  arg_list
+%type <ielem> index_params index_on
+%type <ival> const ident
+%type <ival> optbootstrap optoideq tuple tuplelist
+
+%token <ival> CONST ID
+%token OPEN XCLOSE XCREATE INSERT_TUPLE
+%token STRING XDEFINE 
+%token XDECLARE INDEX ON USING XBUILD INDICES
+%token COMMA EQUALS LPAREN RPAREN
+%token OBJ_ID XBOOTSTRAP NULLVAL
+%start TopLevel
+
+%nonassoc low
+%nonassoc high
+
+%%
+
+TopLevel:
+     Queries
+   |
+   ;
+
+Queries:
+     Query
+   | Queries Query
+   ;
+
+Query :
+     OpenStmt
+   | CloseStmt 
+   | CreateStmt
+   | InsertStmt 
+   | DeclareIndexStmt
+   | BuildIndsStmt
+   ;
+
+OpenStmt: 
+     OPEN ident
+       { 
+           DO_START;
+           boot_openrel(LexIDStr($2));
+           DO_END; 
+       }   
+   ;
+
+CloseStmt:
+     XCLOSE ident %prec low
+       {
+           DO_START;
+           closerel(LexIDStr($2));
+           DO_END;
+       }
+   | XCLOSE %prec high
+       {
+           DO_START;
+           closerel(NULL);
+           DO_END;
+       }
+   ;
+
+CreateStmt:
+     XCREATE optbootstrap ident LPAREN 
+       { 
+           DO_START; 
+           numattr=(int)0;
+       }
+     typelist 
+       { 
+           if (!Quiet) putchar('\n');
+           DO_END;
+       }
+     RPAREN 
+       { 
+           DO_START; 
+
+           if ($2) {
+           extern Relation reldesc;
+           TupleDesc tupdesc;
+
+           if (reldesc) {
+               puts("create bootstrap: Warning, open relation");
+               puts("exists, closing first");
+               closerel(NULL);
+           }
+           if (DebugMode)
+               puts("creating bootstrap relation");
+           tupdesc = CreateTupleDesc(numattr,attrtypes);
+           reldesc = heap_creatr(LexIDStr($3),
+                         DEFAULT_SMGR,
+                         tupdesc);
+           if (DebugMode)
+               puts("bootstrap relation created ok");
+           } else {
+           Oid id;
+           TupleDesc tupdesc;
+           /* extern Oid heap_create();*/
+
+           tupdesc = CreateTupleDesc(numattr,attrtypes);
+           id = heap_create(LexIDStr($3),
+                    NULL,
+                    'n',
+                    DEFAULT_SMGR,
+                    tupdesc);
+           if (!Quiet)
+               printf("CREATED relation %s with OID %d\n",
+                  LexIDStr($3), id);
+           }
+           DO_END;
+           if (DebugMode)
+           puts("Commit End");
+       }
+   ;
+
+InsertStmt:
+     INSERT_TUPLE optoideq     
+       { 
+           DO_START;
+           if (DebugMode)
+               printf("tuple %d<", $2);
+           num_tuples_read = 0;
+       }
+     LPAREN  tuplelist RPAREN  
+       {
+           if (num_tuples_read != numattr)
+               elog(WARN,"incorrect number of values for tuple");
+           if (reldesc == (Relation)NULL) {
+               elog(WARN,"must OPEN RELATION before INSERT\n");
+               err();
+           }
+           if (DebugMode)
+           puts("Insert Begin");
+           objectid = $2;
+           InsertOneTuple(objectid);
+           if (DebugMode)
+           puts("Insert End");
+           if (!Quiet) { putchar('\n'); }
+           DO_END;
+           if (DebugMode)
+           puts("Transaction End");
+       } 
+   ;
+
+DeclareIndexStmt:
+     XDECLARE INDEX ident ON ident USING ident LPAREN index_params RPAREN
+       {
+         List *params;
+
+         DO_START;
+
+         params = lappend(NIL, (List*)$9);
+         DefineIndex(LexIDStr($5), 
+                 LexIDStr($3), 
+                 LexIDStr($7),
+                 params, NIL, 0, NIL);
+         DO_END;
+       }
+   ;
+
+BuildIndsStmt:
+     XBUILD INDICES    { build_indices(); }
+
+index_params:
+   index_on ident
+       {
+           IndexElem *n = (IndexElem*)$1;
+           n->class = LexIDStr($2);
+           $$ = n;
+       }
+
+index_on:
+     ident
+       {
+           IndexElem *n = makeNode(IndexElem);
+           n->name = LexIDStr($1);
+           $$ = n;
+       }
+   | ident LPAREN arg_list RPAREN
+       {
+           IndexElem *n = makeNode(IndexElem);
+           n->name = LexIDStr($1);
+           n->args = (List*)$3;
+           $$ = n;
+       }
+
+arg_list:
+     ident
+       {
+         $$ = lappend(NIL, makeString(LexIDStr($1)));
+       }
+   | arg_list COMMA ident
+       {
+         $$ = lappend((List*)$1, makeString(LexIDStr($3)));
+       }
+    
+optbootstrap:
+       XBOOTSTRAP  { $$ = 1; }
+   |       { $$ = 0; }
+   ;
+
+typelist:
+     typething
+   | typelist COMMA typething
+   ;
+
+typething:
+     ident EQUALS ident
+       { 
+          if(++numattr > MAXATTR)
+           elog(FATAL,"Too many attributes\n");
+          DefineAttr(LexIDStr($1),LexIDStr($3),numattr-1);
+          if (DebugMode)
+              printf("\n");
+       }
+   ;
+
+optoideq:
+       OBJ_ID EQUALS ident { $$ = atol(LexIDStr($3));      }
+   |           { extern Oid newoid(); $$ = newoid();   }
+   ;
+
+tuplelist:
+      tuple
+   |  tuplelist tuple
+   |  tuplelist COMMA tuple
+   ;
+
+tuple:
+     ident {InsertOneValue(objectid, LexIDStr($1), num_tuples_read++); }
+        | const {InsertOneValue(objectid, LexIDStr($1), num_tuples_read++); }
+   | NULLVAL
+       { InsertOneNull(num_tuples_read++); }
+   ;
+  
+const :
+     CONST { $$=yylval.ival; }
+   ;
+
+ident :
+     ID    { $$=yylval.ival; }
+   ;
+%%
+
+
diff --git a/src/backend/bootstrap/bootscanner.l b/src/backend/bootstrap/bootscanner.l

new file mode 100644 (file)

index 0000000..9dbd92c
--- /dev/null
+++ b/src/backend/bootstrap/bootscanner.l
@@ -0,0 +1,108 @@
+%{
+/*-------------------------------------------------------------------------
+ *
+ * bootscanner.lex--
+ *    a lexical scanner for the bootstrap parser
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/bootstrap/bootscanner.l,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "bootstrap/bootstrap.h"
+#include "utils/portal.h" 
+#include "access/xact.h"
+#include "parser/scansup.h"
+
+#include "bootstrap_tokens.h"
+
+/* some versions of lex define this as a macro */
+#if defined(yywrap)
+#undef yywrap
+#endif /* yywrap */
+
+YYSTYPE    yylval;
+int    yyline;  /* keep track of the line number for error reporting */
+
+%}
+
+D  [0-9]
+oct     \\{D}{D}{D}
+Exp    [Ee][-+]?{D}+
+id      ([A-Za-z0-9_]|{oct}|\-)+
+sid     \"([^\"])*\"
+arrayid    [A-Za-z0-9_]+\[{D}*\]
+
+%%
+
+open           { return(OPEN); }
+
+close      { return(XCLOSE); }
+
+create     { return(XCREATE); }
+
+OID             { return(OBJ_ID); }
+bootstrap  { return(XBOOTSTRAP); }
+_null_     { return(NULLVAL); }
+
+insert     { return(INSERT_TUPLE); }
+
+","        { return(COMMA); }
+"="        { return(EQUALS); }
+"("        { return(LPAREN); }
+")"        { return(RPAREN); }
+
+[\n]       { yyline++; }
+[\t]       ;
+" "        ; 
+
+^\#[^\n]* ; /* drop everything after "#" for comments */
+
+
+"declare"  { return(XDECLARE); }
+"build"        { return(XBUILD); }
+"indices"  { return(INDICES); }
+"index"        { return(INDEX); }
+"on"       { return(ON); }
+"using"        { return(USING); }
+{arrayid}  {
+           yylval.ival = EnterString(MapArrayTypeName((char*)yytext));
+           return(ID);
+       }
+{id}       { 
+           yylval.ival = EnterString(scanstr((char*)yytext));
+           return(ID);
+       }
+{sid}      {
+           yylval.ival = EnterString(scanstr((char*)yytext));
+           return(ID);
+       }
+
+(-)?{D}+"."{D}*({Exp})?    |
+(-)?{D}*"."{D}+({Exp})?    |
+(-)?{D}+{Exp}      {
+               yylval.ival = EnterString((char*)yytext);
+               return(CONST);
+           }
+
+.      {
+           printf("syntax error %d : -> %s\n", yyline, yytext);
+       }
+
+
+
+%%
+
+yywrap()
+{
+    return 1;
+}
+
+yyerror(str)
+    char *str;
+{
+    fprintf(stderr,"\tsyntax error %d : %s",yyline, str);
+}
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c

new file mode 100644 (file)

index 0000000..e2df755
--- /dev/null
+++ b/src/backend/bootstrap/bootstrap.c
@@ -0,0 +1,1049 @@
+/*-------------------------------------------------------------------------
+ *
+ * bootstrap.c--
+ *    routines to support running postgres in 'bootstrap' mode
+ *  bootstrap mode is used to create the initial template database
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <string.h>
+#include <unistd.h>
+#include "libpq/pqsignal.h"    /* substitute for <signal.h> */
+#if defined(PORTNAME_linux)
+#ifndef __USE_POSIX
+#define __USE_POSIX
+#endif
+#endif /* defined(PORTNAME_linux) */
+#include <setjmp.h>
+
+#define BOOTSTRAP_INCLUDE  /* mask out stuff in tcop/tcopprot.h */
+
+#include "bootstrap/bootstrap.h"
+#include "postgres.h"
+#include "miscadmin.h"
+#include "tcop/tcopprot.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/tupdesc.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+#include "utils/tqual.h"
+#include "utils/lsyscache.h"
+#include "access/xact.h"
+#include "utils/exc.h" /* for ExcAbort and <setjmp.h> */
+#include "fmgr.h"
+#include "utils/palloc.h"
+#include "utils/mcxt.h"
+#include "storage/smgr.h"
+#include "commands/defrem.h"
+
+#include "catalog/pg_type.h"
+#include "catalog/catname.h"
+#include "catalog/indexing.h"
+#include "catalog/index.h"
+
+#define ALLOC(t, c)    (t *)calloc((unsigned)(c), sizeof(t))
+#define FIRST_TYPE_OID 16  /* OID of the first type */
+
+/* ----------------
+ * global variables
+ * ----------------
+ */
+/*
+ * In the lexical analyzer, we need to get the reference number quickly from
+ * the string, and the string from the reference number.  Thus we have
+ * as our data structure a hash table, where the hashing key taken from
+ * the particular string.  The hash table is chained.  One of the fields
+ * of the hash table node is an index into the array of character pointers.
+ * The unique index number that every string is assigned is simply the
+ * position of its string pointer in the array of string pointers.
+ */
+
+#define STRTABLESIZE   10000
+#define HASHTABLESIZE  503
+
+/* Hash function numbers */
+#define NUM    23
+#define    NUMSQR  529
+#define    NUMCUBE 12167
+
+char            *strtable [STRTABLESIZE]; 
+hashnode   *hashtable [HASHTABLESIZE];
+
+static int strtable_end = -1;    /* Tells us last occupied string space */
+
+/*-
+ * Basic information associated with each type.  This is used before
+ * pg_type is created.
+ *
+ * XXX several of these input/output functions do catalog scans
+ *     (e.g., F_REGPROCIN scans pg_proc).  this obviously creates some 
+ *     order dependencies in the catalog creation process.
+ */
+struct typinfo {
+    char   name[NAMEDATALEN];
+    Oid        oid;
+    Oid        elem;
+    int16  len;
+    Oid        inproc;
+    Oid        outproc;
+};
+
+static struct typinfo Procid[] = {
+    { "bool",       16,    0,  1, F_BOOLIN,     F_BOOLOUT },
+    { "bytea",      17,    0, -1, F_BYTEAIN,    F_BYTEAOUT },
+    { "char",       18,    0,  1, F_CHARIN,     F_CHAROUT },
+    { "name",       19,    0, NAMEDATALEN, F_NAMEIN,   F_NAMEOUT },
+    { "char16",     20,    0,  16, F_CHAR16IN, F_CHAR16OUT}, 
+/*    { "dt",         20,    0,  4, F_DTIN,        F_DTOUT}, */
+    { "int2",       21,    0,  2, F_INT2IN,     F_INT2OUT },
+    { "int28",      22,    0, 16, F_INT28IN,    F_INT28OUT },
+    { "int4",       23,    0,  4, F_INT4IN,     F_INT4OUT },
+    { "regproc",    24,    0,  4, F_REGPROCIN,  F_REGPROCOUT },
+    { "text",       25,    0, -1, F_TEXTIN,     F_TEXTOUT },
+    { "oid",        26,    0,  4, F_INT4IN,     F_INT4OUT },
+    { "tid",        27,    0,  6, F_TIDIN,      F_TIDOUT },
+    { "xid",        28,    0,  5, F_XIDIN,      F_XIDOUT },
+    { "iid",        29,    0,  1, F_CIDIN,      F_CIDOUT },
+    { "oid8",       30,    0, 32, F_OID8IN,     F_OID8OUT },
+    { "smgr",      210,    0,  2, F_SMGRIN,     F_SMGROUT },
+    { "_int4",    1007,   23, -1, F_ARRAY_IN,   F_ARRAY_OUT },
+    { "_aclitem", 1034, 1033, -1, F_ARRAY_IN,   F_ARRAY_OUT }
+};
+
+static int n_types = sizeof(Procid) / sizeof(struct typinfo);
+
+struct typmap {            /* a hack */
+    Oid    am_oid;
+    TypeTupleFormData  am_typ;
+};
+
+static struct  typmap  **Typ = (struct typmap **)NULL;
+static struct  typmap  *Ap = (struct typmap *)NULL;
+     
+static int     Warnings = 0;
+static char        Blanks[MAXATTR];
+     
+Relation   reldesc;        /* current relation descriptor */
+static char *relname;                   /* current relation name */
+
+AttributeTupleForm attrtypes[MAXATTR];  /* points to attribute info */
+static char    *values[MAXATTR];   /* cooresponding attribute values */
+int        numattr;        /* number of attributes for cur. rel */
+
+#if defined(WIN32) || defined(PORTNAME_next)
+static jmp_buf    Warn_restart;
+#define sigsetjmp(x,y)  setjmp(x)
+#define siglongjmp longjmp
+#else
+static sigjmp_buf Warn_restart;
+#endif
+
+int        DebugMode;
+static GlobalMemory nogc = (GlobalMemory) NULL;    /* special no-gc mem context */
+
+extern int optind;
+extern char    *optarg;
+     
+/*
+ *  At bootstrap time, we first declare all the indices to be built, and
+ *  then build them.  The IndexList structure stores enough information
+ *  to allow us to build the indices after they've been declared.
+ */
+
+typedef struct _IndexList {
+    char*      il_heap;
+    char*      il_ind;
+    int            il_natts;
+    AttrNumber     *il_attnos;
+    uint16         il_nparams;
+    Datum *        il_params;
+    FuncIndexInfo  *il_finfo;
+    PredInfo       *il_predInfo;
+    struct _IndexList  *il_next;
+} IndexList;
+
+static IndexList *ILHead = (IndexList *) NULL;
+     
+typedef void (*sig_func)();
+
+
+\f
+/* ----------------------------------------------------------------
+ *         misc functions
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ * error handling / abort routines
+ * ----------------
+ */
+#if !defined(PORTNAME_bsdi)
+void err()
+{
+    Warnings++;
+    cleanup();
+}
+#endif
+
+/* usage:
+   usage help for the bootstrap backen
+*/
+static void
+usage()
+{
+    fprintf(stderr,"Usage: postgres -boot [-d] [-C] [-O] [-Q] [-P portno] [dbName]\n");
+    fprintf(stderr,"     d: debug mode\n");
+    fprintf(stderr,"     C: disable version checking\n");
+    fprintf(stderr,"     O: set BootstrapProcessing mode\n");
+    fprintf(stderr,"     P portno: specify port number\n");
+
+    exitpg(1);
+}
+
+/* ----------------------------------------------------------------
+ * BootstrapMain
+ *         the main loop for handling the backend in bootstrap mode
+ *   the bootstrap mode is used to initialize the template database
+ *   the bootstrap backend doesn't speak SQL, but instead expects
+ *   commands in a special bootstrap language.
+ *   they are a special bootstrap language.
+ *
+ *  the arguments passed in to BootstrapMain are the run-time arguments
+ * without the argument '-boot', the caller is required to have
+ * removed -boot from the run-time args
+ * ----------------------------------------------------------------
+ */
+int
+BootstrapMain(int argc, char *argv[])
+{
+    int      i;
+    int      portFd = -1;
+    char  *dbName;
+    int   flag;
+    int   override = 1;  /* use BootstrapProcessing or InitProcessing mode */
+    
+    extern int   optind;
+    extern char      *optarg;
+
+    /* ----------------
+     * initialize signal handlers
+     * ----------------
+     */
+    signal(SIGINT, (sig_func) die);
+#ifndef WIN32
+    signal(SIGHUP, (sig_func) die); 
+    signal(SIGTERM, (sig_func) die);
+#endif /* WIN32 */    
+
+    /* --------------------
+     * initialize globals 
+     * -------------------
+     */
+    
+    InitGlobals();
+
+    /* ----------------
+     * process command arguments
+     * ----------------
+     */
+    Quiet = 0;
+    Noversion = 0;
+    dbName = NULL;
+    
+    while ((flag = getopt(argc, argv, "dCOQP")) != EOF) {
+   switch (flag) {
+   case 'd':
+       DebugMode = 1; /* print out debuggin info while parsing */
+       break;
+   case 'C':
+       Noversion = 1; 
+       break;
+   case 'O':
+       override = true;
+       break;
+   case 'Q':
+       Quiet = 1;
+       break;
+   case 'P':/* specify port */
+       portFd = atoi(optarg);
+       break; 
+   default:
+       usage();
+       break;
+   }
+    } /* while */
+
+    if (argc - optind > 1) {
+   usage();
+    } else
+    if (argc - optind == 1) {
+   dbName = argv[optind];
+    } 
+
+    if (dbName == NULL) {
+   dbName = getenv("USER");
+   if (dbName == NULL) {
+       fputs("bootstrap backend: failed, no db name specified\n", stderr);
+       fputs("          and no USER enviroment variable\n", stderr);
+       exitpg(1);
+   }
+    }
+
+    /* ----------------
+     * initialize input fd
+     * ----------------
+     */
+    if (IsUnderPostmaster == true && portFd < 0) {
+   fputs("backend: failed, no -P option with -postmaster opt.\n", stderr);
+   exitpg(1);
+    }
+    
+#ifdef WIN32
+    _nt_init();
+    _nt_attach();
+#endif /* WIN32 */
+
+
+    /* ----------------
+     * backend initialization
+     * ----------------
+     */
+    SetProcessingMode((override) ? BootstrapProcessing : InitProcessing);
+    InitPostgres(dbName);
+    LockDisable(true);
+    
+    for (i = 0 ; i < MAXATTR; i++) {
+   attrtypes[i]=(AttributeTupleForm )NULL;
+   Blanks[i] = ' ';
+    }
+    for(i = 0; i < STRTABLESIZE; ++i)
+   strtable[i] = NULL;                    
+    for(i = 0; i < HASHTABLESIZE; ++i)
+   hashtable[i] = NULL;                   
+    
+    /* ----------------
+     * abort processing resumes here  - What to do in WIN32?
+     * ----------------
+     */
+#ifndef WIN32    
+    signal(SIGHUP, handle_warn);
+
+    if (sigsetjmp(Warn_restart, 1) != 0) {
+#else
+    if (setjmp(Warn_restart) != 0) {
+#endif /* WIN32 */
+   Warnings++;
+   AbortCurrentTransaction();
+    }
+    
+    /* ----------------
+     * process input.
+     * ----------------
+     */
+
+    /* the sed script boot.sed renamed yyparse to Int_yyparse
+       for the bootstrap parser to avoid conflicts with the normal SQL
+       parser */
+    Int_yyparse();
+
+    /* clean up processing */
+    StartTransactionCommand();
+    cleanup();
+ 
+    /* not reached, here to make compiler happy */
+    return 0;
+
+}
+
+/* ----------------------------------------------------------------
+ *     MANUAL BACKEND INTERACTIVE INTERFACE COMMANDS
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ * boot_openrel
+ * ----------------
+ */
+void
+boot_openrel(char *relname)
+{
+    int        i;
+    struct typmap  **app;
+    Relation   rdesc;
+    HeapScanDesc   sdesc;
+    HeapTuple  tup;
+    
+    if (strlen(relname) > 15) 
+   relname[15] ='\000';
+    
+    if (Typ == (struct typmap **)NULL) {
+   StartPortalAllocMode(DefaultAllocMode, 0);
+   rdesc = heap_openr(TypeRelationName);
+   sdesc = heap_beginscan(rdesc, 0, NowTimeQual, 0, (ScanKey)NULL);
+   for (i=0; PointerIsValid(tup=heap_getnext(sdesc,0,(Buffer *)NULL)); ++i);
+   heap_endscan(sdesc);
+   app = Typ = ALLOC(struct typmap *, i + 1);
+   while (i-- > 0)
+       *app++ = ALLOC(struct typmap, 1);
+   *app = (struct typmap *)NULL;
+   sdesc = heap_beginscan(rdesc, 0, NowTimeQual, 0, (ScanKey)NULL);
+   app = Typ;
+   while (PointerIsValid(tup = heap_getnext(sdesc, 0, (Buffer *)NULL))) {
+       (*app)->am_oid = tup->t_oid;
+       memmove((char *)&(*app++)->am_typ, 
+           (char *)GETSTRUCT(tup), 
+           sizeof ((*app)->am_typ));
+   }
+   heap_endscan(sdesc);
+   heap_close(rdesc);
+   EndPortalAllocMode();
+    }
+    
+    if (reldesc != NULL) {
+   closerel(NULL);
+    }
+    
+    if (!Quiet)
+       printf("Amopen: relation %s. attrsize %d\n", relname,
+          ATTRIBUTE_TUPLE_SIZE);
+    
+    reldesc = heap_openr(relname);
+    Assert(reldesc);
+    numattr = reldesc->rd_rel->relnatts;
+    for (i = 0; i < numattr; i++) {
+   if (attrtypes[i] == NULL) {
+       attrtypes[i] = AllocateAttribute();
+   }
+   memmove((char *)attrtypes[i],
+       (char *)reldesc->rd_att->attrs[i], 
+       ATTRIBUTE_TUPLE_SIZE);
+   
+   /* Some old pg_attribute tuples might not have attisset. */
+   /* If the attname is attisset, don't look for it - it may
+      not be defined yet.
+      */
+   if (namestrcmp(&attrtypes[i]->attname, "attisset") == 0)
+       attrtypes[i]->attisset = get_attisset(reldesc->rd_id,
+                         attrtypes[i]->attname.data);
+   else
+       attrtypes[i]->attisset = false;
+   
+   if (DebugMode) {
+       AttributeTupleForm at = attrtypes[i];
+       printf("create attribute %d name %.*s len %d num %d type %d\n",
+          i, NAMEDATALEN, at->attname.data, at->attlen, at->attnum, 
+          at->atttypid
+          );
+       fflush(stdout);
+   }
+    }
+}
+
+/* ----------------
+ * closerel
+ * ----------------
+ */
+void
+closerel(char *name)
+{
+    if (name) {
+   if (reldesc) {
+       if (namestrcmp(RelationGetRelationName(reldesc), name) != 0)
+       elog(WARN,"closerel: close of '%s' when '%s' was expected",
+            name, relname);
+   } else
+       elog(WARN,"closerel: close of '%s' before any relation was opened",
+        name);
+   
+    }
+    
+    if (reldesc == NULL) {
+   elog(WARN,"Warning: no opened relation to close.\n");
+    } else {
+   if (!Quiet) printf("Amclose: relation %s.\n", relname);
+   heap_close(reldesc);
+   reldesc = (Relation)NULL;
+    }
+}
+
+\f
+/* ----------------
+ * DEFINEATTR()
+ *
+ * define a <field,type> pair
+ * if there are n fields in a relation to be created, this routine
+ * will be called n times
+ * ----------------
+ */
+void
+DefineAttr(char *name, char *type, int attnum)
+{
+    int     attlen;
+    int     t;
+    
+    if (reldesc != NULL) {
+   fputs("Warning: no open relations allowed with 't' command.\n",stderr);
+   closerel(relname);
+    }
+    
+    t = gettype(type);
+    if (attrtypes[attnum] == (AttributeTupleForm )NULL) 
+   attrtypes[attnum] = AllocateAttribute();
+    if (Typ != (struct typmap **)NULL) {
+   attrtypes[attnum]->atttypid = Ap->am_oid;
+   namestrcpy(&attrtypes[attnum]->attname, name);
+   if (!Quiet) printf("<%.*s %s> ", NAMEDATALEN, 
+              attrtypes[attnum]->attname.data, type);
+   attrtypes[attnum]->attnum = 1 + attnum; /* fillatt */
+   attlen = attrtypes[attnum]->attlen = Ap->am_typ.typlen;
+   attrtypes[attnum]->attbyval = Ap->am_typ.typbyval;
+    } else {
+   attrtypes[attnum]->atttypid = Procid[t].oid;
+   namestrcpy(&attrtypes[attnum]->attname,name);
+   if (!Quiet) printf("<%.*s %s> ", NAMEDATALEN,
+              attrtypes[attnum]->attname.data, type);
+   attrtypes[attnum]->attnum = 1 + attnum; /* fillatt */
+   attlen = attrtypes[attnum]->attlen = Procid[t].len;
+   attrtypes[attnum]->attbyval = (attlen==1) || (attlen==2)||(attlen==4);
+    }
+}
+
+
+/* ----------------
+ * InsertOneTuple
+ * assumes that 'oid' will not be zero.
+ * ----------------
+ */
+void
+InsertOneTuple(Oid objectid)
+{
+    HeapTuple tuple;
+    TupleDesc tupDesc;
+
+    int i;
+    
+    if (DebugMode) {
+   printf("InsertOneTuple oid %d, %d attrs\n", objectid, numattr);
+   fflush(stdout);
+    }
+    
+    tupDesc = CreateTupleDesc(numattr,attrtypes); 
+    tuple = heap_formtuple(tupDesc,(Datum*)values,Blanks);     
+    pfree(tupDesc); /* just free's tupDesc, not the attrtypes */
+
+    if(objectid !=(Oid)0) {
+   tuple->t_oid=objectid;
+    }
+    heap_insert(reldesc, tuple);
+    pfree(tuple);
+    if (DebugMode) {
+   printf("End InsertOneTuple, objectid=%d\n", objectid);
+   fflush(stdout);
+    }
+    /*
+     * Reset blanks for next tuple
+     */
+    for (i = 0; i<numattr; i++)
+   Blanks[i] = ' ';
+}
+
+/* ----------------
+ * InsertOneValue
+ * ----------------
+ */
+void
+InsertOneValue(Oid objectid, char *value, int i)
+{
+    int        typeindex;
+    char   *prt;
+    struct typmap **app;
+    
+    if (DebugMode)
+   printf("Inserting value: '%s'\n", value);
+    if (i < 0 || i >= MAXATTR) {
+   printf("i out of range: %d\n", i);
+   Assert(0);
+    }
+    
+    if (Typ != (struct typmap **)NULL) {
+   struct typmap *ap;
+   if (DebugMode)
+       puts("Typ != NULL");
+   app = Typ;
+   while (*app && (*app)->am_oid != reldesc->rd_att->attrs[i]->atttypid)
+       ++app;
+   ap = *app;
+   if (ap == NULL) {
+       printf("Unable to find atttypid in Typ list! %d\n",
+          reldesc->rd_att->attrs[i]->atttypid
+          );
+       Assert(0);
+   }
+   values[i] = fmgr(ap->am_typ.typinput,
+            value,
+            ap->am_typ.typelem,
+            -1); /* shouldn't have char() or varchar() types
+                during boostrapping but just to be safe */
+   prt = fmgr(ap->am_typ.typoutput, values[i],
+          ap->am_typ.typelem);
+   if (!Quiet) printf("%s ", prt);
+   pfree(prt);
+    } else {
+   typeindex = attrtypes[i]->atttypid - FIRST_TYPE_OID;
+   if (DebugMode)
+       printf("Typ == NULL, typeindex = %d idx = %d\n", typeindex, i);
+   values[i] = fmgr(Procid[typeindex].inproc, value,
+            Procid[typeindex].elem, -1);
+   prt = fmgr(Procid[typeindex].outproc, values[i],
+          Procid[typeindex].elem);
+   if (!Quiet) printf("%s ", prt);
+   pfree(prt);
+    }
+    if (DebugMode) {
+   puts("End InsertValue");
+   fflush(stdout);
+    }
+}
+
+/* ----------------
+ * InsertOneNull
+ * ----------------
+ */
+void
+InsertOneNull(int i)
+{
+    if (DebugMode)
+   printf("Inserting null\n");
+    if (i < 0 || i >= MAXATTR) {
+   elog(FATAL, "i out of range (too many attrs): %d\n", i);
+    }
+    values[i] = (char *)NULL;
+    Blanks[i] = 'n';
+}
+
+#define MORE_THAN_THE_NUMBER_OF_CATALOGS 256
+
+bool
+BootstrapAlreadySeen(Oid id)
+{
+    static Oid seenArray[MORE_THAN_THE_NUMBER_OF_CATALOGS];
+    static int nseen = 0;
+    bool seenthis;
+    int i;
+    
+    seenthis = false;
+     
+    for (i=0; i < nseen; i++) {
+   if (seenArray[i] == id) {
+       seenthis = true;
+       break;
+   }
+    }
+    if (!seenthis) {
+   seenArray[nseen] = id;
+   nseen++;
+    }
+    return (seenthis);
+}
+
+/* ----------------
+ * cleanup
+ * ----------------
+ */
+void
+cleanup()
+{
+    static int beenhere = 0;
+    
+    if (!beenhere)
+   beenhere = 1;
+    else {
+   elog(FATAL,"Memory manager fault: cleanup called twice.\n", stderr);
+   exitpg(1);
+    }
+    if (reldesc != (Relation)NULL) {
+   heap_close(reldesc);
+    }
+    CommitTransactionCommand();
+    exitpg(Warnings);
+}
+
+/* ----------------
+ * gettype
+ * ----------------
+ */
+int
+gettype(char *type)
+{
+    int        i;
+    Relation   rdesc;
+    HeapScanDesc   sdesc;
+    HeapTuple  tup;
+    struct typmap  **app;
+    
+    if (Typ != (struct typmap **)NULL) {
+   for (app = Typ; *app != (struct typmap *)NULL; app++) {
+       if (strncmp((*app)->am_typ.typname.data, type, NAMEDATALEN) == 0) {
+       Ap = *app;
+       return((*app)->am_oid);
+       }
+   }
+    } else {
+   for (i = 0; i <= n_types; i++) {
+       if (strncmp(type, Procid[i].name, NAMEDATALEN) == 0) {
+       return(i);
+       }
+   }
+   if (DebugMode)
+       printf("bootstrap.c: External Type: %.*s\n", NAMEDATALEN, type);
+        rdesc = heap_openr(TypeRelationName);
+        sdesc = heap_beginscan(rdesc, 0, NowTimeQual, 0, (ScanKey)NULL);
+   i = 0;
+   while (PointerIsValid(tup = heap_getnext(sdesc, 0, (Buffer *)NULL)))
+       ++i;
+   heap_endscan(sdesc);
+   app = Typ = ALLOC(struct typmap *, i + 1);
+   while (i-- > 0)
+       *app++ = ALLOC(struct typmap, 1);
+   *app = (struct typmap *)NULL;
+   sdesc = heap_beginscan(rdesc, 0, NowTimeQual, 0, (ScanKey)NULL);
+   app = Typ;
+   while (PointerIsValid(tup = heap_getnext(sdesc, 0, (Buffer *)NULL))) {
+       (*app)->am_oid = tup->t_oid;
+       memmove((char *)&(*app++)->am_typ,
+           (char *)GETSTRUCT(tup), 
+           sizeof ((*app)->am_typ));
+        }
+        heap_endscan(sdesc);
+        heap_close(rdesc);
+        return(gettype(type));
+    }
+    elog(WARN, "Error: unknown type '%s'.\n", type);
+    err();
+    /* not reached, here to make compiler happy */
+    return 0;
+}
+
+/* ----------------
+ * AllocateAttribute
+ * ----------------
+ */
+AttributeTupleForm  /* XXX */
+AllocateAttribute()
+{
+    AttributeTupleForm attribute =
+   (AttributeTupleForm)malloc(ATTRIBUTE_TUPLE_SIZE);
+    
+    if (!PointerIsValid(attribute)) {
+   elog(FATAL, "AllocateAttribute: malloc failed");
+    }
+    memset(attribute, 0, ATTRIBUTE_TUPLE_SIZE);
+    
+    return (attribute);
+}
+
+/* ----------------
+ * MapArrayTypeName
+ * XXX arrays of "basetype" are always "_basetype".
+ *     this is an evil hack inherited from rel. 3.1.
+ * XXX array dimension is thrown away because we
+ *     don't support fixed-dimension arrays.  again,
+ *     sickness from 3.1.
+ * 
+ * the string passed in must have a '[' character in it 
+ *
+ * the string returned is a pointer to static storage and should NOT
+ * be freed by the CALLER.
+ * ----------------
+ */
+char* 
+MapArrayTypeName(char *s)
+{
+    int i, j;
+    static char newStr[NAMEDATALEN]; /* array type names < NAMEDATALEN long */
+
+    if (s == NULL || s[0] == '\0')
+   return s;
+
+    j = 1;
+    newStr[0] = '_';
+    for (i=0; i<NAMEDATALEN-1 && s[i] != '['; i++, j++)
+   newStr[j] = s[i];
+    
+    newStr[j] = '\0';
+
+    return newStr;
+}
+
+/* ----------------
+ * EnterString
+ * returns the string table position of the identifier
+ * passed to it.  We add it to the table if we can't find it.
+ * ----------------
+ */
+int
+EnterString (char *str)
+{
+    hashnode   *node;
+    int        len;
+    
+    len= strlen(str);
+
+    node = FindStr(str, len, 0);
+    if (node) {
+   return (node->strnum);
+    } else {
+   node = AddStr(str, len, 0);
+   return (node->strnum);
+    }
+}
+
+/* ----------------
+ * LexIDStr
+ * when given an idnum into the 'string-table' return the string
+ * associated with the idnum
+ * ----------------
+ */
+char *
+LexIDStr(int ident_num) 
+{
+    return(strtable[ident_num]);
+}    
+
+
+/* ----------------
+ * CompHash
+ *
+ *     Compute a hash function for a given string.  We look at the first,
+ *     the last, and the middle character of a string to try to get spread
+ *     the strings out.  The function is rather arbitrary, except that we
+ *     are mod'ing by a prime number.
+ * ----------------
+ */
+int
+CompHash(char *str, int len)
+{
+    register int result;
+    
+    result =(NUM * str[0] + NUMSQR * str[len-1] + NUMCUBE * str[(len-1)/2]);
+    
+    return (result % HASHTABLESIZE);
+    
+}
+
+/* ----------------
+ * FindStr
+ *
+ *     This routine looks for the specified string in the hash
+ *     table.  It returns a pointer to the hash node found,
+ *     or NULL if the string is not in the table.
+ * ----------------
+ */
+hashnode *
+FindStr(char *str, int length, hashnode *mderef)
+{
+    hashnode   *node;
+    node = hashtable [CompHash (str, length)];
+    while (node != NULL) {
+   /*
+    * We must differentiate between string constants that
+    * might have the same value as a identifier
+    * and the identifier itself.
+    */
+   if (!strcmp(str, strtable[node->strnum])) {
+       return(node);  /* no need to check */
+   } else {
+       node = node->next;
+   }
+    }
+    /* Couldn't find it in the list */
+    return (NULL);
+}
+
+/* ----------------
+ * AddStr
+ *
+ *     This function adds the specified string, along with its associated
+ *     data, to the hash table and the string table.  We return the node
+ *     so that the calling routine can find out the unique id that AddStr
+ *     has assigned to this string.
+ * ----------------
+ */
+hashnode *
+AddStr(char *str, int strlength, int mderef)
+{
+    hashnode   *temp, *trail, *newnode;
+    int        hashresult;
+    int        len;
+    
+    if (++strtable_end == STRTABLESIZE) {
+   /* Error, string table overflow, so we Punt */
+   elog(FATAL, 
+        "There are too many string constants and identifiers for the compiler to handle.");
+
+
+    }
+    
+    /*
+     *  Some of the utilites (eg, define type, create relation) assume
+     *  that the string they're passed is a NAMEDATALEN.  We get array bound
+     *  read violations from purify if we don't allocate at least NAMEDATALEN
+     *  bytes for strings of this sort.  Because we're lazy, we allocate
+     *  at least NAMEDATALEN bytes all the time.
+     */
+    
+    if ((len = strlength + 1) < NAMEDATALEN)
+   len = NAMEDATALEN;
+    
+    strtable [strtable_end] = malloc((unsigned) len);
+    strcpy (strtable[strtable_end], str);
+    
+    /* Now put a node in the hash table */
+    
+    newnode = (hashnode*)malloc(sizeof(hashnode)*1);
+    newnode->strnum = strtable_end;
+    newnode->next = NULL;
+    
+    /* Find out where it goes */
+    
+    hashresult = CompHash (str, strlength);
+    if (hashtable [hashresult] == NULL) {
+   hashtable [hashresult] = newnode;
+    } else {           /* There is something in the list */
+   trail = hashtable [hashresult];
+   temp = trail->next;
+   while (temp != NULL) {
+       trail = temp;
+       temp = temp->next;
+   }
+   trail->next = newnode;
+    }
+    return (newnode);
+}
+
+
+
+/*
+ *  index_register() -- record an index that has been set up for building
+ *         later.
+ *
+ * At bootstrap time, we define a bunch of indices on system catalogs.
+ * We postpone actually building the indices until just before we're
+ * finished with initialization, however.  This is because more classes
+ * and indices may be defined, and we want to be sure that all of them
+ * are present in the index.
+ */
+void
+index_register(char *heap,
+          char *ind,
+          int natts,
+          AttrNumber *attnos,
+          uint16 nparams,
+          Datum *params,
+          FuncIndexInfo *finfo,
+          PredInfo *predInfo)
+{
+    Datum *v;
+    IndexList *newind;
+    int len;
+    MemoryContext oldcxt;
+    
+    /*
+     *  XXX mao 10/31/92 -- don't gc index reldescs, associated info
+     *  at bootstrap time.  we'll declare the indices now, but want to
+     *  create them later.
+     */
+    
+    if (nogc == (GlobalMemory) NULL)
+   nogc = CreateGlobalMemory("BootstrapNoGC");
+    
+    oldcxt = MemoryContextSwitchTo((MemoryContext) nogc);
+    
+    newind = (IndexList *) palloc(sizeof(IndexList));
+    newind->il_heap = pstrdup(heap);
+    newind->il_ind = pstrdup(ind);
+    newind->il_natts = natts;
+    
+    if (PointerIsValid(finfo))
+   len = FIgetnArgs(finfo) * sizeof(AttrNumber);
+    else
+   len = natts * sizeof(AttrNumber);
+    
+    newind->il_attnos = (AttrNumber *) palloc(len);
+    memmove(newind->il_attnos, attnos, len); 
+    
+    if ((newind->il_nparams = nparams) > 0) {
+   v = newind->il_params = (Datum *) palloc(2 * nparams * sizeof(Datum));
+   nparams *= 2;
+   while (nparams-- > 0) {
+       *v = (Datum) palloc(strlen((char *)(*params)) + 1);
+       strcpy((char *) *v++, (char *) *params++);
+   }
+    } else {
+   newind->il_params = (Datum *) NULL;
+    }
+    
+    if (finfo != (FuncIndexInfo *) NULL) {
+   newind->il_finfo = (FuncIndexInfo *) palloc(sizeof(FuncIndexInfo));
+   memmove(newind->il_finfo, finfo, sizeof(FuncIndexInfo)); 
+    } else {
+   newind->il_finfo = (FuncIndexInfo *) NULL;
+    }
+    
+    if (predInfo != NULL) {
+   newind->il_predInfo = (PredInfo*)palloc(sizeof(PredInfo));
+   newind->il_predInfo->pred = predInfo->pred;
+   newind->il_predInfo->oldPred = predInfo->oldPred;
+    } else {
+   newind->il_predInfo = NULL;
+    }
+    
+    newind->il_next = ILHead;
+    
+    ILHead = newind;
+    
+    (void) MemoryContextSwitchTo(oldcxt);
+}
+
+void
+build_indices()
+{
+    Relation heap;
+    Relation ind;
+    
+    for ( ; ILHead != (IndexList *) NULL; ILHead = ILHead->il_next) {
+   heap = heap_openr(ILHead->il_heap);
+   ind = index_openr(ILHead->il_ind);
+   index_build(heap, ind, ILHead->il_natts, ILHead->il_attnos,
+           ILHead->il_nparams, ILHead->il_params, ILHead->il_finfo,
+           ILHead->il_predInfo);
+   
+   /*
+    * All of the rest of this routine is needed only because in bootstrap
+    * processing we don't increment xact id's.  The normal DefineIndex
+    * code replaces a pg_class tuple with updated info including the
+    * relhasindex flag (which we need to have updated).  Unfortunately, 
+    * there are always two indices defined on each catalog causing us to 
+    * update the same pg_class tuple twice for each catalog getting an 
+    * index during bootstrap resulting in the ghost tuple problem (see 
+    * heap_replace).  To get around this we change the relhasindex 
+    * field ourselves in this routine keeping track of what catalogs we 
+    * already changed so that we don't modify those tuples twice.  The 
+    * normal mechanism for updating pg_class is disabled during bootstrap.
+    *
+    *      -mer 
+    */
+   heap = heap_openr(ILHead->il_heap);
+   
+   if (!BootstrapAlreadySeen(heap->rd_id))
+       UpdateStats(heap->rd_id, 0, true);
+    }
+}
+
diff --git a/src/backend/bootstrap/bootstrap.h b/src/backend/bootstrap/bootstrap.h

new file mode 100644 (file)

index 0000000..8ade766
--- /dev/null
+++ b/src/backend/bootstrap/bootstrap.h
@@ -0,0 +1,78 @@
+/*-------------------------------------------------------------------------
+ *
+ * bootstrap.h--
+ *    include file for the bootstrapping code
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: bootstrap.h,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef BOOTSTRAP_H
+#define BOOTSTRAP_H
+
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <ctype.h>
+
+#include "access/htup.h"
+#include "access/itup.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+#include "utils/tqual.h"
+#include "storage/buf.h"
+#include "storage/bufmgr.h"    /* for BufferManagerFlush */
+#include "utils/portal.h"
+#include "utils/elog.h"
+#include "utils/rel.h"
+
+#define    MAXATTR 40      /* max. number of attributes in a relation */
+
+typedef struct hashnode {
+    int        strnum;     /* Index into string table */
+    struct hashnode    *next;
+} hashnode;
+
+#define EMITPROMPT printf("> ")
+
+extern Relation reldesc;
+extern AttributeTupleForm attrtypes[MAXATTR];
+extern int numattr;
+extern int DebugMode;
+
+extern int BootstrapMain(int ac, char *av[]);
+extern void index_register(char *heap,
+              char *ind,
+              int natts,
+              AttrNumber *attnos,
+              uint16 nparams,
+              Datum *params,
+              FuncIndexInfo *finfo,
+              PredInfo *predInfo);
+
+extern void err(void);
+extern void InsertOneTuple(Oid objectid);
+extern void closerel(char *name);
+extern void boot_openrel(char *name);
+extern char *LexIDStr(int ident_num);
+
+extern void DefineAttr(char *name, char *type, int attnum);
+extern void InsertOneValue(Oid objectid, char *value, int i);
+extern void InsertOneNull(int i);
+extern bool BootstrapAlreadySeen(Oid id);
+extern void cleanup(void);
+extern int gettype(char *type);
+extern AttributeTupleForm AllocateAttribute(void);
+extern char* MapArrayTypeName(char *s);
+extern char* CleanUpStr(char *s);
+extern int EnterString (char *str);
+extern int CompHash (char *str, int len);
+extern hashnode *FindStr (char *str, int length, hashnode *mderef);
+extern hashnode *AddStr(char *str, int strlength, int mderef);
+extern void build_indices(void);
+
+#endif /* BOOTSTRAP_H */
diff --git a/src/backend/catalog/Makefile.inc b/src/backend/catalog/Makefile.inc

new file mode 100644 (file)

index 0000000..b29a0bf
--- /dev/null
+++ b/src/backend/catalog/Makefile.inc
@@ -0,0 +1,69 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for the system catalogs module
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:14 scrappy Exp $
+#
+#-------------------------------------------------------------------------
+
+catdir=$(CURDIR)/catalog
+VPATH:=$(VPATH):$(catdir)
+
+
+SRCS_CATALOG= catalog.c heap.c index.c indexing.c \
+   pg_aggregate.c pg_operator.c pg_proc.c pg_type.c
+
+HEADERS+= catalog.h catname.h heap.h index.h indexing.h pg_aggregate.h \
+   pg_am.h pg_amop.h pg_amproc.h pg_attribute.h pg_database.h \
+   pg_defaults.h pg_demon.h pg_group.h pg_index.h pg_inheritproc.h \
+   pg_inherits.h pg_ipl.h pg_language.h pg_listener.h \
+   pg_log.h pg_magic.h pg_opclass.h pg_operator.h pg_parg.h \
+   pg_proc.h pg_class.h \
+   pg_rewrite.h pg_server.h pg_statistic.h pg_time.h pg_type.h \
+   pg_user.h pg_variable.h pg_version.h
+
+#
+# The following is to create the .bki files.
+# TODO: sort headers, (figure some automatic way of of determining
+#  the bki sources?)
+#
+# XXX - more grot.  includes names and uid's in the header file.  FIX THIS
+#  (not sure if i got this right - which do i need - or should i 
+#   burn the whole damned thing)
+#
+ifdef ALLOW_PG_GROUP
+BKIOPTS= -DALLOW_PG_GROUP
+endif
+
+GENBKI= $(catdir)/genbki.sh
+BKIFILES= global1.bki local1_template1.bki 
+
+GLOBALBKI_SRCS= pg_database.h pg_demon.h pg_magic.h pg_defaults.h \
+   pg_variable.h pg_server.h pg_user.h pg_hosts.h \
+   pg_group.h pg_log.h pg_time.h
+
+LOCALBKI_SRCS= pg_proc.h pg_type.h pg_attribute.h pg_class.h \
+   pg_inherits.h pg_index.h pg_version.h pg_statistic.h pg_operator.h \
+   pg_opclass.h pg_am.h pg_amop.h pg_amproc.h pg_language.h pg_parg.h \
+   pg_aggregate.h pg_ipl.h pg_inheritproc.h \
+   pg_rewrite.h pg_listener.h indexing.h
+
+global1.bki: $(GENBKI) $(GLOBALBKI_SRCS)
+   sh $(SHOPTS) $(GENBKI) $(BKIOPTS) \
+       $(patsubst $(GENBKI),,$^) > $(objdir)/$(@F)
+
+
+local1_template1.bki: $(GENBKI) $(LOCALBKI_SRCS)
+   sh $(SHOPTS) $(GENBKI) $(BKIOPTS) \
+       $(patsubst $(GENBKI),,$^) > $(objdir)/$(@F)
+
+
+#${PROG}: ${BKIFILES}
+#
+
+CLEANFILES+= ${BKIFILES}
diff --git a/src/backend/catalog/README b/src/backend/catalog/README

new file mode 100644 (file)

index 0000000..5bfc359
--- /dev/null
+++ b/src/backend/catalog/README
@@ -0,0 +1,66 @@
+$Header: /cvsroot/pgsql/src/backend/catalog/README,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
+
+This directory contains .c files that manipulate the system catalogs
+as well as .h files that define the structure of the system catalogs.
+
+When the compile-time scripts (such as Gen_fmgrtab.sh and genbki.sh)
+execute, they grep the DATA statements out of the .h files and munge
+these in order to generate the .bki files.  The .bki files are then
+used as input to initdb (which is just a wrapper around postgres
+running single-user in bootstrapping mode) in order to generate the
+initial (template) system catalog relation files.
+
+-----------------------------------------------------------------
+
+People who are going to hose around with the .h files should be aware
+of the following facts:
+
+- It is very important that the DATA statements be properly formatted
+(e.g., no broken lines, proper use of white-space and _null_).  The
+scripts are line-oriented and break easily.  In addition, the only
+documentation on the proper format for them is the code in the
+bootstrap/ directory.  Just be careful when adding new DATA
+statements.
+
+- Some catalogs require that OIDs be preallocated to tuples because
+certain catalogs contain circular references.  For example, pg_type
+contains pointers into pg_proc (pg_type.typinput), and pg_proc
+contains back-pointers into pg_type (pg_proc.proargtypes).  In these
+cases, the references may be explicitly set by use of the "OID ="
+clause of the .bki insert statement.  If no such pointers are required
+to a given tuple, then the OID may be set to the wildcard value 0
+(i.e., the system generates a random OID in the usual way).
+
+If you need to find a valid OID for a set of tuples that refer to each
+other, use the unused_oids script.  It generates inclusive ranges of
+*unused* OIDs (i.e., the line "45-900" means OIDs 45 through 900 have
+not been allocated yet).  However, you should not rely 100% on this
+script, since it only looks at the .h files in the catalog/ directory.
+Do a pg_grepsrc (recursive grep) of the source tree to insure that
+there aren't any hidden crocks (i.e., explicit use of a numeric OID)
+anywhere in the code.
+
+-----------------------------------------------------------------
+
+When munging the .c files, you should be aware of certain conventions:
+
+- The system catalog cache code (and most catalog-munging code in
+general) assumes that the fixed-length portion of all system catalog
+tuples are in fact present.  That is, only the variable-length
+portions of a catalog tuple are assumed to be permitted to be
+non-NULL.  For example, if you set pg_type.typdelim to be NULL, a
+piece of code will likely perform "typetup->typdelim" (or, worse,
+"typetyp->typelem", which follows typdelim).  This will result in
+random errors or even segmentation violations.  Hence, do NOT insert
+catalog tuples that contain NULL attributes except in their
+variable-length portions!
+
+- Modification of the catalogs must be performed with the proper
+updating of catalog indexes!  That is, several catalogs have indexes
+on them; when you munge them using the executor, the executor will
+take care of doing the index updates, but if you make direct access
+method calls to insert new or modified tuples into a heap, you must
+also make the calls to insert the tuple into ALL of its indexes!  If
+not, the new tuple will generally be "invisible" to the system because
+most of the accesses to the catalogs in question will be through the
+associated indexes.
diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c

new file mode 100644 (file)

index 0000000..25588c0
--- /dev/null
+++ b/src/backend/catalog/catalog.c
@@ -0,0 +1,205 @@
+/*-------------------------------------------------------------------------
+ *
+ * catalog.c--
+ *    
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/catalog/catalog.c,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <string.h>    /* XXX */
+#include "postgres.h"
+#include "miscadmin.h"  /* for DataDir */
+#include "access/htup.h"
+#include "storage/buf.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+
+#include "utils/syscache.h"
+#include "catalog/catname.h"   /* NameIs{,Shared}SystemRelationName */
+#include "catalog/pg_attribute.h"
+#include "catalog/pg_type.h"
+#include "catalog/catalog.h"
+#include "storage/bufmgr.h"
+#include "access/transam.h"
+
+
+#ifndef    MAXPATHLEN
+#define    MAXPATHLEN  80
+#endif
+
+/*
+ * relpath     - path to the relation
+ * Perhaps this should be in-line code in relopen().
+ */
+char *
+relpath(char relname[])
+{
+    char    *path;
+    
+    if (IsSharedSystemRelationName(relname)) {
+   path = (char *) palloc(strlen(DataDir) + sizeof(NameData) + 2);
+   sprintf(path, "%s/%.*s", DataDir, NAMEDATALEN, relname);
+   return (path);
+    }
+    return(relname);
+}
+
+/*
+ * issystem    - returns non-zero iff relname is a system catalog
+ *
+ * We now make a new requirement where system catalog relns must begin
+ * with pg_ while user relns are forbidden to do so.  Make the test
+ *     trivial and instantaneous.
+ *
+ * XXX this is way bogus. -- pma
+ */
+bool
+issystem(char relname[])
+{
+    if (relname[0] && relname[1] && relname[2])
+   return (relname[0] == 'p' && 
+       relname[1] == 'g' && 
+       relname[2] == '_');
+    else
+   return FALSE;
+}
+
+/*
+ * IsSystemRelationName --
+ * True iff name is the name of a system catalog relation.
+ *
+ * We now make a new requirement where system catalog relns must begin
+ * with pg_ while user relns are forbidden to do so.  Make the test
+ *     trivial and instantaneous.
+ *
+ * XXX this is way bogus. -- pma
+ */
+bool
+IsSystemRelationName(char *relname)
+{
+    if (relname[0] && relname[1] && relname[2])
+   return (relname[0] == 'p' && 
+       relname[1] == 'g' && 
+       relname[2] == '_');
+    else
+   return FALSE;
+}
+
+/*
+ * IsSharedSystemRelationName --
+ * True iff name is the name of a shared system catalog relation.
+ */
+bool
+IsSharedSystemRelationName(char *relname)
+{
+    int i;
+    
+    /*
+     * Quick out: if it's not a system relation, it can't be a shared
+     * system relation.
+     */
+    if (!IsSystemRelationName(relname))
+   return FALSE;
+    
+    i = 0;
+    while ( SharedSystemRelationNames[i] != NULL) {
+         if (strcmp(SharedSystemRelationNames[i],relname) == 0)
+        return TRUE;
+    i++;
+    }
+    return FALSE;
+}
+
+/*
+ * newoid      - returns a unique identifier across all catalogs.
+ *
+ * Object Id allocation is now done by GetNewObjectID in
+ * access/transam/varsup.c.  oids are now allocated correctly.
+ *
+ * old comments:
+ * This needs to change soon, it fails if there are too many more
+ * than one call per second when postgres restarts after it dies.
+ *
+ * The distribution of OID's should be done by the POSTMASTER.
+ * Also there needs to be a facility to preallocate OID's.  Ie.,
+ * for a block of OID's to be declared as invalid ones to allow
+ * user programs to use them for temporary object identifiers.
+ */
+Oid newoid()
+{
+    Oid     lastoid;
+    
+    GetNewObjectId(&lastoid);
+    if (! OidIsValid(lastoid))
+   elog(WARN, "newoid: GetNewObjectId returns invalid oid");
+    return lastoid;
+}
+
+/*
+ * fillatt     - fills the ATTRIBUTE relation fields from the TYP
+ *
+ * Expects that the atttypid domain is set for each att[].
+ * Returns with the attnum, and attlen domains set.
+ * attnum, attproc, atttyparg, ... should be set by the user.
+ *
+ * In the future, attnum may not be set?!? or may be passed as an arg?!?
+ *
+ * Current implementation is very inefficient--should cashe the
+ * information if this is at all possible.
+ *
+ * Check to see if this is really needed, and especially in the case
+ * of index tuples.
+ */
+void
+fillatt(TupleDesc tupleDesc)
+{
+    AttributeTupleForm *attributeP;
+    register TypeTupleForm typp;
+    HeapTuple      tuple;
+    int            i;
+    int natts = tupleDesc->natts;
+    AttributeTupleForm *att = tupleDesc->attrs;
+
+    if (natts < 0 || natts > MaxHeapAttributeNumber)
+   elog(WARN, "fillatt: %d attributes is too large", natts);
+    if (natts == 0) {
+   elog(DEBUG, "fillatt: called with natts == 0");
+   return;
+    }
+    
+    attributeP = &att[0];
+    
+    for (i = 0; i < natts;) {
+   tuple = SearchSysCacheTuple(TYPOID,
+                   Int32GetDatum((*attributeP)->atttypid),
+                   0,0,0);
+   if (!HeapTupleIsValid(tuple)) {
+       elog(WARN, "fillatt: unknown atttypid %ld",
+        (*attributeP)->atttypid);
+   } else {
+       (*attributeP)->attnum = (int16) ++i;
+       /* Check if the attr is a set before messing with the length
+          and byval, since those were already set in 
+          TupleDescInitEntry.  In fact, this seems redundant 
+          here, but who knows what I'll break if I take it out...
+
+          same for char() and varchar() stuff. I share the same
+          sentiments. This function is poorly written anyway. -ay 6/95
+          */
+       if (!(*attributeP)->attisset &&
+       (*attributeP)->atttypid!=BPCHAROID &&
+       (*attributeP)->atttypid!=VARCHAROID) {
+       
+       typp = (TypeTupleForm) GETSTRUCT(tuple);  /* XXX */
+       (*attributeP)->attlen = typp->typlen;
+       (*attributeP)->attbyval = typp->typbyval;
+       }
+   }
+   attributeP += 1;
+    }
+}
diff --git a/src/backend/catalog/catalog.h b/src/backend/catalog/catalog.h

new file mode 100644 (file)

index 0000000..9a54e83
--- /dev/null
+++ b/src/backend/catalog/catalog.h
@@ -0,0 +1,24 @@
+/*-------------------------------------------------------------------------
+ *
+ * catalog.h--
+ *    prototypes for functions in lib/catalog/catalog.c
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: catalog.h,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef CATALOG_H
+#define CATALOG_H
+
+#include "access/tupdesc.h"
+
+extern char *relpath(char relname[]);
+extern bool IsSystemRelationName(char *relname);
+extern bool IsSharedSystemRelationName(char *relname);
+extern Oid newoid(void);
+extern void fillatt(TupleDesc att);
+
+#endif /* CATALOG_H */
diff --git a/src/backend/catalog/catname.h b/src/backend/catalog/catname.h

new file mode 100644 (file)

index 0000000..8d96541
--- /dev/null
+++ b/src/backend/catalog/catname.h
@@ -0,0 +1,52 @@
+/*-------------------------------------------------------------------------
+ *
+ * catname.h--
+ *    POSTGRES system catalog relation name definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: catname.h,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    CATNAME_H
+#define CATNAME_H
+
+#include "postgres.h"
+
+
+#define  AggregateRelationName "pg_aggregate"
+#define  AccessMethodRelationName "pg_am"
+#define  AccessMethodOperatorRelationName "pg_amop"
+#define  AccessMethodProcedureRelationName "pg_amproc"
+#define  AttributeRelationName "pg_attribute"
+#define  DatabaseRelationName "pg_database"
+#define  DefaultsRelationName "pg_defaults"
+#define  DemonRelationName "pg_demon"
+#define  GroupRelationName "pg_group"
+#define  HostsRelationName "pg_hosts"
+#define  IndexRelationName "pg_index"
+#define  InheritProcedureRelationName "pg_inheritproc"
+#define  InheritsRelationName "pg_inherits"
+#define  InheritancePrecidenceListRelationName "pg_ipl"
+#define  LanguageRelationName "pg_language"
+#define  ListenerRelationName "pg_listener"
+#define  LogRelationName "pg_log"
+#define  MagicRelationName "pg_magic"
+#define  OperatorClassRelationName "pg_opclass"
+#define  OperatorRelationName "pg_operator"
+#define  ProcedureRelationName "pg_proc"
+#define  RelationRelationName "pg_class"
+#define  RewriteRelationName "pg_rewrite"
+#define  ServerRelationName "pg_server"
+#define  StatisticRelationName "pg_statistic"
+#define  TimeRelationName "pg_time"
+#define  TypeRelationName "pg_type"
+#define  UserRelationName "pg_user"
+#define  VariableRelationName "pg_variable"
+#define  VersionRelationName "pg_version"
+
+extern char *SharedSystemRelationNames[];
+
+#endif /* CATNAME_H */
diff --git a/src/backend/catalog/genbki.sh b/src/backend/catalog/genbki.sh

new file mode 100644 (file)

index 0000000..2f7e402
--- /dev/null
+++ b/src/backend/catalog/genbki.sh
@@ -0,0 +1,218 @@
+#!/bin/sh
+#-------------------------------------------------------------------------
+#
+# genbki.sh--
+#    shell script which generates .bki files from specially formatted .h
+#    files.  These .bki files are used to initialize the postgres template
+#    database.
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
+#
+# NOTES
+#    non-essential whitespace is removed from the generated file.
+#    if this is ever a problem, then the sed script at the very
+#    end can be changed into another awk script or something smarter..
+#
+#-------------------------------------------------------------------------
+
+PATH=$PATH:/lib:/usr/ccs/lib       # to find cpp
+BKIOPTS=''
+if [ $? != 0 ]
+then
+    echo `basename $0`: Bad option
+    exit 1
+fi
+
+for opt in $*
+do
+    case $opt in
+    -D) BKIOPTS="$BKIOPTS -D$2"; shift; shift;;
+    -D*) BKIOPTS="$BKIOPTS $1";shift;;
+    --) shift; break;;
+    esac
+done
+
+# ----------------
+#  collect nodefiles
+# ----------------
+SYSFILES=''
+x=1
+numargs=$#
+while test $x -le $numargs ; do
+    SYSFILES="$SYSFILES $1"
+    x=`expr $x + 1`
+    shift
+done
+
+# ----------------
+#  strip comments and trash from .h before we generate
+#  the .bki file...
+# ----------------
+#  also, change Oid to oid. -- AY 8/94.
+#  also, change NameData to name. -- jolly 8/21/95.
+#
+cat $SYSFILES | \
+sed -e 's/\/\*.*\*\///g' \
+    -e 's/;[   ]*$//g'  \
+    -e 's/\ Oid/\ oid/g' \
+    -e 's/\ NameData/\ name/g' \
+    -e 's/(NameData/(name/g' \
+    -e 's/(Oid/(oid/g' | \
+awk '
+# ----------------
+#  now use awk to process remaining .h file..
+#
+#  nc is the number of catalogs
+#  inside is a variable set to 1 when we are scanning the
+#     contents of a catalog definition.
+#  inserting_data is a flag indicating when we are processing DATA lines.
+#      (i.e. have a relation open and need to close it)
+# ----------------
+BEGIN {
+   inside = 0;
+   raw = 0;
+   bootstrap = 0;
+   nc = 0;
+   reln_open = 0;
+}
+
+# ----------------
+#  anything in a BKI_BEGIN .. BKI_END block should be passed
+#  along without interpretation.
+# ----------------
+/^BKI_BEGIN/   { raw = 1; next; }
+/^BKI_END/     { raw = 0; next; }
+raw == 1   { print; next; }
+
+# ----------------
+#  DATA() statements should get passed right through after
+#  stripping off the DATA( and the ) on the end.
+# ----------------
+/^DATA\(/ {
+   data = substr($0, 6, length($0) - 6);
+   print data;
+   next;
+}
+
+/^DECLARE_INDEX\(/ {
+# ----
+#  end any prior catalog data insertions before starting a define index
+# ----
+   if (reln_open == 1) {
+#      print "show";
+       print "close " catalog;
+       reln_open = 0;
+   }
+
+   data = substr($0, 15, length($0) - 15);
+   print "declare index " data
+}
+
+/^BUILD_INDICES/   { print "build indices"; }
+   
+# ----------------
+#  CATALOG() definitions take some more work.
+# ----------------
+/^CATALOG\(/ { 
+# ----
+#  end any prior catalog data insertions before starting a new one..
+# ----
+   if (reln_open == 1) {
+#      print "show";
+       print "close " catalog;
+       reln_open = 0;
+   }
+
+# ----
+#  get the name of the new catalog
+# ----
+   pos = index($1,")");
+   catalog = substr($1,9,pos-9); 
+
+   if ($0 ~ /BOOTSTRAP/) {
+       bootstrap = 1;
+   }
+
+        i = 1;
+   inside = 1;
+        nc++;
+   next;
+}
+
+# ----------------
+#  process the contents of the catalog definition
+#
+#  attname[ x ] contains the attribute name for attribute x
+#  atttype[ x ] contains the attribute type fot attribute x
+# ----------------
+inside == 1 {
+# ----
+#  ignore a leading brace line..
+# ----
+        if ($1 ~ /\{/)
+       next;
+
+# ----
+#  if this is the last line, then output the bki catalog stuff.
+# ----
+   if ($1 ~ /}/) {
+       if (bootstrap) {
+           print "create bootstrap " catalog;
+       } else {
+           print "create " catalog;
+       }
+       print "\t(";
+
+       for (j=1; j<i-1; j++) {
+           print "\t " attname[ j ] " = " atttype[ j ] " ,";
+       }
+       print "\t " attname[ j ] " = " atttype[ j ] ;
+       print "\t)";
+
+       if (! bootstrap) {
+           print "open " catalog;
+       }
+
+       i = 1;
+       reln_open = 1;
+       inside = 0;
+       bootstrap = 0;
+       next;
+   }
+
+# ----
+#  if we are inside the catalog definition, then keep sucking up
+#  attibute names and types
+# ----
+   if ($2 ~ /\[.*\]/) {            # array attribute
+       idlen = index($2,"[") - 1;
+       atttype[ i ] = $1 "[]";     # variable-length only..
+       attname[ i ] = substr($2,1,idlen);
+   } else {
+       atttype[ i ] = $1;
+       attname[ i ] = $2;
+   }
+   i++;
+   next;
+}
+
+END {
+   if (reln_open == 1) {
+#      print "show";
+       print "close " catalog;
+       reln_open = 0;
+   }
+}
+' | \
+cpp $BKIOPTS | \
+sed -e '/^[    ]*$/d' \
+    -e 's/[    ][  ]*/ /g'
+
+# ----------------
+#  all done
+# ----------------
+exit 0
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c

new file mode 100644 (file)

index 0000000..6e9634a
--- /dev/null
+++ b/src/backend/catalog/heap.c
@@ -0,0 +1,1428 @@
+/*-------------------------------------------------------------------------
+ *
+ * heap.c--
+ *    code to create and destroy POSTGRES heap relations
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
+ *
+ * INTERFACE ROUTINES
+ * heap_creatr()       - Create an uncataloged heap relation
+ * heap_create()       - Create a cataloged relation
+ * heap_destroy()      - Removes named relation from catalogs
+ *
+ * NOTES
+ *    this code taken from access/heap/create.c, which contains
+ *    the old heap_creater, amcreate, and amdestroy.  those routines
+ *    will soon call these routines using the function manager,
+ *    just like the poorly named "NewXXX" routines do.  The
+ *    "New" routines are all going to die soon, once and for all!
+ * -cim 1/13/91
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <stdio.h> /* for sprintf() */
+#include <sys/file.h>
+#include <string.h>
+
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/htup.h"
+#include "access/istrat.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+#include "utils/tqual.h"   /* for NowTimeQual */
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+#include "storage/itemptr.h"
+#include "lib/hasht.h"
+#include "miscadmin.h"
+#include "fmgr.h"
+#include "utils/builtins.h"
+#include "utils/elog.h"            /* XXX */
+#include "utils/mcxt.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/relcache.h"
+
+#include "catalog/catname.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_attribute.h"
+#include "catalog/pg_index.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_ipl.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "catalog/index.h"
+#include "catalog/indexing.h"
+
+#include "catalog/catalog.h"
+#include "parser/catalog_utils.h"
+
+#include "storage/lmgr.h"
+
+#include "rewrite/rewriteRemove.h"
+
+static void AddNewAttributeTuples(Oid new_rel_oid, TupleDesc tupdesc);
+static void CheckAttributeNames(TupleDesc tupdesc);
+
+/* ----------------------------------------------------------------
+ *     XXX UGLY HARD CODED BADNESS FOLLOWS XXX
+ *
+ * these should all be moved to someplace in the lib/catalog
+ * module, if not obliterated first.
+ * ----------------------------------------------------------------
+ */
+
+
+/*
+ * Note:
+ * Should the executor special case these attributes in the future?
+ * Advantage:  consume 1/2 the space in the ATTRIBUTE relation.
+ * Disadvantage:  having rules to compute values in these tuples may
+ *     be more difficult if not impossible.
+ */
+
+static FormData_pg_attribute a1 = {
+    0xffffffff, {"ctid"}, 27l, 0l, 0l, 0l, sizeof (ItemPointerData),
+    SelfItemPointerAttributeNumber, 0, '\0', '\001', 0l, 'i'
+};
+
+static FormData_pg_attribute a2 = {
+    0xffffffff, {"oid"}, 26l, 0l, 0l, 0l, sizeof(Oid),
+    ObjectIdAttributeNumber, 0, '\001', '\001', 0l, 'i'
+};
+
+static FormData_pg_attribute a3 = {
+    0xffffffff, {"xmin"}, 28l, 0l, 0l, 0l, sizeof (TransactionId),
+    MinTransactionIdAttributeNumber, 0, '\0', '\001', 0l, 'i',
+};
+
+static FormData_pg_attribute a4 = {
+    0xffffffff, {"cmin"}, 29l, 0l, 0l, 0l, sizeof (CommandId),
+    MinCommandIdAttributeNumber, 0, '\001', '\001', 0l, 's'
+};
+
+static FormData_pg_attribute a5 = {
+    0xffffffff, {"xmax"}, 28l, 0l, 0l, 0l, sizeof (TransactionId),
+    MaxTransactionIdAttributeNumber, 0, '\0', '\001', 0l, 'i'
+};
+
+static FormData_pg_attribute a6 = {
+    0xffffffff, {"cmax"}, 29l, 0l, 0l, 0l, sizeof (CommandId),
+    MaxCommandIdAttributeNumber, 0, '\001', '\001', 0l, 's'
+};
+
+static FormData_pg_attribute a7 = {
+    0xffffffff, {"chain"}, 27l, 0l, 0l, 0l, sizeof (ItemPointerData),
+    ChainItemPointerAttributeNumber, 0, '\0', '\001', 0l, 'i',
+};
+
+static FormData_pg_attribute a8 = {
+    0xffffffff, {"anchor"}, 27l, 0l, 0l, 0l, sizeof (ItemPointerData),
+    AnchorItemPointerAttributeNumber, 0, '\0', '\001', 0l, 'i'
+};
+
+static FormData_pg_attribute a9 = {
+    0xffffffff, {"tmin"}, 20l, 0l, 0l, 0l, sizeof (AbsoluteTime),
+    MinAbsoluteTimeAttributeNumber, 0, '\001', '\001', 0l, 'i'
+};
+
+static FormData_pg_attribute a10 = {
+    0xffffffff, {"tmax"}, 20l, 0l, 0l, 0l, sizeof (AbsoluteTime),
+    MaxAbsoluteTimeAttributeNumber, 0, '\001', '\001', 0l, 'i'
+};
+
+static FormData_pg_attribute a11 = {
+    0xffffffff, {"vtype"}, 18l, 0l, 0l, 0l, sizeof (char),
+    VersionTypeAttributeNumber, 0, '\001', '\001', 0l, 'c'
+};
+
+static AttributeTupleForm HeapAtt[] =
+{ &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, &a9, &a10, &a11 };
+
+/* ----------------------------------------------------------------
+ *     XXX END OF UGLY HARD CODED BADNESS XXX
+ * ----------------------------------------------------------------
+ */
+
+/* the tempRelList holds
+   the list of temporary uncatalogued relations that are created.
+   these relations should be destroyed at the end of transactions
+*/
+typedef struct tempRelList {
+    Relation *rels; /* array of relation descriptors */
+    int  num; /* number of temporary relations */
+    int size; /* size of space allocated for the rels array */
+} TempRelList;
+
+#define TEMP_REL_LIST_SIZE  32
+
+static TempRelList *tempRels = NULL;   
+
+
+/* ----------------------------------------------------------------
+ * heap_creatr - Create an uncataloged heap relation
+ *
+ * Fields relpages, reltuples, reltuples, relkeys, relhistory,
+ * relisindexed, and relkind of rdesc->rd_rel are initialized
+ * to all zeros, as are rd_last and rd_hook.  Rd_refcnt is set to 1.
+ *
+ * Remove the system relation specific code to elsewhere eventually.
+ *
+ * Eventually, must place information about this temporary relation
+ * into the transaction context block.
+ *
+ *  
+ * if heap_creatr is called with "" as the name, then heap_creatr will create a
+ * temporary name   "temp_$RELOID" for the relation
+ * ----------------------------------------------------------------
+ */
+Relation
+heap_creatr(char *name, 
+       unsigned smgr,
+       TupleDesc tupDesc) 
+{
+    register unsigned  i;
+    Oid        relid;
+    Relation       rdesc;
+    int            len;
+    bool       nailme = false;
+    char*               relname = name;
+    char                tempname[40];
+    int isTemp = 0;
+    int natts = tupDesc->natts;
+/*    AttributeTupleForm *att = tupDesc->attrs; */
+    
+    extern GlobalMemory    CacheCxt;
+    MemoryContext  oldcxt;
+
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    AssertArg(natts > 0);
+
+    if (IsSystemRelationName(relname) && IsNormalProcessingMode())
+   {
+       elog(WARN, 
+        "Illegal class name: %s -- pg_ is reserved for system catalogs",
+        relname);
+   }
+    
+    /* ----------------
+     * switch to the cache context so that we don't lose
+     *  allocations at the end of this transaction, I guess.
+     *  -cim 6/14/90
+     * ----------------
+     */
+    if (!CacheCxt)
+   CacheCxt = CreateGlobalMemory("Cache");
+    
+    oldcxt = MemoryContextSwitchTo((MemoryContext)CacheCxt);
+    
+    /* ----------------
+     * real ugly stuff to assign the proper relid in the relation
+     *  descriptor follows.
+     * ----------------
+     */
+    if (! strcmp(RelationRelationName,relname))
+   {
+       relid = RelOid_pg_class;
+       nailme = true;
+   }
+    else if (! strcmp(AttributeRelationName,relname))
+   {
+       relid = RelOid_pg_attribute;
+       nailme = true;
+   }
+    else if (! strcmp(ProcedureRelationName, relname))
+   {
+       relid = RelOid_pg_proc;
+       nailme = true;
+   }
+    else if (! strcmp(TypeRelationName,relname))
+   {
+       relid = RelOid_pg_type;
+       nailme = true;
+   }
+    else
+      {
+   relid = newoid();
+    
+   if (name[0] == '\0')
+     {
+       sprintf(tempname, "temp_%d", relid);
+       relname = tempname;
+       isTemp = 1;
+     };
+      }
+
+    /* ----------------
+     * allocate a new relation descriptor.
+     *
+     *     XXX the length computation may be incorrect, handle elsewhere
+     * ----------------
+     */
+    len = sizeof(RelationData);
+  
+    rdesc = (Relation) palloc(len);
+    memset((char *)rdesc, 0,len);
+    
+    /* ----------
+       create a new tuple descriptor from the one passed in
+    */
+    rdesc->rd_att = CreateTupleDescCopy(tupDesc);
+    
+    /* ----------------
+     * initialize the fields of our new relation descriptor
+     * ----------------
+     */
+    
+    /* ----------------
+     *  nail the reldesc if this is a bootstrap create reln and
+     *  we may need it in the cache later on in the bootstrap
+     *  process so we don't ever want it kicked out.  e.g. pg_attribute!!!
+     * ----------------
+     */
+    if (nailme)
+   rdesc->rd_isnailed = true;
+    
+    RelationSetReferenceCount(rdesc, 1);
+    
+    rdesc->rd_rel = (Form_pg_class)palloc(sizeof *rdesc->rd_rel);
+    
+    memset((char *)rdesc->rd_rel, 0,
+      sizeof *rdesc->rd_rel);
+    namestrcpy(&(rdesc->rd_rel->relname), relname); 
+    rdesc->rd_rel->relkind = RELKIND_UNCATALOGED;
+    rdesc->rd_rel->relnatts = natts;
+    rdesc->rd_rel->relsmgr = smgr;
+    
+    for (i = 0; i < natts; i++) {
+   rdesc->rd_att->attrs[i]->attrelid = relid;
+    }
+    
+    rdesc->rd_id = relid;
+    
+     if (nailme) {
+   /* for system relations, set the reltype field here */
+   rdesc->rd_rel->reltype = relid;
+     }
+
+    /* ----------------
+     * have the storage manager create the relation.
+     * ----------------
+     */
+    
+    rdesc->rd_fd = (File)smgrcreate(smgr, rdesc);
+    
+    RelationRegisterRelation(rdesc);
+    
+    MemoryContextSwitchTo(oldcxt);
+    
+    /* add all temporary relations to the tempRels list
+       so they can be properly disposed of at the end of transaction
+    */
+    if (isTemp)
+   AddToTempRelList(rdesc);
+
+    return (rdesc);
+}
+
+
+/* ----------------------------------------------------------------
+ * heap_create - Create a cataloged relation
+ *
+ * this is done in 6 steps:
+ *
+ * 1) CheckAttributeNames() is used to make certain the tuple
+ *    descriptor contains a valid set of attribute names
+ *
+ * 2) pg_class is opened and RelationAlreadyExists()
+ *    preforms a scan to ensure that no relation with the
+ *         same name already exists.
+ *
+ * 3) heap_creater() is called to create the new relation on
+ *    disk.
+ *
+ * 4) TypeDefine() is called to define a new type corresponding
+ *    to the new relation.
+ *
+ * 5) AddNewAttributeTuples() is called to register the
+ *    new relation's schema in pg_attribute.
+ *
+ * 6) AddPgRelationTuple() is called to register the
+ *    relation itself in the catalogs.
+ *
+ * 7) the relations are closed and the new relation's oid
+ *    is returned.
+ *
+ * old comments:
+ * A new relation is inserted into the RELATION relation
+ * with the specified attribute(s) (newly inserted into
+ * the ATTRIBUTE relation).  How does concurrency control
+ * work?  Is it automatic now?  Expects the caller to have
+ * attname, atttypid, atttyparg, attproc, and attlen domains filled.
+ * Create fills the attnum domains sequentually from zero,
+ * fills the attnvals domains with zeros, and fills the
+ * attrelid fields with the relid.
+ *
+ * scan relation catalog for name conflict
+ * scan type catalog for typids (if not arg)
+ * create and insert attribute(s) into attribute catalog
+ * create new relation
+ * insert new relation into attribute catalog
+ *
+ * Should coordinate with heap_creater().  Either it should
+ * not be called or there should be a way to prevent
+ * the relation from being removed at the end of the
+ * transaction if it is successful ('u'/'r' may be enough).
+ * Also, if the transaction does not commit, then the
+ * relation should be removed.
+ *
+ * XXX amcreate ignores "off" when inserting (for now).
+ * XXX amcreate (like the other utilities) needs to understand indexes.
+ * 
+ * ----------------------------------------------------------------
+ */
+
+/* --------------------------------
+ * CheckAttributeNames
+ *
+ * this is used to make certain the tuple descriptor contains a
+ * valid set of attribute names.  a problem simply generates
+ * elog(WARN) which aborts the current transaction.
+ * --------------------------------
+ */
+static void
+CheckAttributeNames(TupleDesc tupdesc)
+{
+    unsigned   i;
+    unsigned   j;
+    int natts = tupdesc->natts;
+
+    /* ----------------
+     * first check for collision with system attribute names
+     * ----------------
+     *
+     *   also, warn user if attribute to be created has
+     *   an unknown typid  (usually as a result of a 'retrieve into'
+     *    - jolly
+     */
+    for (i = 0; i < natts; i += 1) {
+   for (j = 0; j < sizeof HeapAtt / sizeof HeapAtt[0]; j += 1) {
+       if (nameeq(&(HeapAtt[j]->attname),
+               &(tupdesc->attrs[i]->attname))) {
+       elog(WARN,
+            "create: system attribute named \"%s\"",
+            HeapAtt[j]->attname.data);
+       }
+   }
+   if (tupdesc->attrs[i]->atttypid == UNKNOWNOID)
+       {
+       elog(NOTICE,
+            "create: attribute named \"%s\" has an unknown type",
+             tupdesc->attrs[i]->attname.data);
+       }
+    }
+    
+    /* ----------------
+     * next check for repeated attribute names
+     * ----------------
+     */
+    for (i = 1; i < natts; i += 1) {
+   for (j = 0; j < i; j += 1) {
+       if (nameeq(&(tupdesc->attrs[j]->attname),
+               &(tupdesc->attrs[i]->attname))) {
+       elog(WARN,
+            "create: repeated attribute \"%s\"",
+            tupdesc->attrs[j]->attname.data);
+       }
+   }
+    }
+}
+
+/* --------------------------------
+ * RelationAlreadyExists
+ *
+ * this preforms a scan of pg_class to ensure that
+ * no relation with the same name already exists.  The caller
+ * has to open pg_class and pass an open descriptor.
+ * --------------------------------
+ */
+int
+RelationAlreadyExists(Relation pg_class_desc, char relname[])
+{
+    ScanKeyData            key;
+    HeapScanDesc   pg_class_scan;
+    HeapTuple      tup;
+    
+    /*
+     *  If this is not bootstrap (initdb) time, use the catalog index
+     *  on pg_class.
+     */
+    
+    if (!IsBootstrapProcessingMode()) {
+   tup = ClassNameIndexScan(pg_class_desc, relname);
+   if (HeapTupleIsValid(tup)) {
+       pfree(tup);
+       return ((int) true);
+   } else
+       return ((int) false);
+    }
+    
+    /* ----------------
+     *  At bootstrap time, we have to do this the hard way.  Form the
+     * scan key.
+     * ----------------
+     */
+    ScanKeyEntryInitialize(&key,
+              0,
+              (AttrNumber)Anum_pg_class_relname,
+              (RegProcedure)NameEqualRegProcedure,
+              (Datum) relname);
+    
+    /* ----------------
+     * begin the scan
+     * ----------------
+     */
+    pg_class_scan = heap_beginscan(pg_class_desc,
+                     0,
+                     NowTimeQual,
+                     1,
+                     &key);
+    
+    /* ----------------
+     * get a tuple.  if the tuple is NULL then it means we
+     *  didn't find an existing relation.
+     * ----------------
+     */
+    tup = heap_getnext(pg_class_scan, 0, (Buffer *)NULL);
+    
+    /* ----------------
+     * end the scan and return existance of relation.
+     * ----------------
+     */
+    heap_endscan(pg_class_scan);
+    
+    return
+   (PointerIsValid(tup) == true);
+}
+
+/* --------------------------------
+ * AddNewAttributeTuples
+ *
+ * this registers the new relation's schema by adding
+ * tuples to pg_attribute.
+ * --------------------------------
+ */
+static void
+AddNewAttributeTuples(Oid new_rel_oid,
+             TupleDesc tupdesc)
+{
+    AttributeTupleForm *dpp;       
+    unsigned   i;
+    HeapTuple  tup;
+    Relation   rdesc;
+    bool   hasindex;
+    Relation   idescs[Num_pg_attr_indices];
+    int natts = tupdesc->natts;
+    
+    /* ----------------
+     * open pg_attribute
+     * ----------------
+     */
+    rdesc = heap_openr(AttributeRelationName);
+    
+    /* -----------------
+     * Check if we have any indices defined on pg_attribute.
+     * -----------------
+     */
+    Assert(rdesc);
+    Assert(rdesc->rd_rel);
+    hasindex = RelationGetRelationTupleForm(rdesc)->relhasindex;
+    if (hasindex)
+   CatalogOpenIndices(Num_pg_attr_indices, Name_pg_attr_indices, idescs);
+    
+    /* ----------------
+     * initialize tuple descriptor.  Note we use setheapoverride()
+     *  so that we can see the effects of our TypeDefine() done
+     *  previously.
+     * ----------------
+     */
+    setheapoverride(true);
+    fillatt(tupdesc);
+    setheapoverride(false);
+    
+    /* ----------------
+     *  first we add the user attributes..
+     * ----------------
+     */
+    dpp = tupdesc->attrs;
+    for (i = 0; i < natts; i++) {
+   (*dpp)->attrelid = new_rel_oid;
+   (*dpp)->attnvals = 0l;
+   
+   tup = heap_addheader(Natts_pg_attribute,
+                ATTRIBUTE_TUPLE_SIZE,
+                (char *) *dpp);
+   
+   heap_insert(rdesc, tup);
+   
+   if (hasindex)
+       CatalogIndexInsert(idescs, Num_pg_attr_indices, rdesc, tup);
+   
+   pfree(tup);
+   dpp++;
+    }
+    
+    /* ----------------
+     * next we add the system attributes..
+     * ----------------
+     */
+    dpp = HeapAtt;
+    for (i = 0; i < -1 - FirstLowInvalidHeapAttributeNumber; i++) {
+   (*dpp)->attrelid = new_rel_oid;
+   /*  (*dpp)->attnvals = 0l;  unneeded */
+   
+   tup = heap_addheader(Natts_pg_attribute,
+                ATTRIBUTE_TUPLE_SIZE,
+                (char *)*dpp);
+   
+   heap_insert(rdesc, tup);
+   
+   if (hasindex)
+       CatalogIndexInsert(idescs, Num_pg_attr_indices, rdesc, tup);
+   
+   pfree(tup);
+   dpp++;
+    }
+    
+    heap_close(rdesc);
+
+    /*
+     * close pg_attribute indices
+     */
+    if (hasindex)
+   CatalogCloseIndices(Num_pg_attr_indices, idescs);
+}
+
+/* --------------------------------
+ * AddPgRelationTuple
+ *
+ * this registers the new relation in the catalogs by
+ * adding a tuple to pg_class.
+ * --------------------------------
+ */
+void
+AddPgRelationTuple(Relation pg_class_desc,
+          Relation new_rel_desc,
+          Oid new_rel_oid,
+          int arch,
+          unsigned natts)
+{
+    Form_pg_class  new_rel_reltup;
+    HeapTuple      tup;
+    Relation       idescs[Num_pg_class_indices];
+    bool       isBootstrap;
+    
+    /* ----------------
+     * first we munge some of the information in our
+     *  uncataloged relation's relation descriptor.
+     * ----------------
+     */
+    new_rel_reltup = new_rel_desc->rd_rel;
+    
+    /* CHECK should get new_rel_oid first via an insert then use XXX */
+    /*   new_rel_reltup->reltuples = 1; */ /* XXX */
+    
+    new_rel_reltup->relowner = GetUserId();
+    new_rel_reltup->relkind = RELKIND_RELATION;
+    new_rel_reltup->relarch = arch;
+    new_rel_reltup->relnatts = natts;
+    
+    /* ----------------
+     * now form a tuple to add to pg_class
+     *  XXX Natts_pg_class_fixed is a hack - see pg_class.h
+     * ----------------
+     */
+    tup = heap_addheader(Natts_pg_class_fixed,
+            CLASS_TUPLE_SIZE,
+            (char *) new_rel_reltup);
+    tup->t_oid = new_rel_oid;
+    
+    /* ----------------
+     *  finally insert the new tuple and free it.
+     *
+     *  Note: I have no idea why we do a
+     *     SetProcessingMode(BootstrapProcessing);
+     *        here -cim 6/14/90
+     * ----------------
+     */
+    isBootstrap = IsBootstrapProcessingMode() ? true : false;
+    
+    SetProcessingMode(BootstrapProcessing);
+    
+    heap_insert(pg_class_desc, tup);
+    
+    if (! isBootstrap) {
+   /*
+    *  First, open the catalog indices and insert index tuples for
+    *  the new relation.
+    */
+   
+   CatalogOpenIndices(Num_pg_class_indices, Name_pg_class_indices, idescs);
+   CatalogIndexInsert(idescs, Num_pg_class_indices, pg_class_desc, tup);
+   CatalogCloseIndices(Num_pg_class_indices, idescs);
+   
+   /* now restore processing mode */
+   SetProcessingMode(NormalProcessing);
+    }
+    
+    pfree(tup);
+}
+
+
+/* --------------------------------
+ * addNewRelationType -
+ *
+ * define a complex type corresponding to the new relation
+ * --------------------------------
+ */
+void
+addNewRelationType(char *typeName, Oid new_rel_oid)
+{
+    Oid        new_type_oid;
+
+    /* The sizes are set to oid size because it makes implementing sets MUCH
+     * easier, and no one (we hope) uses these fields to figure out
+     * how much space to allocate for the type. 
+     * An oid is the type used for a set definition.  When a user
+     * requests a set, what they actually get is the oid of a tuple in
+     * the pg_proc catalog, so the size of the "set" is the size
+     * of an oid.
+     * Similarly, byval being true makes sets much easier, and 
+     * it isn't used by anything else.
+     * Note the assumption that OIDs are the same size as int4s.
+     */
+    new_type_oid = TypeCreate(typeName,            /* type name */
+                 new_rel_oid,          /* relation oid */
+                 tlen(type("oid")),    /* internal size */
+                 tlen(type("oid")),    /* external size */
+                 'c',      /* type-type (catalog) */
+                 ',',      /* default array delimiter */
+                 "int4in", /* input procedure */
+                 "int4out",    /* output procedure */
+                 "int4in",  /* send procedure */
+                 "int4out",    /* receive procedure */
+                 NULL,     /* array element type - irrelevent */
+                 "-",      /* default type value */
+                 (bool) 1, /* passed by value */
+                 'i'); /* default alignment */
+}
+
+/* --------------------------------
+ * heap_create 
+ *
+ * creates a new cataloged relation.  see comments above.
+ * --------------------------------
+ */
+Oid
+heap_create(char relname[],
+       char *typename, /* not used currently */
+       int arch,
+       unsigned smgr,
+       TupleDesc tupdesc)
+{
+    Relation       pg_class_desc;
+    Relation       new_rel_desc;
+    Oid            new_rel_oid;
+/*    NameData            typeNameData; */
+    int natts = tupdesc->natts;
+
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    AssertState(IsNormalProcessingMode() || IsBootstrapProcessingMode());
+    if (natts == 0 || natts > MaxHeapAttributeNumber)
+   elog(WARN, "amcreate: from 1 to %d attributes must be specified",
+        MaxHeapAttributeNumber);
+    
+    CheckAttributeNames(tupdesc);
+    
+    /* ----------------
+     * open pg_class and see that the relation doesn't
+     *  already exist.
+     * ----------------
+     */
+    pg_class_desc = heap_openr(RelationRelationName);
+    
+    if (RelationAlreadyExists(pg_class_desc, relname)) {
+   heap_close(pg_class_desc);
+   elog(WARN, "amcreate: %s relation already exists", relname);
+    }
+    
+    /* ----------------
+     *  ok, relation does not already exist so now we
+     * create an uncataloged relation and pull its relation oid
+     *  from the newly formed relation descriptor.
+     *
+     *  Note: The call to heap_creatr() does all the "real" work
+     *  of creating the disk file for the relation.
+     * ----------------
+     */
+    new_rel_desc = heap_creatr(relname, smgr, tupdesc);
+    new_rel_oid  = new_rel_desc->rd_att->attrs[0]->attrelid;
+    
+    /* ----------------
+     *  since defining a relation also defines a complex type,
+     * we add a new system type corresponding to the new relation.
+     * ----------------
+     */
+/*    namestrcpy(&typeNameData, relname);*/
+/*    addNewRelationType(&typeNameData, new_rel_oid);*/
+    addNewRelationType(relname, new_rel_oid);
+
+    /* ----------------
+     * now add tuples to pg_attribute for the attributes in
+     *  our new relation.
+     * ----------------
+     */
+    AddNewAttributeTuples(new_rel_oid, tupdesc);
+    
+    /* ----------------
+     * now update the information in pg_class.
+     * ----------------
+     */
+    AddPgRelationTuple(pg_class_desc,
+              new_rel_desc,
+              new_rel_oid,
+              arch,
+              natts);
+    
+    /* ----------------
+     * ok, the relation has been cataloged, so close our relations
+     *  and return the oid of the newly created relation.
+     *
+     * SOMEDAY: fill the STATISTIC relation properly.
+     * ----------------
+     */
+    heap_close(new_rel_desc);
+    heap_close(pg_class_desc);
+    
+    return new_rel_oid;
+}
+
+
+/* ----------------------------------------------------------------
+ * heap_destroy    - removes all record of named relation from catalogs
+ *
+ * 1)  open relation, check for existence, etc.
+ * 2)  remove inheritance information
+ * 3)  remove indexes
+ * 4)  remove pg_class tuple
+ * 5)  remove pg_attribute tuples
+ * 6)  remove pg_type tuples
+ * 7)  unlink relation
+ *
+ * old comments
+ * Except for vital relations, removes relation from
+ * relation catalog, and related attributes from
+ * attribute catalog (needed?).  (Anything else???)
+ *
+ * get proper relation from relation catalog (if not arg)
+ * check if relation is vital (strcmp()/reltype???)
+ * scan attribute catalog deleting attributes of reldesc
+ *     (necessary?)
+ * delete relation from relation catalog
+ * (How are the tuples of the relation discarded???)
+ *
+ * XXX Must fix to work with indexes.
+ * There may be a better order for doing things.
+ * Problems with destroying a deleted database--cannot create
+ * a struct reldesc without having an open file descriptor.
+ * ----------------------------------------------------------------
+ */
+
+/* --------------------------------
+ * RelationRemoveInheritance
+ *
+ *     Note: for now, we cause an exception if relation is a
+ * superclass.  Someday, we may want to allow this and merge
+ * the type info into subclass procedures....  this seems like
+ * lots of work.
+ * --------------------------------
+ */
+void
+RelationRemoveInheritance(Relation relation)
+{
+    Relation       catalogRelation;
+    HeapTuple      tuple;
+    HeapScanDesc   scan;
+    ScanKeyData            entry;
+    
+    /* ----------------
+     * open pg_inherits
+     * ----------------
+     */
+    catalogRelation = heap_openr(InheritsRelationName);
+    
+    /* ----------------
+     * form a scan key for the subclasses of this class
+     *  and begin scanning
+     * ----------------
+     */
+    ScanKeyEntryInitialize(&entry, 0x0, Anum_pg_inherits_inhparent,
+              ObjectIdEqualRegProcedure,
+              ObjectIdGetDatum(RelationGetRelationId(relation)));
+    
+    scan = heap_beginscan(catalogRelation,
+             false,
+             NowTimeQual,
+             1,
+             &entry);
+    
+    /* ----------------
+     * if any subclasses exist, then we disallow the deletion.
+     * ----------------
+     */
+    tuple = heap_getnext(scan, 0, (Buffer *)NULL);
+    if (HeapTupleIsValid(tuple)) {
+   heap_endscan(scan);
+   heap_close(catalogRelation);
+   
+   elog(WARN, "relation <%d> inherits \"%s\"",
+        ((InheritsTupleForm) GETSTRUCT(tuple))->inhrel,
+        RelationGetRelationName(relation));
+    }
+    
+    /* ----------------
+     * If we get here, it means the relation has no subclasses
+     *  so we can trash it.  First we remove dead INHERITS tuples.
+     * ----------------
+     */
+    entry.sk_attno = Anum_pg_inherits_inhrel;
+    
+    scan = heap_beginscan(catalogRelation,
+             false,
+             NowTimeQual,
+             1,
+             &entry);
+    
+    for (;;) {
+   tuple = heap_getnext(scan, 0, (Buffer *)NULL);
+   if (!HeapTupleIsValid(tuple)) {
+       break;
+   }
+   heap_delete(catalogRelation, &tuple->t_ctid);
+    }
+    
+    heap_endscan(scan);
+    heap_close(catalogRelation);
+    
+    /* ----------------
+     * now remove dead IPL tuples
+     * ----------------
+     */
+    catalogRelation =
+   heap_openr(InheritancePrecidenceListRelationName);
+    
+    entry.sk_attno = Anum_pg_ipl_iplrel;
+    
+    scan = heap_beginscan(catalogRelation,
+             false,
+             NowTimeQual,
+             1,
+             &entry);
+    
+    for (;;) {
+   tuple = heap_getnext(scan, 0, (Buffer *)NULL);
+   if (!HeapTupleIsValid(tuple)) {
+       break;
+   }
+   heap_delete(catalogRelation, &tuple->t_ctid);
+    }
+    
+    heap_endscan(scan);
+    heap_close(catalogRelation);
+}
+
+/* --------------------------------
+ * RelationRemoveIndexes
+ * 
+ * --------------------------------
+ */
+void
+RelationRemoveIndexes(Relation relation)
+{
+    Relation       indexRelation;
+    HeapTuple      tuple;
+    HeapScanDesc   scan;
+    ScanKeyData    entry;
+    
+    indexRelation = heap_openr(IndexRelationName);
+    
+    ScanKeyEntryInitialize(&entry, 0x0, Anum_pg_index_indrelid,
+              ObjectIdEqualRegProcedure,
+              ObjectIdGetDatum(RelationGetRelationId(relation)));
+    
+    scan = heap_beginscan(indexRelation,
+             false,
+             NowTimeQual,
+             1,
+             &entry);
+    
+    for (;;) {
+   tuple = heap_getnext(scan, 0, (Buffer *)NULL);
+   if (!HeapTupleIsValid(tuple)) {
+       break;
+   }
+   
+   index_destroy(((IndexTupleForm)GETSTRUCT(tuple))->indexrelid);
+    }
+    
+    heap_endscan(scan);
+    heap_close(indexRelation);
+}
+
+/* --------------------------------
+ * DeletePgRelationTuple
+ *
+ * --------------------------------
+ */
+void
+DeletePgRelationTuple(Relation rdesc)
+{
+    Relation       pg_class_desc;
+    HeapScanDesc   pg_class_scan;
+    ScanKeyData        key;
+    HeapTuple      tup;
+    
+    /* ----------------
+     * open pg_class
+     * ----------------
+     */
+    pg_class_desc = heap_openr(RelationRelationName);
+    
+    /* ----------------
+     * create a scan key to locate the relation oid of the
+     *  relation to delete
+     * ----------------
+     */
+    ScanKeyEntryInitialize(&key, 0, ObjectIdAttributeNumber,
+              F_INT4EQ, rdesc->rd_att->attrs[0]->attrelid);
+    
+    pg_class_scan =  heap_beginscan(pg_class_desc,
+                   0,
+                   NowTimeQual,
+                   1,
+                   &key);
+    
+    /* ----------------
+     * use heap_getnext() to fetch the pg_class tuple.  If this
+     *  tuple is not valid then something's wrong.
+     * ----------------
+     */
+    tup = heap_getnext(pg_class_scan, 0, (Buffer *) NULL);
+    
+    if (! PointerIsValid(tup)) {
+   heap_endscan(pg_class_scan);
+   heap_close(pg_class_desc);
+   elog(WARN, "DeletePgRelationTuple: %s relation nonexistent",
+        &rdesc->rd_rel->relname);
+    }
+    
+    /* ----------------
+     * delete the relation tuple from pg_class, and finish up.
+     * ----------------
+     */
+    heap_endscan(pg_class_scan);
+    heap_delete(pg_class_desc, &tup->t_ctid);
+    
+    heap_close(pg_class_desc);
+}
+
+/* --------------------------------
+ * DeletePgAttributeTuples
+ *
+ * --------------------------------
+ */
+void
+DeletePgAttributeTuples(Relation rdesc)
+{
+    Relation       pg_attribute_desc;
+    HeapScanDesc   pg_attribute_scan;
+    ScanKeyData            key;
+    HeapTuple      tup;
+    
+    /* ----------------
+     * open pg_attribute
+     * ----------------
+     */
+    pg_attribute_desc = heap_openr(AttributeRelationName);
+    
+    /* ----------------
+     * create a scan key to locate the attribute tuples to delete
+     *  and begin the scan.
+     * ----------------
+     */
+    ScanKeyEntryInitialize(&key, 0, Anum_pg_attribute_attrelid,
+              F_INT4EQ, rdesc->rd_att->attrs[0]->attrelid);
+    
+    /* -----------------
+     * Get a write lock _before_ getting the read lock in the scan
+     * ----------------
+     */
+    RelationSetLockForWrite(pg_attribute_desc);
+    
+    pg_attribute_scan = heap_beginscan(pg_attribute_desc,
+                      0,
+                      NowTimeQual,
+                      1,
+                      &key);
+    
+    /* ----------------
+     * use heap_getnext() / amdelete() until all attribute tuples
+     *  have been deleted.
+     * ----------------
+     */
+    while (tup = heap_getnext(pg_attribute_scan, 0, (Buffer *)NULL),
+      PointerIsValid(tup)) {
+   
+   heap_delete(pg_attribute_desc, &tup->t_ctid);
+    }
+    
+    /* ----------------
+     * finish up.
+     * ----------------
+     */
+    heap_endscan(pg_attribute_scan);
+    
+    /* ----------------
+     * Release the write lock 
+     * ----------------
+     */
+    RelationUnsetLockForWrite(pg_attribute_desc);
+    heap_close(pg_attribute_desc);
+}
+
+
+/* --------------------------------
+ * DeletePgTypeTuple
+ *
+ * If the user attempts to destroy a relation and there
+ * exists attributes in other relations of type
+ * "relation we are deleting", then we have to do something
+ * special.  presently we disallow the destroy.
+ * --------------------------------
+ */
+void
+DeletePgTypeTuple(Relation rdesc)
+{
+    Relation       pg_type_desc;
+    HeapScanDesc   pg_type_scan;
+    Relation       pg_attribute_desc;
+    HeapScanDesc   pg_attribute_scan;
+    ScanKeyData            key;
+    ScanKeyData            attkey;
+    HeapTuple      tup;
+    HeapTuple      atttup;
+    Oid        typoid;
+    
+    /* ----------------
+     * open pg_type
+     * ----------------
+     */
+    pg_type_desc = heap_openr(TypeRelationName);
+    
+    /* ----------------
+     * create a scan key to locate the type tuple corresponding
+     *  to this relation.
+     * ----------------
+     */
+    ScanKeyEntryInitialize(&key, 0, Anum_pg_type_typrelid, F_INT4EQ,
+              rdesc->rd_att->attrs[0]->attrelid);
+    
+    pg_type_scan =  heap_beginscan(pg_type_desc,
+                  0,
+                  NowTimeQual,
+                  1,
+                  &key);
+    
+    /* ----------------
+     * use heap_getnext() to fetch the pg_type tuple.  If this
+     *  tuple is not valid then something's wrong.
+     * ----------------
+     */
+    tup = heap_getnext(pg_type_scan, 0, (Buffer *)NULL);
+    
+    if (! PointerIsValid(tup)) {
+   heap_endscan(pg_type_scan);
+   heap_close(pg_type_desc);
+   elog(WARN, "DeletePgTypeTuple: %s type nonexistent",
+        &rdesc->rd_rel->relname);
+    }
+    
+    /* ----------------
+     * now scan pg_attribute.  if any other relations have
+     *  attributes of the type of the relation we are deleteing
+     *  then we have to disallow the deletion.  should talk to
+     *  stonebraker about this.  -cim 6/19/90
+     * ----------------
+     */
+    typoid = tup->t_oid;
+    
+    pg_attribute_desc = heap_openr(AttributeRelationName);
+    
+    ScanKeyEntryInitialize(&attkey,
+              0, Anum_pg_attribute_atttypid, F_INT4EQ,
+              typoid);
+    
+    pg_attribute_scan = heap_beginscan(pg_attribute_desc,
+                      0,
+                      NowTimeQual,
+                      1,
+                      &attkey);
+    
+    /* ----------------
+     * try and get a pg_attribute tuple.  if we succeed it means
+     *  we cant delete the relation because something depends on
+     *  the schema.
+     * ----------------
+     */
+    atttup = heap_getnext(pg_attribute_scan, 0, (Buffer *)NULL);
+    
+    if (PointerIsValid(atttup)) {
+   Oid relid = ((AttributeTupleForm) GETSTRUCT(atttup))->attrelid;
+   
+   heap_endscan(pg_type_scan);
+   heap_close(pg_type_desc);
+   heap_endscan(pg_attribute_scan);
+   heap_close(pg_attribute_desc);
+   
+   elog(WARN, "DeletePgTypeTuple: att of type %s exists in relation %d",
+        &rdesc->rd_rel->relname, relid);   
+    }
+    heap_endscan(pg_attribute_scan);
+    heap_close(pg_attribute_desc);
+    
+    /* ----------------
+     *  Ok, it's safe so we delete the relation tuple
+     *  from pg_type and finish up.  But first end the scan so that
+     *  we release the read lock on pg_type.  -mer 13 Aug 1991
+     * ----------------
+     */
+    heap_endscan(pg_type_scan);
+    heap_delete(pg_type_desc, &tup->t_ctid);
+    
+    heap_close(pg_type_desc);
+}
+
+/* --------------------------------
+ * heap_destroy
+ *
+ * --------------------------------
+ */
+void
+heap_destroy(char *relname)
+{
+    Relation   rdesc;
+    
+    /* ----------------
+     * first open the relation.  if the relation does exist,
+     *  heap_openr() returns NULL.
+     * ----------------
+     */
+    rdesc = heap_openr(relname);
+    if (rdesc == NULL)
+   elog(WARN,"Relation %s Does Not Exist!", relname);
+    
+    /* ----------------
+     * prevent deletion of system relations
+     * ----------------
+     */
+    if (IsSystemRelationName(RelationGetRelationName(rdesc)->data))
+   elog(WARN, "amdestroy: cannot destroy %s relation",
+        &rdesc->rd_rel->relname);
+    
+    /* ----------------
+     * remove inheritance information
+     * ----------------
+     */
+    RelationRemoveInheritance(rdesc);
+    
+    /* ----------------
+     * remove indexes if necessary
+     * ----------------
+     */
+    if (rdesc->rd_rel->relhasindex) {
+   RelationRemoveIndexes(rdesc);
+    }
+
+    /* ----------------
+     * remove rules if necessary
+     * ----------------
+     */
+    if (rdesc->rd_rules != NULL) {
+   RelationRemoveRules(rdesc->rd_id);
+    }
+    
+    /* ----------------
+     * delete attribute tuples
+     * ----------------
+     */
+    DeletePgAttributeTuples(rdesc);
+    
+    /* ----------------
+     * delete type tuple.  here we want to see the effects
+     *  of the deletions we just did, so we use setheapoverride().
+     * ----------------
+     */
+    setheapoverride(true);
+    DeletePgTypeTuple(rdesc);
+    setheapoverride(false);
+    
+    /* ----------------
+     * delete relation tuple
+     * ----------------
+     */
+    DeletePgRelationTuple(rdesc);
+    
+    /* ----------------
+     * flush the relation from the relcache
+     * ----------------
+     */
+    RelationIdInvalidateRelationCacheByRelationId(rdesc->rd_id);
+
+    /* ----------------
+     * unlink the relation and finish up.
+     * ----------------
+     */
+    (void) smgrunlink(rdesc->rd_rel->relsmgr, rdesc);
+    heap_close(rdesc);
+}
+
+/*
+ * heap_destroyr
+ *    destroy and close temporary relations
+ *
+ */
+
+void 
+heap_destroyr(Relation rdesc)
+{
+    ReleaseTmpRelBuffers(rdesc);
+    (void) smgrunlink(rdesc->rd_rel->relsmgr, rdesc);
+    heap_close(rdesc);
+    RemoveFromTempRelList(rdesc);
+}
+
+
+/**************************************************************
+  functions to deal with the list of temporary relations 
+**************************************************************/
+
+/* --------------
+   InitTempRellist():
+
+   initialize temporary relations list
+   the tempRelList is a list of temporary relations that
+   are created in the course of the transactions
+   they need to be destroyed properly at the end of the transactions
+
+   MODIFIES the global variable tempRels
+
+ >> NOTE <<
+
+   malloc is used instead of palloc because we KNOW when we are
+   going to free these things.  Keeps us away from the memory context
+   hairyness
+
+*/
+void
+InitTempRelList()
+{
+    if (tempRels) {
+   free(tempRels->rels);
+   free(tempRels);
+    };
+
+    tempRels = (TempRelList*)malloc(sizeof(TempRelList));
+    tempRels->size = TEMP_REL_LIST_SIZE;
+    tempRels->rels = (Relation*)malloc(sizeof(Relation) * tempRels->size);
+    memset(tempRels->rels, sizeof(Relation) * tempRels->size , 0);
+    tempRels->num = 0;
+}
+
+/*
+   removes a relation from the TempRelList
+
+   MODIFIES the global variable tempRels
+      we don't really remove it, just mark it as NULL
+      and DestroyTempRels will look for NULLs
+*/
+void
+RemoveFromTempRelList(Relation r)
+{
+    int i;
+
+    if (!tempRels)
+   return;
+
+    for (i=0; i<tempRels->num; i++) {
+   if (tempRels->rels[i] == r) {
+       tempRels->rels[i] = NULL;
+       break;
+   }
+    }
+}
+
+/*
+   add a temporary relation to the TempRelList
+
+   MODIFIES the global variable tempRels
+*/
+void
+AddToTempRelList(Relation r)
+{
+    if (!tempRels)
+   return;
+
+    if (tempRels->num == tempRels->size) {
+   tempRels->size += TEMP_REL_LIST_SIZE;
+   tempRels->rels = realloc(tempRels->rels, tempRels->size);
+    }
+    tempRels->rels[tempRels->num] = r;
+    tempRels->num++;
+}
+
+/*
+   go through the tempRels list and destroy each of the relations
+*/
+void
+DestroyTempRels()
+{
+    int i;
+    Relation rdesc;
+
+    if (!tempRels)
+   return;
+
+    for (i=0;i<tempRels->num;i++) {
+   rdesc = tempRels->rels[i];
+   /* rdesc may be NULL if it has been removed from the list already */
+   if (rdesc) 
+       heap_destroyr(rdesc);
+    }
+    free(tempRels->rels);
+    free(tempRels);
+    tempRels = NULL;
+}
+
diff --git a/src/backend/catalog/heap.h b/src/backend/catalog/heap.h

new file mode 100644 (file)

index 0000000..edcd5bf
--- /dev/null
+++ b/src/backend/catalog/heap.h
@@ -0,0 +1,42 @@
+/*-------------------------------------------------------------------------
+ *
+ * heap.h--
+ *    prototypes for functions in lib/catalog/heap.c
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: heap.h,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef HEAP_H
+#define HEAP_H
+
+extern Relation heap_creatr(char *relname, unsigned smgr, TupleDesc att);
+
+extern int RelationAlreadyExists(Relation pg_class_desc, char relname[]);
+extern void addNewRelationType(char *typeName, Oid new_rel_oid);
+
+extern void AddPgRelationTuple(Relation pg_class_desc,
+   Relation new_rel_desc, Oid new_rel_oid, int arch, unsigned natts);
+
+extern Oid heap_create(char relname[], 
+              char *typename,
+              int arch, 
+              unsigned smgr, TupleDesc tupdesc);
+
+extern void RelationRemoveInheritance(Relation relation);
+extern void RelationRemoveIndexes(Relation relation);
+extern void DeletePgRelationTuple(Relation rdesc);
+extern void DeletePgAttributeTuples(Relation rdesc);
+extern void DeletePgTypeTuple(Relation rdesc);
+extern void heap_destroy(char relname[]);
+extern void heap_destroyr(Relation r);
+ 
+extern void InitTempRelList();
+extern void AddToTempRelList(Relation r);
+extern void RemoveFromTempRelList(Relation r);
+extern void DestroyTempRels();
+
+#endif /* HEAP_H */
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c

new file mode 100644 (file)

index 0000000..b04010b
--- /dev/null
+++ b/src/backend/catalog/index.c
@@ -0,0 +1,1655 @@
+/*-------------------------------------------------------------------------
+ *
+ * index.c--
+ *    code to create and destroy POSTGRES index relations
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
+ *
+ *
+ * INTERFACE ROUTINES
+ * index_create()      - Create a cataloged index relation
+ * index_destroy()     - Removes index relation from catalogs
+ *
+ * NOTES
+ *    Much of this code uses hardcoded sequential heap relation scans
+ *    to fetch information from the catalogs.  These should all be
+ *    rewritten to use the system caches lookup routines like
+ *    SearchSysCacheTuple, which can do efficient lookup and
+ *    caching.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/attnum.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/itup.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+#include "utils/builtins.h"
+#include "utils/tqual.h"
+#include "access/tupdesc.h"
+#include "access/funcindex.h"
+#include "access/xact.h"
+
+#include "storage/smgr.h"
+#include "miscadmin.h"
+#include "utils/mcxt.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/relcache.h"
+#include "utils/elog.h"
+
+#include "bootstrap/bootstrap.h"
+
+#include "catalog/catname.h"
+#include "catalog/catalog.h"
+#include "utils/syscache.h"
+#include "catalog/pg_attribute.h"
+#include "catalog/pg_index.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_type.h"
+#include "catalog/indexing.h"
+
+#include "catalog/heap.h"
+
+#include "nodes/execnodes.h"
+#include "nodes/plannodes.h"
+
+#include "catalog/index.h"
+
+#include "executor/executor.h"
+#include "executor/tuptable.h"
+
+#include "optimizer/clauses.h"
+#include "optimizer/prep.h"
+
+#include "parser/catalog_utils.h"
+
+#include "machine.h"
+
+/*
+ * macros used in guessing how many tuples are on a page.
+ */
+#define AVG_TUPLE_SIZE 8
+#define NTUPLES_PER_PAGE(natts) (BLCKSZ/((natts)*AVG_TUPLE_SIZE))
+
+/* non-export function prototypes */
+static Oid RelationNameGetObjectId(char *relationName, Relation pg_class,
+                  bool setHasIndexAttribute);
+static Oid GetHeapRelationOid(char *heapRelationName, char *indexRelationName);
+static TupleDesc BuildFuncTupleDesc(FuncIndexInfo *funcInfo);
+static TupleDesc ConstructTupleDescriptor(Oid heapoid, Relation heapRelation,
+                     int numatts, AttrNumber attNums[]);
+
+static void ConstructIndexReldesc(Relation indexRelation, Oid amoid);
+static Oid UpdateRelationRelation(Relation indexRelation);
+static void InitializeAttributeOids(Relation indexRelation,
+                   int numatts,
+                   Oid indexoid);
+static void
+AppendAttributeTuples(Relation indexRelation, int numatts);
+static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
+   FuncIndexInfo *funcInfo, int natts,
+   AttrNumber attNums[], Oid classOids[], Node *predicate);
+static void DefaultBuild(Relation heapRelation, Relation indexRelation,
+   int numberOfAttributes, AttrNumber attributeNumber[],
+   IndexStrategy indexStrategy, uint16 parameterCount,
+   Datum parameter[], FuncIndexInfoPtr funcInfo, PredInfo *predInfo);
+
+/* ----------------------------------------------------------------
+ *    sysatts is a structure containing attribute tuple forms
+ *    for system attributes (numbered -1, -2, ...).  This really
+ *    should be generated or eliminated or moved elsewhere. -cim 1/19/91
+ *
+ * typedef struct FormData_pg_attribute {
+ * Oid attrelid;
+ * NameData    attname;
+ * Oid atttypid;
+ * Oid attdefrel;
+ * uint32      attnvals;
+ * Oid atttyparg;  type arg for arrays/spquel/procs
+ * int16       attlen;
+ * AttrNumber  attnum;
+ * uint16      attbound;
+ * bool        attbyval;
+ * bool        attcanindex;
+ * Oid     attproc;    spquel?
+ * } FormData_pg_attribute;
+ *
+ *    The data in this table was taken from local1_template.ami
+ *    but tmin and tmax were switched because local1 was incorrect.
+ * ----------------------------------------------------------------
+ */
+static FormData_pg_attribute   sysatts[] = {
+   { 0l, {"ctid"},   27l,  0l, 0l, 0l,  6,  -1, 0,   '\0', '\001', 0l, 'i' },
+   { 0l, {"oid"},    26l,  0l, 0l, 0l,  4,  -2, 0, '\001', '\001', 0l, 'i' },
+   { 0l, {"xmin"},   28l,  0l, 0l, 0l,  5,  -3, 0,   '\0', '\001', 0l, 'i' },
+   { 0l, {"cmin"},   29l,  0l, 0l, 0l,  1,  -4, 0, '\001', '\001', 0l, 's' },
+   { 0l, {"xmax"},   28l,  0l, 0l, 0l,  5,  -5, 0,   '\0', '\001', 0l, 'i' },
+   { 0l, {"cmax"},   29l,  0l, 0l, 0l,  1,  -6, 0, '\001', '\001', 0l, 's' },
+   { 0l, {"chain"},  27l,  0l, 0l, 0l,  6,  -7, 0,   '\0', '\001', 0l, 'i' },
+   { 0l, {"anchor"}, 27l,  0l, 0l, 0l,  6,  -8, 0,   '\0', '\001', 0l, 'i' },
+   { 0l, {"tmin"},   20l,  0l, 0l, 0l,  4,  -9, 0, '\001', '\001', 0l, 'i' },
+   { 0l, {"tmax"},   20l,  0l, 0l, 0l,  4, -10, 0, '\001', '\001', 0l, 'i' },
+   { 0l, {"vtype"},  18l,  0l, 0l, 0l,  1, -11, 0, '\001', '\001', 0l, 'c' },
+};
+
+/* ----------------------------------------------------------------
+ * RelationNameGetObjectId --
+ * Returns the object identifier for a relation given its name.
+ *
+ * >   The HASINDEX attribute for the relation with this name will
+ * >   be set if it exists and if it is indicated by the call argument.
+ * What a load of bull.  This setHasIndexAttribute is totally ignored.
+ * This is yet another silly routine to scan the catalogs which should
+ * probably be replaced by SearchSysCacheTuple. -cim 1/19/91
+ *
+ * Note:
+ * Assumes relation name is valid.
+ * Assumes relation descriptor is valid.
+ * ----------------------------------------------------------------
+ */
+static Oid
+RelationNameGetObjectId(char *relationName,
+           Relation pg_class,
+           bool setHasIndexAttribute)
+{  
+    HeapScanDesc   pg_class_scan;
+    HeapTuple      pg_class_tuple;
+    Oid            relationObjectId;
+    Buffer     buffer;
+    ScanKeyData        key;
+    
+    /*
+     *  If this isn't bootstrap time, we can use the system catalogs to
+     *  speed this up.
+     */
+    
+    if (!IsBootstrapProcessingMode()) {
+   pg_class_tuple = ClassNameIndexScan(pg_class, relationName);
+   if (HeapTupleIsValid(pg_class_tuple)) {
+       relationObjectId = pg_class_tuple->t_oid;
+       pfree(pg_class_tuple);
+   } else
+       relationObjectId = InvalidOid;
+   
+   return (relationObjectId);
+    }
+    
+    /* ----------------
+     *  Bootstrap time, do this the hard way.
+     * begin a scan of pg_class for the named relation
+     * ----------------
+     */
+    ScanKeyEntryInitialize(&key, 0, Anum_pg_class_relname,
+              NameEqualRegProcedure,
+              PointerGetDatum(relationName));
+    
+    pg_class_scan = heap_beginscan(pg_class, 0, NowTimeQual, 1, &key);
+    
+    /* ----------------
+     * if we find the named relation, fetch its relation id
+     *  (the oid of the tuple we found). 
+     * ----------------
+     */
+    pg_class_tuple = heap_getnext(pg_class_scan, 0, &buffer);
+    
+    if (! HeapTupleIsValid(pg_class_tuple)) {
+   relationObjectId = InvalidOid;
+    } else {
+   relationObjectId = pg_class_tuple->t_oid;
+   ReleaseBuffer(buffer);
+    }
+    
+    /* ----------------
+     * cleanup and return results
+     * ----------------
+     */
+    heap_endscan(pg_class_scan);
+    
+    return
+   relationObjectId;
+}
+
+
+/* ----------------------------------------------------------------
+ * GetHeapRelationOid
+ * ----------------------------------------------------------------
+ */
+static Oid
+GetHeapRelationOid(char *heapRelationName, char *indexRelationName)
+{
+    Relation   pg_class;
+    Oid    indoid;
+    Oid    heapoid;
+    
+    /* ----------------
+     * XXX ADD INDEXING HERE
+     * ----------------
+     */
+    /* ----------------
+     * open pg_class and get the oid of the relation
+     *  corresponding to the name of the index relation.
+     * ----------------
+     */
+    pg_class = heap_openr(RelationRelationName);
+    
+    indoid = RelationNameGetObjectId(indexRelationName,
+                    pg_class,
+                    false);
+    
+    if (OidIsValid(indoid))
+   elog(WARN, "Cannot create index: '%s' already exists",
+        indexRelationName);
+    
+    /* ----------------
+     * get the object id of the heap relation
+     * ----------------
+     */
+    heapoid = RelationNameGetObjectId(heapRelationName,
+                     pg_class,
+                     true);
+    
+    /* ----------------
+     *    check that the heap relation exists..
+     * ----------------
+     */
+    if (! OidIsValid(heapoid))
+   elog(WARN, "Cannot create index on '%s': relation does not exist",
+        heapRelationName);
+    
+    /* ----------------
+     *    close pg_class and return the heap relation oid
+     * ----------------
+     */
+    heap_close(pg_class);
+    
+    return heapoid;
+}
+
+static TupleDesc
+BuildFuncTupleDesc(FuncIndexInfo *funcInfo)
+{
+    HeapTuple      tuple;
+    TupleDesc  funcTupDesc;
+    Oid        retType;
+    char       *funcname;
+    int4       nargs;
+    Oid        *argtypes;
+    
+    /*
+     * Allocate and zero a tuple descriptor.
+     */
+    funcTupDesc = CreateTemplateTupleDesc(1);
+    funcTupDesc->attrs[0] = (AttributeTupleForm) palloc(ATTRIBUTE_TUPLE_SIZE);
+    memset(funcTupDesc->attrs[0], 0, ATTRIBUTE_TUPLE_SIZE);
+    
+    /*
+     * Lookup the function for the return type.
+     */
+    funcname = FIgetname(funcInfo);
+    nargs = FIgetnArgs(funcInfo);
+    argtypes = FIgetArglist(funcInfo);
+    tuple = SearchSysCacheTuple(PRONAME, 
+               PointerGetDatum(funcname),
+               Int32GetDatum(nargs), 
+               PointerGetDatum(argtypes),
+               0);
+    
+    if (!HeapTupleIsValid(tuple))
+   func_error("BuildFuncTupleDesc", funcname, nargs, (int*)argtypes);
+    
+    retType = ((Form_pg_proc)GETSTRUCT(tuple))->prorettype;
+    
+    /*
+     * Look up the return type in pg_type for the type length.
+     */
+    tuple = SearchSysCacheTuple(TYPOID,
+               ObjectIdGetDatum(retType),
+               0,0,0);
+    if (!HeapTupleIsValid(tuple))
+   elog(WARN,"Function %s return type does not exist",FIgetname(funcInfo));
+    
+    /*
+     * Assign some of the attributes values. Leave the rest as 0.
+     */
+    funcTupDesc->attrs[0]->attlen = ((TypeTupleForm)GETSTRUCT(tuple))->typlen;
+    funcTupDesc->attrs[0]->atttypid = retType;
+    funcTupDesc->attrs[0]->attnum = 1;
+    funcTupDesc->attrs[0]->attbyval = ((TypeTupleForm)GETSTRUCT(tuple))->typbyval;
+    funcTupDesc->attrs[0]->attcanindex = 0;
+    
+    /*
+     * make the attributes name the same as the functions
+     */
+    namestrcpy(&funcTupDesc->attrs[0]->attname, funcname);
+    
+    return (funcTupDesc);
+}
+
+/* ----------------------------------------------------------------
+ * ConstructTupleDescriptor
+ * ----------------------------------------------------------------
+ */
+static TupleDesc
+ConstructTupleDescriptor(Oid heapoid,
+            Relation heapRelation,
+            int numatts,
+            AttrNumber attNums[])
+{
+    TupleDesc  heapTupDesc;
+    TupleDesc  indexTupDesc;
+    AttrNumber     atnum;      /* attributeNumber[attributeOffset] */
+    AttrNumber     atind;
+    int    natts;      /* RelationTupleForm->relnatts */
+    char   *from;      /* used to simplify memcpy below */
+    char   *to;        /* used to simplify memcpy below */
+    int        i;
+    
+    /* ----------------
+     * allocate the new tuple descriptor
+     * ----------------
+     */
+    natts = RelationGetRelationTupleForm(heapRelation)->relnatts;
+    
+    indexTupDesc = CreateTemplateTupleDesc(numatts);
+    
+    /* ----------------
+     * 
+     * ----------------
+     */
+    
+    /* ----------------
+     *    for each attribute we are indexing, obtain its attribute
+     *    tuple form from either the static table of system attribute
+     *    tuple forms or the relation tuple descriptor
+     * ----------------
+     */
+    for (i = 0; i < numatts; i += 1) {
+   
+   /* ----------------
+    *   get the attribute number and make sure it's valid
+    * ----------------
+    */
+   atnum = attNums[i];
+   if (atnum > natts)
+       elog(WARN, "Cannot create index: attribute %d does not exist",
+        atnum);
+   
+   indexTupDesc->attrs[i] = (AttributeTupleForm) palloc(ATTRIBUTE_TUPLE_SIZE);
+   
+   /* ----------------
+    *   determine which tuple descriptor to copy
+    * ----------------
+    */
+   if (!AttrNumberIsForUserDefinedAttr(atnum)) {
+       
+       /* ----------------
+        *    here we are indexing on a system attribute (-1...-12)
+        *    so we convert atnum into a usable index 0...11 so we can
+        *    use it to dereference the array sysatts[] which stores
+        *    tuple descriptor information for system attributes.
+        * ----------------
+        */
+       if (atnum <= FirstLowInvalidHeapAttributeNumber || atnum >= 0 )
+       elog(WARN, "Cannot create index on system attribute: attribute number out of range (%d)", atnum);
+       atind = (-atnum) - 1;
+       
+       from = (char *) (& sysatts[atind]);
+       
+   } else {
+       /* ----------------
+        *    here we are indexing on a normal attribute (1...n)
+        * ----------------
+        */
+       
+       heapTupDesc = RelationGetTupleDescriptor(heapRelation);
+       atind =       AttrNumberGetAttrOffset(atnum);
+       
+       from = (char *) (heapTupDesc->attrs[ atind ]);
+   }
+   
+   /* ----------------
+    *   now that we've determined the "from", let's copy
+    *   the tuple desc data...
+    * ----------------
+    */
+   
+   to =   (char *) (indexTupDesc->attrs[ i ]);
+   memcpy(to, from, ATTRIBUTE_TUPLE_SIZE);
+   
+   /* ----------------
+    *    now we have to drop in the proper relation descriptor
+    *    into the copied tuple form's attrelid and we should be
+    *    all set.
+    * ----------------
+    */
+   ((AttributeTupleForm) to)->attrelid = heapoid;
+    }
+    
+    return indexTupDesc;
+}
+
+/* ----------------------------------------------------------------
+ * AccessMethodObjectIdGetAccessMethodTupleForm --
+ * Returns the formated access method tuple given its object identifier.
+ *
+ * XXX ADD INDEXING
+ *
+ * Note:
+ * Assumes object identifier is valid.
+ * ----------------------------------------------------------------
+ */
+Form_pg_am
+AccessMethodObjectIdGetAccessMethodTupleForm(Oid accessMethodObjectId)
+{
+    Relation       pg_am_desc;
+    HeapScanDesc   pg_am_scan;
+    HeapTuple      pg_am_tuple;
+    ScanKeyData        key;
+    Form_pg_am         form;
+    
+    /* ----------------
+     * form a scan key for the pg_am relation
+     * ----------------
+     */
+    ScanKeyEntryInitialize(&key, 0, ObjectIdAttributeNumber,
+              ObjectIdEqualRegProcedure,
+              ObjectIdGetDatum(accessMethodObjectId));
+    
+    /* ----------------
+     * fetch the desired access method tuple
+     * ----------------
+     */
+    pg_am_desc = heap_openr(AccessMethodRelationName);
+    pg_am_scan = heap_beginscan(pg_am_desc, 0, NowTimeQual, 1, &key);
+    
+    pg_am_tuple = heap_getnext(pg_am_scan, 0, (Buffer *)NULL);
+    
+    /* ----------------
+     * return NULL if not found
+     * ----------------
+     */
+    if (! HeapTupleIsValid(pg_am_tuple)) {
+   heap_endscan(pg_am_scan);
+   heap_close(pg_am_desc);
+   return (NULL);
+    }
+    
+    /* ----------------
+     * if found am tuple, then copy the form and return the copy
+     * ----------------
+     */
+    form = (Form_pg_am)palloc(sizeof *form);
+    memcpy(form, GETSTRUCT(pg_am_tuple), sizeof *form);
+    
+    heap_endscan(pg_am_scan);
+    heap_close(pg_am_desc);
+    
+    return (form);
+}
+
+/* ----------------------------------------------------------------
+ * ConstructIndexReldesc
+ * ----------------------------------------------------------------
+ */
+static void
+ConstructIndexReldesc(Relation indexRelation, Oid amoid)
+{
+    extern GlobalMemory  CacheCxt;
+    MemoryContext       oldcxt;
+    
+    /* ----------------
+     *    here we make certain to allocate the access method
+     *    tuple within the cache context lest it vanish when the
+     *    context changes
+     * ----------------
+     */
+    if (!CacheCxt)
+   CacheCxt = CreateGlobalMemory("Cache");
+    
+    oldcxt = MemoryContextSwitchTo((MemoryContext)CacheCxt);
+    
+    indexRelation->rd_am =
+   AccessMethodObjectIdGetAccessMethodTupleForm(amoid);
+    
+    MemoryContextSwitchTo(oldcxt);
+    
+    /* ----------------
+     *   XXX missing the initialization of some other fields 
+     * ----------------
+     */
+    
+    indexRelation->rd_rel->relowner =  GetUserId();
+    
+    indexRelation->rd_rel->relam =         amoid;
+    indexRelation->rd_rel->reltuples =         1;      /* XXX */
+    indexRelation->rd_rel->relexpires =    0;      /* XXX */
+    indexRelation->rd_rel->relpreserved =  0;      /* XXX */
+    indexRelation->rd_rel->relkind =       RELKIND_INDEX;
+    indexRelation->rd_rel->relarch =       'n';        /* XXX */
+}
+
+/* ----------------------------------------------------------------
+ * UpdateRelationRelation
+ * ----------------------------------------------------------------
+ */
+static Oid
+UpdateRelationRelation(Relation indexRelation)
+{
+    Relation   pg_class;
+    HeapTuple  tuple;
+    Oid    tupleOid;
+    Relation   idescs[Num_pg_class_indices];
+    
+    pg_class = heap_openr(RelationRelationName);
+    
+    /* XXX Natts_pg_class_fixed is a hack - see pg_class.h */
+    tuple = heap_addheader(Natts_pg_class_fixed,
+              sizeof(*indexRelation->rd_rel),
+              (char *) indexRelation->rd_rel);
+    
+    /* ----------------
+     *  the new tuple must have the same oid as the relcache entry for the
+     *  index.  sure would be embarassing to do this sort of thing in polite
+     *  company.
+     * ----------------
+     */
+    tuple->t_oid = indexRelation->rd_id;
+    heap_insert(pg_class, tuple);
+    
+    /*
+     *  During normal processing, we need to make sure that the system
+     *  catalog indices are correct.  Bootstrap (initdb) time doesn't
+     *  require this, because we make sure that the indices are correct
+     *  just before exiting.
+     */
+    
+    if (!IsBootstrapProcessingMode()) {
+   CatalogOpenIndices(Num_pg_class_indices, Name_pg_class_indices, idescs);
+   CatalogIndexInsert(idescs, Num_pg_class_indices, pg_class, tuple);
+   CatalogCloseIndices(Num_pg_class_indices, idescs);
+    }
+    
+    tupleOid = tuple->t_oid;
+    pfree(tuple);
+    heap_close(pg_class);
+    
+    return(tupleOid);
+}
+
+/* ----------------------------------------------------------------
+ * InitializeAttributeOids
+ * ----------------------------------------------------------------
+ */
+static void
+InitializeAttributeOids(Relation indexRelation,
+           int numatts,
+           Oid indexoid)
+{
+    TupleDesc  tupleDescriptor;
+    int            i;
+    
+    tupleDescriptor = RelationGetTupleDescriptor(indexRelation);
+    
+    for (i = 0; i < numatts; i += 1)
+   tupleDescriptor->attrs[i]->attrelid = indexoid;
+}
+
+/* ----------------------------------------------------------------
+ * AppendAttributeTuples
+ *
+ *     XXX For now, only change the ATTNUM attribute value
+ * ----------------------------------------------------------------
+ */
+static void
+AppendAttributeTuples(Relation indexRelation, int numatts)
+{
+    Relation   pg_attribute;
+    HeapTuple  tuple;
+    HeapTuple  newtuple;
+    bool   hasind;
+    Relation   idescs[Num_pg_attr_indices];
+    
+    Datum  value[ Natts_pg_attribute ];
+    char   nullv[ Natts_pg_attribute ];
+    char   replace[ Natts_pg_attribute ];
+    
+    TupleDesc  indexTupDesc;
+    int        i;
+    
+    /* ----------------
+     * open the attribute relation
+     *  XXX ADD INDEXING
+     * ----------------
+     */
+    pg_attribute = heap_openr(AttributeRelationName);
+    
+    /* ----------------
+     * initialize null[], replace[] and value[]
+     * ----------------
+     */
+    (void) memset(nullv, ' ', Natts_pg_attribute);
+    (void) memset(replace, ' ', Natts_pg_attribute);
+    
+    /* ----------------
+     *  create the first attribute tuple.
+     * XXX For now, only change the ATTNUM attribute value
+     * ----------------
+     */
+    replace[ Anum_pg_attribute_attnum - 1 ] = 'r';
+    
+    value[ Anum_pg_attribute_attnum - 1 ] = Int16GetDatum(1);
+    
+    tuple = heap_addheader(Natts_pg_attribute,
+              sizeof *(indexRelation->rd_att->attrs[0]),
+              (char *)(indexRelation->rd_att->attrs[0]));
+    
+    hasind = false;
+    if (!IsBootstrapProcessingMode() && pg_attribute->rd_rel->relhasindex) {
+   hasind = true;
+   CatalogOpenIndices(Num_pg_attr_indices, Name_pg_attr_indices, idescs);
+    }
+    
+    /* ----------------
+     *  insert the first attribute tuple.
+     * ----------------
+     */
+    tuple = heap_modifytuple(tuple,
+                InvalidBuffer,
+                pg_attribute,
+                value,
+                nullv,
+                replace);
+    
+    heap_insert(pg_attribute, tuple);
+    if (hasind)
+   CatalogIndexInsert(idescs, Num_pg_attr_indices, pg_attribute, tuple);
+    
+    /* ----------------
+     * now we use the information in the index tuple
+     *  descriptor to form the remaining attribute tuples.
+     * ----------------
+     */
+    indexTupDesc = RelationGetTupleDescriptor(indexRelation);
+    
+    for (i = 1; i < numatts; i += 1) {
+   /* ----------------
+    *  process the remaining attributes...
+    * ----------------
+    */
+   memmove(GETSTRUCT(tuple),
+          (char *)indexTupDesc->attrs[i],
+          sizeof (AttributeTupleForm));
+   
+   value[ Anum_pg_attribute_attnum - 1 ] = Int16GetDatum(i + 1);
+   
+   newtuple = heap_modifytuple(tuple,
+                   InvalidBuffer,
+                   pg_attribute,
+                   value,
+                   nullv,
+                   replace);
+   
+   heap_insert(pg_attribute, newtuple);
+   if (hasind)
+       CatalogIndexInsert(idescs, Num_pg_attr_indices, pg_attribute, newtuple);
+   
+   /* ----------------
+    *  ModifyHeapTuple returns a new copy of a tuple
+    *  so we free the original and use the copy..
+    * ----------------
+    */
+   pfree(tuple);
+   tuple = newtuple;
+    }
+    
+    /* ----------------
+     * close the attribute relation and free the tuple
+     * ----------------
+     */
+    heap_close(pg_attribute);
+    
+    if (hasind)
+   CatalogCloseIndices(Num_pg_attr_indices, idescs);
+    
+    pfree(tuple);
+}
+
+/* ----------------------------------------------------------------
+ * UpdateIndexRelation
+ * ----------------------------------------------------------------
+ */
+static void
+UpdateIndexRelation(Oid indexoid,
+           Oid heapoid,
+           FuncIndexInfo *funcInfo,
+           int natts,
+           AttrNumber attNums[],
+           Oid classOids[],
+           Node *predicate)
+{
+    IndexTupleForm indexForm;
+    char       *predString;
+    text       *predText;
+    int            predLen, itupLen;
+    Relation       pg_index;
+    HeapTuple      tuple;
+    int            i;
+    
+    /* ----------------
+     * allocate an IndexTupleForm big enough to hold the
+     *  index-predicate (if any) in string form
+     * ----------------
+     */
+    if (predicate != NULL) {
+   predString = nodeToString(predicate);
+   predText = (text *)fmgr(F_TEXTIN, predString);
+   pfree(predString);
+    } else {
+   predText = (text *)fmgr(F_TEXTIN, "");
+    }
+    predLen = VARSIZE(predText);
+    itupLen = predLen + sizeof(FormData_pg_index);
+    indexForm = (IndexTupleForm) palloc(itupLen);
+    
+    memmove((char *)& indexForm->indpred, (char *)predText, predLen);
+    
+    /* ----------------
+     * store the oid information into the index tuple form
+     * ----------------
+     */
+    indexForm->indrelid =   heapoid;
+    indexForm->indexrelid = indexoid;
+    indexForm->indproc = (PointerIsValid(funcInfo)) ?
+   FIgetProcOid(funcInfo) : InvalidOid;
+    
+    memset((char *)& indexForm->indkey[0], 0, sizeof indexForm->indkey);
+    memset((char *)& indexForm->indclass[0], 0, sizeof indexForm->indclass);
+    
+    /* ----------------
+     * copy index key and op class information
+     * ----------------
+     */
+    for (i = 0; i < natts; i += 1) {
+   indexForm->indkey[i] =   attNums[i];
+   indexForm->indclass[i] = classOids[i];
+    }
+    /*
+     * If we have a functional index, add all attribute arguments
+     */
+    if (PointerIsValid(funcInfo))
+   {
+       for (i=1; i < FIgetnArgs(funcInfo); i++)
+       indexForm->indkey[i] =   attNums[i];
+   }
+    
+    indexForm->indisclustered = '\0';      /* XXX constant */
+    indexForm->indisarchived = '\0';       /* XXX constant */
+    
+    /* ----------------
+     * open the system catalog index relation
+     * ----------------
+     */
+    pg_index = heap_openr(IndexRelationName);
+    
+    /* ----------------
+     * form a tuple to insert into pg_index
+     * ----------------
+     */
+    tuple = heap_addheader(Natts_pg_index,
+              itupLen,
+              (char *)indexForm);
+    
+    /* ----------------
+     * insert the tuple into the pg_index
+     *  XXX ADD INDEX TUPLES TOO
+     * ----------------
+     */
+    heap_insert(pg_index, tuple);
+    
+    /* ----------------
+     * close the relation and free the tuple
+     * ----------------
+     */
+    heap_close(pg_index);
+    pfree(predText);
+    pfree(indexForm);
+    pfree(tuple);
+}
+
+/* ----------------------------------------------------------------
+ * UpdateIndexPredicate
+ * ----------------------------------------------------------------
+ */
+void
+UpdateIndexPredicate(Oid indexoid, Node *oldPred, Node *predicate)
+{
+    Node       *newPred;
+    char       *predString;
+    text       *predText;
+    Relation       pg_index;
+    HeapTuple      tuple;
+    HeapTuple      newtup;
+    ScanKeyData        entry;
+    HeapScanDesc   scan;
+    Buffer     buffer;
+    int            i;
+    Datum      values[Natts_pg_index];
+    char       nulls[Natts_pg_index];
+    char       replace[Natts_pg_index];
+    
+    /*
+     * Construct newPred as a CNF expression equivalent to the OR of the
+     * original partial-index predicate ("oldPred") and the extension
+     * predicate ("predicate").
+     *
+     * This should really try to process the result to change things like
+     * "a>2 OR a>1" to simply "a>1", but for now all it does is make sure
+     * that if the extension predicate is NULL (i.e., it is being extended
+     * to be a complete index), then newPred will be NULL - in effect,
+     * changing "a>2 OR TRUE" to "TRUE". --Nels, Jan '93
+     */
+    newPred = NULL;
+    if (predicate != NULL) {
+   newPred =
+       (Node*)make_orclause(lcons(make_andclause((List*)predicate),
+                     lcons(make_andclause((List*)oldPred),
+                      NIL)));
+   newPred = (Node*)cnfify((Expr*)newPred, true);
+    }
+    
+    /* translate the index-predicate to string form */
+    if (newPred != NULL) {
+   predString = nodeToString(newPred);
+   predText = (text *)fmgr(F_TEXTIN, predString);
+   pfree(predString);
+    } else {
+   predText = (text *)fmgr(F_TEXTIN, "");
+    }
+    
+    /* open the index system catalog relation */
+    pg_index = heap_openr(IndexRelationName);
+    
+    ScanKeyEntryInitialize(&entry, 0x0, Anum_pg_index_indexrelid, 
+              ObjectIdEqualRegProcedure, 
+              ObjectIdGetDatum(indexoid));
+    
+    scan = heap_beginscan(pg_index, 0, NowTimeQual, 1, &entry);
+    tuple = heap_getnext(scan, 0, &buffer);
+    heap_endscan(scan);
+    
+    for (i = 0; i < Natts_pg_index; i++) {
+   nulls[i] = heap_attisnull(tuple, i+1) ? 'n' : ' ';
+   replace[i] = ' ';
+   values[i] = (Datum) NULL;
+    }
+    
+    replace[Anum_pg_index_indpred - 1] = 'r';
+    values[Anum_pg_index_indpred - 1] = (Datum) predText;
+    
+    newtup = heap_modifytuple(tuple, buffer, pg_index, values, nulls, replace);
+    
+    (void) heap_replace(pg_index, &(newtup->t_ctid), newtup);
+    
+    heap_close(pg_index);
+    pfree(predText);
+}
+
+/* ----------------------------------------------------------------
+ * InitIndexStrategy
+ * ----------------------------------------------------------------
+ */
+void
+InitIndexStrategy(int numatts,
+         Relation indexRelation,
+         Oid accessMethodObjectId)
+{
+    IndexStrategy  strategy;
+    RegProcedure   *support;
+    uint16     amstrategies;
+    uint16     amsupport;
+    Oid        attrelid;
+    Size       strsize;
+    extern GlobalMemory CacheCxt;
+    
+    /* ----------------
+     * get information from the index relation descriptor
+     * ----------------
+     */
+    attrelid =        indexRelation->rd_att->attrs[0]->attrelid;
+    amstrategies = indexRelation->rd_am->amstrategies;
+    amsupport =    indexRelation->rd_am->amsupport;
+    
+    /* ----------------
+     * get the size of the strategy
+     * ----------------
+     */
+    strsize =  AttributeNumberGetIndexStrategySize(numatts, amstrategies);
+    
+    /* ----------------
+     *  allocate the new index strategy structure
+     *
+     * the index strategy has to be allocated in the same
+     * context as the relation descriptor cache or else
+     * it will be lost at the end of the transaction.
+     * ----------------
+     */
+    if (!CacheCxt)
+   CacheCxt = CreateGlobalMemory("Cache");
+    
+    strategy = (IndexStrategy)
+   MemoryContextAlloc((MemoryContext)CacheCxt, strsize);
+    
+    if (amsupport > 0) {
+        strsize = numatts * (amsupport * sizeof(RegProcedure));
+        support = (RegProcedure *) MemoryContextAlloc((MemoryContext)CacheCxt,
+                             strsize);
+    } else {
+   support = (RegProcedure *) NULL;
+    }
+    
+    /* ----------------
+     * fill in the index strategy structure with information
+     *  from the catalogs.  Note: we use heap override mode
+     *  in order to be allowed to see the correct information in the
+     *  catalogs, even though our transaction has not yet committed.
+     * ----------------
+     */
+    setheapoverride(1);
+    
+    IndexSupportInitialize(strategy, support,
+              attrelid, accessMethodObjectId,
+              amstrategies, amsupport, numatts);
+    
+    setheapoverride(0);
+    
+    /* ----------------
+     * store the strategy information in the index reldesc
+     * ----------------
+     */
+    RelationSetIndexSupport(indexRelation, strategy, support);
+}
+
+
+/* ----------------------------------------------------------------
+ *     index_create
+ * ----------------------------------------------------------------
+ */
+void
+index_create(char *heapRelationName,
+        char *indexRelationName,
+        FuncIndexInfo *funcInfo,
+        Oid accessMethodObjectId,
+        int numatts,
+        AttrNumber attNums[],
+        Oid classObjectId[],
+        uint16 parameterCount,
+        Datum parameter[],
+        Node *predicate)
+{
+    Relation       heapRelation;
+    Relation       indexRelation;
+    TupleDesc      indexTupDesc;
+    Oid            heapoid;
+    Oid            indexoid;
+    PredInfo       *predInfo;
+    
+    /* ----------------
+     * check parameters
+     * ----------------
+     */
+    if (numatts < 1)
+   elog(WARN, "must index at least one attribute");
+    
+    /* ----------------
+     *    get heap relation oid and open the heap relation
+     *   XXX ADD INDEXING
+     * ----------------
+     */
+    heapoid = GetHeapRelationOid(heapRelationName, indexRelationName);
+    
+    heapRelation = heap_open(heapoid);
+    
+    /* ----------------
+     * write lock heap to guarantee exclusive access
+     * ---------------- 
+     */
+    
+    RelationSetLockForWrite(heapRelation);
+    
+    /* ----------------
+     *    construct new tuple descriptor
+     * ----------------
+     */
+    if (PointerIsValid(funcInfo))
+       indexTupDesc = BuildFuncTupleDesc(funcInfo);
+    else
+   indexTupDesc = ConstructTupleDescriptor(heapoid,
+                       heapRelation,
+                       numatts,
+                       attNums);
+    
+    /* ----------------
+     * create the index relation
+     * ----------------
+     */
+    indexRelation = heap_creatr(indexRelationName,
+               DEFAULT_SMGR,
+               indexTupDesc);
+    
+    /* ----------------
+     *    construct the index relation descriptor
+     *
+     *    XXX should have a proper way to create cataloged relations
+     * ----------------
+     */
+    ConstructIndexReldesc(indexRelation, accessMethodObjectId);
+    
+    /* ----------------
+     *    add index to catalogs
+     *    (append RELATION tuple)
+     * ----------------
+     */
+    indexoid = UpdateRelationRelation(indexRelation);
+    
+    /* ----------------
+     * Now get the index procedure (only relevant for functional indices).
+     * ----------------
+     */
+    
+    if (PointerIsValid(funcInfo))
+   {
+       HeapTuple proc_tup;
+       
+       proc_tup = SearchSysCacheTuple(PRONAME,
+                  PointerGetDatum(FIgetname(funcInfo)),
+                  Int32GetDatum(FIgetnArgs(funcInfo)),
+                  PointerGetDatum(FIgetArglist(funcInfo)),
+                  0);
+       
+       if (!HeapTupleIsValid(proc_tup)) {
+       func_error("index_create", FIgetname(funcInfo),
+              FIgetnArgs(funcInfo), 
+              (int*) FIgetArglist(funcInfo));
+       }
+       FIgetProcOid(funcInfo) = proc_tup->t_oid;
+   }
+    
+    /* ----------------
+     * now update the object id's of all the attribute
+     *  tuple forms in the index relation's tuple descriptor
+     * ----------------
+     */
+    InitializeAttributeOids(indexRelation, numatts, indexoid);
+    
+    /* ----------------
+     *    append ATTRIBUTE tuples
+     * ----------------
+     */
+    AppendAttributeTuples(indexRelation, numatts);
+    
+    /* ----------------
+     *    update pg_index
+     *    (append INDEX tuple)
+     *
+     *    Note that this stows away a representation of "predicate".
+     *    (Or, could define a rule to maintain the predicate) --Nels, Feb '92
+     * ----------------
+     */
+    UpdateIndexRelation(indexoid, heapoid, funcInfo,
+           numatts, attNums, classObjectId, predicate);
+    
+    predInfo = (PredInfo*)palloc(sizeof(PredInfo));
+    predInfo->pred = predicate;
+    predInfo->oldPred = NULL;
+    
+    /* ----------------
+     *    initialize the index strategy
+     * ----------------
+     */
+    InitIndexStrategy(numatts, indexRelation, accessMethodObjectId);
+    
+    /*
+     *  If this is bootstrap (initdb) time, then we don't actually
+     *  fill in the index yet.  We'll be creating more indices and classes
+     *  later, so we delay filling them in until just before we're done
+     *  with bootstrapping.  Otherwise, we call the routine that constructs
+     *  the index.  The heap and index relations are closed by index_build().
+     */
+    if (IsBootstrapProcessingMode()) {
+   index_register(heapRelationName, indexRelationName, numatts, attNums,
+              parameterCount, parameter, funcInfo, predInfo);
+    } else {
+   heapRelation = heap_openr(heapRelationName);
+   index_build(heapRelation, indexRelation, numatts, attNums,
+           parameterCount, parameter, funcInfo, predInfo);
+    }
+}
+
+/* ----------------------------------------------------------------
+ * index_destroy
+ *
+ * XXX break into modules like index_create
+ * ----------------------------------------------------------------
+ */
+void
+index_destroy(Oid indexId)
+{
+    Relation       indexRelation;
+    Relation       catalogRelation;
+    HeapTuple      tuple;
+    HeapScanDesc   scan;
+    ScanKeyData        entry;
+    
+    Assert(OidIsValid(indexId));
+    
+    indexRelation = index_open(indexId);
+    
+    /* ----------------
+     * fix RELATION relation
+     * ----------------
+     */
+    catalogRelation = heap_openr(RelationRelationName);
+    
+    ScanKeyEntryInitialize(&entry, 0x0, ObjectIdAttributeNumber, 
+              ObjectIdEqualRegProcedure, 
+              ObjectIdGetDatum(indexId));;
+    
+    scan = heap_beginscan(catalogRelation, 0, NowTimeQual, 1, &entry);
+    tuple = heap_getnext(scan, 0, (Buffer *)NULL);
+    
+    AssertState(HeapTupleIsValid(tuple));
+    
+    heap_delete(catalogRelation, &tuple->t_ctid);
+    heap_endscan(scan);
+    heap_close(catalogRelation);
+    
+    /* ----------------
+     * fix ATTRIBUTE relation
+     * ----------------
+     */
+    catalogRelation = heap_openr(AttributeRelationName);
+    
+    entry.sk_attno = Anum_pg_attribute_attrelid;
+    
+    scan = heap_beginscan(catalogRelation, 0, NowTimeQual, 1, &entry);
+    
+    while (tuple = heap_getnext(scan, 0, (Buffer *)NULL),
+      HeapTupleIsValid(tuple)) {
+   
+   heap_delete(catalogRelation, &tuple->t_ctid);
+    }
+    heap_endscan(scan);
+    heap_close(catalogRelation);
+    
+    /* ----------------
+     * fix INDEX relation
+     * ----------------
+     */
+    catalogRelation = heap_openr(IndexRelationName);
+    
+    entry.sk_attno = Anum_pg_index_indexrelid;
+    
+    scan = heap_beginscan(catalogRelation, 0, NowTimeQual, 1,  &entry);
+    tuple = heap_getnext(scan, 0, (Buffer *)NULL);
+    if (! HeapTupleIsValid(tuple)) {
+   elog(NOTICE, "IndexRelationDestroy: %s's INDEX tuple missing",
+        RelationGetRelationName(indexRelation));
+    }
+    heap_delete(catalogRelation, &tuple->t_ctid);
+    heap_endscan(scan);
+    heap_close(catalogRelation);
+    
+    /*
+     * physically remove the file
+     */
+    if (FileNameUnlink(relpath(indexRelation->rd_rel->relname.data)) < 0)
+   elog(WARN, "amdestroyr: unlink: %m");
+    
+    index_close(indexRelation);
+}
+
+/* ----------------------------------------------------------------
+ *         index_build support
+ * ----------------------------------------------------------------
+ */
+/* ----------------
+ * FormIndexDatum
+ * ----------------
+ */
+void
+FormIndexDatum(int numberOfAttributes,
+          AttrNumber attributeNumber[],
+          HeapTuple heapTuple,
+          TupleDesc heapDescriptor,
+          Buffer buffer,
+          Datum *datum,
+          char *nullv,
+          FuncIndexInfoPtr fInfo)
+{
+    AttrNumber i;
+    int        offset;
+    bool   isNull;
+    
+    /* ----------------
+     * for each attribute we need from the heap tuple,
+     *  get the attribute and stick it into the datum and
+     *  null arrays.
+     * ----------------
+     */
+    
+    for (i = 1; i <= numberOfAttributes; i += 1) {
+   offset = AttrNumberGetAttrOffset(i);
+   
+   datum[ offset ] =
+       PointerGetDatum( GetIndexValue(heapTuple,
+                      heapDescriptor,
+                      offset,
+                      attributeNumber,
+                      fInfo,
+                      &isNull,
+                      buffer) );
+   
+   nullv[ offset ] = (isNull) ? 'n' : ' ';
+    }
+}
+
+
+/* ----------------
+ * UpdateStats
+ * ----------------
+ */
+void
+UpdateStats(Oid relid, long reltuples, bool hasindex)
+{
+    Relation   whichRel;
+    Relation   pg_class;
+    HeapScanDesc pg_class_scan;
+    HeapTuple  htup;
+    HeapTuple  newtup;
+    long   relpages;
+    Buffer     buffer;
+    int    i;
+    Form_pg_class rd_rel;
+    Relation   idescs[Num_pg_class_indices];
+    
+    static ScanKeyData key[1] = {
+   { 0, ObjectIdAttributeNumber, ObjectIdEqualRegProcedure }
+    };
+    Datum  values[Natts_pg_class];
+    char   nulls[Natts_pg_class];
+    char   replace[Natts_pg_class];
+    
+    fmgr_info(ObjectIdEqualRegProcedure, (func_ptr *) &key[0].sk_func,
+         &key[0].sk_nargs);
+    
+    /* ----------------
+     * This routine handles updates for both the heap and index relation
+     * statistics.  In order to guarantee that we're able to *see* the index
+     * relation tuple, we bump the command counter id here.  The index
+     * relation tuple was created in the current transaction.
+     * ----------------
+     */
+    CommandCounterIncrement();
+    
+    /* ----------------
+     * CommandCounterIncrement() flushes invalid cache entries, including
+     * those for the heap and index relations for which we're updating
+     * statistics.  Now that the cache is flushed, it's safe to open the
+     * relation again.  We need the relation open in order to figure out
+     * how many blocks it contains.
+     * ----------------
+     */
+    
+    whichRel = RelationIdGetRelation(relid);
+    
+    if (!RelationIsValid(whichRel))
+   elog(WARN, "UpdateStats: cannot open relation id %d", relid);
+    
+    /* ----------------
+     * Find the RELATION relation tuple for the given relation.
+     * ----------------
+     */
+    pg_class = heap_openr(RelationRelationName);
+    if (! RelationIsValid(pg_class)) {
+   elog(WARN, "UpdateStats: could not open RELATION relation");
+    }
+    key[0].sk_argument = ObjectIdGetDatum(relid);
+    
+    pg_class_scan =
+   heap_beginscan(pg_class, 0, NowTimeQual, 1, key);
+    
+    if (! HeapScanIsValid(pg_class_scan)) {
+   heap_close(pg_class);
+   elog(WARN, "UpdateStats: cannot scan RELATION relation");
+    }
+    
+    /* if the heap_open above succeeded, then so will this heap_getnext() */
+    htup = heap_getnext(pg_class_scan, 0, &buffer);
+    heap_endscan(pg_class_scan);
+    
+    /* ----------------
+     * update statistics
+     * ----------------
+     */
+    relpages = RelationGetNumberOfBlocks(whichRel);
+    
+    /*
+     *  We shouldn't have to do this, but we do...  Modify the reldesc
+     *  in place with the new values so that the cache contains the
+     *  latest copy.
+     */
+    
+    whichRel->rd_rel->relhasindex = hasindex;
+    whichRel->rd_rel->relpages = relpages;
+    whichRel->rd_rel->reltuples = reltuples;
+    
+    for (i = 0; i < Natts_pg_class; i++) {
+   nulls[i] = heap_attisnull(htup, i+1) ? 'n' : ' ';
+   replace[i] = ' ';
+   values[i] = (Datum) NULL;
+    }
+    
+    /*
+     * If reltuples wasn't supplied take an educated guess.
+     */
+    if (reltuples == 0)
+   reltuples = relpages*NTUPLES_PER_PAGE(whichRel->rd_rel->relnatts);
+    
+    if (IsBootstrapProcessingMode()) {
+   
+   /*
+    *  At bootstrap time, we don't need to worry about concurrency
+    *  or visibility of changes, so we cheat.
+    */
+   
+   rd_rel = (Form_pg_class) GETSTRUCT(htup);
+   rd_rel->relpages = relpages;
+   rd_rel->reltuples = reltuples;
+   rd_rel->relhasindex = hasindex;
+    } else {
+   /* during normal processing, work harder */
+   replace[Anum_pg_class_relpages - 1] = 'r';
+   values[Anum_pg_class_relpages - 1] = (Datum)relpages;
+   replace[Anum_pg_class_reltuples - 1] = 'r';
+   values[Anum_pg_class_reltuples - 1] = (Datum)reltuples;
+   replace[Anum_pg_class_relhasindex - 1] = 'r';
+   values[Anum_pg_class_relhasindex - 1] = CharGetDatum(hasindex);
+   
+   newtup = heap_modifytuple(htup, buffer, pg_class, values,
+                 nulls, replace);
+   (void) heap_replace(pg_class, &(newtup->t_ctid), newtup);
+   CatalogOpenIndices(Num_pg_class_indices, Name_pg_class_indices, idescs);
+   CatalogIndexInsert(idescs, Num_pg_class_indices, pg_class, newtup);
+   CatalogCloseIndices(Num_pg_class_indices, idescs);
+    }
+    
+    heap_close(pg_class);
+    heap_close(whichRel);
+}
+
+
+/* -------------------------
+ * FillDummyExprContext
+ *     Sets up dummy ExprContext and TupleTableSlot objects for use
+ *     with ExecQual.
+ * -------------------------
+ */
+void
+FillDummyExprContext(ExprContext *econtext,
+            TupleTableSlot *slot,
+            TupleDesc tupdesc,
+            Buffer buffer)
+{
+    econtext->ecxt_scantuple = slot;
+    econtext->ecxt_innertuple = NULL;
+    econtext->ecxt_outertuple = NULL;
+    econtext->ecxt_param_list_info = NULL;
+    econtext->ecxt_range_table = NULL;
+    
+    slot->ttc_tupleDescriptor = tupdesc;
+    slot->ttc_buffer = buffer;
+    slot->ttc_shouldFree = false;
+
+}
+
+
+/* ----------------
+ * DefaultBuild
+ * ----------------
+ */
+static void
+DefaultBuild(Relation heapRelation,
+        Relation indexRelation,
+        int numberOfAttributes,
+        AttrNumber attributeNumber[],
+        IndexStrategy indexStrategy, /* not used */
+        uint16 parameterCount, /* not used */
+        Datum  parameter[], /* not used */
+        FuncIndexInfoPtr funcInfo,
+        PredInfo *predInfo)
+{
+    HeapScanDesc   scan;
+    HeapTuple      heapTuple;
+    Buffer     buffer;
+    
+    IndexTuple     indexTuple;
+    TupleDesc      heapDescriptor;
+    TupleDesc      indexDescriptor;
+    Datum      *datum;
+    char       *nullv;
+    long       reltuples, indtuples;
+    ExprContext        *econtext;
+    TupleTable     tupleTable;
+    TupleTableSlot *slot;
+    Node       *predicate;
+    Node       *oldPred;
+    
+    InsertIndexResult  insertResult;
+    
+    /* ----------------
+     * more & better checking is needed
+     * ----------------
+     */
+    Assert(OidIsValid(indexRelation->rd_rel->relam));  /* XXX */
+    
+    /* ----------------
+     * get the tuple descriptors from the relations so we know
+     *  how to form the index tuples..
+     * ----------------
+     */
+    heapDescriptor =  RelationGetTupleDescriptor(heapRelation);
+    indexDescriptor = RelationGetTupleDescriptor(indexRelation);
+    
+    /* ----------------
+     * datum and null are arrays in which we collect the index attributes
+     *  when forming a new index tuple.
+     * ----------------
+     */
+    datum = (Datum *) palloc(numberOfAttributes * sizeof *datum);
+    nullv =  (char *)  palloc(numberOfAttributes * sizeof *nullv);
+    
+    /*
+     * If this is a predicate (partial) index, we will need to evaluate the
+     * predicate using ExecQual, which requires the current tuple to be in a
+     * slot of a TupleTable.  In addition, ExecQual must have an ExprContext
+     * referring to that slot.  Here, we initialize dummy TupleTable and
+     * ExprContext objects for this purpose. --Nels, Feb '92
+     */
+
+    predicate = predInfo->pred;
+    oldPred = predInfo->oldPred;
+
+#ifndef OMIT_PARTIAL_INDEX
+    if (predicate != NULL || oldPred != NULL) {
+   tupleTable = ExecCreateTupleTable(1);
+   slot = ExecAllocTableSlot(tupleTable);
+   econtext = makeNode(ExprContext);
+   FillDummyExprContext(econtext, slot, heapDescriptor, buffer);
+    }
+#endif /* OMIT_PARTIAL_INDEX */        
+
+    /* ----------------
+     * Ok, begin our scan of the base relation.
+     * ----------------
+     */
+    scan = heap_beginscan(heapRelation,    /* relation */
+             0,            /* start at end */
+             NowTimeQual,          /* time range */
+             0,            /* number of keys */
+             (ScanKey) NULL);  /* scan key */
+    
+    reltuples = indtuples = 0;
+    
+    /* ----------------
+     * for each tuple in the base relation, we create an index
+     *  tuple and add it to the index relation.  We keep a running
+     *  count of the number of tuples so that we can update pg_class
+     *  with correct statistics when we're done building the index.
+     * ----------------
+     */
+    while (heapTuple = heap_getnext(scan, 0, &buffer),
+      HeapTupleIsValid(heapTuple)) {
+   
+   reltuples++;
+   
+   /*
+    * If oldPred != NULL, this is an EXTEND INDEX command, so skip
+    * this tuple if it was already in the existing partial index
+    */
+   if (oldPred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+       /*SetSlotContents(slot, heapTuple); */
+       slot->val = heapTuple;
+       if (ExecQual((List*)oldPred, econtext) == true) {
+       indtuples++;
+       continue;
+       }
+#endif /* OMIT_PARTIAL_INDEX */        
+   }
+   
+   /* Skip this tuple if it doesn't satisfy the partial-index predicate */
+   if (predicate != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+       /*SetSlotContents(slot, heapTuple); */
+       slot->val = heapTuple;
+       if (ExecQual((List*)predicate, econtext) == false)
+       continue;
+#endif /* OMIT_PARTIAL_INDEX */        
+   }
+   
+   indtuples++;
+   
+   /* ----------------
+    *  FormIndexDatum fills in its datum and null parameters
+    *  with attribute information taken from the given heap tuple.
+    * ----------------
+    */
+   FormIndexDatum(numberOfAttributes,  /* num attributes */
+              attributeNumber,     /* array of att nums to extract */
+              heapTuple,       /* tuple from base relation */
+              heapDescriptor,      /* heap tuple's descriptor */
+              buffer,          /* buffer used in the scan */
+              datum,       /* return: array of attributes */
+              nullv,       /* return: array of char's */
+              funcInfo);
+   
+   indexTuple = index_formtuple(indexDescriptor,
+                    datum,
+                    nullv);
+   
+   indexTuple->t_tid = heapTuple->t_ctid;
+   
+   insertResult = index_insert(indexRelation, indexTuple);
+
+   if (insertResult) pfree(insertResult);
+   pfree(indexTuple);
+    }
+    
+    heap_endscan(scan);
+    
+    if (predicate != NULL || oldPred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+   ExecDestroyTupleTable(tupleTable, false);
+#endif /* OMIT_PARTIAL_INDEX */        
+    }
+    
+    pfree(nullv);
+    pfree(datum);
+    
+    /*
+     *  Okay, now update the reltuples and relpages statistics for both
+     *  the heap relation and the index.  These statistics are used by
+     *  the planner to choose a scan type.  They are maintained generally
+     *  by the vacuum daemon, but we update them here to make the index
+     *  useful as soon as possible.
+     */
+    UpdateStats(heapRelation->rd_id, reltuples, true);
+    UpdateStats(indexRelation->rd_id, indtuples, false);
+    if (oldPred != NULL) {
+   if (indtuples == reltuples) predicate = NULL;
+   UpdateIndexPredicate(indexRelation->rd_id, oldPred, predicate);
+    }
+}
+
+/* ----------------
+ * index_build
+ * ----------------
+ */
+void
+index_build(Relation heapRelation,
+       Relation indexRelation,
+       int numberOfAttributes,
+       AttrNumber attributeNumber[],
+       uint16  parameterCount,
+       Datum   parameter[],
+       FuncIndexInfo *funcInfo,
+       PredInfo *predInfo)
+{
+    RegProcedure   procedure;
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    Assert(RelationIsValid(indexRelation));
+    Assert(PointerIsValid(indexRelation->rd_am));
+    
+    procedure = indexRelation->rd_am->ambuild;
+    
+    /* ----------------
+     * use the access method build procedure if supplied..
+     * ----------------
+     */
+    if (RegProcedureIsValid(procedure))
+   (void) fmgr(procedure,
+           heapRelation,
+           indexRelation,
+           numberOfAttributes,
+           attributeNumber,
+           RelationGetIndexStrategy(indexRelation),
+           parameterCount,
+           parameter,
+           funcInfo,
+           predInfo);
+    else
+   DefaultBuild(heapRelation,
+            indexRelation,
+            numberOfAttributes,
+            attributeNumber,
+            RelationGetIndexStrategy(indexRelation),
+            parameterCount,
+            parameter,
+            funcInfo,
+            predInfo);
+}
+
+
diff --git a/src/backend/catalog/index.h b/src/backend/catalog/index.h

new file mode 100644 (file)

index 0000000..1734f86
--- /dev/null
+++ b/src/backend/catalog/index.h
@@ -0,0 +1,59 @@
+/*-------------------------------------------------------------------------
+ *
+ * index.h--
+ *    prototypes for index.c.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: index.h,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef    INDEX_H 
+#define INDEX_H
+
+#include "access/funcindex.h"
+#include "access/itup.h"
+#include "nodes/execnodes.h"
+
+
+extern Form_pg_am
+AccessMethodObjectIdGetAccessMethodTupleForm(Oid accessMethodObjectId);
+
+extern void
+UpdateIndexPredicate(Oid indexoid, Node *oldPred, Node *predicate);
+
+extern void InitIndexStrategy(int numatts,
+                 Relation indexRelation,
+                 Oid accessMethodObjectId);
+
+extern void index_create(char *heapRelationName, 
+            char* indexRelationName,
+            FuncIndexInfo *funcInfo, 
+            Oid accessMethodObjectId,
+            int numatts, 
+            AttrNumber attNums[],
+            Oid classObjectId[], 
+            uint16 parameterCount,
+            Datum parameter[], 
+            Node *predicate);
+
+extern void index_destroy(Oid indexId);
+
+extern void FormIndexDatum(int numberOfAttributes,
+   AttrNumber attributeNumber[], HeapTuple heapTuple,
+   TupleDesc heapDescriptor, Buffer buffer, Datum *datum,
+   char *nullv, FuncIndexInfoPtr fInfo);
+
+extern void UpdateStats(Oid relid, long reltuples, bool hasindex);
+
+extern void FillDummyExprContext(ExprContext *econtext, TupleTableSlot *slot,
+             TupleDesc tupdesc, Buffer buffer);
+
+extern void index_build(Relation heapRelation, Relation indexRelation,
+   int numberOfAttributes, AttrNumber attributeNumber[],
+   uint16 parameterCount, Datum parameter[], FuncIndexInfo *funcInfo,
+   PredInfo *predInfo);
+
+#endif /* INDEX_H */
diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c

new file mode 100644 (file)

index 0000000..74bf48a
--- /dev/null
+++ b/src/backend/catalog/indexing.c
@@ -0,0 +1,561 @@
+/*-------------------------------------------------------------------------
+ *
+ * indexing.c--
+ *    This file contains routines to support indices defined on system
+ *    catalogs.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/catalog/indexing.c,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+#include "utils/elog.h"
+#include "utils/oidcompos.h"
+#include "utils/palloc.h"
+#include "access/htup.h"
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/attnum.h"
+#include "access/funcindex.h"
+#include "access/skey.h"
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+#include "nodes/execnodes.h"
+#include "catalog/catalog.h"
+#include "catalog/catname.h"
+#include "catalog/pg_index.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_attribute.h"
+#include "utils/syscache.h"
+#include "catalog/indexing.h"
+#include "catalog/index.h"
+
+/*
+ * Names of indices on the following system catalogs:
+ *
+ * pg_attribute
+ * pg_proc
+ * pg_type
+ * pg_naming
+ * pg_class
+ */
+/*
+static NameData    AttributeNameIndexData = { "pg_attnameind" };
+static NameData    AttributeNumIndexData  = { "pg_attnumind" };
+static NameData AttributeRelidIndexData= { "pg_attrelidind" };
+static NameData    ProcedureNameIndexData = { "pg_procnameind" };
+static NameData    ProcedureOidIndexData  = { "pg_procidind" };
+static NameData    ProcedureSrcIndexData  = { "pg_procsrcind" };
+static NameData    TypeNameIndexData      = { "pg_typenameind" };
+static NameData    TypeOidIndexData       = { "pg_typeidind" };
+static NameData ClassNameIndexData     = { "pg_classnameind" };
+static NameData ClassOidIndexData      = { "pg_classoidind" };
+
+Name   AttributeNameIndex = &AttributeNameIndexData;
+Name   AttributeNumIndex  = &AttributeNumIndexData;
+Name   AttributeRelidIndex= &AttributeRelidIndexData;
+Name   ProcedureNameIndex = &ProcedureNameIndexData;
+Name   ProcedureOidIndex  = &ProcedureOidIndexData;
+Name   ProcedureSrcIndex  = &ProcedureSrcIndexData;
+Name   TypeNameIndex      = &TypeNameIndexData;
+Name   TypeOidIndex       = &TypeOidIndexData;
+Name   ClassNameIndex     = &ClassNameIndexData;
+Name   ClassOidIndex      = &ClassOidIndexData;
+char *Name_pg_attr_indices[Num_pg_attr_indices] = {AttributeNameIndexData.data,
+                          AttributeNumIndexData.data,
+                          AttributeRelidIndexData.data};
+char *Name_pg_proc_indices[Num_pg_proc_indices] = {ProcedureNameIndexData.data,
+                          ProcedureOidIndexData.data,
+                          ProcedureSrcIndexData.data};char *Name_pg_type_indices[Num_pg_type_indices] = {TypeNameIndexData.data,
+                          TypeOidIndexData.data};
+char *Name_pg_class_indices[Num_pg_class_indices]= {ClassNameIndexData.data,
+                          ClassOidIndexData.data};
+*/
+
+char *Name_pg_attr_indices[Num_pg_attr_indices] = {AttributeNameIndex,
+                          AttributeNumIndex,
+                          AttributeRelidIndex};
+char *Name_pg_proc_indices[Num_pg_proc_indices] = { ProcedureNameIndex,
+                           ProcedureOidIndex,
+                           ProcedureSrcIndex};
+char *Name_pg_type_indices[Num_pg_type_indices] = { TypeNameIndex,
+                           TypeOidIndex};
+char *Name_pg_class_indices[Num_pg_class_indices]= { ClassNameIndex,
+                            ClassOidIndex};
+
+
+static HeapTuple CatalogIndexFetchTuple(Relation heapRelation,
+                   Relation idesc,
+                   ScanKey skey);
+
+
+/*
+ * Changes (appends) to catalogs can (and does) happen at various places
+ * throughout the code.  We need a generic routine that will open all of
+ * the indices defined on a given catalog a return the relation descriptors
+ * associated with them.
+ */
+void
+CatalogOpenIndices(int nIndices, char *names[], Relation idescs[])
+{
+    int i;
+    
+    for (i=0; i<nIndices; i++)
+   {
+       idescs[i] = index_openr(names[i]);
+   }
+}
+
+/*
+ * This is the inverse routine to CatalogOpenIndices()
+ */
+void
+CatalogCloseIndices(int nIndices, Relation *idescs)
+{
+    int i;
+    
+    for (i=0; i<nIndices; i++)
+   index_close(idescs[i]);
+}
+
+
+/*
+ * For the same reasons outlined above CatalogOpenIndices() we need a routine
+ * that takes a new catalog tuple and inserts an associated index tuple into 
+ * each catalog index.
+ */
+void
+CatalogIndexInsert(Relation *idescs,
+          int nIndices,
+          Relation heapRelation,
+          HeapTuple heapTuple)
+{
+    HeapTuple pgIndexTup;
+    TupleDesc heapDescriptor;
+    IndexTupleForm pgIndexP;
+    IndexTuple newIndxTup;
+    Datum datum;
+    int natts;
+    AttrNumber *attnumP;
+    FuncIndexInfo finfo, *finfoP;
+    char nulls[INDEX_MAX_KEYS];
+    int i;
+    
+    heapDescriptor =  RelationGetTupleDescriptor(heapRelation);
+    
+    for (i=0; i<nIndices; i++) 
+   {
+       TupleDesc     indexDescriptor;
+       InsertIndexResult indexRes;
+       
+       indexDescriptor = RelationGetTupleDescriptor(idescs[i]);
+       pgIndexTup = SearchSysCacheTuple(INDEXRELID,
+                        Int32GetDatum(idescs[i]->rd_id),
+                        0,0,0);
+       Assert(pgIndexTup);
+       pgIndexP = (IndexTupleForm)GETSTRUCT(pgIndexTup);
+       
+       /*
+        * Compute the number of attributes we are indexing upon.
+        * very important - can't assume one if this is a functional
+        * index.
+        */
+       for (attnumP=(&pgIndexP->indkey[0]), natts=0;
+        *attnumP != InvalidAttrNumber;
+        attnumP++, natts++)
+       ;
+       
+       if (pgIndexP->indproc != InvalidOid)
+       {
+           FIgetnArgs(&finfo) = natts;
+           natts = 1;
+           FIgetProcOid(&finfo) = pgIndexP->indproc;
+           *(FIgetname(&finfo)) = '\0';
+           finfoP = &finfo;
+       }
+       else
+       finfoP = (FuncIndexInfo *)NULL;
+       
+       FormIndexDatum(natts,
+              (AttrNumber *)&pgIndexP->indkey[0],
+              heapTuple,
+              heapDescriptor,
+              InvalidBuffer,
+              &datum,
+              nulls,
+              finfoP);
+       
+       newIndxTup = (IndexTuple)index_formtuple(indexDescriptor,
+                            &datum,nulls);
+       Assert(newIndxTup);
+       /*
+        * Doing this structure assignment makes me quake in my boots when I 
+        * think about portability.
+        */
+       newIndxTup->t_tid = heapTuple->t_ctid;
+       
+       indexRes = index_insert(idescs[i], newIndxTup);
+       if (indexRes) pfree(indexRes);
+   }
+}
+
+/*
+ * This is needed at initialization when reldescs for some of the crucial
+ * system catalogs are created and nailed into the cache.
+ */
+bool
+CatalogHasIndex(char *catName, Oid catId)
+{
+    Relation    pg_class;
+    HeapTuple   htup;
+    Form_pg_class pgRelP;
+    int i;
+    
+    Assert(IsSystemRelationName(catName));
+    
+    /*
+     * If we're bootstraping we don't have pg_class (or any indices).
+     */
+    if (IsBootstrapProcessingMode())
+   return false;
+    
+    if (IsInitProcessingMode()) {
+   for (i = 0; IndexedCatalogNames[i] != NULL; i++) {
+       if ( strcmp(IndexedCatalogNames[i], catName) == 0)
+       return (true);
+   }
+   return (false);
+    }
+    
+    pg_class = heap_openr(RelationRelationName);
+    htup = ClassOidIndexScan(pg_class, catId);
+    heap_close(pg_class);
+    
+    if (! HeapTupleIsValid(htup)) {
+   elog(NOTICE, "CatalogHasIndex: no relation with oid %d", catId);
+   return false;
+    }
+    
+    pgRelP = (Form_pg_class)GETSTRUCT(htup);
+    return (pgRelP->relhasindex);
+}
+
+/*
+ *  CatalogIndexFetchTuple() -- Get a tuple that satisfies a scan key
+ *             from a catalog relation.
+ *
+ * Since the index may contain pointers to dead tuples, we need to
+ * iterate until we find a tuple that's valid and satisfies the scan
+ * key.
+ */
+static HeapTuple
+CatalogIndexFetchTuple(Relation heapRelation,
+              Relation idesc,
+              ScanKey skey)
+{
+    IndexScanDesc sd;
+    RetrieveIndexResult indexRes;
+    HeapTuple tuple;
+    Buffer buffer;
+    
+    sd = index_beginscan(idesc, false, 1, skey);
+    tuple = (HeapTuple)NULL;
+    
+    do {
+   indexRes = index_getnext(sd, ForwardScanDirection);
+   if (indexRes) {
+       ItemPointer iptr;
+       
+       iptr = &indexRes->heap_iptr;
+       tuple = heap_fetch(heapRelation, NowTimeQual, iptr, &buffer);
+       pfree(indexRes);
+   } else
+       break;
+    } while (!HeapTupleIsValid(tuple));
+    
+    if (HeapTupleIsValid(tuple)) {
+   tuple = heap_copytuple(tuple);
+   ReleaseBuffer(buffer);
+    }
+    
+    index_endscan(sd);
+    if (sd->opaque)
+   pfree(sd->opaque);
+    pfree(sd);
+    return (tuple);
+}
+
+/*
+ * The remainder of the file is for individual index scan routines.  Each
+ * index should be scanned according to how it was defined during bootstrap
+ * (that is, functional or normal) and what arguments the cache lookup
+ * requires.  Each routine returns the heap tuple that qualifies.
+ */
+HeapTuple
+AttributeNameIndexScan(Relation heapRelation,
+              Oid relid,
+              char *attname)
+{
+    Relation idesc;
+    ScanKeyData skey;
+    OidName keyarg;
+    HeapTuple tuple;
+    
+    keyarg = mkoidname(relid, attname);
+    ScanKeyEntryInitialize(&skey,
+              (bits16)0x0,
+              (AttrNumber)1,
+              (RegProcedure)OidNameEqRegProcedure,
+              (Datum)keyarg);
+    
+    idesc = index_openr(AttributeNameIndex);
+    tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
+    
+    index_close(idesc);
+    pfree(keyarg);
+    
+    return tuple;
+}
+
+HeapTuple
+AttributeNumIndexScan(Relation heapRelation,
+             Oid relid,
+             AttrNumber attnum)
+{
+    Relation idesc;
+    ScanKeyData skey;
+    OidInt2 keyarg;
+    HeapTuple tuple;
+    
+    keyarg = mkoidint2(relid, (uint16)attnum);
+    ScanKeyEntryInitialize(&skey,
+              (bits16)0x0,
+              (AttrNumber)1,
+              (RegProcedure)OidInt2EqRegProcedure,
+              (Datum)keyarg);
+    
+    idesc = index_openr(AttributeNumIndex);
+    tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
+    
+    index_close(idesc);
+    pfree(keyarg);
+    
+    return tuple;
+}
+
+HeapTuple
+ProcedureOidIndexScan(Relation heapRelation, Oid procId)
+{
+    Relation idesc;
+    ScanKeyData skey;
+    HeapTuple tuple;
+    
+    ScanKeyEntryInitialize(&skey,
+              (bits16)0x0,
+              (AttrNumber)1,
+              (RegProcedure)ObjectIdEqualRegProcedure,
+              (Datum)procId);
+    
+    idesc = index_openr(ProcedureOidIndex);
+    tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
+    
+    index_close(idesc);
+    
+    return tuple;
+}
+
+HeapTuple
+ProcedureNameIndexScan(Relation heapRelation,
+              char *procName,
+              int nargs,
+              Oid *argTypes)
+{
+    Relation idesc;
+    ScanKeyData skey;
+    HeapTuple tuple;
+    IndexScanDesc sd;
+    RetrieveIndexResult indexRes;
+    Buffer buffer;
+    Form_pg_proc pgProcP;
+    bool bufferUsed = FALSE;
+    
+    ScanKeyEntryInitialize(&skey,
+              (bits16)0x0,
+              (AttrNumber)1,
+              (RegProcedure)NameEqualRegProcedure,
+              (Datum)procName);
+    
+    idesc = index_openr(ProcedureNameIndex);
+    
+    sd = index_beginscan(idesc, false, 1, &skey);
+    
+    /*
+     * for now, we do the work usually done by CatalogIndexFetchTuple
+     * by hand, so that we can check that the other keys match.  when
+     * multi-key indices are added, they will be used here.
+     */
+    do {  
+   tuple = (HeapTuple)NULL;
+   if (bufferUsed) {
+       ReleaseBuffer(buffer);
+       bufferUsed = FALSE;
+        }
+   
+   indexRes = index_getnext(sd, ForwardScanDirection);
+   if (indexRes) {
+       ItemPointer iptr;
+       
+       iptr = &indexRes->heap_iptr;
+       tuple = heap_fetch(heapRelation, NowTimeQual, iptr, &buffer);
+       pfree(indexRes);
+       if (HeapTupleIsValid(tuple)) {
+       pgProcP = (Form_pg_proc)GETSTRUCT(tuple);
+       bufferUsed = TRUE;
+       }
+   } else
+       break;
+    } while (!HeapTupleIsValid(tuple) ||
+        pgProcP->pronargs != nargs ||
+        !oid8eq(&(pgProcP->proargtypes[0]), argTypes));
+    
+    if (HeapTupleIsValid(tuple)) {
+   tuple = heap_copytuple(tuple);
+   ReleaseBuffer(buffer);
+    }
+    
+    index_endscan(sd);
+    index_close(idesc);
+    
+    return tuple;
+}
+
+HeapTuple
+ProcedureSrcIndexScan(Relation heapRelation, text *procSrc)
+{
+    Relation idesc;
+    IndexScanDesc sd;
+    ScanKeyData skey;
+    RetrieveIndexResult indexRes;
+    HeapTuple tuple;
+    Buffer buffer;
+    
+    ScanKeyEntryInitialize(&skey,
+              (bits16)0x0,
+              (AttrNumber)Anum_pg_proc_prosrc,
+                  (RegProcedure)TextEqualRegProcedure,
+              (Datum)procSrc);
+    
+    idesc = index_openr(ProcedureSrcIndex);
+    sd = index_beginscan(idesc, false, 1, &skey);
+    
+    indexRes = index_getnext(sd, ForwardScanDirection);
+    if (indexRes) {
+   ItemPointer iptr;
+       
+   iptr = &indexRes->heap_iptr;
+   tuple = heap_fetch(heapRelation, NowTimeQual, iptr, &buffer);
+   pfree(indexRes);
+    } else
+   tuple = (HeapTuple)NULL;
+    
+    if (HeapTupleIsValid(tuple)) {
+   tuple = heap_copytuple(tuple);
+   ReleaseBuffer(buffer);
+    }
+    
+    index_endscan(sd);
+    
+    return tuple;
+}
+
+HeapTuple
+TypeOidIndexScan(Relation heapRelation, Oid typeId)
+{
+    Relation idesc;
+    ScanKeyData skey;
+    HeapTuple tuple;
+    
+    ScanKeyEntryInitialize(&skey,
+              (bits16)0x0,
+              (AttrNumber)1,
+              (RegProcedure)ObjectIdEqualRegProcedure,
+              (Datum)typeId);
+    
+    idesc = index_openr(TypeOidIndex);
+    tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
+    
+    index_close(idesc);
+    
+    return tuple;
+}
+
+HeapTuple
+TypeNameIndexScan(Relation heapRelation, char *typeName)
+{
+    Relation idesc;
+    ScanKeyData skey;
+    HeapTuple tuple;
+    
+    ScanKeyEntryInitialize(&skey,
+              (bits16)0x0,
+              (AttrNumber)1,
+              (RegProcedure)NameEqualRegProcedure,
+              (Datum)typeName);
+    
+    idesc = index_openr(TypeNameIndex);
+    tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
+    
+    index_close(idesc);
+    
+    return tuple;
+}
+
+HeapTuple
+ClassNameIndexScan(Relation heapRelation, char *relName)
+{
+    Relation idesc;
+    ScanKeyData skey;
+    HeapTuple tuple;
+    
+    ScanKeyEntryInitialize(&skey,
+              (bits16)0x0,
+              (AttrNumber)1,
+              (RegProcedure)NameEqualRegProcedure,
+              (Datum)relName);
+    
+    idesc = index_openr(ClassNameIndex);
+    
+    tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
+    
+    index_close(idesc);
+    return tuple;
+}
+
+HeapTuple
+ClassOidIndexScan(Relation heapRelation, Oid relId)
+{
+    Relation idesc;
+    ScanKeyData skey;
+    HeapTuple tuple;
+    
+    ScanKeyEntryInitialize(&skey,
+              (bits16)0x0,
+              (AttrNumber)1,
+              (RegProcedure)ObjectIdEqualRegProcedure,
+              (Datum)relId);
+    
+    idesc = index_openr(ClassOidIndex);
+    tuple = CatalogIndexFetchTuple(heapRelation, idesc, &skey);
+    
+    index_close(idesc);
+    
+    return tuple;
+}
diff --git a/src/backend/catalog/indexing.h b/src/backend/catalog/indexing.h

new file mode 100644 (file)

index 0000000..c1a83cb
--- /dev/null
+++ b/src/backend/catalog/indexing.h
@@ -0,0 +1,103 @@
+/*-------------------------------------------------------------------------
+ *
+ * indexing.h--
+ *    This include provides some definitions to support indexing 
+ *    on system catalogs
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: indexing.h,v 1.1.1.1 1996/07/09 06:21:15 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef INDEXING_H
+#define INDEXING_H
+
+#include "utils/rel.h"
+
+/*
+ * Some definitions for indices on pg_attribute
+ */
+#define Num_pg_attr_indices    3
+#define Num_pg_proc_indices    3
+#define Num_pg_type_indices    2
+#define Num_pg_class_indices   2
+
+
+/*
+ * Names of indices on system catalogs
+ */
+#define AttributeNameIndex "pg_attnameind"
+#define AttributeNumIndex  "pg_attnumind"
+#define AttributeRelidIndex "pg_attrelidind"
+#define ProcedureNameIndex "pg_procnameind"
+#define ProcedureOidIndex  "pg_procidind"
+#define ProcedureSrcIndex  "pg_procsrcind"
+#define TypeNameIndex      "pg_typenameind"
+#define TypeOidIndex       "pg_typeidind"
+#define ClassNameIndex     "pg_classnameind"
+#define ClassOidIndex      "pg_classoidind"
+
+extern char *Name_pg_attr_indices[];
+extern char *Name_pg_proc_indices[];
+extern char *Name_pg_type_indices[];
+extern char *Name_pg_class_indices[];
+
+extern char *IndexedCatalogNames[];
+
+/*
+ * indexing.c prototypes 
+ *
+ * Functions for each index to perform the necessary scan on a cache miss.
+ */
+extern void CatalogOpenIndices(int nIndices, char *names[], Relation idescs[]);
+extern void CatalogCloseIndices(int nIndices, Relation *idescs);
+extern void CatalogIndexInsert(Relation *idescs,
+                  int nIndices,
+                  Relation heapRelation,
+                  HeapTuple heapTuple);
+extern bool CatalogHasIndex(char *catName, Oid catId);
+
+extern HeapTuple AttributeNameIndexScan(Relation heapRelation,
+                   Oid relid,
+                   char *attname);
+
+extern HeapTuple AttributeNumIndexScan(Relation heapRelation,
+                      Oid relid,
+                      AttrNumber attnum);
+extern HeapTuple ProcedureOidIndexScan(Relation heapRelation, Oid procId);
+extern HeapTuple ProcedureNameIndexScan(Relation heapRelation,
+   char *procName, int nargs, Oid *argTypes);
+extern HeapTuple ProcedureSrcIndexScan(Relation heapRelation, text *procSrc);
+extern HeapTuple TypeOidIndexScan(Relation heapRelation, Oid typeId);
+extern HeapTuple TypeNameIndexScan(Relation heapRelation, char *typeName);
+extern HeapTuple ClassNameIndexScan(Relation heapRelation, char *relName);
+extern HeapTuple ClassOidIndexScan(Relation heapRelation, Oid relId);
+
+
+/*
+ * What follows are lines processed by genbki.sh to create the statements
+ * the bootstrap parser will turn into DefineIndex commands.
+ *
+ * The keyword is DECLARE_INDEX every thing after that is just like in a
+ * normal specification of the 'define index' POSTQUEL command.
+ */
+DECLARE_INDEX(pg_attnameind on pg_attribute using btree (mkoidname(attrelid, attname) oidname_ops));
+DECLARE_INDEX(pg_attnumind  on pg_attribute using btree (mkoidint2(attrelid, attnum) oidint2_ops));
+DECLARE_INDEX(pg_attrelidind on pg_attribute using btree (attrelid oid_ops));
+
+DECLARE_INDEX(pg_procidind on pg_proc using btree (Oid oid_ops));
+DECLARE_INDEX(pg_procnameind on pg_proc using btree (proname name_ops));
+DECLARE_INDEX(pg_procsrcind on pg_proc using btree (prosrc text_ops));
+
+DECLARE_INDEX(pg_typeidind on pg_type using btree (Oid oid_ops));
+DECLARE_INDEX(pg_typenameind on pg_type using btree (typname name_ops));
+
+DECLARE_INDEX(pg_classnameind on pg_class using btree (relname name_ops));
+DECLARE_INDEX(pg_classoidind on pg_class using btree (Oid oid_ops));
+
+/* now build indices in the initialization scripts */
+BUILD_INDICES
+
+#endif /* INDEXING_H */
diff --git a/src/backend/catalog/pg_aggregate.c b/src/backend/catalog/pg_aggregate.c

new file mode 100644 (file)

index 0000000..7fe895e
--- /dev/null
+++ b/src/backend/catalog/pg_aggregate.c
@@ -0,0 +1,325 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_aggregate.c--
+ *    routines to support manipulation of the pg_aggregate relation
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/catalog/pg_aggregate.c,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <string.h>
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+#include "access/htup.h"
+#include "access/tupdesc.h"
+#include "utils/rel.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/builtins.h"
+#include "fmgr.h"
+
+#include "catalog/catname.h"
+#include "utils/syscache.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_aggregate.h"
+
+/* ----------------
+ * AggregateCreate
+ *
+ * aggregates overloading has been added.  Instead of the full
+ * overload support we have for functions, aggregate overloading only
+ * applies to exact basetype matches.  That is, we don't check the 
+ * the inheritance hierarchy
+ *
+ * OLD COMMENTS:
+ * Currently, redefining aggregates using the same name is not
+ * supported.  In such a case, a warning is printed that the 
+ * aggregate already exists.  If such is not the case, a new tuple
+ * is created and inserted in the aggregate relation.  The fields
+ * of this tuple are aggregate name, owner id, 2 transition functions
+ * (called aggtransfn1 and aggtransfn2), final function (aggfinalfn),
+ * type of data on which aggtransfn1 operates (aggbasetype), return
+ * types of the two transition functions (aggtranstype1 and 
+ * aggtranstype2), final return type (aggfinaltype), and initial values
+ * for the two state transition functions (agginitval1 and agginitval2).
+ * All types and functions must have been defined
+ * prior to defining the aggregate.
+ * 
+ * ---------------
+ */
+void
+AggregateCreate(char *aggName,
+       char *aggtransfn1Name,
+       char *aggtransfn2Name,
+       char *aggfinalfnName, 
+       char *aggbasetypeName,
+       char *aggtransfn1typeName,
+       char *aggtransfn2typeName,
+       char *agginitval1,
+       char *agginitval2)
+{
+    register       i;
+    Relation       aggdesc;
+    HeapTuple      tup;
+    char       nulls[Natts_pg_aggregate];
+    Datum               values[Natts_pg_aggregate];
+    Form_pg_proc   proc;
+    Oid        xfn1 = InvalidOid;
+    Oid        xfn2 = InvalidOid;
+    Oid        ffn = InvalidOid;
+    Oid        xbase = InvalidOid;
+    Oid        xret1 = InvalidOid;
+    Oid        xret2 = InvalidOid;
+    Oid        fret = InvalidOid;
+    Oid        fnArgs[8];
+    TupleDesc    tupDesc;
+    
+    memset(fnArgs, 0, 8 * sizeof(Oid)); 
+    
+    /* sanity checks */
+    if (!aggName)
+   elog(WARN, "AggregateCreate: no aggregate name supplied");
+    
+    if (!aggtransfn1Name && !aggtransfn2Name)
+   elog(WARN, "AggregateCreate: aggregate must have at least one transition function");
+    
+    tup = SearchSysCacheTuple(TYPNAME, 
+                 PointerGetDatum(aggbasetypeName),
+                 0,0,0);
+    if(!HeapTupleIsValid(tup))
+   elog(WARN, "AggregateCreate: Type '%s' undefined",aggbasetypeName);
+    xbase = tup->t_oid;
+
+    if (aggtransfn1Name) {
+   tup = SearchSysCacheTuple(TYPNAME, 
+                 PointerGetDatum(aggtransfn1typeName),
+                 0,0,0);
+   if(!HeapTupleIsValid(tup))
+       elog(WARN, "AggregateCreate: Type '%s' undefined",
+        aggtransfn1typeName);
+   xret1 = tup->t_oid;
+   
+   fnArgs[0] = xret1;
+   fnArgs[1] = xbase;
+   tup = SearchSysCacheTuple(PRONAME,
+                 PointerGetDatum(aggtransfn1Name),
+                 Int32GetDatum(2),
+                 PointerGetDatum(fnArgs),
+                 0);
+   if(!HeapTupleIsValid(tup))
+       elog(WARN, "AggregateCreate: '%s('%s', '%s') does not exist",
+        aggtransfn1Name,  aggtransfn1typeName, aggbasetypeName);
+   if (((Form_pg_proc) GETSTRUCT(tup))->prorettype != xret1)
+       elog(WARN, "AggregateCreate: return type of '%s' is not '%s'",
+        aggtransfn1Name,
+        aggtransfn1typeName);
+   xfn1 = tup->t_oid;
+   if (!OidIsValid(xfn1) || !OidIsValid(xret1) ||
+       !OidIsValid(xbase))
+       elog(WARN, "AggregateCreate: bogus function '%s'", aggfinalfnName);
+    }
+    
+    if (aggtransfn2Name) {
+   tup = SearchSysCacheTuple(TYPNAME, 
+                 PointerGetDatum(aggtransfn2typeName),
+                 0,0,0);
+   if(!HeapTupleIsValid(tup))
+       elog(WARN, "AggregateCreate: Type '%s' undefined",
+        aggtransfn2typeName);
+   xret2 = tup->t_oid;
+   
+   fnArgs[0] = xret2;
+   fnArgs[1] = 0;
+   tup = SearchSysCacheTuple(PRONAME, 
+                 PointerGetDatum(aggtransfn2Name),
+                 Int32GetDatum(1),
+                 PointerGetDatum(fnArgs),
+                 0);
+   if(!HeapTupleIsValid(tup))
+       elog(WARN, "AggregateCreate: '%s'('%s') does not exist",
+        aggtransfn2Name, aggtransfn2typeName);
+   if (((Form_pg_proc) GETSTRUCT(tup))->prorettype != xret2)
+       elog(WARN, "AggregateCreate: return type of '%s' is not '%s'",
+        aggtransfn2Name, aggtransfn2typeName);
+   xfn2 = tup->t_oid;
+   if (!OidIsValid(xfn2) || !OidIsValid(xret2))
+       elog(WARN, "AggregateCreate: bogus function '%s'",aggfinalfnName);
+    }
+    
+    tup = SearchSysCacheTuple(AGGNAME, PointerGetDatum(aggName),
+                 ObjectIdGetDatum(xbase),  
+                 0,0);
+    if (HeapTupleIsValid(tup))
+   elog(WARN, 
+        "AggregateCreate: aggregate '%s' with base type '%s' already exists",
+        aggName, aggbasetypeName);
+
+    /* more sanity checks */
+    if (aggtransfn1Name && aggtransfn2Name && !aggfinalfnName)
+   elog(WARN, "AggregateCreate: Aggregate must have final function with both transition functions");
+    
+    if ((!aggtransfn1Name || !aggtransfn2Name) && aggfinalfnName)
+   elog(WARN, "AggregateCreate: Aggregate cannot have final function without both transition functions");
+    
+    if (aggfinalfnName) {
+        fnArgs[0] = xret1;
+   fnArgs[1] = xret2;
+   tup = SearchSysCacheTuple(PRONAME,
+                 PointerGetDatum(aggfinalfnName),
+                 Int32GetDatum(2),
+                 PointerGetDatum(fnArgs),
+                 0);
+   if(!HeapTupleIsValid(tup))
+       elog(WARN, "AggregateCreate: '%s'('%s','%s') does not exist",
+        aggfinalfnName, aggtransfn1typeName, aggtransfn2typeName);
+   ffn = tup->t_oid;
+   proc = (Form_pg_proc) GETSTRUCT(tup);
+   fret = proc->prorettype;
+   if (!OidIsValid(ffn) || !OidIsValid(fret))
+       elog(WARN, "AggregateCreate: bogus function '%s'", aggfinalfnName);
+    }
+    
+    /*
+     * If transition function 2 is defined, it must have an initial value,
+     * whereas transition function 1 does not, which allows man and min
+     * aggregates to return NULL if they are evaluated on empty sets.
+     */
+    if (OidIsValid(xfn2) && !agginitval2)
+   elog(WARN, "AggregateCreate: transition function 2 MUST have an initial value");
+    
+    /* initialize nulls and values */
+    for(i=0; i < Natts_pg_aggregate; i++) {
+   nulls[i] = ' ';
+   values[i] = (Datum)NULL;
+    }
+    values[Anum_pg_aggregate_aggname-1] = PointerGetDatum(aggName);
+    values[Anum_pg_aggregate_aggowner-1] =
+   Int32GetDatum(GetUserId());
+    values[Anum_pg_aggregate_aggtransfn1-1] =
+   ObjectIdGetDatum(xfn1);
+    values[Anum_pg_aggregate_aggtransfn2-1] =
+   ObjectIdGetDatum(xfn2);
+    values[Anum_pg_aggregate_aggfinalfn-1] =
+   ObjectIdGetDatum(ffn);
+    
+    values[Anum_pg_aggregate_aggbasetype-1] =
+   ObjectIdGetDatum(xbase);
+    if (!OidIsValid(xfn1)) {
+   values[Anum_pg_aggregate_aggtranstype1-1] =
+       ObjectIdGetDatum(InvalidOid);
+   values[Anum_pg_aggregate_aggtranstype2-1] =
+       ObjectIdGetDatum(xret2);
+   values[Anum_pg_aggregate_aggfinaltype-1] =
+       ObjectIdGetDatum(xret2);
+    }
+    else if (!OidIsValid(xfn2)) {
+   values[Anum_pg_aggregate_aggtranstype1-1] =
+       ObjectIdGetDatum(xret1);
+   values[Anum_pg_aggregate_aggtranstype2-1] =
+       ObjectIdGetDatum(InvalidOid);
+   values[Anum_pg_aggregate_aggfinaltype-1] =
+       ObjectIdGetDatum(xret1);
+    }
+    else {
+   values[Anum_pg_aggregate_aggtranstype1-1] =
+       ObjectIdGetDatum(xret1);
+   values[Anum_pg_aggregate_aggtranstype2-1] =
+       ObjectIdGetDatum(xret2);
+   values[Anum_pg_aggregate_aggfinaltype-1] =
+       ObjectIdGetDatum(fret);
+    }
+    
+    if (agginitval1)
+   values[Anum_pg_aggregate_agginitval1-1] = PointerGetDatum(textin(agginitval1));
+    else
+   nulls[Anum_pg_aggregate_agginitval1-1] = 'n';
+    
+    if (agginitval2)
+   values[Anum_pg_aggregate_agginitval2-1] = PointerGetDatum(textin(agginitval2));
+    else
+   nulls[Anum_pg_aggregate_agginitval2-1] = 'n';
+    
+    if (!RelationIsValid(aggdesc = heap_openr(AggregateRelationName)))
+   elog(WARN, "AggregateCreate: could not open '%s'",
+        AggregateRelationName);
+
+    tupDesc = aggdesc->rd_att;
+    if (!HeapTupleIsValid(tup = heap_formtuple(tupDesc,
+                          values,
+                          nulls)))
+   elog(WARN, "AggregateCreate: heap_formtuple failed");
+    if (!OidIsValid(heap_insert(aggdesc, tup)))
+   elog(WARN, "AggregateCreate: heap_insert failed");
+    heap_close(aggdesc);
+
+}
+
+char *
+AggNameGetInitVal(char *aggName, Oid basetype, int xfuncno, bool *isNull)
+{
+    HeapTuple  tup;
+    Relation   aggRel;
+    int        initValAttno;
+    Oid    transtype;
+    text   *textInitVal;
+    char   *strInitVal, *initVal;
+    extern char    *textout();
+    
+    Assert(PointerIsValid(aggName));
+    Assert(PointerIsValid(isNull));
+    Assert(xfuncno == 1 || xfuncno == 2);
+
+    tup = SearchSysCacheTuple(AGGNAME, 
+                 PointerGetDatum(aggName),
+                 PointerGetDatum(basetype),
+                 0,0);
+    if (!HeapTupleIsValid(tup))
+   elog(WARN, "AggNameGetInitVal: cache lookup failed for aggregate '%s'",
+        aggName);
+    if (xfuncno == 1) {
+   transtype = ((Form_pg_aggregate) GETSTRUCT(tup))->aggtranstype1;
+   initValAttno = Anum_pg_aggregate_agginitval1;
+    }
+    else if (xfuncno == 2) {
+   transtype = ((Form_pg_aggregate) GETSTRUCT(tup))->aggtranstype2;
+   initValAttno = Anum_pg_aggregate_agginitval2;
+    }
+    
+    aggRel = heap_openr(AggregateRelationName);
+    if (!RelationIsValid(aggRel))
+   elog(WARN, "AggNameGetInitVal: could not open \"%-.*s\"",
+        AggregateRelationName);
+    /* 
+     * must use fastgetattr in case one or other of the init values is NULL
+     */
+    textInitVal = (text *) fastgetattr(tup, initValAttno, 
+                      RelationGetTupleDescriptor(aggRel),
+                      isNull);
+    if (!PointerIsValid(textInitVal))
+   *isNull = true;
+    if (*isNull) {
+   heap_close(aggRel);
+   return((char *) NULL);
+    }
+    strInitVal = textout(textInitVal);
+    heap_close(aggRel);
+    
+    tup = SearchSysCacheTuple(TYPOID, ObjectIdGetDatum(transtype),
+                 0,0,0);
+    if (!HeapTupleIsValid(tup)) {
+   pfree(strInitVal);
+   elog(WARN, "AggNameGetInitVal: cache lookup failed on aggregate transition function return type");
+    }
+    initVal = fmgr(((TypeTupleForm) GETSTRUCT(tup))->typinput, strInitVal, -1);
+    pfree(strInitVal);
+    return(initVal);
+}
diff --git a/src/backend/catalog/pg_aggregate.h b/src/backend/catalog/pg_aggregate.h

new file mode 100644 (file)

index 0000000..7ed9835
--- /dev/null
+++ b/src/backend/catalog/pg_aggregate.h
@@ -0,0 +1,132 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_aggregate.h--
+ *    definition of the system "aggregate" relation (pg_aggregate)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_aggregate.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_AGGREGATE_H
+#define PG_AGGREGATE_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------------------------------------------------------
+ * pg_aggregate definition.
+ *
+ * cpp turns this into typedef struct FormData_pg_aggregate
+ *
+ *  aggname        name of the aggregate
+ *  aggtransfn1        transition function 1
+ *  aggtransfn2        transition function 2
+ *  aggfinalfn     final function
+ *  aggbasetype        type of data on which aggregate operates
+ *  aggtranstype1  output types for xition func 1
+ *  aggtranstype2  output types for xition func 2
+ *  aggfinaltype   output type for final func
+ *  agginitval1        initial aggregate value
+ *  agginitval2        initial value for transition state 2
+ * ----------------------------------------------------------------
+ */ 
+CATALOG(pg_aggregate) {
+    NameData       aggname;
+    Oid            aggowner;
+    regproc        aggtransfn1;
+    regproc        aggtransfn2;
+    regproc        aggfinalfn;
+    Oid            aggbasetype;
+    Oid            aggtranstype1;
+    Oid            aggtranstype2;
+    Oid            aggfinaltype;
+    text       agginitval1;    /* VARIABLE LENGTH FIELD */
+    text       agginitval2;    /* VARIABLE LENGTH FIELD */
+} FormData_pg_aggregate;
+
+/* ----------------
+ * Form_pg_aggregate corresponds to a pointer to a tuple with
+ * the format of pg_aggregate relation.
+ * ----------------
+ */
+typedef FormData_pg_aggregate  *Form_pg_aggregate;
+
+/* ----------------
+ * compiler constants for pg_aggregate
+ * ----------------
+ */
+
+#define Natts_pg_aggregate     11
+#define Anum_pg_aggregate_aggname  1
+#define Anum_pg_aggregate_aggowner 2
+#define Anum_pg_aggregate_aggtransfn1  3
+#define Anum_pg_aggregate_aggtransfn2  4
+#define Anum_pg_aggregate_aggfinalfn   5
+#define Anum_pg_aggregate_aggbasetype  6
+#define Anum_pg_aggregate_aggtranstype1    7
+#define Anum_pg_aggregate_aggtranstype2    8
+#define Anum_pg_aggregate_aggfinaltype 9
+#define Anum_pg_aggregate_agginitval1  10
+#define Anum_pg_aggregate_agginitval2  11
+
+
+/* ----------------
+ * initial contents of pg_aggregate
+ * ---------------
+ */
+
+DATA(insert OID = 0 ( avg   PGUID int4pl  int4inc  int4div  23  23  23  23 0  0 ));
+DATA(insert OID = 0 ( avg   PGUID int2pl  int2inc  int2div  21  21  21  21  0  0 ));
+DATA(insert OID = 0 ( avg PGUID float4pl float4inc float4div  700  700  700  700 0.0 0.0 ));
+DATA(insert OID = 0 ( avg PGUID float8pl float8inc float8div  701  701  701  701 0.0 0.0 ));
+
+DATA(insert OID = 0 ( sum   PGUID int4pl   - -  23  23  0  23  0   _null_ ));
+DATA(insert OID = 0 ( sum   PGUID int2pl   - -  21  21  0  21  0   _null_ ));
+DATA(insert OID = 0 ( sum PGUID float4pl - - 700  700 0  700  0.0 _null_ ));
+DATA(insert OID = 0 ( sum PGUID float8pl - - 701  701 0  701  0.0 _null_ ));
+
+DATA(insert OID = 0 ( max   PGUID int4larger   - -  23  23  0  23  _null_ _null_ ));
+DATA(insert OID = 0 ( max   PGUID int2larger   - -  21  21  0  21  _null_ _null_ ));
+DATA(insert OID = 0 ( max PGUID float4larger - - 700  700 0  700  _null_ _null_ ));
+DATA(insert OID = 0 ( max PGUID float8larger - - 701  701 0  701  _null_ _null_ ));
+
+DATA(insert OID = 0 ( min   PGUID int4smaller   - -  23  23  0  23  _null_ _null_ ));
+DATA(insert OID = 0 ( min   PGUID int2smaller   - -  21  21  0  21    _null_ _null_ ));
+DATA(insert OID = 0 ( min PGUID float4smaller - - 700  700 0  700   _null_ _null_ ));
+DATA(insert OID = 0 ( min PGUID float8smaller - - 701  701 0  701  _null_ _null_ ));
+
+DATA(insert OID = 0 ( count     PGUID - int4inc - 0 0 23 23  _null_ 0 ));
+
+/*
+ * prototypes for fucnctions in pg_aggregate.c
+ */
+extern void AggregateCreate(char *aggName, 
+               char *aggtransfn1Name,
+               char *aggtransfn2Name,
+               char *aggfinalfnName,
+               char *aggbasetypeName,
+               char *aggtransfn1typeName,
+               char *aggtransfn2typeName,
+               char *agginitval1,
+               char *agginitval2);
+extern char *AggNameGetInitVal(char *aggName, Oid basetype, 
+                  int xfuncno, bool *isNull);
+
+#endif /* PG_AGGREGATE_H */
+
+
+
+
diff --git a/src/backend/catalog/pg_am.h b/src/backend/catalog/pg_am.h

new file mode 100644 (file)

index 0000000..0f36e7c
--- /dev/null
+++ b/src/backend/catalog/pg_am.h
@@ -0,0 +1,115 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_am.h--
+ *    definition of the system "am" relation (pg_am)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_am.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
+ *
+ * NOTES
+ * the genbki.sh script reads this file and generates .bki
+ * information from the DATA() statements.
+ *
+ * XXX do NOT break up DATA() statements into multiple lines!
+ *     the scripts are not as smart as you might think...
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_AM_H
+#define PG_AM_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_am definition.  cpp turns this into
+ * typedef struct FormData_pg_am
+ * ----------------
+ */ 
+CATALOG(pg_am) {
+    NameData   amname;
+    Oid    amowner;
+    char   amkind;
+    int2   amstrategies;
+    int2   amsupport;
+    regproc    amgettuple;
+    regproc    aminsert;
+    regproc    amdelete;
+    regproc    amgetattr;
+    regproc    amsetlock;
+    regproc    amsettid;
+    regproc    amfreetuple;
+    regproc    ambeginscan;
+    regproc    amrescan;
+    regproc    amendscan;
+    regproc    ammarkpos;
+    regproc    amrestrpos;
+    regproc    amopen;
+    regproc    amclose;
+    regproc    ambuild;
+    regproc    amcreate; 
+    regproc    amdestroy;
+} FormData_pg_am;
+
+/* ----------------
+ * Form_pg_am corresponds to a pointer to a tuple with
+ * the format of pg_am relation.
+ * ----------------
+ */
+typedef FormData_pg_am *Form_pg_am;
+
+/* ----------------
+ * compiler constants for pg_am
+ * ----------------
+ */
+#define Natts_pg_am            22
+#define Anum_pg_am_amname      1
+#define Anum_pg_am_amowner     2
+#define Anum_pg_am_amkind      3
+#define Anum_pg_am_amstrategies        4
+#define Anum_pg_am_amsupport       5
+#define Anum_pg_am_amgettuple      6
+#define Anum_pg_am_aminsert        7
+#define Anum_pg_am_amdelete        8
+#define Anum_pg_am_amgetattr       9
+#define Anum_pg_am_amsetlock       10
+#define Anum_pg_am_amsettid        11
+#define Anum_pg_am_amfreetuple     12
+#define Anum_pg_am_ambeginscan     13
+#define Anum_pg_am_amrescan        14
+#define Anum_pg_am_amendscan       15
+#define Anum_pg_am_ammarkpos       16
+#define Anum_pg_am_amrestrpos      17
+#define Anum_pg_am_amopen      18
+#define Anum_pg_am_amclose     19
+#define Anum_pg_am_ambuild     20
+#define Anum_pg_am_amcreate        21
+#define Anum_pg_am_amdestroy       22
+
+/* ----------------
+ * initial contents of pg_am
+ * ----------------
+ */
+
+DATA(insert OID = 405 (  hash PGUID "o"  1 1 hashgettuple hashinsert hashdelete - - - - hashbeginscan hashrescan hashendscan hashmarkpos hashrestrpos - - hashbuild - - ));
+DATA(insert OID = 402 (  rtree PGUID "o" 8 3 rtgettuple rtinsert rtdelete - - - - rtbeginscan rtrescan rtendscan rtmarkpos rtrestrpos - - rtbuild - - ));
+DATA(insert OID = 403 (  btree PGUID "o" 5 1 btgettuple btinsert btdelete - - - - btbeginscan btrescan btendscan btmarkpos btrestrpos - - btbuild - - ));
+#define BTREE_AM_OID 403
+
+BKI_BEGIN
+#ifdef NOBTREE
+BKI_END
+DATA(insert OID = 404 (  nobtree PGUID "o" 5 1 nobtgettuple nobtinsert nobtdelete - - - - nobtbeginscan nobtrescan nobtendscan nobtmarkpos nobtrestrpos - - nobtbuild - - ));
+BKI_BEGIN
+#endif /* NOBTREE */
+BKI_END
+
+#endif /* PG_AM_H */
diff --git a/src/backend/catalog/pg_amop.h b/src/backend/catalog/pg_amop.h

new file mode 100644 (file)

index 0000000..e9d1212
--- /dev/null
+++ b/src/backend/catalog/pg_amop.h
@@ -0,0 +1,546 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_amop.h--
+ *    definition of the system "amop" relation (pg_amop)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_amop.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
+ *
+ * NOTES
+ *   the genbki.sh script reads this file and generates .bki
+ *   information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_AMOP_H
+#define PG_AMOP_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+#include "access/istrat.h"
+
+/* ----------------
+ * pg_amop definition.  cpp turns this into
+ * typedef struct FormData_pg_amop
+ * ----------------
+ */ 
+CATALOG(pg_amop) {
+    Oid    amopid;
+    Oid    amopclaid;
+    Oid    amopopr;
+    int2   amopstrategy;
+    regproc    amopselect;
+    regproc    amopnpages;  
+} FormData_pg_amop;
+
+/* ----------------
+ * Form_pg_amop corresponds to a pointer to a tuple with
+ * the format of pg_amop relation.
+ * ----------------
+ */
+typedef FormData_pg_amop   *Form_pg_amop;
+
+/* ----------------
+ * compiler constants for pg_amop
+ * ----------------
+ */
+/* #define Name_pg_amop            "pg_amop" */
+#define Natts_pg_amop          6
+#define Anum_pg_amop_amopid        1
+#define Anum_pg_amop_amopclaid         2
+#define Anum_pg_amop_amopopr       3
+#define Anum_pg_amop_amopstrategy  4
+#define Anum_pg_amop_amopselect        5
+#define Anum_pg_amop_amopnpages        6
+
+/* ----------------
+ * initial contents of pg_amop
+ * ----------------
+ */
+
+/*
+ *  rtree box_ops
+ */
+
+DATA(insert OID = 0 (  402 422 493 1 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 422 494 2 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 422 500 3 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 422 495 4 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 422 496 5 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 422 499 6 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 422 498 7 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 422 497 8 rtsel rtnpage ));
+
+/*
+ *  rtree bigbox_ops
+ */
+
+DATA(insert OID = 0 (  402 433 493 1 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 433 494 2 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 433 500 3 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 433 495 4 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 433 496 5 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 433 499 6 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 433 498 7 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 433 497 8 rtsel rtnpage ));
+
+/*
+ *  rtree poly_ops (supports polygons)
+ */
+
+DATA(insert OID = 0 (  402 434 485 1 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 434 486 2 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 434 487 3 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 434 488 4 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 434 489 5 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 434 490 6 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 434 491 7 rtsel rtnpage ));
+DATA(insert OID = 0 (  402 434 492 8 rtsel rtnpage ));
+
+/*
+ *  nbtree int2_ops
+ */
+
+DATA(insert OID = 0 (  403 421  95 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 421 522 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 421  94 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 421 524 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 421 520 5 btreesel btreenpage ));
+
+/*
+ *  nbtree float8_ops
+ */
+
+DATA(insert OID = 0 (  403 423 672 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 423 673 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 423 670 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 423 675 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 423 674 5 btreesel btreenpage ));
+
+/*
+ *  nbtree int24_ops
+ */
+
+DATA(insert OID = 0 (  403 424 534 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 424 540 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 424 532 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 424 542 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 424 536 5 btreesel btreenpage ));
+
+/*
+ *  nbtree int42_ops
+ */
+
+DATA(insert OID = 0 (  403 425 535 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 425 541 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 425 533 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 425 543 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 425 537 5 btreesel btreenpage ));
+
+/*
+ *  nbtree int4_ops
+ */
+
+DATA(insert OID = 0 (  403 426  97 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 426 523 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 426  96 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 426 525 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 426 521 5 btreesel btreenpage ));
+
+/*
+ *  nbtree oid_ops
+ */
+
+DATA(insert OID = 0 (  403 427 609 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 427 611 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 427 607 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 427 612 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 427 610 5 btreesel btreenpage ));
+
+/*
+ *  nbtree float4_ops
+ */
+
+DATA(insert OID = 0 (  403 428 622 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 428 624 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 428 620 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 428 625 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 428 623 5 btreesel btreenpage ));
+
+/*
+ *  nbtree char_ops
+ */
+
+DATA(insert OID = 0 (  403 429 631 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 429 632 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 429 92 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 429 634 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 429 633 5 btreesel btreenpage ));
+
+/*
+ *  nbtree char2_ops
+ */
+
+DATA(insert OID = 0 (  403 406 418 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 406 457 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 406 412 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 406 463 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 406 460 5 btreesel btreenpage ));
+
+/*
+ *  nbtree char4_ops
+ */
+
+DATA(insert OID = 0 (  403 407 419 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 407 458 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 407 413 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 407 464 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 407 461 5 btreesel btreenpage ));
+
+/*
+ *  nbtree char8_ops
+ */
+
+DATA(insert OID = 0 (  403 408 420 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 408 459 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 408 414 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 408 465 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 408 462 5 btreesel btreenpage ));
+
+/*
+ *  nbtree name_ops
+ */
+
+DATA(insert OID = 0 (  403 409 660 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 409 661 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 409 93 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 409 663 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 409 662 5 btreesel btreenpage ));
+
+/*
+ *  nbtree char16_ops
+ */
+
+DATA(insert OID = 0 (  403 430 645 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 430 646 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 430 99 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 430 648 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 430 647 5 btreesel btreenpage ));
+
+/*
+ *  nbtree text_ops
+ */
+
+DATA(insert OID = 0 (  403 431 664 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 431 665 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 431 98 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 431 667 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 431 666 5 btreesel btreenpage ));
+
+/*
+ *  nbtree abstime_ops
+ */
+
+DATA(insert OID = 0 (  403 432 562 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 432 564 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 432 560 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 432 565 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 432 563 5 btreesel btreenpage ));
+
+/*
+ *  nbtree oidint4_ops
+ */
+
+DATA(insert OID = 0 (  403 435 930 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 435 931 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 435 932 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 435 933 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 435 934 5 btreesel btreenpage ));
+
+/*
+ *  nbtree oidint2_ops
+ */
+
+DATA(insert OID = 0 (  403 437 830 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 437 831 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 437 832 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 437 833 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 437 834 5 btreesel btreenpage ));
+
+/*
+ *  nbtree oidname_ops
+ */
+
+DATA(insert OID = 0 (  403 436 676 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 436 677 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 436 678 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 436 679 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 436 680 5 btreesel btreenpage ));
+
+/*
+ *  nbtree bpchar_ops
+ */
+
+DATA(insert OID = 0 (  403 1076 1058 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1076 1059 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1076 1054 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1076 1061 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1076 1060 5 btreesel btreenpage ));
+
+/*
+ *  nbtree varchar_ops
+ */
+
+DATA(insert OID = 0 (  403 1077 1066 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1077 1067 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1077 1062 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1077 1069 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1077 1068 5 btreesel btreenpage ));
+
+/*
+ *  nbtree date_ops
+ */
+
+DATA(insert OID = 0 (  403 1114 1095 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1114 1096 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1114 1093 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1114 1098 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1114 1097 5 btreesel btreenpage ));
+
+
+/*
+ *  nbtree time_ops
+ */
+
+DATA(insert OID = 0 (  403 1115 1110 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1115 1111 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1115 1108 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1115 1113 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  403 1115 1112 5 btreesel btreenpage ));
+
+BKI_BEGIN
+#ifdef NOBTREE
+BKI_END
+/*
+ *  nobtree int2_ops
+ */
+
+DATA(insert OID = 0 (  404 421  95 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 421 522 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 421  94 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 421 524 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 421 520 5 btreesel btreenpage ));
+
+/*
+ *  nobtree float8_ops
+ */
+
+DATA(insert OID = 0 (  404 423 672 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 423 673 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 423 670 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 423 675 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 423 674 5 btreesel btreenpage ));
+
+/*
+ *  nobtree int24_ops
+ */
+
+DATA(insert OID = 0 (  404 424 534 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 424 540 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 424 532 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 424 542 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 424 536 5 btreesel btreenpage ));
+
+/*
+ *  nobtree int42_ops
+ */
+
+DATA(insert OID = 0 (  404 425 535 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 425 541 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 425 533 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 425 543 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 425 537 5 btreesel btreenpage ));
+
+/*
+ *  nobtree int4_ops
+ */
+
+DATA(insert OID = 0 (  404 426  97 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 426 523 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 426  96 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 426 525 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 426 521 5 btreesel btreenpage ));
+
+/*
+ *  nobtree oid_ops
+ */
+
+DATA(insert OID = 0 (  404 427 609 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 427 611 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 427 607 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 427 612 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 427 610 5 btreesel btreenpage ));
+
+/*
+ *  nobtree float4_ops
+ */
+
+DATA(insert OID = 0 (  404 428 622 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 428 624 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 428 620 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 428 625 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 428 623 5 btreesel btreenpage ));
+
+/*
+ *  nobtree char_ops
+ */
+
+DATA(insert OID = 0 (  404 429 631 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 429 632 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 429 92 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 429 634 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 429 633 5 btreesel btreenpage ));
+
+/*
+ *  nobtree char2_ops
+ */
+
+DATA(insert OID = 0 (  404 406 418 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 406 457 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 406 412 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 406 463 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 406 460 5 btreesel btreenpage ));
+
+/*
+ *  nobtree char4_ops
+ */
+
+DATA(insert OID = 0 (  404 407 419 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 407 458 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 407 413 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 407 464 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 407 461 5 btreesel btreenpage ));
+
+/*
+ *  nobtree char8_ops
+ */
+
+DATA(insert OID = 0 (  404 408 420 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 408 459 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 408 414 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 408 465 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 408 462 5 btreesel btreenpage ));
+
+/*
+ *  nobtree char16_ops
+ */
+
+DATA(insert OID = 0 (  404 430 645 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 430 646 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 430 99 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 430 648 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 430 647 5 btreesel btreenpage ));
+
+/*
+ *  nobtree name_ops
+ */
+
+DATA(insert OID = 0 (  404 409 660 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 409 661 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 409 93 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 409 663 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 409 662 5 btreesel btreenpage ));
+
+/*
+ *  nobtree text_ops
+ */
+
+DATA(insert OID = 0 (  404 431 664 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 431 665 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 431 98 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 431 667 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 431 666 5 btreesel btreenpage ));
+
+/*
+ *  nobtree abstime_ops
+ */
+
+DATA(insert OID = 0 (  404 432 562 1 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 432 564 2 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 432 560 3 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 432 565 4 btreesel btreenpage ));
+DATA(insert OID = 0 (  404 432 563 5 btreesel btreenpage ));
+
+BKI_BEGIN
+#endif /* NOBTREE */
+BKI_END
+
+/*
+ *  hash table int2_ops
+ */
+DATA(insert OID = 0 (  405 421  94 1 btreesel btreenpage ));
+/*
+ *  hash table float8_ops
+ */
+DATA(insert OID = 0 (  405 423 670 1 btreesel btreenpage ));
+/*
+ *  hash table int4_ops
+ */
+DATA(insert OID = 0 (  405 426  96 1 hashsel hashnpage ));
+/*
+ *  hash table oid_ops
+ */
+DATA(insert OID = 0 (  405 427 607 1 hashsel hashnpage ));
+/*
+ *  hash table float4_ops
+ */
+DATA(insert OID = 0 (  405 428 620 1 hashsel hashnpage ));
+/*
+ *  hash table char_ops
+ */
+DATA(insert OID = 0 (  405 429 92 1 hashsel hashnpage ));
+/*
+ *  hash table char2_ops
+ */
+DATA(insert OID = 0 (  405 406 412 1 hashsel hashnpage ));
+/*
+ *  hash table char4_ops
+ */
+DATA(insert OID = 0 (  405 407 413 1 hashsel hashnpage ));
+/*
+ *  hash table char8_ops
+ */
+DATA(insert OID = 0 (  405 408 414 1 hashsel hashnpage ));
+/*
+ *  hash table char16_ops
+ */
+DATA(insert OID = 0 (  405 430 99 1 hashsel hashnpage ));
+/*
+ *  hash table name_ops
+ */
+DATA(insert OID = 0 (  405 409 93 1 hashsel hashnpage ));
+/*
+ *  hash table text_ops
+ */
+DATA(insert OID = 0 (  405 431 98 1 hashsel hashnpage ));
+
+/*
+ *  hash table bpchar_ops
+ */
+DATA(insert OID = 0 (  405 1076 1054 1 hashsel hashnpage ));
+
+/*
+ *  hash table varchar_ops
+ */
+DATA(insert OID = 0 (  405 1077 1062 1 hashsel hashnpage ));
+
+
+#endif /* PG_AMOP_H */
diff --git a/src/backend/catalog/pg_amproc.h b/src/backend/catalog/pg_amproc.h

new file mode 100644 (file)

index 0000000..cacc2b7
--- /dev/null
+++ b/src/backend/catalog/pg_amproc.h
@@ -0,0 +1,134 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_amproc.h--
+ *    definition of the system "amproc" relation (pg_amproce)
+ *    along with the relation's initial contents.  The amproc
+ *    catalog is used to store procedures used by indexed access
+ *    methods that aren't associated with operators.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_amproc.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_AMPROC_H
+#define PG_AMPROC_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_amproc definition.  cpp turns this into
+ * typedef struct FormData_pg_amproc
+ * ----------------
+ */ 
+CATALOG(pg_amproc) {
+    Oid    amid;
+    Oid    amopclaid;
+    Oid    amproc;
+    int2   amprocnum;
+} FormData_pg_amproc;
+
+/* ----------------
+ * Form_pg_amproc corresponds to a pointer to a tuple with
+ * the format of pg_amproc relation.
+ * ----------------
+ */
+typedef FormData_pg_amproc *Form_pg_amproc;
+
+/* ----------------
+ * compiler constants for pg_amproc
+ * ----------------
+ */
+#define Natts_pg_amproc            4
+#define Anum_pg_amproc_amid        1
+#define Anum_pg_amproc_amopclaid   2
+#define Anum_pg_amproc_amproc      3
+#define Anum_pg_amproc_amprocnum   4
+
+/* ----------------
+ * initial contents of pg_amproc
+ * ----------------
+ */
+
+DATA(insert OID = 0 (402 422 193 1));
+DATA(insert OID = 0 (402 422 194 2));
+DATA(insert OID = 0 (402 422 195 3));
+DATA(insert OID = 0 (402 433 193 1));
+DATA(insert OID = 0 (402 433 194 2));
+DATA(insert OID = 0 (402 433 196 3));
+DATA(insert OID = 0 (402 434 197 1));
+DATA(insert OID = 0 (402 434 198 2));
+DATA(insert OID = 0 (402 434 199 3));
+DATA(insert OID = 0 (403 421 350 1));
+DATA(insert OID = 0 (403 423 355 1));
+DATA(insert OID = 0 (403 424 353 1));
+DATA(insert OID = 0 (403 425 352 1));
+DATA(insert OID = 0 (403 426 351 1));
+DATA(insert OID = 0 (403 427 356 1));
+DATA(insert OID = 0 (403 428 354 1));
+DATA(insert OID = 0 (403 429 358 1));
+DATA(insert OID = 0 (403 406 689 1));
+DATA(insert OID = 0 (403 407 690 1));
+DATA(insert OID = 0 (403 408 691 1));
+DATA(insert OID = 0 (403 409 359 1));
+DATA(insert OID = 0 (403 430 374 1));
+DATA(insert OID = 0 (403 431 360 1));
+DATA(insert OID = 0 (403 432 357 1));
+DATA(insert OID = 0 (403 435 928 1));
+DATA(insert OID = 0 (403 436 948 1));
+DATA(insert OID = 0 (403 437 828 1));
+DATA(insert OID = 0 (403 1076 1078 1));
+DATA(insert OID = 0 (403 1077 1079 1));
+DATA(insert OID = 0 (403 1114 1092 1));
+DATA(insert OID = 0 (403 1115 1107 1));
+
+BKI_BEGIN
+#ifdef NOBTREE
+BKI_END
+DATA(insert OID = 0 (404 421 350 1));
+DATA(insert OID = 0 (404 423 355 1));
+DATA(insert OID = 0 (404 424 353 1));
+DATA(insert OID = 0 (404 425 352 1));
+DATA(insert OID = 0 (404 426 351 1));
+DATA(insert OID = 0 (404 427 356 1));
+DATA(insert OID = 0 (404 428 354 1));
+DATA(insert OID = 0 (404 429 358 1));
+DATA(insert OID = 0 (404 406 689 1));
+DATA(insert OID = 0 (404 407 690 1));
+DATA(insert OID = 0 (404 408 691 1));
+DATA(insert OID = 0 (404 409 359 1));
+DATA(insert OID = 0 (404 430 374 1));
+DATA(insert OID = 0 (404 431 360 1));
+DATA(insert OID = 0 (404 432 357 1));
+BKI_BEGIN
+#endif /* NOBTREE */
+BKI_END
+
+DATA(insert OID = 0 (405 421 449 1));
+DATA(insert OID = 0 (405 423 452 1));
+DATA(insert OID = 0 (405 426 450 1));
+DATA(insert OID = 0 (405 427 453 1));
+DATA(insert OID = 0 (405 428 451 1));
+DATA(insert OID = 0 (405 429 454 1));
+DATA(insert OID = 0 (405 406 692 1));
+DATA(insert OID = 0 (405 407 693 1));
+DATA(insert OID = 0 (405 408 694 1));
+DATA(insert OID = 0 (405 409 455 1));
+DATA(insert OID = 0 (405 430 499 1));
+DATA(insert OID = 0 (405 431 456 1));
+DATA(insert OID = 0 (405 1076 1080 1));
+DATA(insert OID = 0 (405 1077 1081 1));
+
+#endif /* PG_AMPROC_H */
diff --git a/src/backend/catalog/pg_attribute.h b/src/backend/catalog/pg_attribute.h

new file mode 100644 (file)

index 0000000..d813317
--- /dev/null
+++ b/src/backend/catalog/pg_attribute.h
@@ -0,0 +1,512 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_attribute.h--
+ *    definition of the system "attribute" relation (pg_attribute)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_attribute.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *    utils/cache/relcache.c requires some hard-coded tuple descriptors
+ *    for some of the system catalogs so if the schema for any of
+ *    these changes, be sure and change the appropriate Schema_xxx
+ *    macros!  -cim 2/5/91
+ *
+ *    fastgetattr() now uses attcacheoff to cache byte offsets of
+ *    attributes in heap tuples.  The data actually stored in 
+ *    pg_attribute (-1) indicates no cached value.  But when we copy
+ *    these tuples into a tuple descriptor, we may then update attcacheoff
+ *    in the copies.  This speeds up the attribute walking process.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_ATTRIBUTE_H
+#define PG_ATTRIBUTE_H
+   
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+#include "access/attnum.h"
+
+/* ----------------
+ * pg_attribute definition.  cpp turns this into
+ * typedef struct FormData_pg_attribute
+ *
+ *      If you change the following, make sure you change the structs for
+ *      system attributes in heap.c and index.c also.
+ * ----------------
+ */
+CATALOG(pg_attribute) BOOTSTRAP {
+    Oid    attrelid;      
+    NameData   attname;
+    Oid    atttypid;
+    Oid    attdefrel;
+    int4   attnvals;
+    Oid    atttyparg;  /* type arg for arrays/spquel/procs */
+    int2   attlen;
+    int2   attnum;
+    int2   attbound;
+    bool   attbyval;
+    bool   attcanindex;
+    Oid    attproc;    /* spquel? */
+    int4   attnelems;
+    int4   attcacheoff;
+    bool        attisset;
+    char   attalign;   /* alignment (c=char, s=short, i=int, d=double) */
+} FormData_pg_attribute;
+
+/*
+ * someone should figure out how to do this properly. (The problem is
+ * the size of the C struct is not the same as the size of the tuple.)
+ */
+#define ATTRIBUTE_TUPLE_SIZE \
+    (offsetof(FormData_pg_attribute,attalign) + sizeof(char))
+
+/* ----------------
+ * Form_pg_attribute corresponds to a pointer to a tuple with
+ * the format of pg_attribute relation.
+ * ----------------
+ */
+typedef FormData_pg_attribute  *AttributeTupleForm;
+
+/* ----------------
+ * compiler constants for pg_attribute
+ * ----------------
+ */
+
+#define Natts_pg_attribute     16
+#define Anum_pg_attribute_attrelid 1
+#define Anum_pg_attribute_attname  2
+#define Anum_pg_attribute_atttypid 3
+#define Anum_pg_attribute_attdefrel    4
+#define Anum_pg_attribute_attnvals 5
+#define Anum_pg_attribute_atttyparg    6
+#define Anum_pg_attribute_attlen   7
+#define Anum_pg_attribute_attnum   8
+#define Anum_pg_attribute_attbound 9
+#define Anum_pg_attribute_attbyval 10
+#define Anum_pg_attribute_attcanindex  11
+#define Anum_pg_attribute_attproc  12
+#define Anum_pg_attribute_attnelems    13
+#define Anum_pg_attribute_attcacheoff  14
+#define Anum_pg_attribute_attisset      15
+#define Anum_pg_attribute_attalign      16
+
+
+/* ----------------
+ * SCHEMA_ macros for declaring hardcoded tuple descriptors.
+ * these are used in utils/cache/relcache.c
+ * ----------------
+#define SCHEMA_NAME(x) CppConcat(Name_,x)
+#define SCHEMA_DESC(x) CppConcat(Desc_,x)
+#define SCHEMA_NATTS(x) CppConcat(Natts_,x)
+#define SCHEMA_DEF(x) \
+    FormData_pg_attribute \
+    SCHEMA_DESC(x) [ SCHEMA_NATTS(x) ] = \
+    { \
+   CppConcat(Schema_,x) \
+    }
+ */
+
+/* ----------------
+ * initial contents of pg_attribute
+ * ----------------
+ */
+
+/* ----------------
+ * pg_type schema
+ * ----------------
+ */
+#define Schema_pg_type \
+{ 71l, {"typname"},      19l, 71l, 0l, 0l, NAMEDATALEN,  1, 0,   '\0', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 71l, {"typowner"},     26l, 71l, 0l, 0l,  4,  2, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 71l, {"typlen"},       21l, 71l, 0l, 0l,  2,  3, 0, '\001', '\001', 0l, 0l, -1l, '\0', 's' }, \
+{ 71l, {"typprtlen"},    21l, 71l, 0l, 0l,  2,  4, 0, '\001', '\001', 0l, 0l, -1l, '\0', 's' }, \
+{ 71l, {"typbyval"},     16l, 71l, 0l, 0l,  1,  5, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 71l, {"typtype"},      18l, 71l, 0l, 0l,  1,  6, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 71l, {"typisdefined"}, 16l, 71l, 0l, 0l,  1,  7, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 71l, {"typdelim"},     18l, 71l, 0l, 0l,  1,  8, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 71l, {"typrelid"},     26l, 71l, 0l, 0l,  4,  9, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 71l, {"typelem"},      26l, 71l, 0l, 0l,  4, 10, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 71l, {"typinput"},     24l, 71l, 0l, 0l,  4, 11, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 71l, {"typoutput"},    24l, 71l, 0l, 0l,  4, 12, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 71l, {"typreceive"},   24l, 71l, 0l, 0l,  4, 13, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 71l, {"typsend"},      24l, 71l, 0l, 0l,  4, 14, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 71l, {"typalign"},     18l, 71l, 0l, 0l,  1, 15, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 71l, {"typdefault"},   25l, 71l, 0l, 0l, -1, 16, 0,   '\0', '\001', 0l, 0l, -1l, '\0', 'i' }
+
+DATA(insert OID = 0 (  71 typname          19 0 0 0 NAMEDATALEN   1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 typowner         26 0 0 0  4   2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 typlen           21 0 0 0  2   3 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  71 typprtlen        21 0 0 0  2   4 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  71 typbyval         16 0 0 0  1   5 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  71 typtype          18 0 0 0  1   6 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  71 typisdefined     16 0 0 0  1   7 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  71 typdelim         18 0 0 0  1   8 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  71 typrelid         26 0 0 0  4   9 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 typelem          26 0 0 0  4  10 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 typinput         26 0 0 0  4  11 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 typoutput        26 0 0 0  4  12 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 typreceive       26 0 0 0  4  13 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 typsend          26 0 0 0  4  14 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 typalign         18 0 0 0  1  15 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  71 typdefault       25 0 0 0 -1  16 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 ctid             27 0 0 0  6  -1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 oid              26 0 0 0  4  -2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 xmin             28 0 0 0  4  -3 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 cmin             29 0 0 0  2  -4 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  71 xmax             28 0 0 0  4  -5 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 cmax             29 0 0 0  2  -6 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  71 chain            27 0 0 0  6  -7 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 anchor           27 0 0 0  6  -8 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 tmax            702 0 0 0  4  -9 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 tmin            702 0 0 0  4 -10 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  71 vtype            18 0 0 0  1 -11 0 t t 0 0 -1 f c));
+
+/* ----------------
+ * pg_database
+ * ----------------
+ */
+DATA(insert OID = 0 (  88 datname          19 0 0 0 NAMEDATALEN   1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  88 datdba           26 0 0 0  4   2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  88 datpath          25 0 0 0 -1   3 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  88 ctid             27 0 0 0  6  -1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  88 oid              26 0 0 0  4  -2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  88 xmin             28 0 0 0  4  -3 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  88 cmin             29 0 0 0  2  -4 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  88 xmax             28 0 0 0  4  -5 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  88 cmax             29 0 0 0  2  -6 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  88 chain            27 0 0 0  6  -7 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  88 anchor           27 0 0 0  6  -8 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  88 tmax            702 0 0 0  4  -9 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  88 tmin            702 0 0 0  4 -10 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  88 vtype            18 0 0 0  1 -11 0 t t 0 0 -1 f c));
+    
+/* ----------------
+ * pg_demon
+ * ----------------
+ */
+DATA(insert OID = 0 (  76 demserid         26 0 0 0  4   1 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  76 demname          19 0 0 0 NAMEDATALEN   2 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  76 demowner         26 0 0 0  4   3 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  76 demcode          24 0 0 0  4   4 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  76 ctid             27 0 0 0  6  -1 0 f t 0 0 -1 f i));
+
+DATA(insert OID = 0 (  76 oid              26 0 0 0  4  -2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  76 xmin             28 0 0 0  4  -3 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  76 cmin             29 0 0 0  2  -4 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  76 xmax             28 0 0 0  4  -5 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  76 cmax             29 0 0 0  2  -6 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  76 chain            27 0 0 0  6  -7 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  76 anchor           27 0 0 0  6  -8 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  76 tmax            702 0 0 0  4  -9 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  76 tmin            702 0 0 0  4 -10 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  76 vtype            18 0 0 0  1 -11 0 t t 0 0 -1 f c));
+    
+/* ----------------
+ * pg_proc
+ * ----------------
+ */
+#define Schema_pg_proc \
+{ 81l, {"proname"},       19l, 81l, 0l, 0l, NAMEDATALEN,  1, 0,   '\0', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 81l, {"proowner"},      26l, 81l, 0l, 0l,  4,  2, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 81l, {"prolang"},       26l, 81l, 0l, 0l,  4,  3, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 81l, {"proisinh"},      16l, 81l, 0l, 0l,  1,  4, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 81l, {"proistrusted"},  16l, 81l, 0l, 0l,  1,  5, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 81l, {"proiscachable"}, 16l, 81l, 0l, 0l,  1,  6, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 81l, {"pronargs"},      21l, 81l, 0l, 0l,  2,  7, 0, '\001', '\001', 0l, 0l, -1l, '\0', 's' }, \
+{ 81l, {"proretset"},     16l, 81l, 0l, 0l,  1,  8, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 81l, {"prorettype"},    26l, 81l, 0l, 0l,  4,  9, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 81l, {"proargtypes"},   30l, 81l, 0l, 0l, 32, 10, 0,   '\0', '\001', 0l, 0l, \
+      -1l, '\0', 'i' }, \
+{ 81l, {"probyte_pct"},   23l, 81l, 0l, 0l,  4, 11, 0, '\001', '\001', 0l, 0l, \
+      -1l, '\0', 'i' }, \
+{ 81l, {"properbyte_cpu"},   23l, 81l, 0l, 0l,  4, 12, 0, '\001', '\001', 0l, 0l,      -1l, '\0', 'i' }, \
+{ 81l, {"propercall_cpu"},   23l, 81l, 0l, 0l,  4, 13, 0, '\001', '\001', 0l, 0l,      -1l, '\0', 'i' }, \
+{ 81l, {"prooutin_ratio"},   23l, 81l, 0l, 0l,  4, 14, 0, '\001', '\001', 0l, 0l,      -1l, '\0', 'i' }, \
+{ 81l, {"prosrc"},        25l, 81l, 0l, 0l, -1,  15, 0,   '\0', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 81l, {"probin"},        17l, 81l, 0l, 0l, -1,  16, 0,   '\0', '\001', 0l, 0l, -1l, '\0', 'i' }
+
+DATA(insert OID = 0 (  81 proname          19 0 0 0 NAMEDATALEN   1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 proowner         26 0 0 0  4   2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 prolang          26 0 0 0  4   3 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 proisinh         16 0 0 0  1   4 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  81 proistrusted     16 0 0 0  1   5 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  81 proiscachable    16 0 0 0  1   6 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  81 pronargs         21 0 0 0  2   7 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  81 proretset        16 0 0 0  1   8 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  81 prorettype       26 0 0 0  4   9 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 proargtypes      30 0 0 0 32  10 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 probyte_pct      23 0 0 0  4  11 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 properbyte_cpu   23 0 0 0  4  12 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 propercall_cpu   23 0 0 0  4  13 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 prooutin_ratio   23 0 0 0  4  14 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 prosrc           25 0 0 0 -1  15 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 probin           17 0 0 0 -1  16 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 ctid             27 0 0 0  6  -1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 oid              26 0 0 0  4  -2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 xmin             28 0 0 0  4  -3 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 cmin             29 0 0 0  2  -4 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  81 xmax             28 0 0 0  4  -5 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 cmax             29 0 0 0  2 -6 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  81 chain            27 0 0 0  6  -7 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 anchor           27 0 0 0  6  -8 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 tmax            702 0 0 0  4  -9 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 tmin            702 0 0 0  4 -10 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  81 vtype            18 0 0 0  1 -11 0 t t 0 0 -1 f c));
+    
+/* ----------------
+ * pg_server
+ * ----------------
+ */
+DATA(insert OID = 0 (  82 sername          19 0 0 0 NAMEDATALEN   1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  82 serpid           21 0 0 0  2   2 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  82 serport          21 0 0 0  2   3 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  82 ctid             27 0 0 0  6  -1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  82 oid              26 0 0 0  4  -2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  82 xmin             28 0 0 0  4  -3 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  82 cmin             29 0 0 0  2  -4 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  82 xmax             28 0 0 0  4  -5 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  82 cmax             29 0 0 0  2  -6 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  82 chain            27 0 0 0  6  -7 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  82 anchor           27 0 0 0  6  -8 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  82 tmax            702 0 0 0  4  -9 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  82 tmin            702 0 0 0  4 -10 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  82 vtype            18 0 0 0  1 -11 0 t t 0 0 -1 f c));
+    
+/* ----------------
+ * pg_user
+ * ----------------
+ */
+DATA(insert OID = 0 (  86 usename          19 0 0 0 NAMEDATALEN   1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  86 usesysid         23 0 0 0  4   2 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  86 usecreatedb      16 0 0 0  1   3 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  86 usetrace         16 0 0 0  1   4 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  86 usesuper         16 0 0 0  1   5 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  86 usecatupd        16 0 0 0  1   6 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  86 ctid             27 0 0 0  6  -1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  86 oid              26 0 0 0  4  -2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  86 xmin             28 0 0 0  4  -3 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  86 cmin             29 0 0 0  2  -4 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  86 xmax             28 0 0 0  4  -5 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  86 cmax             29 0 0 0  2  -6 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  86 chain            27 0 0 0  6  -7 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  86 anchor           27 0 0 0  6  -8 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  86 tmax            702 0 0 0  4  -9 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  86 tmin            702 0 0 0  4 -10 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  86 vtype            18 0 0 0  1 -11 0 t t 0 0 -1 f c));
+
+/* ----------------
+ * pg_group
+ * ----------------
+ */
+DATA(insert OID = 0 (  87 groname          19 0 0 0 NAMEDATALEN   1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  87 grosysid         23 0 0 0  4   2 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  87 grolist        1007 0 0 0 -1   3 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  87 ctid             27 0 0 0  6  -1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  87 oid              26 0 0 0  4  -2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  87 xmin             28 0 0 0  4  -3 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  87 cmin             29 0 0 0  2  -4 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  87 xmax             28 0 0 0  4  -5 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  87 cmax             29 0 0 0  2  -6 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  87 chain            27 0 0 0  6  -7 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  87 anchor           27 0 0 0  6  -8 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  87 tmax            702 0 0 0  4  -9 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  87 tmin            702 0 0 0  4 -10 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  87 vtype            18 0 0 0  1 -11 0 t t 0 0 -1 f c));
+    
+/* ----------------
+ * pg_attribute
+ * ----------------
+ */
+#define Schema_pg_attribute \
+{ 75l, {"attrelid"},    26l, 75l, 0l, 0l,  4,  1, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 75l, {"attname"},     19l, 75l, 0l, 0l, NAMEDATALEN,  2, 0,   '\0', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 75l, {"atttypid"},    26l, 75l, 0l, 0l,  4,  3, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 75l, {"attdefrel"},   26l, 75l, 0l, 0l,  4,  4, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 75l, {"attnvals"},    23l, 75l, 0l, 0l,  4,  5, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 75l, {"atttyparg"},   26l, 75l, 0l, 0l,  4,  6, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 75l, {"attlen"},      21l, 75l, 0l, 0l,  2,  7, 0, '\001', '\001', 0l, 0l, -1l, '\0', 's' }, \
+{ 75l, {"attnum"},      21l, 75l, 0l, 0l,  2,  8, 0, '\001', '\001', 0l, 0l, -1l, '\0', 's' }, \
+{ 75l, {"attbound"},    21l, 75l, 0l, 0l,  2,  9, 0, '\001', '\001', 0l, 0l, -1l, '\0', 's' }, \
+{ 75l, {"attbyval"},    16l, 75l, 0l, 0l,  1, 10, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 75l, {"attcanindex"}, 16l, 75l, 0l, 0l,  1, 11, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 75l, {"attproc"},     26l, 75l, 0l, 0l,  4, 12, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 75l, {"attnelems"},   23l, 75l, 0l, 0l,  4, 13, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 75l, {"attcacheoff"}, 23l, 75l, 0l, 0l,  4, 14, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 75l, {"attisset"},    16l, 75l, 0l, 0l,  1, 15, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 75l, {"attalign"},    18l, 75l, 0l, 0l,  1, 16, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }
+
+DATA(insert OID = 0 (  75 attrelid         26 0 0 0  4   1 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 attname          19 0 0 0 NAMEDATALEN   2 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 atttypid         26 0 0 0  4   3 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 attdefrel        26 0 0 0  4   4 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 attnvals         23 0 0 0  4   5 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 atttyparg        26 0 0 0  4   6 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 attlen           21 0 0 0  2   7 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  75 attnum           21 0 0 0  2   8 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  75 attbound         21 0 0 0  2   9 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  75 attbyval         16 0 0 0  1  10 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  75 attcanindex      16 0 0 0  1  11 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  75 attproc          26 0 0 0  4  12 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 attnelems        23 0 0 0  4  13 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 attcacheoff      23 0 0 0  4  14 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 attisset         16 0 0 0  1  15 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  75 attalign         18 0 0 0  1  16 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  75 ctid             27 0 0 0  6  -1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 oid              26 0 0 0  4  -2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 xmin             28 0 0 0  4  -3 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 cmin             29 0 0 0  2  -4 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  75 xmax             28 0 0 0  4  -5 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 cmax             29 0 0 0  2  -6 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  75 chain            27 0 0 0  6  -7 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 anchor           27 0 0 0  6  -8 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 tmax            702 0 0 0  4  -9 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 tmin            702 0 0 0  4 -10 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  75 vtype            18 0 0 0  1 -11 0 t t 0 0 -1 f c));
+    
+/* ----------------
+ * pg_class
+ * ----------------
+ */
+#define Schema_pg_class \
+{ 83l, {"relname"},      19l, 83l, 0l, 0l, NAMEDATALEN,  1, 0,   '\0', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 83l, {"reltype"},     26l, 83l, 0l, 0l,  4,  2, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 83l, {"relowner"},     26l, 83l, 0l, 0l,  4,  2, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 83l, {"relam"},        26l, 83l, 0l, 0l,  4,  3, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 83l, {"relpages"},     23,  83l, 0l, 0l,  4,  4, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 83l, {"reltuples"},    23,  83l, 0l, 0l,  4,  5, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 83l, {"relexpires"},   702,  83l, 0l, 0l,  4,  6, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 83l, {"relpreserved"}, 703,  83l, 0l, 0l,  4,  7, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 83l, {"relhasindex"},  16,  83l, 0l, 0l,  1,  8, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 83l, {"relisshared"},  16,  83l, 0l, 0l,  1,  9, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 83l, {"relkind"},      18,  83l, 0l, 0l,  1, 10, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 83l, {"relarch"},      18,  83l, 0l, 0l,  1, 11, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 83l, {"relnatts"},     21,  83l, 0l, 0l,  2, 12, 0, '\001', '\001', 0l, 0l, -1l, '\0', 's' }, \
+{ 83l, {"relsmgr"},      210l,  83l, 0l, 0l,  2, 13, 0, '\001', '\001', 0l, 0l, -1l, '\0', 's' }, \
+{ 83l, {"relkey"},       22,  83l, 0l, 0l, 16, 14, 0,   '\0', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 83l, {"relkeyop"},     30,  83l, 0l, 0l, 32, 15, 0,   '\0', '\001', 0l, 0l, -1l, '\0', 'i' }, \
+{ 83l, {"relhasrules"},  16,  83l, 0l, 0l,  1, 16, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'c' }, \
+{ 83l, {"relacl"},     1034l, 83l, 0l, 0l, -1, 17, 0,   '\0', '\001', 0l, 0l, -1l, '\0', 'i' }
+
+DATA(insert OID = 0 (  83 relname          19 0 0 0 NAMEDATALEN   1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 reltype          26 0 0 0  4   2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 relowner         26 0 0 0  4   2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 relam            26 0 0 0  4   3 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 relpages         23 0 0 0  4   4 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 reltuples        23 0 0 0  4   5 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 relexpires      702 0 0 0  4   6 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 relpreserved    702 0 0 0  4   7 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 relhasindex      16 0 0 0  1   8 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  83 relisshared      16 0 0 0  1   9 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  83 relkind          18 0 0 0  1  10 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  83 relarch          18 0 0 0  1  11 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  83 relnatts         21 0 0 0  2  12 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  83 relsmgr         210 0 0 0  2  13 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  83 relkey           22 0 0 0 16  14 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 relkeyop         30 0 0 0 32  15 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 relhasrules      16 0 0 0  1  16 0 t t 0 0 -1 f c));
+DATA(insert OID = 0 (  83 relacl         1034 0 0 0 -1  17 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 ctid             27 0 0 0  6  -1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 oid              26 0 0 0  4  -2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 xmin             28 0 0 0  4  -3 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 cmin             29 0 0 0  2  -4 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  83 xmax             28 0 0 0  4  -5 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 cmax             29 0 0 0  2  -6 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  83 chain            27 0 0 0  6  -7 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 anchor           27 0 0 0  6  -8 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 tmax            702 0 0 0  4  -9 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 tmin            702 0 0 0  4 -10 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  83 vtype            18 0 0 0  1 -11 0 t t 0 0 -1 f c));
+    
+/* ----------------
+ * pg_magic
+ * ----------------
+ */
+DATA(insert OID = 0 (  80 magname          19 0 0 0 NAMEDATALEN   1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  80 magvalue         19 0 0 0 NAMEDATALEN   2 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  80 ctid             27 0 0 0  6  -1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  80 oid              26 0 0 0  4  -2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  80 xmin             28 0 0 0  4  -3 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  80 cmin             29 0 0 0  2  -4 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  80 xmax             28 0 0 0  4  -5 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  80 cmax             29 0 0 0  2  -6 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  80 chain            27 0 0 0  6  -7 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  80 anchor           27 0 0 0  6  -8 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  80 tmax            702 0 0 0  4  -9 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  80 tmin            702 0 0 0  4 -10 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  80 vtype            18 0 0 0  1 -11 0 t t 0 0 -1 f c));
+    
+/* ----------------
+ * pg_defaults
+ * ----------------
+ */
+DATA(insert OID = 0 (  89 defname          19 0 0 0 NAMEDATALEN   1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  89 defvalue         19 0 0 0 NAMEDATALEN   2 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  89 ctid             27 0 0 0  6  -1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  89 oid              26 0 0 0  4  -2 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  89 xmin             28 0 0 0  4  -3 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  89 cmin             29 0 0 0  2  -4 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  89 xmax             28 0 0 0  4  -5 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  89 cmax             29 0 0 0  2  -6 0 t t 0 0 -1 f s));
+DATA(insert OID = 0 (  89 chain            27 0 0 0  6  -7 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  89 anchor           27 0 0 0  6  -8 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  89 tmax            702 0 0 0  4  -9 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  89 tmin            702 0 0 0  4 -10 0 t t 0 0 -1 f i));
+DATA(insert OID = 0 (  89 vtype            18 0 0 0  1 -11 0 t t 0 0 -1 f c));
+    
+
+/* ----------------
+ * pg_hosts - this relation is used to store host based authentication
+ *            info
+ *       
+ * ----------------
+ */
+DATA(insert OID = 0 (  101 dbName           19 0 0 0  NAMEDATALEN   1 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  101 address           25 0 0 0  -1   2 0 f t 0 0 -1 f i));
+DATA(insert OID = 0 (  101 mask           25 0 0 0  -1   3 0 f t 0 0 -1 f i));
+
+/* ----------------
+ * pg_variable - this relation is modified by special purpose access
+ *           method code.  The following is garbage but is needed
+ *       so that the reldesc code works properly.
+ * ----------------
+ */
+#define Schema_pg_variable \
+{ 90l, {"varfoo"},  26l, 90l, 0l, 0l, 4, 1, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }
+    
+DATA(insert OID = 0 (  90 varfoo           26 0 0 0  4   1 0 t t 0 0 -1 f i));
+    
+/* ----------------
+ * pg_log - this relation is modified by special purpose access
+ *           method code.  The following is garbage but is needed
+ *       so that the reldesc code works properly.
+ * ----------------
+ */
+#define Schema_pg_log \
+{ 99l, {"logfoo"},  26l, 99l, 0l, 0l, 4, 1, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }
+
+DATA(insert OID = 0 (  99 logfoo           26 0 0 0  4   1 0 t t 0 0 -1 f i));
+    
+/* ----------------
+ * pg_time - this relation is modified by special purpose access
+ *           method code.  The following is garbage but is needed
+ *       so that the reldesc code works properly.
+ * ----------------
+ */
+#define Schema_pg_time \
+{ 100l, {"timefoo"},  26l, 100l, 0l, 0l, 4, 1, 0, '\001', '\001', 0l, 0l, -1l, '\0', 'i' }
+
+DATA(insert OID = 0 (  100 timefoo         26 0 0 0  4   1 0 t t 0 0 -1 f i));
+    
+#endif /* PG_ATTRIBUTE_H */
diff --git a/src/backend/catalog/pg_class.h b/src/backend/catalog/pg_class.h

new file mode 100644 (file)

index 0000000..b1adb68
--- /dev/null
+++ b/src/backend/catalog/pg_class.h
@@ -0,0 +1,162 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_class.h--
+ *    definition of the system "relation" relation (pg_class)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_class.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
+ *
+ * NOTES
+ *    ``pg_relation'' is being replaced by ``pg_class''.  currently
+ *    we are only changing the name in the catalogs but someday the
+ *    code will be changed too. -cim 2/26/90
+ *    [it finally happens.  -ay 11/5/94]
+ *
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_RELATION_H
+#define PG_RELATION_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+#include "utils/nabstime.h"
+
+/* ----------------
+ * pg_class definition.  cpp turns this into
+ * typedef struct FormData_pg_class
+ *
+ * Note: the #if 0, #endif around the BKI_BEGIN.. END block
+ *       below keeps cpp from seeing what is meant for the
+ *       genbki script: pg_relation is now called pg_class, but
+ *       only in the catalogs -cim 2/26/90
+ * ----------------
+ */
+
+/* ----------------
+ * This structure is actually variable-length (the last attribute is
+ * a POSTGRES array).  Hence, sizeof(FormData_pg_class) does not
+ * describe the fixed-length or actual size of the structure.
+ * FormData_pg_class.relacl may not be correctly aligned, either,
+ * if aclitem and struct varlena don't align together.  Hence,
+ * you MUST use heap_getattr() to get the relacl field.
+ * ----------------
+ */
+CATALOG(pg_class) BOOTSTRAP {
+     NameData  relname;
+     Oid        reltype;          
+     Oid   relowner;
+     Oid   relam;
+     int4  relpages;
+     int4  reltuples;
+     int4  relexpires; /* really used as a abstime, but fudge it for now*/
+     int4  relpreserved;/*really used as a reltime, but fudge it for now*/
+     bool  relhasindex;
+     bool  relisshared;
+     char  relkind;
+     char  relarch; /* 'h' = heavy, 'l' = light, 'n' = no archival*/
+     int2  relnatts;
+     int2  relsmgr;
+     int28     relkey;         /* not used */
+     oid8  relkeyop;       /* not used */
+     bool  relhasrules;
+     aclitem   relacl[1];      /* this is here for the catalog */
+} FormData_pg_class;
+
+#define CLASS_TUPLE_SIZE \
+     (offsetof(FormData_pg_class,relhasrules) + sizeof(bool))
+
+/* ----------------
+ * Form_pg_class corresponds to a pointer to a tuple with
+ * the format of pg_class relation.
+ * ----------------
+ */
+typedef FormData_pg_class  *Form_pg_class;
+
+/* ----------------
+ * compiler constants for pg_class
+ * ----------------
+ */
+
+/* ----------------
+ * Natts_pg_class_fixed is used to tell routines that insert new
+ * pg_class tuples (as opposed to replacing old ones) that there's no
+ * relacl field.
+ * ----------------
+ */
+#define Natts_pg_class_fixed       17
+#define Natts_pg_class         18
+#define Anum_pg_class_relname      1
+#define Anum_pg_class_reltype           2
+#define Anum_pg_class_relowner     3
+#define Anum_pg_class_relam        4
+#define Anum_pg_class_relpages     5
+#define Anum_pg_class_reltuples        6
+#define Anum_pg_class_relexpires   7
+#define Anum_pg_class_relpreserved 8
+#define Anum_pg_class_relhasindex  9
+#define Anum_pg_class_relisshared  10
+#define Anum_pg_class_relkind      11
+#define Anum_pg_class_relarch      12
+#define Anum_pg_class_relnatts     13
+#define Anum_pg_class_relsmgr      14
+#define Anum_pg_class_relkey       15
+#define Anum_pg_class_relkeyop     16
+#define Anum_pg_class_relhasrules  17
+#define Anum_pg_class_relacl       18
+
+/* ----------------
+ * initial contents of pg_class
+ * ----------------
+ */
+
+DATA(insert OID =  71 (  pg_type 71          PGUID 0 0 0 0 0 f f r n 16 0 - - f _null_ ));
+DATA(insert OID =  75 (  pg_attribute 75      PGUID 0 0 0 0 0 f f r n 16 0 - - f _null_ ));
+DATA(insert OID =  76 (  pg_demon 76          PGUID 0 0 0 0 0 f t r n 4 0 - - f _null_ ));
+DATA(insert OID =  80 (  pg_magic 80         PGUID 0 0 0 0 0 f t r n 2 0 - - f _null_ ));
+DATA(insert OID =  81 (  pg_proc 81          PGUID 0 0 0 0 0 f f r n 16 0 - - f _null_ ));
+DATA(insert OID =  82 (  pg_server 82         PGUID 0 0 0 0 0 f t r n 3 0 - - f _null_ ));
+DATA(insert OID =  83 (  pg_class 83         PGUID 0 0 0 0 0 f f r n 17 0 - - f _null_ ));    
+DATA(insert OID =  86 (  pg_user 86          PGUID 0 0 0 0 0 f t r n 6 0 - - f _null_ ));
+DATA(insert OID =  87 (  pg_group 87          PGUID 0 0 0 0 0 f t s n 3 0 - - f _null_ ));
+DATA(insert OID =  88 (  pg_database 88      PGUID 0 0 0 0 0 f t r n 3 0 - - f _null_ ));
+DATA(insert OID =  89 (  pg_defaults 89       PGUID 0 0 0 0 0 f t r n 2 0 - - f _null_ ));
+DATA(insert OID =  90 (  pg_variable 90        PGUID 0 0 0 0 0 f t s n 2 0 - - f _null_ ));
+DATA(insert OID =  99 (  pg_log  99           PGUID 0 0 0 0 0 f t s n 1 0 - - f _null_ ));
+DATA(insert OID = 100 (  pg_time 100           PGUID 0 0 0 0 0 f t s n 1 0 - - f _null_ ));
+DATA(insert OID = 101 (  pg_hosts 101           PGUID 0 0 0 0 0 f t s n 3 0 - - f _null_ ));
+
+#define RelOid_pg_type     71
+#define RelOid_pg_demon        76   
+#define RelOid_pg_attribute    75   
+#define RelOid_pg_magic    80      
+#define RelOid_pg_proc         81   
+#define RelOid_pg_server       82   
+#define RelOid_pg_class    83   
+#define RelOid_pg_user         86   
+#define RelOid_pg_group        87
+#define RelOid_pg_database     88   
+#define RelOid_pg_defaults     89    
+#define RelOid_pg_variable     90   
+#define RelOid_pg_log      99       
+#define RelOid_pg_time     100      
+#define RelOid_pg_hosts    101      
+    
+#define MAX_SYSTEM_RELOID       101
+
+#define       RELKIND_INDEX           'i'     /* secondary index */
+#define       RELKIND_RELATION        'r'     /* cataloged heap */
+#define       RELKIND_SPECIAL         's'     /* special (non-heap) */
+#define       RELKIND_UNCATALOGED     'u'     /* temporary heap */
+
+#endif /* PG_RELATION_H */
diff --git a/src/backend/catalog/pg_database.h b/src/backend/catalog/pg_database.h

new file mode 100644 (file)

index 0000000..78a657e
--- /dev/null
+++ b/src/backend/catalog/pg_database.h
@@ -0,0 +1,57 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_database.h--
+ *    definition of the system "database" relation (pg_database)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_database.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_DATABASE_H
+#define PG_DATABASE_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_database definition.  cpp turns this into
+ * typedef struct FormData_pg_database
+ * ----------------
+ */ 
+CATALOG(pg_database) BOOTSTRAP {
+    NameData   datname;
+    Oid    datdba;
+    text   datpath;    /* VARIABLE LENGTH FIELD */
+} FormData_pg_database;
+
+/* ----------------
+ * Form_pg_database corresponds to a pointer to a tuple with
+ * the format of pg_database relation.
+ * ----------------
+ */
+typedef FormData_pg_database   *Form_pg_database;
+
+/* ----------------
+ * compiler constants for pg_database
+ * ----------------
+ */
+#define Natts_pg_database      3
+#define Anum_pg_database_datname   1
+#define Anum_pg_database_datdba        2
+#define Anum_pg_database_datpath   3
+
+
+#endif /* PG_DATABASE_H */
diff --git a/src/backend/catalog/pg_defaults.h b/src/backend/catalog/pg_defaults.h

new file mode 100644 (file)

index 0000000..66efb7b
--- /dev/null
+++ b/src/backend/catalog/pg_defaults.h
@@ -0,0 +1,55 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_defaults.h--
+ *   definition of the system "defaults" relation (pg_defaults)
+ *   along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_defaults.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_DEFAULTS_H
+#define PG_DEFAULTS_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_defaults definition.  cpp turns this into
+ * typedef struct FormData_pg_defaults
+ * ----------------
+ */ 
+CATALOG(pg_defaults) BOOTSTRAP {
+    NameData   defname;
+    NameData   defvalue;
+} FormData_pg_defaults;
+
+/* ----------------
+ * Form_pg_defaults corresponds to a pointer to a tuple with
+ * the format of pg_defaults relation.
+ * ----------------
+ */
+typedef FormData_pg_defaults   *Form_pg_defaults;
+
+/* ----------------
+ * compiler constants for pg_defaults
+ * ----------------
+ */
+#define Natts_pg_defaults      2
+#define Anum_pg_defaults_defname   1
+#define Anum_pg_defaults_defvalue  2
+
+
+#endif /* PG_DEFAULTS_H */
diff --git a/src/backend/catalog/pg_demon.h b/src/backend/catalog/pg_demon.h

new file mode 100644 (file)

index 0000000..1089f57
--- /dev/null
+++ b/src/backend/catalog/pg_demon.h
@@ -0,0 +1,58 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_demon.h--
+ *   definition of the system "demon" relation (pg_demon)
+ *   along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_demon.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_DEMON_H 
+#define PG_DEMON_H 
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_demon definition.  cpp turns this into
+ * typedef struct FormData_pg_demon
+ * ----------------
+ */ 
+CATALOG(pg_demon) BOOTSTRAP {
+    Oid    demserid;
+    NameData   demname;
+    Oid    demowner;
+    regproc    demcode;
+} FormData_pg_demon;
+
+/* ----------------
+ * Form_pg_demon corresponds to a pointer to a tuple with
+ * the format of pg_demon relation.
+ * ----------------
+ */
+typedef FormData_pg_demon  *Form_pg_demon;
+
+/* ----------------
+ * compiler constants for pg_demon
+ * ----------------
+ */
+#define Natts_pg_demon         4
+#define Anum_pg_demon_demserid     1
+#define Anum_pg_demon_demname      2
+#define Anum_pg_demon_demowner     3
+#define Anum_pg_demon_demcode      4
+
+#endif /* PG_DEMON_H */
diff --git a/src/backend/catalog/pg_group.h b/src/backend/catalog/pg_group.h

new file mode 100644 (file)

index 0000000..76d51be
--- /dev/null
+++ b/src/backend/catalog/pg_group.h
@@ -0,0 +1,42 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_group.h--
+ *    
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_group.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_GROUP_H
+#define PG_GROUP_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+CATALOG(pg_group) BOOTSTRAP {
+        NameData groname;
+        int4    grosysid;
+   int4    grolist[1];
+} FormData_pg_group;
+/* VARIABLE LENGTH STRUCTURE */
+
+typedef FormData_pg_group  *Form_pg_group;
+
+#define Natts_pg_group     1
+#define Anum_pg_group_groname  1
+#define Anum_pg_group_grosysid 2
+#define Anum_pg_group_grolist  3
+
+#endif /* PG_GROUP_H */
diff --git a/src/backend/catalog/pg_hosts.h b/src/backend/catalog/pg_hosts.h

new file mode 100644 (file)

index 0000000..3924c26
--- /dev/null
+++ b/src/backend/catalog/pg_hosts.h
@@ -0,0 +1,44 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_hosts.h--
+ *
+ *     the pg_hosts system catalog provides host-based access to the 
+ * backend.  Only those hosts that are in the pg_hosts 
+ *
+ *  currently, this table is not used, instead file-based host authentication
+ * is used
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_hosts.h,v 1.1.1.1 1996/07/09 06:21:16 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef PG_HOSTS_H
+#define PG_HOSTS_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+CATALOG(pg_hosts) BOOTSTRAP {
+        NameData dbName;   
+   text    address; 
+   text    mask;
+} FormData_pg_hosts;
+
+typedef FormData_pg_hosts  *Form_pg_hosts;
+#define Natts_pg_hosts     3
+#define Anum_pg_hosts_dbName   1
+#define Anum_pg_hosts_address  2
+#define Anum_pg_hosts_mask      3
+
+#endif /* PG_HOSTS_H */
diff --git a/src/backend/catalog/pg_index.h b/src/backend/catalog/pg_index.h

new file mode 100644 (file)

index 0000000..da75b02
--- /dev/null
+++ b/src/backend/catalog/pg_index.h
@@ -0,0 +1,71 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_index.h--
+ *    definition of the system "index" relation (pg_index)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_index.h,v 1.1.1.1 1996/07/09 06:21:17 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_INDEX_H
+#define PG_INDEX_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_index definition.  cpp turns this into
+ * typedef struct FormData_pg_index.  The oid of the index relation
+ * is stored in indexrelid; the oid of the indexed relation is stored
+ * in indrelid.
+ * ----------------
+ */ 
+CATALOG(pg_index) {
+    Oid    indexrelid;
+    Oid    indrelid;
+    Oid        indproc; /* registered procedure for functional index */
+    int28  indkey;
+    oid8   indclass;
+    bool   indisclustered;
+    bool   indisarchived;
+    text   indpred;    /* query plan for partial index predicate */
+} FormData_pg_index;
+
+#define INDEX_MAX_KEYS 8  /* maximum number of keys in an index definition */
+
+/* ----------------
+ * Form_pg_index corresponds to a pointer to a tuple with
+ * the format of pg_index relation.
+ * ----------------
+ */
+typedef FormData_pg_index  *IndexTupleForm;
+
+/* ----------------
+ * compiler constants for pg_index
+ * ----------------
+ */
+#define Natts_pg_index         8
+#define Anum_pg_index_indexrelid   1
+#define Anum_pg_index_indrelid     2
+#define Anum_pg_index_indproc      3
+#define Anum_pg_index_indkey       4
+#define Anum_pg_index_indclass     5
+#define Anum_pg_index_indisclustered   6
+#define Anum_pg_index_indisarchived    7
+#define Anum_pg_index_indpred      8
+
+
+#endif /* PG_INDEX_H */
diff --git a/src/backend/catalog/pg_inheritproc.h b/src/backend/catalog/pg_inheritproc.h

new file mode 100644 (file)

index 0000000..1527e99
--- /dev/null
+++ b/src/backend/catalog/pg_inheritproc.h
@@ -0,0 +1,59 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_inheritproc.h--
+ *    definition of the system "inheritproc" relation (pg_inheritproc)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_inheritproc.h,v 1.1.1.1 1996/07/09 06:21:17 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_INHERITPROC_H
+#define PG_INHERITPROC_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_inheritproc definition.  cpp turns this into
+ * typedef struct FormData_pg_inheritproc
+ * ----------------
+ */ 
+CATALOG(pg_inheritproc) {
+     NameData  inhproname;
+     Oid   inhargrel;
+     Oid   inhdefrel;
+     Oid   inhproc;
+} FormData_pg_inheritproc;
+
+/* ----------------
+ * Form_pg_inheritproc corresponds to a pointer to a tuple with
+ * the format of pg_inheritproc relation.
+ * ----------------
+ */
+typedef FormData_pg_inheritproc    *Form_pg_inheritproc;
+
+/* ----------------
+ * compiler constants for pg_inheritproc
+ * ----------------
+ */
+#define Natts_pg_inheritproc       4
+#define Anum_pg_inheritproc_inhproname 1
+#define Anum_pg_inheritproc_inhargrel  2
+#define Anum_pg_inheritproc_inhdefrel  3
+#define Anum_pg_inheritproc_inhproc    4
+
+
+#endif /* PG_INHERITPROC_H */
diff --git a/src/backend/catalog/pg_inherits.h b/src/backend/catalog/pg_inherits.h

new file mode 100644 (file)

index 0000000..1caa1cd
--- /dev/null
+++ b/src/backend/catalog/pg_inherits.h
@@ -0,0 +1,57 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_inherits.h--
+ *    definition of the system "inherits" relation (pg_inherits)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_inherits.h,v 1.1.1.1 1996/07/09 06:21:17 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_INHERITS_H
+#define PG_INHERITS_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_inherits definition.  cpp turns this into
+ * typedef struct FormData_pg_inherits
+ * ----------------
+ */ 
+CATALOG(pg_inherits) {
+    Oid    inhrel;
+    Oid    inhparent;
+    int4   inhseqno;
+} FormData_pg_inherits;
+
+/* ----------------
+ * Form_pg_inherits corresponds to a pointer to a tuple with
+ * the format of pg_inherits relation.
+ * ----------------
+ */
+typedef FormData_pg_inherits   *InheritsTupleForm;
+
+/* ----------------
+ * compiler constants for pg_inherits
+ * ----------------
+ */
+#define Natts_pg_inherits      3
+#define Anum_pg_inherits_inhrel        1
+#define Anum_pg_inherits_inhparent 2
+#define Anum_pg_inherits_inhseqno  3
+
+
+#endif /* PG_INHERITS_H */
diff --git a/src/backend/catalog/pg_ipl.h b/src/backend/catalog/pg_ipl.h

new file mode 100644 (file)

index 0000000..df90cd4
--- /dev/null
+++ b/src/backend/catalog/pg_ipl.h
@@ -0,0 +1,57 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_ipl.h--
+ *    definition of the system "ipl" relation (pg_ipl)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_ipl.h,v 1.1.1.1 1996/07/09 06:21:17 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_IPL_H
+#define PG_IPL_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_ipl definition.  cpp turns this into
+ * typedef struct FormData_pg_ipl
+ * ----------------
+ */ 
+CATALOG(pg_ipl) {
+     Oid   iplrel;
+     Oid   iplipl;
+     int4  iplseqno;
+} FormData_pg_ipl;
+
+/* ----------------
+ * Form_pg_ipl corresponds to a pointer to a tuple with
+ * the format of pg_ipl relation.
+ * ----------------
+ */
+typedef FormData_pg_ipl    *Form_pg_ipl;
+
+/* ----------------
+ * compiler constants for pg_ipl
+ * ----------------
+ */
+#define Natts_pg_ipl       3
+#define Anum_pg_ipl_iplrel 1
+#define Anum_pg_ipl_iplipl 2
+#define Anum_pg_ipl_iplseqno   3
+
+
+#endif /* PG_IPL_H */
diff --git a/src/backend/catalog/pg_language.h b/src/backend/catalog/pg_language.h

new file mode 100644 (file)

index 0000000..7e5a31a
--- /dev/null
+++ b/src/backend/catalog/pg_language.h
@@ -0,0 +1,75 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_language.h--
+ *    definition of the system "language" relation (pg_language)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_language.h,v 1.1.1.1 1996/07/09 06:21:17 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_LANGUAGE_H
+#define PG_LANGUAGE_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_language definition.  cpp turns this into
+ * typedef struct FormData_pg_language
+ * ----------------
+ */ 
+CATALOG(pg_language) {
+    NameData   lanname;
+    text   lancompiler;    /* VARIABLE LENGTH FIELD */
+} FormData_pg_language;
+
+/* ----------------
+ * Form_pg_language corresponds to a pointer to a tuple with
+ * the format of pg_language relation.
+ * ----------------
+ */
+typedef FormData_pg_language   *Form_pg_language;
+
+/* ----------------
+ * compiler constants for pg_language
+ * ----------------
+ */
+#define Natts_pg_language      2
+#define Anum_pg_language_lanname   1
+#define Anum_pg_language_lancompiler   2
+
+/* ----------------
+ * initial contents of pg_language
+ * ----------------
+ */
+
+DATA(insert OID = 11 ( internal "n/a" ));
+#define INTERNALlanguageId 11
+DATA(insert OID = 12 ( lisp "/usr/ucb/liszt" ));
+DATA(insert OID = 13 ( "C" "/bin/cc" ));
+#define ClanguageId 13
+DATA(insert OID = 14 ( "sql" "postgres"));
+#define SQLlanguageId 14
+
+    
+#endif /* PG_LANGUAGE_H */
+
+
+
+
+
+
+
diff --git a/src/backend/catalog/pg_listener.h b/src/backend/catalog/pg_listener.h

new file mode 100644 (file)

index 0000000..05e077e
--- /dev/null
+++ b/src/backend/catalog/pg_listener.h
@@ -0,0 +1,56 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_listener.h--
+ *    Asynchronous notification
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_listener.h,v 1.1.1.1 1996/07/09 06:21:17 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_LISTENER_H
+#define PG_LISTENER_H
+
+/* ----------------
+ *      postgres.h contains the system type definintions and the
+ *      CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ *      can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------------------------------------------------------
+ *      pg_listener definition.
+ *
+ *      cpp turns this into typedef struct FormData_pg_listener
+ * ----------------------------------------------------------------
+ */
+
+CATALOG(pg_listener) {
+    NameData       relname;
+    int4         listenerpid;
+    int4         notification;
+} FormData_pg_listener;
+
+/* ----------------
+ *      compiler constants for pg_listener
+ * ----------------
+ */
+#define Natts_pg_listener                       3
+#define Anum_pg_listener_relname                1
+#define Anum_pg_listener_pid                    2
+#define Anum_pg_listener_notify                 3
+
+/* ----------------
+ *      initial contents of pg_listener are NOTHING.
+ * ----------------
+ */
+
+
+#endif /* PG_LISTENER_H */
diff --git a/src/backend/catalog/pg_log.h b/src/backend/catalog/pg_log.h

new file mode 100644 (file)

index 0000000..987825a
--- /dev/null
+++ b/src/backend/catalog/pg_log.h
@@ -0,0 +1,40 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_log.h--
+ *    the system log relation "pg_log" is not a "heap" relation.
+ *    it is automatically created by the transam/ code and the
+ *    information here is all bogus and is just here to make the
+ *    relcache code happy.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_log.h,v 1.1.1.1 1996/07/09 06:21:17 scrappy Exp $
+ *
+ * NOTES
+ *    The structures and macros used by the transam/ code
+ *    to access pg_log should some day go here -cim 6/18/90
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_LOG_H
+#define PG_LOG_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+CATALOG(pg_log) BOOTSTRAP {
+    Oid    logfoo;
+} FormData_pg_log;
+
+typedef FormData_pg_log    *Form_pg_log;
+
+#define Natts_pg_log       1
+#define Anum_pg_log_logfoo 1
+
+#endif /* PG_LOG_H */
diff --git a/src/backend/catalog/pg_magic.h b/src/backend/catalog/pg_magic.h

new file mode 100644 (file)

index 0000000..c5e0d98
--- /dev/null
+++ b/src/backend/catalog/pg_magic.h
@@ -0,0 +1,54 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_magic.h--
+ *    definition of the system "magic" relation (pg_magic)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_magic.h,v 1.1.1.1 1996/07/09 06:21:17 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_MAGIC_H
+#define PG_MAGIC_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_magic definition.  cpp turns this into
+ * typedef struct FormData_pg_magic
+ * ----------------
+ */ 
+CATALOG(pg_magic) BOOTSTRAP {
+    NameData   magname;
+    NameData   magvalue;
+} FormData_pg_magic;
+
+/* ----------------
+ * Form_pg_magic corresponds to a pointer to a tuple with
+ * the format of pg_magic relation.
+ * ----------------
+ */
+typedef FormData_pg_magic  *Form_pg_magic;
+
+/* ----------------
+ * compiler constants for pg_magic
+ * ----------------
+ */
+#define Natts_pg_magic         2
+#define Anum_pg_magic_magname      1
+#define Anum_pg_magic_magvalue     2
+
+#endif /* PG_MAGIC_H */
diff --git a/src/backend/catalog/pg_opclass.h b/src/backend/catalog/pg_opclass.h

new file mode 100644 (file)

index 0000000..46aecd3
--- /dev/null
+++ b/src/backend/catalog/pg_opclass.h
@@ -0,0 +1,85 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_opclass.h--
+ *    definition of the system "opclass" relation (pg_opclass)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_opclass.h,v 1.1.1.1 1996/07/09 06:21:17 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_OPCLASS_H
+#define PG_OPCLASS_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_opclass definition.  cpp turns this into
+ * typedef struct FormData_pg_opclass
+ * ----------------
+ */ 
+
+CATALOG(pg_opclass) {
+    NameData opcname;
+} FormData_pg_opclass;
+
+/* ----------------
+ * Form_pg_opclass corresponds to a pointer to a tuple with
+ * the format of pg_opclass relation.
+ * ----------------
+ */
+typedef FormData_pg_opclass    *Form_pg_opclass;
+
+/* ----------------
+ * compiler constants for pg_opclass
+ * ----------------
+ */
+#define Natts_pg_opclass       1
+#define Anum_pg_opclass_opcname        1
+
+/* ----------------
+ * initial contents of pg_opclass
+ * ----------------
+ */
+
+DATA(insert OID = 406 (    char2_ops ));
+DATA(insert OID = 407 (    char4_ops ));
+DATA(insert OID = 408 (    char8_ops ));
+DATA(insert OID = 409 (    name_ops ));
+DATA(insert OID = 421 (    int2_ops ));
+DATA(insert OID = 422 (    box_ops ));
+DATA(insert OID = 423 (    float8_ops ));
+DATA(insert OID = 424 (    int24_ops ));
+DATA(insert OID = 425 (    int42_ops ));
+DATA(insert OID = 426 (    int4_ops ));
+#define INT4_OPS_OID 426
+DATA(insert OID = 427 (    oid_ops ));
+DATA(insert OID = 428 (    float4_ops ));
+DATA(insert OID = 429 (    char_ops ));
+DATA(insert OID = 430 (    char16_ops ));
+DATA(insert OID = 431 (    text_ops ));
+DATA(insert OID = 432 (    abstime_ops ));
+DATA(insert OID = 433 (    bigbox_ops));
+DATA(insert OID = 434 (    poly_ops));
+DATA(insert OID = 435 (    oidint4_ops));
+DATA(insert OID = 436 (    oidname_ops));
+DATA(insert OID = 437 (    oidint2_ops));
+DATA(insert OID = 1076 (   bpchar_ops));
+DATA(insert OID = 1077 (   varchar_ops));
+DATA(insert OID = 1114 (   date_ops));
+DATA(insert OID = 1115 (   time_ops));
+
+#endif /* PG_OPCLASS_H */
diff --git a/src/backend/catalog/pg_operator.c b/src/backend/catalog/pg_operator.c

new file mode 100644 (file)

index 0000000..2784297
--- /dev/null
+++ b/src/backend/catalog/pg_operator.c
@@ -0,0 +1,1077 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_operator.c--
+ *    routines to support manipulation of the pg_operator relation
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.1.1.1 1996/07/09 06:21:17 scrappy Exp $
+ *
+ * NOTES
+ *    these routines moved here from commands/define.c and somewhat cleaned up.
+ * 
+ *-------------------------------------------------------------------------
+ */
+#include <string.h>
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+#include "access/htup.h"
+#include "utils/rel.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "parser/catalog_utils.h"
+
+#include "catalog/catname.h"
+#include "utils/syscache.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_proc.h"
+#include "storage/bufmgr.h"
+
+#include "fmgr.h"
+
+static Oid OperatorGetWithOpenRelation(Relation pg_operator_desc,
+                      char *operatorName,
+                      Oid leftObjectId,
+                      Oid rightObjectId );
+static Oid OperatorGet(char *operatorName,
+              char *leftTypeName,
+              char *rightTypeName );
+
+static Oid OperatorShellMakeWithOpenRelation(Relation pg_operator_desc,
+                        char *operatorName, 
+                        Oid leftObjectId, 
+                        Oid rightObjectId );
+static Oid OperatorShellMake(char *operatorName,
+                char *leftTypeName,
+                char *rightTypeName );
+
+static void OperatorDef(char *operatorName,
+           int definedOK,
+           char *leftTypeName,
+           char *rightTypeName,
+           char *procedureName,
+           uint16 precedence,
+           bool isLeftAssociative,
+           char *commutatorName,
+           char *negatorName,
+           char *restrictionName,
+           char *oinName,
+           bool canHash,
+           char *leftSortName,
+           char *rightSortName );
+static void OperatorUpd(Oid baseId , Oid commId , Oid negId );
+     
+/* ----------------------------------------------------------------
+ *     OperatorGetWithOpenRelation
+ *
+ * preforms a scan on pg_operator for an operator tuple
+ * with given name and left/right type oids.
+ * ----------------------------------------------------------------
+ *    pg_operator_desc -- reldesc for pg_operator
+ *    operatorName -- name of operator to fetch
+ *    leftObjectId -- left oid of operator to fetch
+ *    rightObjectId    -- right oid of operator to fetch
+ */
+static Oid
+OperatorGetWithOpenRelation(Relation pg_operator_desc,
+               char *operatorName,
+               Oid leftObjectId,
+               Oid rightObjectId)
+{
+    HeapScanDesc   pg_operator_scan;
+    Oid        operatorObjectId;
+    HeapTuple      tup;
+    
+    static ScanKeyData opKey[3] = {
+   { 0, Anum_pg_operator_oprname,  NameEqualRegProcedure },
+   { 0, Anum_pg_operator_oprleft,  ObjectIdEqualRegProcedure },
+   { 0, Anum_pg_operator_oprright, ObjectIdEqualRegProcedure },
+    };
+    
+    fmgr_info(NameEqualRegProcedure,
+         &opKey[0].sk_func, &opKey[0].sk_nargs);
+    fmgr_info(ObjectIdEqualRegProcedure,
+         &opKey[1].sk_func, &opKey[1].sk_nargs);
+    fmgr_info(ObjectIdEqualRegProcedure,
+         &opKey[2].sk_func, &opKey[2].sk_nargs);
+    
+    /* ----------------
+     * form scan key
+     * ----------------
+     */
+    opKey[0].sk_argument = PointerGetDatum(operatorName);
+    opKey[1].sk_argument = ObjectIdGetDatum(leftObjectId);
+    opKey[2].sk_argument = ObjectIdGetDatum(rightObjectId);
+    
+    /* ----------------
+     * begin the scan
+     * ----------------
+     */
+    pg_operator_scan = heap_beginscan(pg_operator_desc,
+                     0,
+                     SelfTimeQual,
+                     3,
+                     opKey);
+    
+    /* ----------------
+     * fetch the operator tuple, if it exists, and determine
+     *  the proper return oid value.
+     * ----------------
+     */
+    tup = heap_getnext(pg_operator_scan, 0, (Buffer *) 0);
+    operatorObjectId = HeapTupleIsValid(tup) ? tup->t_oid : InvalidOid;
+    
+    /* ----------------
+     * close the scan and return the oid.
+     * ----------------
+     */
+    heap_endscan(pg_operator_scan);
+    
+    return
+   operatorObjectId;
+}
+
+/* ----------------------------------------------------------------
+ *     OperatorGet
+ *
+ * finds the operator associated with the specified name
+ * and left and right type names.
+ * ----------------------------------------------------------------
+ */
+static Oid
+OperatorGet(char *operatorName,
+       char *leftTypeName,
+       char *rightTypeName)
+{
+    Relation   pg_operator_desc;
+    
+    Oid        operatorObjectId;
+    Oid    leftObjectId = InvalidOid;
+    Oid        rightObjectId = InvalidOid;
+    bool   leftDefined = false;
+    bool   rightDefined = false;
+    
+    /* ----------------
+     * look up the operator types.
+     *
+     *  Note: types must be defined before operators
+     * ----------------
+     */
+    if (leftTypeName) {
+   leftObjectId = TypeGet(leftTypeName, &leftDefined);
+   
+   if (!OidIsValid(leftObjectId) || !leftDefined)
+       elog(WARN, "OperatorGet: left type '%s' nonexistent",leftTypeName);
+    }
+    
+    if (rightTypeName) {
+   rightObjectId = TypeGet(rightTypeName, &rightDefined);
+   
+   if (!OidIsValid(rightObjectId) || !rightDefined)
+       elog(WARN, "OperatorGet: right type '%s' nonexistent",
+        rightTypeName);
+    }
+    
+    if (!((OidIsValid(leftObjectId) && leftDefined) ||
+     (OidIsValid(rightObjectId) && rightDefined)))
+   elog(WARN, "OperatorGet: no argument types??");
+    
+    /* ----------------
+     * open the pg_operator relation
+     * ----------------
+     */
+    pg_operator_desc = heap_openr(OperatorRelationName);
+    
+    /* ----------------
+     * get the oid for the operator with the appropriate name
+     *  and left/right types.
+     * ----------------
+     */
+    operatorObjectId = OperatorGetWithOpenRelation(pg_operator_desc,
+                          operatorName,
+                          leftObjectId,
+                          rightObjectId);
+    
+    /* ----------------
+     * close the relation and return the operator oid.
+     * ----------------
+     */
+    heap_close(pg_operator_desc);
+    
+    return
+   operatorObjectId;
+}
+
+/* ----------------------------------------------------------------
+ *     OperatorShellMakeWithOpenRelation
+ *
+ * ----------------------------------------------------------------
+ */
+static Oid
+OperatorShellMakeWithOpenRelation(Relation pg_operator_desc,
+                 char *operatorName,
+                 Oid leftObjectId,
+                 Oid rightObjectId)
+{
+    register int   i;
+    HeapTuple      tup;
+    Datum               values[ Natts_pg_operator ];
+    char       nulls[ Natts_pg_operator ];
+    Oid        operatorObjectId;
+    TupleDesc    tupDesc;
+    
+    /* ----------------
+     * initialize our nulls[] and values[] arrays
+     * ----------------
+     */
+    for (i = 0; i < Natts_pg_operator; ++i) {
+   nulls[i] = ' ';
+   values[i] = (Datum)NULL;    /* redundant, but safe */
+    }
+    
+    /* ----------------
+     * initialize values[] with the type name and 
+     * ----------------
+     */
+    i = 0;
+    values[i++] =  PointerGetDatum(operatorName);
+    values[i++] =  ObjectIdGetDatum(InvalidOid);
+    values[i++] =  (Datum) (uint16) 0;
+    
+    values[i++] = (Datum)'b';  /* fill oprkind with a bogus value */
+    
+    values[i++] = (Datum) (bool) 0;
+    values[i++] = (Datum) (bool) 0;
+    values[i++] =  ObjectIdGetDatum(leftObjectId);  /* <-- left oid */
+    values[i++] =  ObjectIdGetDatum(rightObjectId); /* <-- right oid */
+    values[i++] =  ObjectIdGetDatum(InvalidOid);
+    values[i++] =  ObjectIdGetDatum(InvalidOid);
+    values[i++] =  ObjectIdGetDatum(InvalidOid);
+    values[i++] =  ObjectIdGetDatum(InvalidOid);
+    values[i++] =  ObjectIdGetDatum(InvalidOid);
+    values[i++] =  ObjectIdGetDatum(InvalidOid);
+    values[i++] =  ObjectIdGetDatum(InvalidOid);
+    values[i++] =  ObjectIdGetDatum(InvalidOid);
+    
+    /* ----------------
+     * create a new operator tuple
+     * ----------------
+     */
+    tupDesc = pg_operator_desc->rd_att;
+
+    tup = heap_formtuple(tupDesc,
+            values,
+            nulls);
+    
+    /* ----------------
+     * insert our "shell" operator tuple and
+     *  close the relation
+     * ----------------
+     */
+    heap_insert(pg_operator_desc, tup);
+    operatorObjectId = tup->t_oid;
+    
+    /* ----------------
+     * free the tuple and return the operator oid
+     * ----------------
+     */
+    pfree(tup);
+    
+    return
+   operatorObjectId;   
+}
+
+/* ----------------------------------------------------------------
+ *     OperatorShellMake
+ *
+ *     Specify operator name and left and right type names,
+ * fill an operator struct with this info and NULL's,
+ * call heap_insert and return the Oid
+ * to the caller.
+ * ----------------------------------------------------------------
+ */
+static Oid
+OperatorShellMake(char *operatorName,
+         char *leftTypeName,
+         char *rightTypeName)
+{    
+    Relation   pg_operator_desc;
+    Oid        operatorObjectId;
+    
+    Oid    leftObjectId = InvalidOid;
+    Oid        rightObjectId = InvalidOid;
+    bool   leftDefined = false;
+    bool   rightDefined = false;
+    
+    /* ----------------
+     * get the left and right type oid's for this operator
+     * ----------------
+     */
+    if (leftTypeName)
+   leftObjectId = TypeGet(leftTypeName, &leftDefined);
+    
+    if (rightTypeName)
+   rightObjectId = TypeGet(rightTypeName, &rightDefined);
+    
+    if (!((OidIsValid(leftObjectId) && leftDefined) ||
+     (OidIsValid(rightObjectId) && rightDefined)))
+   elog(WARN, "OperatorShellMake: no valid argument types??");
+    
+    /* ----------------
+     * open pg_operator
+     * ----------------
+     */
+    pg_operator_desc = heap_openr(OperatorRelationName);
+    
+    /* ----------------
+     * add a "shell" operator tuple to the operator relation
+     *  and recover the shell tuple's oid.
+     * ----------------
+     */
+    operatorObjectId =
+   OperatorShellMakeWithOpenRelation(pg_operator_desc,
+                     operatorName,
+                     leftObjectId,
+                     rightObjectId);
+    /* ----------------
+     * close the operator relation and return the oid.
+     * ----------------
+     */
+    heap_close(pg_operator_desc);
+    
+    return
+   operatorObjectId;
+}
+
+/* --------------------------------
+ * OperatorDef
+ *
+ * This routine gets complicated because it allows the user to
+ * specify operators that do not exist.  For example, if operator
+ * "op" is being defined, the negator operator "negop" and the
+ * commutator "commop" can also be defined without specifying
+ * any information other than their names.  Since in order to
+ * add "op" to the PG_OPERATOR catalog, all the Oid's for these
+ * operators must be placed in the fields of "op", a forward
+ * declaration is done on the commutator and negator operators.
+ * This is called creating a shell, and its main effect is to
+ * create a tuple in the PG_OPERATOR catalog with minimal
+ * information about the operator (just its name and types).
+ * Forward declaration is used only for this purpose, it is
+ * not available to the user as it is for type definition.
+ *
+ * Algorithm:
+ * 
+ * check if operator already defined 
+ *    if so issue error if not definedOk, this is a duplicate
+ *    but if definedOk, save the Oid -- filling in a shell
+ * get the attribute types from relation descriptor for pg_operator
+ * assign values to the fields of the operator:
+ *   operatorName
+ *   owner id (simply the user id of the caller)
+ *   precedence
+ *   operator "kind" either "b" for binary or "l" for left unary
+ *   isLeftAssociative boolean
+ *   canHash boolean
+ *   leftTypeObjectId -- type must already be defined
+ *   rightTypeObjectId -- this is optional, enter ObjectId=0 if none specified
+ *   resultType -- defer this, since it must be determined from
+ *                 the pg_procedure catalog
+ *   commutatorObjectId -- if this is NULL, enter ObjectId=0
+ *                    else if this already exists, enter it's ObjectId
+ *                    else if this does not yet exist, and is not
+ *                      the same as the main operatorName, then create
+ *                      a shell and enter the new ObjectId
+ *                    else if this does not exist but IS the same
+ *                      name as the main operator, set the ObjectId=0.
+ *                      Later OperatorCreate will make another call
+ *                      to OperatorDef which will cause this field
+ *                      to be filled in (because even though the names
+ *                      will be switched, they are the same name and
+ *                      at this point this ObjectId will then be defined)
+ *   negatorObjectId   -- same as for commutatorObjectId
+ *   leftSortObjectId  -- same as for commutatorObjectId
+ *   rightSortObjectId -- same as for commutatorObjectId
+ *   operatorProcedure -- must access the pg_procedure catalog to get the
+ *        ObjectId of the procedure that actually does the operator
+ *        actions this is required.  Do an amgetattr to find out the
+ *                 return type of the procedure 
+ *   restrictionProcedure -- must access the pg_procedure catalog to get
+ *                 the ObjectId but this is optional
+ *   joinProcedure -- same as restrictionProcedure
+ * now either insert or replace the operator into the pg_operator catalog
+ * if the operator shell is being filled in
+ *   access the catalog in order to get a valid buffer
+ *   create a tuple using ModifyHeapTuple
+ *   get the t_ctid from the modified tuple and call RelationReplaceHeapTuple
+ * else if a new operator is being created
+ *   create a tuple using heap_formtuple
+ *   call heap_insert
+ * --------------------------------
+ *     "X" indicates an optional argument (i.e. one that can be NULL)
+ *     operatorName;       -- operator name
+ * definedOK;      -- operator can already have an oid?
+ *     leftTypeName;       -- X left type name
+ *     rightTypeName;      -- X right type name
+ *     procedureName;      -- procedure oid for operator code 
+ *     precedence;         -- operator precedence 
+ *     isLeftAssociative;  -- operator is left associative?
+ *     commutatorName;     -- X commutator operator name
+ *     negatorName;        -- X negator operator name
+ *     restrictionName;    -- X restriction sel. procedure name
+ *     joinName;       -- X join sel. procedure name
+ *     canHash;        -- possible hash operator?
+ *     leftSortName;       -- X left sort operator
+ *     rightSortName;      -- X right sort operator
+ */
+static void
+OperatorDef(char *operatorName,
+       int definedOK,
+       char *leftTypeName,
+       char *rightTypeName,
+       char *procedureName,
+       uint16 precedence,
+       bool isLeftAssociative,
+       char *commutatorName,
+       char *negatorName,
+       char *restrictionName,
+       char *joinName,
+       bool canHash,
+       char *leftSortName,
+       char *rightSortName)
+{
+    register   i, j;
+    Relation   pg_operator_desc;
+    
+    HeapScanDesc   pg_operator_scan;
+    HeapTuple  tup;
+    Buffer     buffer;
+    ItemPointerData    itemPointerData;
+    char   nulls[ Natts_pg_operator ];
+    char   replaces[ Natts_pg_operator ];
+    Datum       values[ Natts_pg_operator ];
+    Oid    other_oid;
+    Oid        operatorObjectId;
+    Oid        leftTypeId = InvalidOid;
+    Oid        rightTypeId = InvalidOid;
+    Oid        commutatorId = InvalidOid;
+    Oid        negatorId = InvalidOid;
+    bool   leftDefined = false;
+    bool   rightDefined = false;
+    char        *name[4];
+    Oid        typeId[8];
+    int        nargs;
+    TupleDesc   tupDesc;
+    
+    static ScanKeyData opKey[3] = {
+   { 0, Anum_pg_operator_oprname, NameEqualRegProcedure },
+   { 0, Anum_pg_operator_oprleft, ObjectIdEqualRegProcedure },
+   { 0, Anum_pg_operator_oprright, ObjectIdEqualRegProcedure },
+    };
+    
+    fmgr_info(NameEqualRegProcedure,
+         &opKey[0].sk_func, &opKey[0].sk_nargs);
+    fmgr_info(ObjectIdEqualRegProcedure,
+         &opKey[1].sk_func, &opKey[1].sk_nargs);
+    fmgr_info(ObjectIdEqualRegProcedure,
+         &opKey[2].sk_func, &opKey[2].sk_nargs);
+    
+    operatorObjectId =     OperatorGet(operatorName,
+                   leftTypeName,
+                   rightTypeName);
+    
+    if (OidIsValid(operatorObjectId) && !definedOK)
+   elog(WARN, "OperatorDef: operator \"%-.*s\" already defined",
+        NAMEDATALEN, operatorName); 
+    
+    if (leftTypeName)
+   leftTypeId = TypeGet(leftTypeName, &leftDefined);
+    
+    if (rightTypeName)
+   rightTypeId = TypeGet(rightTypeName, &rightDefined);
+    
+    if (!((OidIsValid(leftTypeId && leftDefined)) ||
+     (OidIsValid(rightTypeId && rightDefined))))
+   elog(WARN, "OperatorGet: no argument types??");
+    
+    for (i = 0; i < Natts_pg_operator; ++i) {
+   values[i] = (Datum)NULL;
+   replaces[i] = 'r';
+   nulls[i] = ' ';
+    }
+    
+    /* ----------------
+     * Look up registered procedures -- find the return type
+     * of procedureName to place in "result" field.
+     * Do this before shells are created so we don't
+     * have to worry about deleting them later.
+     * ----------------
+     */
+    memset(typeId, 0, 8 * sizeof(Oid));
+    if (!leftTypeName) {
+   typeId[0] = rightTypeId;
+   nargs = 1;
+    }
+    else if (!rightTypeName) {
+   typeId[0] = leftTypeId;
+   nargs = 1;
+    }
+    else {
+   typeId[0] = leftTypeId;
+   typeId[1] = rightTypeId;
+   nargs = 2;
+    }
+    tup = SearchSysCacheTuple(PRONAME,
+                PointerGetDatum(procedureName),
+                Int32GetDatum(nargs),
+                PointerGetDatum(typeId),
+                 0);
+    
+    if (!PointerIsValid(tup))
+   func_error("OperatorDef", procedureName, nargs, (int*)typeId);
+    
+    values[ Anum_pg_operator_oprcode-1 ] =  ObjectIdGetDatum(tup->t_oid);
+    values[ Anum_pg_operator_oprresult-1 ] =
+    ObjectIdGetDatum(((Form_pg_proc)
+              GETSTRUCT(tup))->prorettype);
+    
+    /* ----------------
+     * find restriction
+     * ----------------
+     */
+    if (restrictionName) {     /* optional */
+        memset(typeId, 0, 8 * sizeof(Oid)); 
+   typeId[0] = OIDOID;     /* operator OID */
+   typeId[1] = OIDOID;     /* relation OID */
+   typeId[2] = INT2OID;        /* attribute number */
+   typeId[3] = 0;          /* value - can be any type  */
+   typeId[4] = INT4OID;        /* flags - left or right selectivity */
+   tup = SearchSysCacheTuple(PRONAME,
+                 PointerGetDatum(restrictionName),
+                 Int32GetDatum(5),
+                 ObjectIdGetDatum(typeId),
+                 0);
+   if (!HeapTupleIsValid(tup))
+       func_error("OperatorDef", restrictionName, 5, (int*)typeId);
+   
+   values[ Anum_pg_operator_oprrest-1 ] = ObjectIdGetDatum(tup->t_oid);
+    } else
+   values[ Anum_pg_operator_oprrest-1 ] = ObjectIdGetDatum(InvalidOid);
+    
+    /* ----------------
+     * find join - only valid for binary operators
+     * ----------------
+     */
+    if (joinName) {        /* optional */
+   memset(typeId, 0, 8 * sizeof(Oid));
+   typeId[0] = OIDOID;     /* operator OID */
+   typeId[1] = OIDOID;     /* relation OID 1 */
+   typeId[2] = INT2OID;        /* attribute number 1 */
+   typeId[3] = OIDOID;     /* relation OID 2 */
+   typeId[4] = INT2OID;        /* attribute number 2 */
+   
+   tup = SearchSysCacheTuple(PRONAME,
+                 PointerGetDatum(joinName),
+                 Int32GetDatum(5),
+                 Int32GetDatum(typeId),
+                 0);
+   if (!HeapTupleIsValid(tup))
+       func_error("OperatorDef", joinName, 5, (int*)typeId);
+   
+   values[Anum_pg_operator_oprjoin-1] = ObjectIdGetDatum(tup->t_oid);
+    } else
+   values[Anum_pg_operator_oprjoin-1] = ObjectIdGetDatum(InvalidOid);
+    
+    /* ----------------
+     * set up values in the operator tuple
+     * ----------------
+     */
+    i = 0;
+    values[i++] = PointerGetDatum(operatorName);
+    values[i++] = Int32GetDatum(GetUserId());
+    values[i++] = UInt16GetDatum(precedence);
+    values[i++] = leftTypeName ?  (rightTypeName ? 'b' : 'r') : 'l';
+    values[i++] = Int8GetDatum(isLeftAssociative);
+    values[i++] = Int8GetDatum(canHash);
+    values[i++] = ObjectIdGetDatum(leftTypeId);
+    values[i++] = ObjectIdGetDatum(rightTypeId);
+    
+    ++i;   /* Skip "prorettype", this was done above */
+    
+    /*
+     * Set up the other operators.  If they do not currently exist,
+     * set up shells in order to get ObjectId's and call OperatorDef
+     * again later to fill in the shells.
+     */
+    name[0] = commutatorName;
+    name[1] = negatorName;
+    name[2] = leftSortName;
+    name[3] = rightSortName;
+    
+    for (j = 0; j < 4; ++j) {
+   if (name[j]) {
+       
+       /* for the commutator, switch order of arguments */
+       if (j == 0) {
+           other_oid = OperatorGet(name[j], rightTypeName,leftTypeName);
+       commutatorId = other_oid;
+       } else {
+           other_oid = OperatorGet(name[j], leftTypeName,rightTypeName);
+       if (j == 1)
+           negatorId = other_oid;
+       }
+       
+       if (OidIsValid(other_oid)) /* already in catalogs */
+       values[i++] = ObjectIdGetDatum(other_oid);
+       else if (strcmp(operatorName, name[j]) != 0) {
+       /* not in catalogs, different from operator */
+       
+       /* for the commutator, switch order of arguments */
+       if (j == 0) {
+           other_oid = OperatorShellMake(name[j],
+                         rightTypeName,
+                         leftTypeName);
+       } else {
+           other_oid = OperatorShellMake(name[j],
+                         leftTypeName,
+                         rightTypeName);
+       }
+       
+       if (!OidIsValid(other_oid))
+           elog(WARN,
+            "OperatorDef: can't create operator '%s'",
+            name[j]);     
+       values[i++] =  ObjectIdGetDatum(other_oid);
+       
+       } else /* not in catalogs, same as operator ??? */
+       values[i++] = ObjectIdGetDatum(InvalidOid);
+       
+   } else  /* new operator is optional */
+       values[i++] = ObjectIdGetDatum(InvalidOid);
+    }
+    
+    /* last three fields were filled in first */
+    
+    /*
+     * If we are adding to an operator shell, get its t_ctid and a
+     * buffer.
+     */
+    pg_operator_desc = heap_openr(OperatorRelationName);
+    
+    if (operatorObjectId) {
+   opKey[0].sk_argument = PointerGetDatum(operatorName);
+   opKey[1].sk_argument = ObjectIdGetDatum(leftTypeId);
+   opKey[2].sk_argument = ObjectIdGetDatum(rightTypeId);
+   
+   pg_operator_scan = heap_beginscan(pg_operator_desc,
+                     0,
+                     SelfTimeQual,
+                     3,
+                     opKey);
+   
+   tup = heap_getnext(pg_operator_scan, 0, &buffer);
+   if (HeapTupleIsValid(tup)) {
+       tup = heap_modifytuple(tup,
+                  buffer,
+                  pg_operator_desc,
+                  values,
+                  nulls,
+                  replaces);
+       
+       ItemPointerCopy(&tup->t_ctid, &itemPointerData);
+       setheapoverride(true);
+       (void) heap_replace(pg_operator_desc, &itemPointerData, tup);
+       setheapoverride(false);
+   } else
+       elog(WARN, "OperatorDef: no operator %d", other_oid);
+   
+   heap_endscan(pg_operator_scan);
+   
+    } else {
+   tupDesc = pg_operator_desc->rd_att;
+   tup = heap_formtuple(tupDesc, values, nulls);
+   
+   heap_insert(pg_operator_desc, tup);
+   operatorObjectId = tup->t_oid;
+    }
+    
+    heap_close(pg_operator_desc);
+    
+    /*
+     *  It's possible that we're creating a skeleton operator here for
+     *  the commute or negate attributes of a real operator.  If we are,
+     *  then we're done.  If not, we may need to update the negator and
+     *  commutator for this attribute.  The reason for this is that the
+     *  user may want to create two operators (say < and >=).  When he
+     *  defines <, if he uses >= as the negator or commutator, he won't
+     *  be able to insert it later, since (for some reason) define operator
+     *  defines it for him.  So what he does is to define > without a
+     *  negator or commutator.  Then he defines >= with < as the negator
+     *  and commutator.  As a side effect, this will update the > tuple
+     *  if it has no commutator or negator defined.
+     *
+     *  Alstublieft, Tom Vijlbrief.
+     */
+    if (!definedOK)
+   OperatorUpd(operatorObjectId, commutatorId, negatorId);
+}
+
+/* ----------------------------------------------------------------
+ * OperatorUpd
+ *
+ *  For a given operator, look up its negator and commutator operators.
+ *  If they are defined, but their negator and commutator operators
+ *  (respectively) are not, then use the new operator for neg and comm.
+ *  This solves a problem for users who need to insert two new operators
+ *  which are the negator or commutator of each other.
+ * ---------------------------------------------------------------- 
+ */
+static void
+OperatorUpd(Oid baseId, Oid commId, Oid negId)
+{
+    register       i;
+    Relation       pg_operator_desc;
+    HeapScanDesc   pg_operator_scan;
+    HeapTuple      tup;
+    Buffer         buffer;
+    ItemPointerData    itemPointerData;
+    char       nulls[ Natts_pg_operator ];
+    char       replaces[ Natts_pg_operator ];
+    Datum               values[ Natts_pg_operator ];
+    
+    static ScanKeyData opKey[1] = {
+   { 0, ObjectIdAttributeNumber, ObjectIdEqualRegProcedure },
+    };
+    
+    fmgr_info(ObjectIdEqualRegProcedure,
+         &opKey[0].sk_func, &opKey[0].sk_nargs);
+    
+    for (i = 0; i < Natts_pg_operator; ++i) {
+   values[i] =  (Datum)NULL;
+   replaces[i] = ' ';
+   nulls[i] = ' ';
+    }
+    
+    pg_operator_desc = heap_openr(OperatorRelationName);
+    
+    /* check and update the commutator, if necessary */
+    opKey[0].sk_argument = ObjectIdGetDatum(commId);
+    
+    pg_operator_scan = heap_beginscan(pg_operator_desc,
+                     0,
+                     SelfTimeQual,
+                     1,
+                     opKey);
+    
+    tup = heap_getnext(pg_operator_scan, 0, &buffer);
+    
+    /* if the commutator and negator are the same operator, do one update */
+    if (commId == negId) {
+   if (HeapTupleIsValid(tup)) {
+       OperatorTupleForm t;
+       
+       t = (OperatorTupleForm) GETSTRUCT(tup);
+       if (!OidIsValid(t->oprcom)
+       || !OidIsValid(t->oprnegate)) {
+       
+       if (!OidIsValid(t->oprnegate)) {
+           values[Anum_pg_operator_oprnegate - 1] =
+           ObjectIdGetDatum(baseId);
+           replaces[ Anum_pg_operator_oprnegate - 1 ] = 'r';
+       }
+       
+       if (!OidIsValid(t->oprcom)) {
+           values[Anum_pg_operator_oprcom - 1] =
+           ObjectIdGetDatum(baseId);
+           replaces[ Anum_pg_operator_oprcom - 1 ] = 'r';
+       }
+       
+       tup = heap_modifytuple(tup,
+                      buffer,
+                      pg_operator_desc,
+                      values,
+                      nulls,
+                      replaces);
+       
+       ItemPointerCopy(&tup->t_ctid, &itemPointerData);
+       
+       setheapoverride(true);
+       (void) heap_replace(pg_operator_desc, &itemPointerData, tup);
+       setheapoverride(false);
+
+       }
+   }
+   heap_endscan(pg_operator_scan);
+   
+   heap_close(pg_operator_desc);
+   
+   /* release the buffer properly */
+   if (BufferIsValid(buffer))
+       ReleaseBuffer(buffer);
+
+   return;
+    }
+    
+    /* if commutator and negator are different, do two updates */
+    if (HeapTupleIsValid(tup) &&
+   !(OidIsValid(((OperatorTupleForm) GETSTRUCT(tup))->oprcom))) {
+   values[ Anum_pg_operator_oprcom - 1] = ObjectIdGetDatum(baseId);
+   replaces[ Anum_pg_operator_oprcom - 1] = 'r';
+   tup = heap_modifytuple(tup,
+                  buffer,
+                  pg_operator_desc,
+                  values,
+                  nulls,
+                  replaces);
+   
+   ItemPointerCopy(&tup->t_ctid, &itemPointerData);
+   setheapoverride(true);
+   (void) heap_replace(pg_operator_desc, &itemPointerData, tup);
+   setheapoverride(false);
+   
+   values[ Anum_pg_operator_oprcom - 1 ] = (Datum)NULL;
+   replaces[ Anum_pg_operator_oprcom - 1 ] = ' ';
+
+   /* release the buffer properly */
+   if (BufferIsValid(buffer))
+       ReleaseBuffer(buffer);
+
+    }
+    
+    /* check and update the negator, if necessary */
+    opKey[0].sk_argument = ObjectIdGetDatum(negId);
+    
+    pg_operator_scan = heap_beginscan(pg_operator_desc,
+                     0,
+                     SelfTimeQual,
+                     1,
+                     opKey);
+    
+    tup = heap_getnext(pg_operator_scan, 0, &buffer);
+    if (HeapTupleIsValid(tup) &&
+   !(OidIsValid(((OperatorTupleForm) GETSTRUCT(tup))->oprnegate))) {
+   values[Anum_pg_operator_oprnegate-1] = ObjectIdGetDatum(baseId);
+   replaces[ Anum_pg_operator_oprnegate - 1 ] = 'r';
+   tup = heap_modifytuple(tup,
+                  buffer,
+                  pg_operator_desc,
+                  values,
+                  nulls,
+                  replaces);
+   
+   ItemPointerCopy(&tup->t_ctid, &itemPointerData);
+   
+   setheapoverride(true);
+   (void) heap_replace(pg_operator_desc, &itemPointerData, tup);
+   setheapoverride(false);
+    }
+
+    /* release the buffer properly */
+    if (BufferIsValid(buffer))
+   ReleaseBuffer(buffer);
+
+    heap_endscan(pg_operator_scan);
+    
+    heap_close(pg_operator_desc);
+}
+
+
+/* ----------------------------------------------------------------
+ * OperatorCreate
+ *
+ * Algorithm:
+ *
+ *  Since the commutator, negator, leftsortoperator, and rightsortoperator
+ *  can be defined implicitly through OperatorCreate, must check before
+ *  the main operator is added to see if they already exist.  If they
+ *  do not already exist, OperatorDef makes a "shell" for each undefined
+ *  one, and then OperatorCreate must call OperatorDef again to fill in
+ *  each shell.  All this is necessary in order to get the right ObjectId's 
+ *  filled into the right fields.
+ *
+ *  The "definedOk" flag indicates that OperatorDef can be called on
+ *  the operator even though it already has an entry in the PG_OPERATOR
+ *  relation.  This allows shells to be filled in.  The user cannot
+ *  forward declare operators, this is strictly an internal capability.
+ *
+ *  When the shells are filled in by subsequent calls to OperatorDef,
+ *  all the fields are the same as the definition of the original operator
+ *  except that the target operator name and the original operatorName
+ *  are switched.  In the case of commutator and negator, special flags
+ *  are set to indicate their status, telling the executor(?) that
+ *  the operands are to be switched, or the outcome of the procedure
+ *  negated.
+ * 
+ * ************************* NOTE NOTE NOTE ******************************
+ *  
+ *  If the execution of this utility is interrupted, the pg_operator
+ *  catalog may be left in an inconsistent state.  Similarly, if
+ *  something is removed from the pg_operator, pg_type, or pg_procedure
+ *  catalog while this is executing, the results may be inconsistent.
+ * ----------------------------------------------------------------
+ *
+ * "X" indicates an optional argument (i.e. one that can be NULL) 
+ *     operatorName;       -- operator name 
+ *     leftTypeName;       -- X left type name 
+ *     rightTypeName;      -- X right type name 
+ *     procedureName;      -- procedure for operator 
+ *     precedence;         -- operator precedence 
+ *     isLeftAssociative;  -- operator is left associative 
+ *     commutatorName;     -- X commutator operator name 
+ *     negatorName;        -- X negator operator name 
+ *     restrictionName;    -- X restriction sel. procedure 
+ *     joinName;       -- X join sel. procedure name 
+ *     canHash;        -- operator hashes 
+ *     leftSortName;       -- X left sort operator 
+ *     rightSortName;      -- X right sort operator 
+ * 
+ */
+void
+OperatorCreate(char *operatorName,
+          char *leftTypeName,
+          char *rightTypeName,
+          char *procedureName,
+          uint16 precedence,
+          bool isLeftAssociative,
+          char *commutatorName,
+          char *negatorName,
+          char *restrictionName,
+          char *joinName,
+          bool canHash,
+          char *leftSortName,
+          char *rightSortName)
+{
+    Oid    commObjectId, negObjectId;
+    Oid    leftSortObjectId, rightSortObjectId;
+    int        definedOK;
+    
+    if (!leftTypeName && !rightTypeName)
+   elog(WARN, "OperatorCreate : at least one of leftarg or rightarg must be defined");
+    
+    /* ----------------
+     * get the oid's of the operator's associated operators, if possible.
+     * ----------------
+     */
+    if (commutatorName)
+   commObjectId = OperatorGet(commutatorName,  /* commute type order */
+                  rightTypeName,
+                  leftTypeName);
+    
+    if (negatorName)
+   negObjectId  = OperatorGet(negatorName,
+                  leftTypeName,
+                  rightTypeName);
+    
+    if (leftSortName)
+   leftSortObjectId = OperatorGet(leftSortName,
+                      leftTypeName,
+                      rightTypeName);
+    
+    if (rightSortName)
+   rightSortObjectId = OperatorGet(rightSortName,
+                   rightTypeName,
+                   leftTypeName);
+    
+    /* ----------------
+     *  Use OperatorDef() to define the specified operator and
+     *  also create shells for the operator's associated operators
+     *  if they don't already exist.
+     *
+     * This operator should not be defined yet.
+     * ----------------
+     */
+    definedOK = 0;
+    
+    OperatorDef(operatorName,
+       definedOK,
+       leftTypeName,
+       rightTypeName,
+       procedureName,
+       precedence,
+       isLeftAssociative,
+       commutatorName,
+       negatorName,
+       restrictionName,
+       joinName,
+       canHash,
+       leftSortName,
+       rightSortName);
+    
+    /* ----------------
+     * Now fill in information in the operator's associated
+     *  operators.
+     *
+     *  These operators should be defined or have shells defined.
+     * ----------------
+     */
+    definedOK = 1; 
+    
+    if (!OidIsValid(commObjectId) && commutatorName)
+   OperatorDef(commutatorName,
+           definedOK,   
+           leftTypeName,   /* should eventually */
+           rightTypeName,      /* commute order */  
+           procedureName,
+           precedence,
+           isLeftAssociative,
+           operatorName,   /* commutator */
+           negatorName,
+           restrictionName,
+           joinName,
+           canHash,
+           rightSortName,
+           leftSortName);
+    
+    if (negatorName && !OidIsValid(negObjectId))
+   OperatorDef(negatorName,
+           definedOK,
+           leftTypeName,
+           rightTypeName,
+           procedureName,
+           precedence,
+           isLeftAssociative,
+           commutatorName,
+           operatorName,   /* negator */
+           restrictionName,
+           joinName,
+           canHash,
+           leftSortName,
+           rightSortName);
+    
+    if (leftSortName && !OidIsValid(leftSortObjectId))
+   OperatorDef(leftSortName,
+           definedOK,
+           leftTypeName,
+           rightTypeName,
+           procedureName,
+           precedence,
+           isLeftAssociative,
+           commutatorName,
+           negatorName,
+           restrictionName,
+           joinName,
+           canHash,
+           operatorName,   /* left sort */
+           rightSortName);
+    
+    if (rightSortName && !OidIsValid(rightSortObjectId))
+   OperatorDef(rightSortName,
+           definedOK,
+           leftTypeName,
+           rightTypeName,
+           procedureName,
+           precedence,
+           isLeftAssociative,
+           commutatorName,
+           negatorName,
+           restrictionName,
+           joinName,
+           canHash,
+           leftSortName,
+           operatorName);  /* right sort */
+}
diff --git a/src/backend/catalog/pg_operator.h b/src/backend/catalog/pg_operator.h

new file mode 100644 (file)

index 0000000..9f9533b
--- /dev/null
+++ b/src/backend/catalog/pg_operator.h
@@ -0,0 +1,480 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_operator.h--
+ *    definition of the system "operator" relation (pg_operator)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_operator.h,v 1.1.1.1 1996/07/09 06:21:17 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *    XXX do NOT break up DATA() statements into multiple lines!
+ *        the scripts are not as smart as you might think...
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_OPERATOR_H
+#define PG_OPERATOR_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_operator definition.  cpp turns this into
+ * typedef struct FormData_pg_operator
+ * ----------------
+ */
+CATALOG(pg_operator) {
+    NameData   oprname;
+    Oid    oprowner;
+    int2   oprprec;
+    char   oprkind;
+    bool   oprisleft;
+    bool   oprcanhash;
+    Oid    oprleft;
+    Oid    oprright;
+    Oid        oprresult;
+    Oid    oprcom;
+    Oid    oprnegate;
+    Oid    oprlsortop;
+    Oid    oprrsortop;
+    regproc    oprcode;
+    regproc    oprrest;
+    regproc    oprjoin;
+} FormData_pg_operator;
+
+/* ----------------
+ * Form_pg_operator corresponds to a pointer to a tuple with
+ * the format of pg_operator relation.
+ * ----------------
+ */
+typedef FormData_pg_operator   *OperatorTupleForm;
+
+/* ----------------
+ * compiler constants for pg_operator
+ * ----------------
+ */
+
+#define Natts_pg_operator      16
+#define Anum_pg_operator_oprname   1
+#define Anum_pg_operator_oprowner  2
+#define Anum_pg_operator_oprprec   3
+#define Anum_pg_operator_oprkind   4
+#define Anum_pg_operator_oprisleft 5
+#define Anum_pg_operator_oprcanhash    6
+#define Anum_pg_operator_oprleft   7
+#define Anum_pg_operator_oprright  8
+#define Anum_pg_operator_oprresult 9
+#define Anum_pg_operator_oprcom        10
+#define Anum_pg_operator_oprnegate 11
+#define Anum_pg_operator_oprlsortop    12
+#define Anum_pg_operator_oprrsortop    13
+#define Anum_pg_operator_oprcode   14
+#define Anum_pg_operator_oprrest   15
+#define Anum_pg_operator_oprjoin   16
+
+/* ----------------
+ * initial contents of pg_operator
+ * ----------------
+ */
+
+DATA(insert OID = 85 (  "<>"       PGUID 0 b t f  16  16  16  85   91  0  0 boolne neqsel neqjoinsel ));
+DATA(insert OID = 91 (  "="        PGUID 0 b t t  16  16  16  91   85  0  0 booleq eqsel eqjoinsel ));
+#define BooleanEqualOperator   91
+
+DATA(insert OID = 92 (  "="        PGUID 0 b t t  18  18  16  92 630  631 631 chareq eqsel eqjoinsel ));
+DATA(insert OID = 93 (  "="        PGUID 0 b t t  19  19  16  93  643 660 660 nameeq eqsel eqjoinsel ));
+DATA(insert OID = 94 (  "="        PGUID 0 b t t  21  21  16  94 519 95 95 int2eq eqsel eqjoinsel ));
+DATA(insert OID = 95 (  "<"        PGUID 0 b t f  21  21  16 520 524 0 0 int2lt intltsel intltjoinsel ));
+DATA(insert OID = 96 (  "="        PGUID 0 b t t  23  23  16  96 518 97 97 int4eq eqsel eqjoinsel ));
+DATA(insert OID = 97 (  "<"        PGUID 0 b t f  23  23  16 521 525 0 0 int4lt intltsel intltjoinsel ));
+DATA(insert OID = 98 (  "="        PGUID 0 b t t  25  25  16  98 531 664 664 texteq eqsel eqjoinsel ));
+DATA(insert OID = 99 (  "="        PGUID 0 b t t  20  20  16  99 644 645 645 char16eq eqsel eqjoinsel ));
+DATA(insert OID = 329 (  "="       PGUID 0 b t t  1000  1000  16  329 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 349 (  "="       PGUID 0 b t t  1001  1001  16  349 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 374 (  "="       PGUID 0 b t t  1002  1002  16  374 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 375 (  "="       PGUID 0 b t t  1003  1003  16  375 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 376 (  "="       PGUID 0 b t t  1004  1004  16  376 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 377 (  "="       PGUID 0 b t t  1005  1005  16  377 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 378 (  "="       PGUID 0 b t t  1006  1006  16  378 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 379 (  "="       PGUID 0 b t t  1007  1007  16  379 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 380 (  "="       PGUID 0 b t t  1008  1008  16  380 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 381 (  "="       PGUID 0 b t t  1009  1009  16  381 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 382 (  "="       PGUID 0 b t t  1028  1028  16  382 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 383 (  "="       PGUID 0 b t t  1010  1010  16  383 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 384 (  "="       PGUID 0 b t t  1011  1011  16  384 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 385 (  "="       PGUID 0 b t t  1012  1012  16  385 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 386 (  "="       PGUID 0 b t t  1013  1013  16  386 0  0  0 array_eq eqsel eqjoinsel ));
+/*
+DATA(insert OID = 387 (  "="       PGUID 0 b t t  1014  1014  16  387 0  0  0 array_eq eqsel eqjoinsel ));
+*/
+DATA(insert OID = 388 (  "="       PGUID 0 b t t  1015  1015  16  388 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 389 (  "="       PGUID 0 b t t  1016  1016  16  389 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 390 (  "="       PGUID 0 b t t  1017  1017  16  390 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 391 (  "="       PGUID 0 b t t  1018  1018  16  391 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 392 (  "="       PGUID 0 b t t  1019  1019  16  392 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 393 (  "="       PGUID 0 b t t  1020  1020  16  393 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 394 (  "="       PGUID 0 b t t  1021  1021  16  394 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 395 (  "="       PGUID 0 b t t  1022  1022  16  395 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 396 (  "="       PGUID 0 b t t  1023  1023  16  396 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 397 (  "="       PGUID 0 b t t  1024  1024  16  397 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 398 (  "="       PGUID 0 b t t  1025  1025  16  398 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 399 (  "="       PGUID 0 b t t  1026  1026  16  399 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 400 (  "="       PGUID 0 b t t  1027  1027  16  400 0  0  0 array_eq eqsel eqjoinsel ));
+DATA(insert OID = 401 (  "="       PGUID 0 b t t  1034  1034  16  401 0  0  0 array_eq eqsel eqjoinsel ));
+
+DATA(insert OID = 412 (  "="       PGUID 0 b t t  409  409  16  412 415 418 418 char2eq eqsel eqjoinsel ));
+DATA(insert OID = 413 (  "="       PGUID 0 b t t  410  410  16  413 416 419 419 char4eq eqsel eqjoinsel ));
+DATA(insert OID = 414 (  "="       PGUID 0 b t t  411  411  16  414 417 420 420 char8eq eqsel eqjoinsel ));
+
+DATA(insert OID = 415 (  "<>"      PGUID 0 b t f  409  409  16 415 412  0 0 char2ne neqsel neqjoinsel ));
+DATA(insert OID = 416 (  "<>"      PGUID 0 b t f  410  410  16 416 413  0 0 char4ne neqsel neqjoinsel ));
+DATA(insert OID = 417 (  "<>"      PGUID 0 b t f  411  411  16 417 414  0 0 char8ne neqsel neqjoinsel ));
+DATA(insert OID = 418 (  "<"       PGUID 0 b t f  409  409  16 460 463  0 0 char2lt intltsel intltjoinsel ));
+DATA(insert OID = 419 (  "<"       PGUID 0 b t f  410  410  16 461 464  0 0 char4lt intltsel intltjoinsel ));
+DATA(insert OID = 420 (  "<"       PGUID 0 b t f  411  411  16 462 465  0 0 char8lt intltsel intltjoinsel ));
+
+DATA(insert OID = 457 (  "<="      PGUID 0 b t f  409  409  16 463 460  0 0 char2le intltsel intltjoinsel ));
+DATA(insert OID = 458 (  "<="      PGUID 0 b t f  410  410  16 464 461  0 0 char4le intltsel intltjoinsel ));
+DATA(insert OID = 459 (  "<="      PGUID 0 b t f  411  411  16 465 462  0 0 char8le intltsel intltjoinsel ));
+DATA(insert OID = 460 (  ">"       PGUID 0 b t f  409  409  16 418 457  0 0 char2gt intltsel intltjoinsel ));
+DATA(insert OID = 461 (  ">"       PGUID 0 b t f  410  410  16 419 458  0 0 char4gt intltsel intltjoinsel ));
+DATA(insert OID = 462 (  ">"       PGUID 0 b t f  411  411  16 420 459  0 0 char8gt intltsel intltjoinsel ));
+DATA(insert OID = 463 (  ">="      PGUID 0 b t f  409  409  16 457 418  0 0 char2ge intltsel intltjoinsel ));
+DATA(insert OID = 464 (  ">="      PGUID 0 b t f  410  410  16 458 418  0 0 char4ge intltsel intltjoinsel ));
+DATA(insert OID = 465 (  ">="      PGUID 0 b t f  411  411  16 459 420  0 0 char8ge intltsel intltjoinsel ));
+
+DATA(insert OID = 485 (  "<<"      PGUID 0 b t f 604 604  16   0   0   0   0 poly_left intltsel intltjoinsel ));
+DATA(insert OID = 486 (  "&<"      PGUID 0 b t f 604 604  16   0   0   0   0 poly_overleft intltsel intltjoinsel ));
+DATA(insert OID = 487 (  "&>"      PGUID 0 b t f 604 604  16   0   0   0   0 poly_overright intltsel intltjoinsel ));
+DATA(insert OID = 488 (  ">>"      PGUID 0 b t f 604 604  16   0   0   0   0 poly_right intltsel intltjoinsel ));
+DATA(insert OID = 489 (  "@"       PGUID 0 b t f 604 604  16   0   0   0   0 poly_contained intltsel intltjoinsel ));
+DATA(insert OID = 490 (  "~"       PGUID 0 b t f 604 604  16   0   0   0   0 poly_contain intltsel intltjoinsel ));
+DATA(insert OID = 491 (  "~="      PGUID 0 b t f 604 604  16   0   0   0   0 poly_same intltsel intltjoinsel ));
+DATA(insert OID = 492 (  "&&"      PGUID 0 b t f 604 604  16   0   0   0   0 poly_overlap intltsel intltjoinsel ));
+DATA(insert OID = 493 (  "<<"      PGUID 0 b t f 603 603  16   0   0   0   0 box_left intltsel intltjoinsel ));
+DATA(insert OID = 494 (  "&<"      PGUID 0 b t f 603 603  16   0   0   0   0 box_overleft intltsel intltjoinsel ));
+DATA(insert OID = 495 (  "&>"      PGUID 0 b t f 603 603  16   0   0   0   0 box_overright intltsel intltjoinsel ));
+DATA(insert OID = 496 (  ">>"      PGUID 0 b t f 603 603  16   0   0   0   0 box_right intltsel intltjoinsel ));
+DATA(insert OID = 497 (  "@"       PGUID 0 b t f 603 603  16   0   0   0   0 box_contained intltsel intltjoinsel ));
+DATA(insert OID = 498 (  "~"       PGUID 0 b t f 603 603  16   0   0   0   0 box_contain intltsel intltjoinsel ));
+DATA(insert OID = 499 (  "~="      PGUID 0 b t f 603 603  16   0   0   0   0 box_same intltsel intltjoinsel ));
+DATA(insert OID = 500 (  "&&"      PGUID 0 b t f 603 603  16   0   0   0   0 box_overlap intltsel intltjoinsel ));
+DATA(insert OID = 501 (  ">="      PGUID 0 b t f 603 603  16   0   0   0   0 box_ge areasel areajoinsel ));
+DATA(insert OID = 502 (  ">"       PGUID 0 b t f 603 603  16   0   0   0   0 box_gt areasel areajoinsel ));
+DATA(insert OID = 503 (  "="       PGUID 0 b t t 603 603  16   0   0   0   0 box_eq areasel areajoinsel ));
+DATA(insert OID = 504 (  "<"       PGUID 0 b t f 603 603  16   0   0   0   0 box_lt areasel areajoinsel ));
+DATA(insert OID = 505 (  "<="      PGUID 0 b t f 603 603  16   0   0   0   0 box_le areasel areajoinsel ));
+DATA(insert OID = 506 (  "!^"      PGUID 0 b t f 600 600  16   0   0   0   0 point_above intltsel intltjoinsel ));
+DATA(insert OID = 507 (  "!<"      PGUID 0 b t f 600 600  16   0   0   0   0 point_left intltsel intltjoinsel ));
+DATA(insert OID = 508 (  "!>"      PGUID 0 b t f 600 600  16   0   0   0   0 point_right intltsel intltjoinsel ));
+DATA(insert OID = 509 (  "!|"      PGUID 0 b t f 600 600  16   0   0   0   0 point_below intltsel intltjoinsel ));
+DATA(insert OID = 510 (  "=|="     PGUID 0 b t f 600 600  16   0   0   0   0 point_eq intltsel intltjoinsel ));
+DATA(insert OID = 511 (  "===>"    PGUID 0 b t f 600 603  16   0   0   0   0 on_pb intltsel intltjoinsel ));
+DATA(insert OID = 512 (  "===`"    PGUID 0 b t f 600 602  16   0   0   0   0 on_ppath intltsel intltjoinsel ));
+DATA(insert OID = 513 (  "@@"      PGUID 0 l t f   0 603 600   0   0   0   0 box_center intltsel intltjoinsel ));
+DATA(insert OID = 514 (  "*"       PGUID 0 b t f  23  23  23 514   0   0   0 int4mul intltsel intltjoinsel ));
+DATA(insert OID = 515 (  "!"       PGUID 0 r t f  23   0  23   0   0   0   0 int4fac intltsel intltjoinsel ));
+DATA(insert OID = 516 (  "!!"      PGUID 0 l t f   0  23  23   0   0   0   0 int4fac intltsel intltjoinsel ));
+DATA(insert OID = 517 (  "<===>"   PGUID 0 b t f 600 600  23   0   0   0   0 pointdist intltsel intltjoinsel ));
+DATA(insert OID = 518 (  "<>"      PGUID 0 b t f  23  23  16 518  96  0  0 int4ne neqsel neqjoinsel ));
+DATA(insert OID = 519 (  "<>"      PGUID 0 b t f  21  21  16 519  94  0  0 int2ne neqsel neqjoinsel ));
+DATA(insert OID = 520 (  ">"       PGUID 0 b t f  21  21  16  95   0  0  0 int2gt intgtsel intgtjoinsel ));
+DATA(insert OID = 521 (  ">"       PGUID 0 b t f  23  23  16  97   0  0  0 int4gt intgtsel intgtjoinsel ));
+DATA(insert OID = 522 (  "<="      PGUID 0 b t f  21  21  16 524 520  0  0 int2le intltsel intltjoinsel ));
+DATA(insert OID = 523 (  "<="      PGUID 0 b t f  23  23  16 525 521  0  0 int4le intltsel intltjoinsel ));
+DATA(insert OID = 524 (  ">="      PGUID 0 b t f  21  21  16 522  95  0  0 int2ge intgtsel intgtjoinsel ));
+DATA(insert OID = 525 (  ">="      PGUID 0 b t f  23  23  16 523  97  0  0 int4ge intgtsel intgtjoinsel ));
+DATA(insert OID = 526 (  "*"       PGUID 0 b t f  21  21  21 526   0  0  0 int2mul intltsel intltjoinsel ));
+DATA(insert OID = 527 (  "/"       PGUID 0 b t f  21  21  21   0   0  0  0 int2div intltsel intltjoinsel ));
+DATA(insert OID = 528 (  "/"       PGUID 0 b t f  23  23  23   0   0  0  0 int4div intltsel intltjoinsel ));
+DATA(insert OID = 529 (  "%"       PGUID 0 b t f  21  21  21   6   0  0  0 int2mod intltsel intltjoinsel ));
+DATA(insert OID = 530 (  "%"       PGUID 0 b t f  23  23  23   6   0  0  0 int4mod intltsel intltjoinsel ));
+DATA(insert OID = 531 (  "<>"      PGUID 0 b t f  25  25  16 531  98   0   0 textne neqsel neqjoinsel ));
+DATA(insert OID = 532 (  "="       PGUID 0 b t t  21  23  16 533 538  95  97 int24eq eqsel eqjoinsel ));
+DATA(insert OID = 533 (  "="       PGUID 0 b t t  23  21  16 532 539  97  95 int42eq eqsel eqjoinsel ));
+DATA(insert OID = 534 (  "<"       PGUID 0 b t f  21  23  16 537 542  0  0 int24lt intltsel intltjoinsel ));
+DATA(insert OID = 535 (  "<"       PGUID 0 b t f  23  21  16 536 543  0  0 int42lt intltsel intltjoinsel ));
+DATA(insert OID = 536 (  ">"       PGUID 0 b t f  21  23  16 535 540  0  0 int24gt intgtsel intgtjoinsel ));
+DATA(insert OID = 537 (  ">"       PGUID 0 b t f  23  21  16 534 541  0  0 int42gt intgtsel intgtjoinsel ));
+DATA(insert OID = 538 (  "<>"      PGUID 0 b t f  21  23  16 539 532  0  0 int24ne neqsel neqjoinsel ));
+DATA(insert OID = 539 (  "<>"      PGUID 0 b t f  23  21  16 538 533  0  0 int42ne neqsel neqjoinsel ));
+DATA(insert OID = 540 (  "<="      PGUID 0 b t f  21  23  16 543 536  0  0 int24le intltsel intltjoinsel ));
+DATA(insert OID = 541 (  "<="      PGUID 0 b t f  23  21  16 542 537  0  0 int42le intltsel intltjoinsel ));
+DATA(insert OID = 542 (  ">="      PGUID 0 b t f  21  23  16 541 534  0  0 int24ge intgtsel intgtjoinsel ));
+DATA(insert OID = 543 (  ">="      PGUID 0 b t f  23  21  16 540 535  0  0 int42ge intgtsel intgtjoinsel ));
+DATA(insert OID = 544 (  "*"       PGUID 0 b t f  21  23  23 545   0  0  0 int24mul intltsel intltjoinsel ));
+DATA(insert OID = 545 (  "*"       PGUID 0 b t f  23  21  23 544   0  0  0 int42mul intltsel intltjoinsel ));
+DATA(insert OID = 546 (  "/"       PGUID 0 b t f  21  23  23   0   0  0  0 int24div intltsel intltjoinsel ));
+DATA(insert OID = 547 (  "/"       PGUID 0 b t f  23  21  23   0   0  0  0 int42div intltsel intltjoinsel ));
+DATA(insert OID = 548 (  "%"       PGUID 0 b t f  21  23  23   6   0  0  0 int24mod intltsel intltjoinsel ));
+DATA(insert OID = 549 (  "%"       PGUID 0 b t f  23  21  23   6   0  0  0 int42mod intltsel intltjoinsel ));
+DATA(insert OID = 550 (  "+"       PGUID 0 b t f  21  21  21 550   0   0   0 int2pl intltsel intltjoinsel ));
+DATA(insert OID = 551 (  "+"       PGUID 0 b t f  23  23  23 551   0   0   0 int4pl intltsel intltjoinsel ));
+DATA(insert OID = 552 (  "+"       PGUID 0 b t f  21  23  23 553   0   0   0 int24pl intltsel intltjoinsel ));
+DATA(insert OID = 553 (  "+"       PGUID 0 b t f  23  21  23 552   0   0   0 int42pl intltsel intltjoinsel ));
+DATA(insert OID = 554 (  "-"       PGUID 0 b t f  21  21  21   0   0   0   0 int2mi intltsel intltjoinsel ));
+DATA(insert OID = 555 (  "-"       PGUID 0 b t f  23  23  23   0   0   0   0 int4mi intltsel intltjoinsel ));
+DATA(insert OID = 556 (  "-"       PGUID 0 b t f  21  23  23   0   0   0   0 int24mi intltsel intltjoinsel ));
+DATA(insert OID = 557 (  "-"       PGUID 0 b t f  23  21  23   0   0   0   0 int42mi intltsel intltjoinsel ));
+DATA(insert OID = 558   (  "-"       PGUID 0 l t f   0  23  23  0   0   0   0 int4um intltsel intltjoinsel ));
+DATA(insert OID = 559   (  "-"       PGUID 0 l t f   0  21  21  0   0   0   0 int2um intltsel intltjoinsel ));
+DATA(insert OID = 560 (  "="       PGUID 0 b t t 702 702  16 560 561 562 562 abstimeeq eqsel eqjoinsel ));
+DATA(insert OID = 561 (  "<>"      PGUID 0 b t f 702 702  16 561 560 0 0 abstimene neqsel neqjoinsel ));
+DATA(insert OID = 562 (  "<"       PGUID 0 b t f 702 702  16 563 565 0 0 abstimelt intltsel intltjoinsel ));
+DATA(insert OID = 563 (  ">"       PGUID 0 b t f 702 702  16 562 564 0 0 abstimegt intltsel intltjoinsel ));
+DATA(insert OID = 564 (  "<="      PGUID 0 b t f 702 702  16 565 563 0 0 abstimele intltsel intltjoinsel ));
+DATA(insert OID = 565 (  ">="      PGUID 0 b t f 702 702  16 564 562 0 0 abstimege intltsel intltjoinsel ));
+DATA(insert OID = 566 (  "="       PGUID 0 b t t 703 703  16 566 567 568 568 reltimeeq - - ));
+DATA(insert OID = 567 (  "<>"      PGUID 0 b t f 703 703  16 567 566 0 0 reltimene - - ));
+DATA(insert OID = 568 (  "<"       PGUID 0 b t f 703 703  16 569 571 0 0 reltimelt - - ));
+DATA(insert OID = 569 (  ">"       PGUID 0 b t f 703 703  16 568 570 0 0 reltimegt - - ));
+DATA(insert OID = 570 (  "<="      PGUID 0 b t f 703 703  16 571 569 0 0 reltimele - - ));
+DATA(insert OID = 571 (  ">="      PGUID 0 b t f 703 703  16 570 568 0 0 reltimege - - ));
+DATA(insert OID = 572 (  "="       PGUID 0 b t t 704 704  16 572   0   0   0 intervaleq - - ));
+DATA(insert OID = 573 (  "<<"      PGUID 0 b t f 704 704  16   0   0   0   0 intervalct - - ));
+DATA(insert OID = 574 (  "&&"      PGUID 0 b t f 704 704  16   0   0   0   0 intervalov - - ));
+DATA(insert OID = 575 (  "#="      PGUID 0 b t f 704 703  16   0 576   0 568 intervalleneq - - ));
+DATA(insert OID = 576 (  "#<>"     PGUID 0 b t f 704 703  16   0 575   0 568 intervallenne - - ));
+DATA(insert OID = 577 (  "#<"      PGUID 0 b t f 704 703  16   0 580   0 568 intervallenlt - - ));
+DATA(insert OID = 578 (  "#>"      PGUID 0 b t f 704 703  16   0 579   0 568 intervallengt - - ));
+DATA(insert OID = 579 (  "#<="     PGUID 0 b t f 704 703  16   0 578   0 568 intervallenle - - ));
+DATA(insert OID = 580 (  "#>="     PGUID 0 b t f 704 703  16   0 577   0 568 intervallenge - - ));
+DATA(insert OID = 581 (  "+"       PGUID 0 b t f 702 703 702 581   0 0 0 timepl - - ));
+DATA(insert OID = 582 (  "-"       PGUID 0 b t f 702 703 702   0   0 0 0 timemi - - ));
+DATA(insert OID = 583 (  "<?>"     PGUID 0 b t f 702 704  16   0   0 562   0 ininterval - - ));
+DATA(insert OID = 584 (  "-"       PGUID 0 l t f   0 700 700   0   0   0   0 float4um - - ));
+DATA(insert OID = 585 (  "-"       PGUID 0 l t f   0 701 701   0   0   0   0 float8um - - ));
+DATA(insert OID = 586 (  "+"       PGUID 0 b t f 700 700 700 586   0   0   0 float4pl - - ));
+DATA(insert OID = 587 (  "-"       PGUID 0 b t f 700 700 700   0   0   0   0 float4mi - - ));
+DATA(insert OID = 588 (  "/"       PGUID 0 b t f 700 700 700   0   0   0   0 float4div - - ));
+DATA(insert OID = 589 (  "*"       PGUID 0 b t f 700 700 700 589   0   0   0 float4mul - - ));
+DATA(insert OID = 590 (  "@"       PGUID 0 l t f   0 700 700   0   0   0   0 float4abs - - ));
+DATA(insert OID = 591 (  "+"       PGUID 0 b t f 701 701 701 591   0   0   0 float8pl - - ));
+DATA(insert OID = 592 (  "-"       PGUID 0 b t f 701 701 701   0   0   0   0 float8mi - - ));
+DATA(insert OID = 593 (  "/"       PGUID 0 b t f 701 701 701   0   0   0   0 float8div - - ));
+DATA(insert OID = 594 (  "*"       PGUID 0 b t f 701 701 701 594   0   0   0 float8mul - - ));
+DATA(insert OID = 595 (  "@"       PGUID 0 l t f   0 701 701   0   0   0   0 float8abs - - ));
+DATA(insert OID = 596 (  "|/"      PGUID 0 l t f   0 701 701   0   0   0   0 dsqrt - - ));
+DATA(insert OID = 597 (  "||/"     PGUID 0 l t f   0 701 701   0   0   0   0 dcbrt - - ));
+DATA(insert OID = 598 (  "%"       PGUID 0 l t f   0 701 701   0   0   0   0 dtrunc - - ));
+DATA(insert OID = 599 (  "%"       PGUID 0 r t f 701   0 701   0   0   0   0 dround - - ));
+DATA(insert OID = 601 (  ":"       PGUID 0 l t f   0 701 701   0   0   0   0 dexp - - ));
+DATA(insert OID = 602 (  ";"       PGUID 0 l t f   0 701 701   0   0   0   0 dlog1 - - ));
+DATA(insert OID = 603 (  "|"       PGUID 0 l t f   0 704 702   0   0   0   0 intervalstart - - ));
+DATA(insert OID = 606 (  "<#>"      PGUID 0 b t f 702 702 704   0   0   0   0 mktinterval - - ));
+DATA(insert OID = 607 (  "="       PGUID 0 b t t  26  26  16 607 608 97 97 oideq eqsel eqjoinsel ));
+#define    OIDEqualOperator 607    /* XXX planner/prep/semanopt.c crock */
+DATA(insert OID = 608 (  "<>"      PGUID 0 b t f  26  26  16 608 607  0  0 oidne neqsel neqjoinsel ));
+DATA(insert OID = 609 (  "<"       PGUID 0 b t f  26  26  16 610 612  0  0 int4lt intltsel intltjoinsel ));
+DATA(insert OID = 610 (  ">"       PGUID 0 b t f  26  26  16 609 611  0  0 int4gt intgtsel intgtjoinsel ));
+DATA(insert OID = 611 (  "<="      PGUID 0 b t f  26  26  16 612 610  0  0 int4le intltsel intltjoinsel ));
+DATA(insert OID = 612 (  ">="      PGUID 0 b t f  26  26  16 611 609  0  0 int4ge intgtsel intgtjoinsel ));
+DATA(insert OID = 620 (  "="       PGUID 0 b t t  700  700  16 620 621  622 622 float4eq eqsel eqjoinsel ));
+DATA(insert OID = 621 (  "<>"      PGUID 0 b t f  700  700  16 621 620  0 0 float4ne neqsel neqjoinsel ));
+DATA(insert OID = 622 (  "<"       PGUID 0 b t f  700  700  16 623 625  0 0 float4lt intltsel intltjoinsel ));
+DATA(insert OID = 623 (  ">"       PGUID 0 b t f  700  700  16 622 624  0 0 float4gt intgtsel intgtjoinsel ));
+DATA(insert OID = 624 (  "<="      PGUID 0 b t f  700  700  16 625 623  0 0 float4le intltsel intltjoinsel ));
+DATA(insert OID = 625 (  ">="      PGUID 0 b t f  700  700  16 624 622  0 0 float4ge intgtsel intgtjoinsel ));
+DATA(insert OID = 626 (  "!!="     PGUID 0 b t f  23   19   16 0   0    0   0   int4notin "-"     "-"));
+DATA(insert OID = 627 (  "!!="     PGUID 0 b t f  26   19   16 0   0    0   0   oidnotin "-"     "-"));
+#define OIDNotInOperator 627   /* XXX planner/prep/semanopt.c crock */
+DATA(insert OID = 630 (  "<>"      PGUID 0 b t f  18  18  16 630  92  0 0 charne neqsel neqjoinsel ));
+    
+DATA(insert OID = 631 (  "<"       PGUID 0 b t f  18  18  16 633 634  0 0 charlt intltsel intltjoinsel ));
+DATA(insert OID = 632 (  "<="      PGUID 0 b t f  18  18  16 634 633  0 0 charle intltsel intltjoinsel ));
+DATA(insert OID = 633 (  ">"       PGUID 0 b t f  18  18  16 631 632  0 0 chargt intltsel intltjoinsel ));
+DATA(insert OID = 634 (  ">="      PGUID 0 b t f  18  18  16 632 631  0 0 charge intltsel intltjoinsel ));
+    
+DATA(insert OID = 635 (  "+"       PGUID 0 b t f  18  18  18 0 0  0 0 charpl eqsel eqjoinsel ));
+DATA(insert OID = 636 (  "-"       PGUID 0 b t f  18  18  18 0 0  0 0 charmi eqsel eqjoinsel ));
+DATA(insert OID = 637 (  "*"       PGUID 0 b t f  18  18  18 0 0  0 0 charmul eqsel eqjoinsel ));
+DATA(insert OID = 638 (  "/"       PGUID 0 b t f  18  18  18 0 0  0 0 chardiv eqsel eqjoinsel ));
+
+DATA(insert OID = 639 (  "~"       PGUID 0 b t f  19  25  16 0 640  0 0 nameregexeq eqsel eqjoinsel ));
+DATA(insert OID = 640 (  "!~"      PGUID 0 b t f  19  25  16 0 639  0 0 nameregexne neqsel neqjoinsel ));
+DATA(insert OID = 641 (  "~"       PGUID 0 b t f  25  25  16 0 642  0 0 textregexeq eqsel eqjoinsel ));
+DATA(insert OID = 642 (  "!~"      PGUID 0 b t f  25  25  16 0 641  0 0 textregexne eqsel eqjoinsel ));
+DATA(insert OID = 643 (  "<>"      PGUID 0 b t f  19  19  16 643 93 0 0 namene neqsel neqjoinsel ));
+DATA(insert OID = 644 (  "<>"      PGUID 0 b t f  20  20  16 644 99 0 0 char16ne neqsel neqjoinsel ));
+DATA(insert OID = 645 (  "<"       PGUID 0 b t f  20  20  16 647 648  0 0 char16lt intltsel intltjoinsel ));
+DATA(insert OID = 646 (  "<="       PGUID 0 b t f  20  20  16 648 647  0 0 char16le intltsel intltjoinsel ));
+DATA(insert OID = 647 (  ">"       PGUID 0 b t f  20  20  16 645 646  0 0 char16gt intltsel intltjoinsel ));
+DATA(insert OID = 648 (  ">="       PGUID 0 b t f  20  20  16 646 645  0 0 char16ge intltsel intltjoinsel ));
+DATA(insert OID = 649 (  "~"       PGUID 0 b t f  20  25  16 0 650  0 0 char16regexeq intltsel intltjoinsel ));
+DATA(insert OID = 650 (  "!~"       PGUID 0 b t f  20  25  16 650 0  0 0 char16regexne intltsel intltjoinsel ));
+DATA(insert OID = 651 (  "~~"       PGUID 0 b t f  20  25  16 0 651  0 0 char16like eqsel eqjoinsel ));
+DATA(insert OID = 652 (  "!~~"       PGUID 0 b t f  20  25  16 651 0  0 0 char16nlike neqsel neqjoinsel ));
+
+DATA(insert OID = 660 (  "<"       PGUID 0 b t f  19  19  16 662 663  0 0 namelt intltsel intltjoinsel ));
+DATA(insert OID = 661 (  "<="      PGUID 0 b t f  19  19  16 663 662  0 0 namele intltsel intltjoinsel ));
+DATA(insert OID = 662 (  ">"       PGUID 0 b t f  19  19  16 660 661  0 0 namegt intltsel intltjoinsel ));
+DATA(insert OID = 663 (  ">="      PGUID 0 b t f  19  19  16 661 660  0 0 namege intltsel intltjoinsel ));
+DATA(insert OID = 664 (  "<"       PGUID 0 b t f  25  25  16 666 667  0 0 text_lt intltsel intltjoinsel ));
+DATA(insert OID = 665 (  "<="      PGUID 0 b t f  25  25  16 667 666  0 0 text_le intltsel intltjoinsel ));
+DATA(insert OID = 666 (  ">"       PGUID 0 b t f  25  25  16 664 665  0 0 text_gt intltsel intltjoinsel ));
+DATA(insert OID = 667 (  ">="      PGUID 0 b t f  25  25  16 665 664  0 0 text_ge intltsel intltjoinsel ));
+
+DATA(insert OID = 670 (  "="       PGUID 0 b t f  701  701  16 670 671  0 0 float8eq eqsel eqjoinsel ));
+DATA(insert OID = 671 (  "<>"      PGUID 0 b t f  701  701  16 671 670  0 0 float8ne neqsel neqjoinsel ));
+DATA(insert OID = 672 (  "<"       PGUID 0 b t f  701  701  16 674 675  0 0 float8lt intltsel intltjoinsel ));
+DATA(insert OID = 673 (  "<="      PGUID 0 b t f  701  701  16 675 674  0 0 float8le intltsel intltjoinsel ));
+DATA(insert OID = 674 (  ">"       PGUID 0 b t f  701  701  16 672 673  0 0 float8gt intltsel intltjoinsel ));
+DATA(insert OID = 675 (  ">="      PGUID 0 b t f  701  701  16 673 672  0 0 float8ge intltsel intltjoinsel ));
+
+DATA(insert OID = 676 (  "<"       PGUID 0 b t f  911  911  16 680 679  0 0 oidnamelt intltsel intltjoinsel ));
+DATA(insert OID = 677 (  "<="      PGUID 0 b t f  911  911  16 679 680  0 0 oidnamele intltsel intltjoinsel ));
+DATA(insert OID = 678 (  "="       PGUID 0 b t f  911  911  16 678 681  0 0 oidnameeq intltsel intltjoinsel ));
+DATA(insert OID = 679 (  ">="      PGUID 0 b t f  911  911  16 677 676  0 0 oidnamege intltsel intltjoinsel ));
+DATA(insert OID = 680 (  ">"       PGUID 0 b t f  911  911  16 676 677  0 0 oidnamegt intltsel intltjoinsel ));
+DATA(insert OID = 681 (  "<>"      PGUID 0 b t f  911  911  16 681 678  0 0 oidnamene intltsel intltjoinsel ));
+
+DATA(insert OID = 697 (  "~"       PGUID 0 b t f  411  25  16 0 698  0 0 char8regexeq eqsel eqjoinsel ));
+DATA(insert OID = 698 (  "!~"      PGUID 0 b t f  411  25  16 0 697  0 0 char8regexne neqsel neqjoinsel ));
+
+DATA(insert OID = 830 (  "<"       PGUID 0 b t f  810  810  16 834 833  0 0 oidint2lt intltsel intltjoinsel ));
+DATA(insert OID = 831 (  "<="      PGUID 0 b t f  810  810  16 833 834  0 0 oidint2le intltsel intltjoinsel ));
+DATA(insert OID = 832 (  "="       PGUID 0 b t f  810  810  16 832 835  0 0 oidint2eq intltsel intltjoinsel ));
+DATA(insert OID = 833 (  ">="      PGUID 0 b t f  810  810  16 831 830  0 0 oidint2ge intltsel intltjoinsel ));
+DATA(insert OID = 834 (  ">"       PGUID 0 b t f  810  810  16 830 831  0 0 oidint2gt intltsel intltjoinsel ));
+DATA(insert OID = 835 (  "<>"      PGUID 0 b t f  810  810  16 835 832  0 0 oidint2ne intltsel intltjoinsel ));
+
+DATA(insert OID = 839 (  "~"       PGUID 0 b t f  409  25  16 0 841  0 0 char2regexeq eqsel eqjoinsel ));
+DATA(insert OID = 841 (  "!~"      PGUID 0 b t f  409  25  16 0 839  0 0 char2regexne neqsel neqjoinsel ));
+DATA(insert OID = 840 (  "~"       PGUID 0 b t f  410  25  16 0 842  0 0 char4regexeq eqsel eqjoinsel ));
+DATA(insert OID = 842 (  "!~"      PGUID 0 b t f  410  25  16 0 840  0 0 char4regexne neqsel neqjoinsel ));
+
+DATA(insert OID = 930 (  "<"       PGUID 0 b t f  910  910  16 934 933  0 0 oidint4lt intltsel intltjoinsel ));
+DATA(insert OID = 931 (  "<="      PGUID 0 b t f  910  910  16 933 934  0 0 oidint4le intltsel intltjoinsel ));
+DATA(insert OID = 932 (  "="       PGUID 0 b t f  910  910  16 932 935  0 0 oidint4eq intltsel intltjoinsel ));
+DATA(insert OID = 933 (  ">="      PGUID 0 b t f  910  910  16 931 930  0 0 oidint4ge intltsel intltjoinsel ));
+DATA(insert OID = 934 (  ">"       PGUID 0 b t f  910  910  16 930 931  0 0 oidint4gt intltsel intltjoinsel ));
+DATA(insert OID = 935 (  "<>"      PGUID 0 b t f  910  910  16 935 932  0 0 oidint4ne intltsel intltjoinsel ));
+
+DATA(insert OID = 965 (  "^"       PGUID 0 b t f 701 701 701   0   0   0   0 dpow - - ));
+DATA(insert OID = 966 (  "+"       PGUID 0 b t f 1034 1033 1034 0 0 0 0 aclinsert   intltsel intltjoinsel ));
+DATA(insert OID =  967 (  "-"       PGUID 0 b t f 1034 1033 1034 0 0 0 0 aclremove   intltsel intltjoinsel ));
+DATA(insert OID =   968 (  "~"       PGUID 0 b t f 1034 1033   16 0 0 0 0 aclcontains intltsel intltjoinsel ));
+
+DATA(insert OID = 1054 ( "="       PGUID 0 b t t  1042  1042  16  1054 1057 1058 1058 bpchareq eqsel eqjoinsel ));
+DATA(insert OID = 1055 (  "~"      PGUID 0 b t f  1042  25  16 0 1056  0 0 textregexeq eqsel eqjoinsel ));
+DATA(insert OID = 1056 ( "!~"      PGUID 0 b t f  1042  25  16 0 1055  0 0 textregexne neqsel neqjoinsel ));
+DATA(insert OID = 1057 ( "<>"      PGUID 0 b t f  1042  1042  16 1057 1054  0 0 bpcharne neqsel neqjoinsel ));
+DATA(insert OID = 1058 ( "<"       PGUID 0 b t f  1042  1042  16 1060 1061  0 0 bpcharlt intltsel intltjoinsel ));
+DATA(insert OID = 1059 ( "<="      PGUID 0 b t f  1042  1042  16 1061 1060  0 0 bpcharle intltsel intltjoinsel ));
+DATA(insert OID = 1060 ( ">"       PGUID 0 b t f  1042  1042  16 1058 1059  0 0 bpchargt intltsel intltjoinsel ));
+DATA(insert OID = 1061 ( ">="      PGUID 0 b t f  1042  1042  16 1059 1058  0 0 bpcharge intltsel intltjoinsel ));
+
+DATA(insert OID = 1062 ( "="       PGUID 0 b t t  1043  1043  16  1062 1065 1066 1066 varchareq eqsel eqjoinsel ));
+DATA(insert OID = 1063 (  "~"      PGUID 0 b t f  1043  25  16 0 1064  0 0 textregexeq eqsel eqjoinsel ));
+DATA(insert OID = 1064 ( "!~"      PGUID 0 b t f  1043  25  16 0 1063  0 0 textregexne neqsel neqjoinsel ));
+DATA(insert OID = 1065 ( "<>"      PGUID 0 b t f  1043  1043  16 1065 1062  0 0 varcharne neqsel neqjoinsel ));
+DATA(insert OID = 1066 ( "<"       PGUID 0 b t f  1043  1043  16 1068 1069  0 0 varcharlt intltsel intltjoinsel ));
+DATA(insert OID = 1067 ( "<="      PGUID 0 b t f  1043  1043  16 1069 1068  0 0 varcharle intltsel intltjoinsel ));
+DATA(insert OID = 1068 ( ">"       PGUID 0 b t f  1043  1043  16 1066 1067  0 0 varchargt intltsel intltjoinsel ));
+DATA(insert OID = 1069 ( ">="      PGUID 0 b t f  1043  1043  16 1067 1066  0 0 varcharge intltsel intltjoinsel ));
+
+DATA(insert OID = 1093 ( "="       PGUID 0 b t t  1082  1082  16 1093 1094 1095 1095 date_eq eqsel eqjoinsel ));
+DATA(insert OID = 1094 ( "<>"      PGUID 0 b t f  1082  1082  16 1094 1093  0 0 date_ne neqsel neqjoinsel ));
+DATA(insert OID = 1095 ( "<"       PGUID 0 b t f  1082  1082  16 1097 1098  0 0 date_lt intltsel intltjoinsel ));
+DATA(insert OID = 1096 ( "<="      PGUID 0 b t f  1082  1082  16 1098 1097  0 0 date_le intltsel intltjoinsel ));
+DATA(insert OID = 1097 ( ">"       PGUID 0 b t f  1082  1082  16 1095 1096  0 0 date_gt intltsel intltjoinsel ));
+DATA(insert OID = 1098 ( ">="      PGUID 0 b t f  1082  1082  16 1096 1065  0 0 date_ge intltsel intltjoinsel ));
+
+DATA(insert OID = 1108 ( "="       PGUID 0 b t t  1083  1083  16 1108 1109 1110 1110 time_eq eqsel eqjoinsel ));
+DATA(insert OID = 1109 ( "<>"      PGUID 0 b t f  1083  1083  16 1109 1108  0 0 time_ne neqsel neqjoinsel ));
+DATA(insert OID = 1110 ( "<"       PGUID 0 b t f  1083  1083  16 1112 1113  0 0 time_lt intltsel intltjoinsel ));
+DATA(insert OID = 1111 ( "<="      PGUID 0 b t f  1083  1083  16 1113 1112  0 0 time_le intltsel intltjoinsel ));
+DATA(insert OID = 1112 ( ">"       PGUID 0 b t f  1083  1083  16 1110 1111  0 0 time_gt intltsel intltjoinsel ));
+DATA(insert OID = 1113 ( ">="      PGUID 0 b t f  1083  1083  16 1111 1065  0 0 time_ge intltsel intltjoinsel ));
+
+/* float48 operators */
+DATA(insert OID = 1116 (  "+"       PGUID 0 b t f 700 701 701 1116   0   0   0 float48pl - - ));
+DATA(insert OID = 1117 (  "-"       PGUID 0 b t f 700 701 701   0   0   0   0 float48mi - - ));
+DATA(insert OID = 1118 (  "/"       PGUID 0 b t f 700 701 701   0   0   0   0 float48div - - ));
+DATA(insert OID = 1119 (  "*"       PGUID 0 b t f 700 701 701 1119   0   0   0 float48mul - - ));
+DATA(insert OID = 1120 (  "="       PGUID 0 b t t  700  701  16 1120 1121  1122 1122 float48eq eqsel eqjoinsel ));
+DATA(insert OID = 1121 (  "<>"      PGUID 0 b t f  700  701  16 1121 1120  0 0 float48ne neqsel neqjoinsel ));
+DATA(insert OID = 1122 (  "<"       PGUID 0 b t f  700  701  16 1123 1125  0 0 float48lt intltsel intltjoinsel ));
+DATA(insert OID = 1123 (  ">"       PGUID 0 b t f  700  701  16 1122 1124  0 0 float48gt intgtsel intgtjoinsel ));
+DATA(insert OID = 1124 (  "<="      PGUID 0 b t f  700  701  16 1125 1123  0 0 float48le intltsel intltjoinsel ));
+DATA(insert OID = 1125 (  ">="      PGUID 0 b t f  700  701  16 1124 1122  0 0 float48ge intgtsel intgtjoinsel ));
+
+/* float84 operators */
+DATA(insert OID = 1126 (  "+"       PGUID 0 b t f 701 700 701 1126   0   0   0 float84pl - - ));
+DATA(insert OID = 1127 (  "-"       PGUID 0 b t f 701 700 701   0   0   0   0 float84mi - - ));
+DATA(insert OID = 1128 (  "/"       PGUID 0 b t f 701 700 701   0   0   0   0 float84div - - ));
+DATA(insert OID = 1129 (  "*"       PGUID 0 b t f 701 700 701 1129   0   0   0 float84mul - - ));
+DATA(insert OID = 1130 (  "="       PGUID 0 b t t  701  700  16 1130 1131  1132 1132 float84eq eqsel eqjoinsel ));
+DATA(insert OID = 1131 (  "<>"      PGUID 0 b t f  701  700  16 1131 1130  0 0 float84ne neqsel neqjoinsel ));
+DATA(insert OID = 1132 (  "<"       PGUID 0 b t f  701  700  16 1133 1135  0 0 float84lt intltsel intltjoinsel ));
+DATA(insert OID = 1133 (  ">"       PGUID 0 b t f  701  700  16 1132 1134  0 0 float84gt intgtsel intgtjoinsel ));
+DATA(insert OID = 1134 (  "<="      PGUID 0 b t f  701  700  16 1135 1133  0 0 float84le intltsel intltjoinsel ));
+DATA(insert OID = 1135 (  ">="      PGUID 0 b t f  701  700  16 1134 1132  0 0 float84ge intgtsel intgtjoinsel ));
+
+/* int4 and oid equality */
+DATA(insert OID = 1136 (  "="       PGUID 0 b t t 23 26 16 1137 0 0 0 int4eqoid eqsel eqjoinsel ));
+DATA(insert OID = 1137 (  "="       PGUID 0 b t t 26 23 16 1136 0 0 0 oideqint4 eqsel eqjoinsel ));
+
+/* LIKE hacks by Keith Parks. */
+DATA(insert OID = 1201 (  "~~"    PGUID 0 b t f  409  25  16 0 1202 0 0 char2like eqsel eqjoinsel ));
+DATA(insert OID = 1202 (  "!~~"   PGUID 0 b t f  409  25  16 0 1201 0 0 char2nlike neqsel neqjoinsel ));
+DATA(insert OID = 1203 (  "~~"    PGUID 0 b t f  410  25  16 0 1204 0 0 char4like eqsel eqjoinsel ));
+DATA(insert OID = 1204 (  "!~~"   PGUID 0 b t f  410  25  16 0 1203 0 0 char4nlike neqsel neqjoinsel ));
+DATA(insert OID = 1205 (  "~~"    PGUID 0 b t f  411  25  16 0 1206 0 0 char8like eqsel eqjoinsel ));
+DATA(insert OID = 1206 (  "!~~"   PGUID 0 b t f  411  25  16 0 1205 0 0 char8nlike neqsel neqjoinsel ));
+DATA(insert OID = 1207 (  "~~"    PGUID 0 b t f  19   25  16 0 1208 0 0 namelike eqsel eqjoinsel ));
+DATA(insert OID = 1208 (  "!~~"   PGUID 0 b t f  19   25  16 0 1207 0 0 namenlike neqsel neqjoinsel ));
+DATA(insert OID = 1209 (  "~~"    PGUID 0 b t f  25   25  16 0 1210 0 0 textlike eqsel eqjoinsel ));
+DATA(insert OID = 1210 (  "!~~"   PGUID 0 b t f  25   25  16 0 1209 0 0 textnlike neqsel neqjoinsel ));
+DATA(insert OID = 1211 (  "~~"    PGUID 0 b t f  1042 25  16 0 1212 0 0 textlike eqsel eqjoinsel ));
+DATA(insert OID = 1212 (  "!~~"   PGUID 0 b t f  1042 25  16 0 1211 0 0 textnlike neqsel neqjoinsel ));
+DATA(insert OID = 1213 (  "~~"    PGUID 0 b t f  1043 25  16 0 1214 0 0 textlike eqsel eqjoinsel ));
+DATA(insert OID = 1214 (  "!~~"   PGUID 0 b t f  1043 25  16 0 1213 0 0 textnlike neqsel neqjoinsel ));
+DATA(insert OID = 1215 (  "~~"    PGUID 0 b t f  20   25  16 0 1216 0 0 char16like eqsel eqjoinsel ));
+DATA(insert OID = 1216 (  "!~~"   PGUID 0 b t f  20   25  16 0 1215 0 0 char16nlike neqsel neqjoinsel ));
+
+/* case-insensitive LIKE hacks */
+DATA(insert OID = 1220 (  "~*"       PGUID 0 b t f  409  25  16 0 1221  0 0 char2icregexeq eqsel eqjoinsel ));
+DATA(insert OID = 1221 (  "!~*"      PGUID 0 b t f  409  25  16 0 1220  0 0 char2icregexne neqsel neqjoinsel ));
+DATA(insert OID = 1222 (  "~*"       PGUID 0 b t f  410  25  16 0 1223  0 0 char4icregexeq eqsel eqjoinsel ));
+DATA(insert OID = 1223 (  "!~*"      PGUID 0 b t f  410  25  16 0 1222  0 0 char4icregexne neqsel neqjoinsel ));
+DATA(insert OID = 1224 (  "~*"       PGUID 0 b t f  411  25  16 0 1225  0 0 char8icregexeq eqsel eqjoinsel ));
+DATA(insert OID = 1225 (  "!~*"      PGUID 0 b t f  411  25  16 0 1224  0 0 char8icregexne neqsel neqjoinsel ));
+DATA(insert OID = 1226 (  "~*"       PGUID 0 b t f  19  25  16 0 1227  0 0 nameicregexeq eqsel eqjoinsel ));
+DATA(insert OID = 1227 (  "!~*"      PGUID 0 b t f  19  25  16 0 1226  0 0 nameicregexne neqsel neqjoinsel ));
+DATA(insert OID = 1228 (  "~*"       PGUID 0 b t f  25  25  16 0 1229  0 0 texticregexeq eqsel eqjoinsel ));
+DATA(insert OID = 1229 (  "!~*"      PGUID 0 b t f  25  25  16 0 1228  0 0 texticregexne eqsel eqjoinsel ));
+DATA(insert OID = 1230 (  "~*"       PGUID 0 b t f  20  25  16 0 1231  0 0 char16icregexeq eqsel eqjoinsel ));
+DATA(insert OID = 1231 (  "!~*"      PGUID 0 b t f  20  25  16 0 1230  0 0 char16icregexne neqsel neqjoinsel ));
+
+
+
+/*
+ * function prototypes
+ */
+extern void OperatorCreate(char *operatorName, 
+              char *leftTypeName,
+              char *rightTypeName,
+              char *procedureName,
+              uint16 precedence, 
+              bool isLeftAssociative,
+              char *commutatorName,
+              char *negatorName,
+              char *restrictionName,
+              char *joinName,
+              bool canHash,
+              char *leftSortName,
+              char *rightSortName);
+
+#endif /* PG_OPERATOR_H */
diff --git a/src/backend/catalog/pg_parg.h b/src/backend/catalog/pg_parg.h

new file mode 100644 (file)

index 0000000..aa08827
--- /dev/null
+++ b/src/backend/catalog/pg_parg.h
@@ -0,0 +1,116 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_parg.h--
+ *    definition of the system "parg" relation (pg_parg)
+ *    along with the relation's initial contents.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_parg.h,v 1.1.1.1 1996/07/09 06:21:17 scrappy Exp $
+ *
+ * NOTES
+ *    the genbki.sh script reads this file and generates .bki
+ *    information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_PARG_H
+#define PG_PARG_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+
+/* ----------------
+ * pg_parg definition.  cpp turns this into
+ * typedef struct FormData_pg_parg
+ * ----------------
+ */ 
+CATALOG(pg_parg) {
+    Oid    parproid;
+    int2   parnum;
+    char   parbound;
+    Oid    partype;
+} FormData_pg_parg;
+
+/* ----------------
+ * Form_pg_parg corresponds to a pointer to a tuple with
+ * the format of pg_parg relation.
+ * ----------------
+ */
+typedef FormData_pg_parg   *Form_pg_parg;
+
+/* ----------------
+ * compiler constants for pg_parg
+ * ----------------
+ */
+#define Natts_pg_parg          4
+#define Anum_pg_parg_parproid      1
+#define Anum_pg_parg_parnum        2
+#define Anum_pg_parg_parbound      3
+#define Anum_pg_parg_partype       4
+
+/* ----------------
+ * initial contents of pg_parg
+ * ----------------
+ */
+
+DATA(insert OID = 0 (  28 1 - 23 ));
+DATA(insert OID = 0 (  29 1 - 16 ));
+DATA(insert OID = 0 (  30 1 - 23 ));
+DATA(insert OID = 0 (  31 1 - 17 ));
+DATA(insert OID = 0 (  32 1 - 23 ));
+DATA(insert OID = 0 (  33 1 - 18 ));
+DATA(insert OID = 0 (  34 1 - 23 ));
+DATA(insert OID = 0 (  35 1 - 19 ));
+DATA(insert OID = 0 (  36 1 - 23 ));
+DATA(insert OID = 0 (  37 1 - 20 ));
+DATA(insert OID = 0 (  38 1 - 23 ));
+DATA(insert OID = 0 (  39 1 - 21 ));
+DATA(insert OID = 0 (  40 1 - 23 ));
+DATA(insert OID = 0 (  41 1 - 22 ));
+DATA(insert OID = 0 (  42 1 - 23 ));
+DATA(insert OID = 0 (  43 1 - 23 ));
+DATA(insert OID = 0 (  44 1 - 23 ));
+DATA(insert OID = 0 (  45 1 - 24 ));
+DATA(insert OID = 0 (  46 1 - 23 ));
+DATA(insert OID = 0 (  47 1 - 25 ));
+DATA(insert OID = 0 (  50 1 - 23 ));
+DATA(insert OID = 0 (  50 2 - 23 ));
+DATA(insert OID = 0 (  50 3 - 23 ));
+DATA(insert OID = 0 (  51 1 - 23 ));
+DATA(insert OID = 0 (  52 1 - 23 ));
+DATA(insert OID = 0 (  52 2 - 23 ));
+DATA(insert OID = 0 (  52 3 - 23 ));
+DATA(insert OID = 0 (  52 4 - 23 ));
+DATA(insert OID = 0 (  53 1 - 23 ));
+DATA(insert OID = 0 (  54 1 - 23 ));
+DATA(insert OID = 0 (  54 2 - 23 ));
+DATA(insert OID = 0 (  55 1 - 23 ));
+DATA(insert OID = 0 (  55 2 - 23 ));
+DATA(insert OID = 0 (  56 1 - 23 ));
+DATA(insert OID = 0 (  56 2 - 23 ));
+DATA(insert OID = 0 (  57 1 - 23 ));
+DATA(insert OID = 0 (  57 2 - 23 ));
+DATA(insert OID = 0 (  57 3 - 23 ));
+DATA(insert OID = 0 (  60 1 - 16 ));
+DATA(insert OID = 0 (  60 2 - 16 ));
+DATA(insert OID = 0 (  61 1 - 18 ));
+DATA(insert OID = 0 (  61 2 - 18 ));
+DATA(insert OID = 0 (  63 1 - 21 ));
+DATA(insert OID = 0 (  63 2 - 21 ));
+DATA(insert OID = 0 (  64 1 - 21 ));
+DATA(insert OID = 0 (  64 2 - 21 ));
+DATA(insert OID = 0 (  65 1 - 23 ));
+DATA(insert OID = 0 (  65 2 - 23 ));
+DATA(insert OID = 0 (  66 1 - 23 ));
+DATA(insert OID = 0 (  66 2 - 23 ));
+DATA(insert OID = 0 (  67 1 - 25 ));
+DATA(insert OID = 0 (  67 2 - 25 ));
+
+#endif /* PG_PARG_H */
diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c

new file mode 100644 (file)

index 0000000..d8273ef
--- /dev/null
+++ b/src/backend/catalog/pg_proc.c
@@ -0,0 +1,265 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_proc.c--
+ *    routines to support manipulation of the pg_proc relation
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header: /cvsroot/pgsql/src/backend/catalog/pg_proc.c,v 1.1.1.1 1996/07/09 06:21:17 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <string.h>
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+#include "utils/rel.h"
+#include "fmgr.h"
+#include "utils/elog.h"
+#include "utils/builtins.h"
+#include "utils/sets.h"
+
+#include "nodes/pg_list.h"
+
+#include "catalog/catname.h"
+#include "utils/syscache.h"
+#include "catalog/pg_proc.h"
+#include "catalog/indexing.h"
+#include "tcop/dest.h"
+#include "parser/parse_query.h"
+#include "tcop/tcopprot.h"
+#include "catalog/pg_type.h"
+#include "parser/catalog_utils.h"
+#include "utils/lsyscache.h"
+#include "optimizer/internal.h"
+#include "optimizer/planner.h"
+
+/* ----------------------------------------------------------------
+ * ProcedureDefine
+ * ----------------------------------------------------------------
+ */
+Oid
+ProcedureCreate(char *procedureName,
+       bool returnsSet,
+       char *returnTypeName,   
+       char *languageName,
+       char *prosrc,
+       char *probin,
+       bool canCache,
+       bool trusted,
+       int32 byte_pct,
+       int32 perbyte_cpu,
+       int32 percall_cpu,
+       int32 outin_ratio,
+       List *argList,
+       CommandDest dest)
+{
+    register   i;
+    Relation   rdesc;
+    HeapTuple  tup;
+    bool        defined;
+    uint16     parameterCount;
+    char   nulls[ Natts_pg_proc ];
+    Datum  values[ Natts_pg_proc ];
+    Oid    languageObjectId;
+    Oid        typeObjectId;
+    List   *x;
+    QueryTreeList *querytree_list;
+    List   *plan_list;
+    Oid        typev[8];
+    Oid    relid;
+    Oid    toid;
+    text   *prosrctext;
+    TupleDesc   tupDesc;
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    Assert(PointerIsValid(prosrc));
+    Assert(PointerIsValid(probin));
+    
+    parameterCount = 0;
+    memset(typev, 0, 8 * sizeof(Oid));
+    foreach (x, argList) {
+   Value *t = lfirst(x);
+   
+   if (parameterCount == 8)
+       elog(WARN, "Procedures cannot take more than 8 arguments");
+   
+   if (strcmp(strVal(t), "opaque") == 0) {
+       if (strcmp(languageName, "sql") == 0) {
+       elog(WARN, "ProcedureDefine: sql functions cannot take type \"opaque\"");
+       }
+       else
+       toid = 0;
+   } else {
+       toid = TypeGet(strVal(t), &defined);
+       
+       if (!OidIsValid(toid)) {
+       elog(WARN, "ProcedureCreate: arg type '%s' is not defined",
+            strVal(t));
+       }
+       
+       if (!defined) {
+       elog(NOTICE, "ProcedureCreate: arg type '%s' is only a shell",
+            strVal(t));
+       }
+   }
+   
+   typev[parameterCount++] = toid;
+    }
+    
+    tup = SearchSysCacheTuple(PRONAME,
+                 PointerGetDatum(procedureName),
+                 UInt16GetDatum(parameterCount),
+                 PointerGetDatum(typev),
+                 0);
+    
+    if (HeapTupleIsValid(tup))
+   elog(WARN, "ProcedureCreate: procedure %s already exists with same arguments",
+        procedureName);
+    
+    if (!strcmp(languageName, "sql"))  {
+   /* If this call is defining a set, check if the set is already
+    * defined by looking to see whether this call's function text
+    * matches a function already in pg_proc.  If so just return the 
+    * OID of the existing set.
+    */
+   if (!strcmp(procedureName, GENERICSETNAME)) {
+       prosrctext = textin(prosrc);
+       tup = SearchSysCacheTuple(PROSRC,
+                     PointerGetDatum(prosrctext),
+                     0,0,0);
+       if (HeapTupleIsValid(tup))
+       return tup->t_oid;
+   }
+    }
+    
+    tup = SearchSysCacheTuple(LANNAME,
+                 PointerGetDatum(languageName),
+                 0,0,0);
+    
+    if (!HeapTupleIsValid(tup))
+   elog(WARN, "ProcedureCreate: no such language %s",
+        languageName);
+    
+    languageObjectId = tup->t_oid;
+    
+    if (strcmp(returnTypeName, "opaque") == 0) {
+   if (strcmp(languageName, "sql") == 0) {
+       elog(WARN, "ProcedureCreate: sql functions cannot return type \"opaque\"");
+   }
+   else
+       typeObjectId = 0;
+    }
+    
+    else {
+   typeObjectId = TypeGet(returnTypeName, &defined);
+   
+   if (!OidIsValid(typeObjectId)) {
+       elog(NOTICE, "ProcedureCreate: type '%s' is not yet defined",
+        returnTypeName);
+#if 0
+       elog(NOTICE, "ProcedureCreate: creating a shell for type '%s'",
+        returnTypeName);
+#endif     
+       typeObjectId = TypeShellMake(returnTypeName);
+       if (!OidIsValid(typeObjectId)) {
+       elog(WARN, "ProcedureCreate: could not create type '%s'",
+            returnTypeName);
+       }
+   }
+   
+   else if (!defined) {
+       elog(NOTICE, "ProcedureCreate: return type '%s' is only a shell",
+        returnTypeName);
+   }
+    }
+    
+    /* don't allow functions of complex types that have the same name as
+       existing attributes of the type */
+    if (parameterCount == 1 && 
+   (toid = TypeGet(strVal(lfirst(argList)), &defined)) &&
+   defined &&
+   (relid = typeid_get_relid(toid)) != 0 &&
+   get_attnum(relid, procedureName) != InvalidAttrNumber)
+   elog(WARN, "method %s already an attribute of type %s",
+        procedureName, strVal(lfirst(argList)));
+    
+    
+    /*
+     *  If this is a postquel procedure, we parse it here in order to
+     *  be sure that it contains no syntax errors.  We should store
+     *  the plan in an Inversion file for use later, but for now, we
+     *  just store the procedure's text in the prosrc attribute.
+     */
+    
+    if (strcmp(languageName, "sql") == 0) {
+   plan_list = pg_plan(prosrc, typev, parameterCount,
+               &querytree_list, dest);
+   
+   /* typecheck return value */
+   pg_checkretval(typeObjectId, querytree_list);
+    }
+    
+    for (i = 0; i < Natts_pg_proc; ++i) {
+   nulls[i] = ' ';
+   values[i] = (Datum)NULL;
+    }
+    
+    i = 0;
+    values[i++] = PointerGetDatum(procedureName);
+    values[i++] =  Int32GetDatum(GetUserId());
+    values[i++] =  ObjectIdGetDatum(languageObjectId);
+    
+    /* XXX isinherited is always false for now */
+    
+    values[i++] = Int8GetDatum((bool) 0);
+    
+    /* XXX istrusted is always false for now */
+    
+    values[i++] =  Int8GetDatum(trusted);
+    values[i++] =  Int8GetDatum(canCache);
+    values[i++] =  UInt16GetDatum(parameterCount);
+    values[i++] =  Int8GetDatum(returnsSet);
+    values[i++] =  ObjectIdGetDatum(typeObjectId);
+    
+    values[i++] = (Datum) typev;
+    /*
+     * The following assignments of constants are made.  The real values
+     * will have to be extracted from the arglist someday soon.
+     */
+    values[i++] =  Int32GetDatum(byte_pct); /* probyte_pct */
+    values[i++] =  Int32GetDatum(perbyte_cpu); /* properbyte_cpu */
+    values[i++] =  Int32GetDatum(percall_cpu); /* propercall_cpu */
+    values[i++] =  Int32GetDatum(outin_ratio); /* prooutin_ratio */
+    
+    values[i++] = (Datum)fmgr(TextInRegProcedure, prosrc); /* prosrc */
+    values[i++] = (Datum)fmgr(TextInRegProcedure, probin);   /* probin */
+    
+    rdesc = heap_openr(ProcedureRelationName);
+    
+    tupDesc = rdesc->rd_att;
+    tup = heap_formtuple(tupDesc,
+            values,
+            nulls);
+    
+    heap_insert(rdesc, tup);
+    
+    if (RelationGetRelationTupleForm(rdesc)->relhasindex)
+   {
+       Relation idescs[Num_pg_proc_indices];
+       
+       CatalogOpenIndices(Num_pg_proc_indices, Name_pg_proc_indices, idescs);
+       CatalogIndexInsert(idescs, Num_pg_proc_indices, rdesc, tup);
+       CatalogCloseIndices(Num_pg_proc_indices, idescs);
+   }
+    heap_close(rdesc);
+    return tup->t_oid;
+}
+
diff --git a/src/backend/catalog/pg_proc.h b/src/backend/catalog/pg_proc.h

new file mode 100644 (file)

index 0000000..f282839
--- /dev/null
+++ b/src/backend/catalog/pg_proc.h
@@ -0,0 +1,769 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_proc.h--
+ *    definition of the system "procedure" relation (pg_proc)
+ *    along with the relation's initial contents.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id: pg_proc.h,v 1.1.1.1 1996/07/09 06:21:18 scrappy Exp $
+ *
+ * NOTES
+ *    The script catalog/genbki.sh reads this file and generates .bki
+ *    information from the DATA() statements.  utils/Gen_fmgrtab.sh 
+ *    generates fmgr.h and fmgrtab.c the same way.
+ *
+ *    XXX do NOT break up DATA() statements into multiple lines!
+ *        the scripts are not as smart as you might think...
+ *    XXX (eg. #if 0 #endif won't do what you think)
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_PROC_H
+#define PG_PROC_H
+
+/* ----------------
+ * postgres.h contains the system type definintions and the
+ * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
+ * can be read by both genbki.sh and the C compiler.
+ * ----------------
+ */
+#include "postgres.h"
+#include "nodes/pg_list.h"
+#include "tcop/dest.h"
+
+/* ----------------
+ * pg_proc definition.  cpp turns this into
+ * typedef struct FormData_pg_proc
+ * ----------------
+ */
+CATALOG(pg_proc) BOOTSTRAP {
+    NameData   proname;
+    Oid    proowner;
+    Oid    prolang;
+    bool   proisinh;
+    bool   proistrusted;
+    bool   proiscachable;
+    int2   pronargs;
+    bool   proretset;
+    Oid    prorettype;
+    oid8        proargtypes;
+    int4        probyte_pct;
+    int4        properbyte_cpu;
+    int4        propercall_cpu;
+    int4        prooutin_ratio;
+    text   prosrc;     /* VARIABLE LENGTH FIELD */
+    bytea  probin;     /* VARIABLE LENGTH FIELD */
+} FormData_pg_proc;
+
+/* ----------------
+ * Form_pg_proc corresponds to a pointer to a tuple with
+ * the format of pg_proc relation.
+ * ----------------
+ */
+typedef FormData_pg_proc   *Form_pg_proc;
+
+/* ----------------
+ * compiler constants for pg_proc
+ * ----------------
+ */
+#define Natts_pg_proc          16
+#define Anum_pg_proc_proname       1
+#define Anum_pg_proc_proowner      2
+#define Anum_pg_proc_prolang       3
+#define Anum_pg_proc_proisinh      4
+#define Anum_pg_proc_proistrusted  5
+#define Anum_pg_proc_proiscachable 6
+#define Anum_pg_proc_pronargs      7
+#define Anum_pg_proc_proretset     8
+#define Anum_pg_proc_prorettype        9
+#define Anum_pg_proc_proargtypes        10
+#define Anum_pg_proc_probyte_pct        11
+#define Anum_pg_proc_properbyte_cpu     12
+#define Anum_pg_proc_propercall_cpu     13
+#define Anum_pg_proc_prooutin_ratio     14 
+#define Anum_pg_proc_prosrc        15
+#define Anum_pg_proc_probin        16
+
+/* ----------------
+ * initial contents of pg_proc
+ * ----------------
+ */
+
+/* keep the following ordered by OID so that later changes can be made easier*/
+
+/* OIDS 1 - 99 */
+DATA(insert OID =  28 (  boolin            PGUID 11 f t f 1 f 16 "0" 100 0 0  100  foo bar ));
+DATA(insert OID =  29 (  boolout           PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  30 (  byteain           PGUID 11 f t f 1 f 17 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  31 (  byteaout          PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  32 (  charin            PGUID 11 f t f 1 f 18 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  33 (  charout           PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  34 (  namein          PGUID 11 f t f 1 f 19 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  35 (  nameout         PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  36 (  char16in          PGUID 11 f t f 1 f 19 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  37 (  char16out         PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  38 (  int2in            PGUID 11 f t f 1 f 21 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  39 (  int2out           PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  40 (  int28in           PGUID 11 f t f 1 f 22 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  41 (  int28out          PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  42 (  int4in            PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  43 (  int4out           PGUID 11 f t f 1 f 19 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  44 (  regprocin         PGUID 11 f t f 1 f 24 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  45 (  regprocout        PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  46 (  textin            PGUID 11 f t f 1 f 25 "0" 100 0 0 100  foo bar ));
+#define TextInRegProcedure 46
+
+DATA(insert OID =  47 (  textout           PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  48 (  tidin             PGUID 11 f t f 1 f 27 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  49 (  tidout            PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  50 (  xidin             PGUID 11 f t f 1 f 28 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  51 (  xidout            PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  52 (  cidin             PGUID 11 f t f 1 f 29 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  53 (  cidout            PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  54 (  oid8in            PGUID 11 f t f 1 f 30 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  55 (  oid8out           PGUID 11 f t f 1 f 23 "0" 100 0 0 100  foo bar ));
+DATA(insert OID =  60 (  booleq            PGUID 11 f t f 2 f 16 "16 16" 100 0 0 100  foo bar ));
+DATA(insert OID =  61 (  chareq            PGUID 11 f t f 2 f 16 "18 18" 100 0 0 100  foo bar ));
+#define       CharacterEqualRegProcedure      61
+
+DATA(insert OID =  62 (  nameeq          PGUID 11 f t f 2 f 16 "19 19" 100 0 0 100  foo bar ));
+#define NameEqualRegProcedure      62
+    
+DATA(insert OID =  63 (  int2eq            PGUID 11 f t f 2 f 16 "21 21" 100 0 0 100  foo bar ));
+#define Integer16EqualRegProcedure 63
+    
+DATA(insert OID =  64 (  int2lt            PGUID 11 f t f 2 f 16 "21 21" 100 0 0 100  foo bar ));
+DATA(insert OID =  65 (  int4eq            PGUID 11 f t f 2 f 16 "23 23" 100 0 0 100  foo bar ));
+#define Integer32EqualRegProcedure 65
+    
+DATA(insert OID =  66 (  int4lt            PGUID 11 f t f 2 f 16 "23 23" 100 0 0 100  foo bar ));
+DATA(insert OID =  67 (  texteq            PGUID 11 f t f 2 f 16 "25 25" 100 0 0 0  foo bar ));
+#define TextEqualRegProcedure           67
+
+DATA(insert OID =  68 (  xideq             PGUID 11 f t f 2 f 16 "28 28" 100 0 0 100  foo bar ));
+DATA(insert OID =  69 (  cideq             PGUID 11 f t f 2 f 16 "29 29" 100 0 0 100  foo bar ));
+DATA(insert OID =  70 (  charne            PGUID 11 f t f 2 f 16 "18 18" 100 0 0 100  foo bar ));
+DATA(insert OID =  71 (  charlt            PGUID 11 f t f 2 f 16 "18 18" 100 0 0 100  foo bar ));
+DATA(insert OID =  72 (  charle            PGUID 11 f t f 2 f 16 "18 18" 100 0 0 100  foo bar ));
+DATA(insert OID =  73 (  chargt            PGUID 11 f t f 2 f 16 "18 18" 100 0 0 100  foo bar ));
+DATA(insert OID =  74 (  charge            PGUID 11 f t f 2 f 16 "18 18" 100 0 0 100  foo bar ));
+DATA(insert OID =  75 (  charpl            PGUID 11 f t f 2 f 18 "18 18" 100 0 0 100  foo bar ));
+DATA(insert OID =  76 (  charmi            PGUID 11 f t f 2 f 18 "18 18" 100 0 0 100  foo bar ));
+DATA(insert OID =  77 (  charmul           PGUID 11 f t f 2 f 18 "18 18" 100 0 0 100  foo bar ));
+DATA(insert OID =  78 (  chardiv           PGUID 11 f t f 2 f 18 "18 18" 100 0 0 100  foo bar ));
+
+DATA(insert OID =  79 (  nameregexeq     PGUID 11 f t f 2 f 16 "19 25" 100 0 0 100  foo bar ));
+DATA(insert OID =  80 (  nameregexne     PGUID 11 f t f 2 f 16 "19 25" 100 0 0 100  foo bar ));
+DATA(insert OID =  81 (  textregexeq       PGUID 11 f t f 2 f 16 "25 25" 100 0 1 0  foo bar ));
+DATA(insert OID =  82 (  textregexne       PGUID 11 f t f 2 f 16 "25 25" 100 0 1 0  foo bar ));
+DATA(insert OID =  83 (  textcat           PGUID 11 f t f 2 f 25 "25 25" 100 0 1 0  foo bar ));
+DATA(insert OID =  84 (  boolne            PGUID 11 f t f 2 f 16 "16 16" 100 0 0 100  foo bar ));
+
+DATA(insert OID =  97 (  rtsel             PGUID 11 f t f 7 f 701 "26 26 21 0 23 23 26" 100 0 0 100  foo bar ));
+DATA(insert OID =  98 (  rtnpage           PGUID 11 f t f 7 f 701 "26 26 21 0 23 23 26" 100 0 0 100  foo bar ));
+DATA(insert OID =  99 (  btreesel          PGUID 11 f t f 7 f 701 "26 26 21 0 23 23 26" 100 0 0 100  foo bar ));
+
+/* OIDS 100 - 199 */
+
+DATA(insert OID = 100 (  btreenpage        PGUID 11 f t f 7 f 701 "26 26 21 0 23 23 26" 100 0 0 100  foo bar ));
+DATA(insert OID = 101 (  eqsel             PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  foo bar ));
+#define EqualSelectivityProcedure 101
+
+DATA(insert OID = 102 (  neqsel            PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 103 (  intltsel          PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 104 (  intgtsel          PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 105 (  eqjoinsel         PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 106 (  neqjoinsel        PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 107 (  intltjoinsel      PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 108 (  intgtjoinsel      PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100  foo bar ));
+
+
+
+DATA(insert OID = 117 (  point_in          PGUID 11 f t f 1 f 600 "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 118 (  point_out         PGUID 11 f t f 1 f 23  "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 119 (  lseg_in           PGUID 11 f t f 1 f 601 "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 120 (  lseg_out          PGUID 11 f t f 1 f 23  "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 121 (  path_in           PGUID 11 f t f 1 f 602 "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 122 (  path_out          PGUID 11 f t f 1 f 23  "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 123 (  box_in            PGUID 11 f t f 1 f 603 "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 124 (  box_out           PGUID 11 f t f 1 f 23  "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 125 (  box_overlap       PGUID 11 f t f 2 f 16 "603 603" 100 1 0 100  foo bar ));
+DATA(insert OID = 126 (  box_ge            PGUID 11 f t f 2 f 16 "603 603" 100 1 0 100  foo bar ));
+DATA(insert OID = 127 (  box_gt            PGUID 11 f t f 2 f 16 "603 603" 100 1 0 100  foo bar ));
+DATA(insert OID = 128 (  box_eq            PGUID 11 f t f 2 f 16 "603 603" 100 1 0 100  foo bar ));
+DATA(insert OID = 129 (  box_lt            PGUID 11 f t f 2 f 16 "603 603" 100 1 0 100  foo bar ));
+DATA(insert OID = 130 (  box_le            PGUID 11 f t f 2 f 16 "603 603" 100 1 0 100  foo bar ));
+DATA(insert OID = 131 (  point_above       PGUID 11 f t f 2 f 16 "600 600" 100 0 0 100  foo bar ));
+DATA(insert OID = 132 (  point_left        PGUID 11 f t f 2 f 16 "600 600" 100 0 0 100  foo bar ));
+DATA(insert OID = 133 (  point_right       PGUID 11 f t f 2 f 16 "600 600" 100 0 0 100  foo bar ));
+DATA(insert OID = 134 (  point_below       PGUID 11 f t f 2 f 16 "600 600" 100 0 0 100  foo bar ));
+DATA(insert OID = 135 (  point_eq          PGUID 11 f t f 2 f 16 "600 600" 100 0 0 100  foo bar ));
+DATA(insert OID = 136 (  on_pb             PGUID 11 f t f 2 f 16 "600 603" 100 0 0 100  foo bar ));
+DATA(insert OID = 137 (  on_ppath          PGUID 11 f t f 2 f 16 "600 602" 100 0 1 0  foo bar ));
+DATA(insert OID = 138 (  box_center        PGUID 11 f t f 1 f 600 "603" 100 1 0 100  foo bar ));
+DATA(insert OID = 139 (  areasel           PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 140 (  areajoinsel       PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 141 (  int4mul           PGUID 11 f t f 2 f 23 "23 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 142 (  int4fac           PGUID 11 f t f 1 f 23 "23" 100 0 0 100  foo bar ));
+DATA(insert OID = 143 (  pointdist         PGUID 11 f t f 2 f 23 "600 600" 100 0 0 100  foo bar ));
+DATA(insert OID = 144 (  int4ne            PGUID 11 f t f 2 f 16 "23 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 145 (  int2ne            PGUID 11 f t f 2 f 16 "21 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 146 (  int2gt            PGUID 11 f t f 2 f 16 "21 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 147 (  int4gt            PGUID 11 f t f 2 f 16 "23 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 148 (  int2le            PGUID 11 f t f 2 f 16 "21 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 149 (  int4le            PGUID 11 f t f 2 f 16 "23 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 150 (  int4ge            PGUID 11 f t f 2 f 16 "23 23" 100 0 0 100  foo bar ));
+#define INT4GE_PROC_OID 150
+DATA(insert OID = 151 (  int2ge            PGUID 11 f t f 2 f 16 "21 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 152 (  int2mul           PGUID 11 f t f 2 f 21 "21 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 153 (  int2div           PGUID 11 f t f 2 f 21 "21 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 154 (  int4div           PGUID 11 f t f 2 f 23 "23 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 155 (  int2mod           PGUID 11 f t f 2 f 21 "21 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 156 (  int4mod           PGUID 11 f t f 2 f 23 "23 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 157 (  textne            PGUID 11 f t f 2 f 16 "25 25" 100 0 0 0  foo bar ));
+DATA(insert OID = 158 (  int24eq           PGUID 11 f t f 2 f 23 "21 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 159 (  int42eq           PGUID 11 f t f 2 f 23 "23 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 160 (  int24lt           PGUID 11 f t f 2 f 23 "21 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 161 (  int42lt           PGUID 11 f t f 2 f 23 "23 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 162 (  int24gt           PGUID 11 f t f 2 f 23 "21 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 163 (  int42gt           PGUID 11 f t f 2 f 23 "23 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 164 (  int24ne           PGUID 11 f t f 2 f 23 "21 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 165 (  int42ne           PGUID 11 f t f 2 f 23 "23 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 166 (  int24le           PGUID 11 f t f 2 f 23 "21 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 167 (  int42le           PGUID 11 f t f 2 f 23 "23 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 168 (  int24ge           PGUID 11 f t f 2 f 23 "21 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 169 (  int42ge           PGUID 11 f t f 2 f 23 "23 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 170 (  int24mul          PGUID 11 f t f 2 f 23 "21 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 171 (  int42mul          PGUID 11 f t f 2 f 23 "23 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 172 (  int24div          PGUID 11 f t f 2 f 23 "21 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 173 (  int42div          PGUID 11 f t f 2 f 23 "23 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 174 (  int24mod          PGUID 11 f t f 2 f 23 "21 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 175 (  int42mod          PGUID 11 f t f 2 f 23 "23 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 176 (  int2pl            PGUID 11 f t f 2 f 21 "21 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 177 (  int4pl            PGUID 11 f t f 2 f 23 "23 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 178 (  int24pl           PGUID 11 f t f 2 f 23 "21 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 179 (  int42pl           PGUID 11 f t f 2 f 23 "23 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 180 (  int2mi            PGUID 11 f t f 2 f 21 "21 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 181 (  int4mi            PGUID 11 f t f 2 f 23 "23 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 182 (  int24mi           PGUID 11 f t f 2 f 23 "21 23" 100 0 0 100  foo bar ));
+DATA(insert OID = 183 (  int42mi           PGUID 11 f t f 2 f 23 "23 21" 100 0 0 100  foo bar ));
+DATA(insert OID = 184 (  oideq             PGUID 11 f t f 2 f 16 "26 26" 100 0 0 100  foo bar ));
+#define ObjectIdEqualRegProcedure  184
+    
+DATA(insert OID = 185 (  oidne             PGUID 11 f t f 2 f 16 "26 26" 100 0 0 100  foo bar ));
+DATA(insert OID = 186 (  box_same          PGUID 11 f t f 2 f 16 "603 603" 100 0 0 100  foo bar ));
+DATA(insert OID = 187 (  box_contain       PGUID 11 f t f 2 f 16 "603 603" 100 0 0 100  foo bar ));
+DATA(insert OID = 188 (  box_left          PGUID 11 f t f 2 f 16 "603 603" 100 0 0 100  foo bar ));
+DATA(insert OID = 189 (  box_overleft      PGUID 11 f t f 2 f 16 "603 603" 100 0 0 100  foo bar ));
+DATA(insert OID = 190 (  box_overright     PGUID 11 f t f 2 f 16 "603 603" 100 0 0 100  foo bar ));
+DATA(insert OID = 191 (  box_right         PGUID 11 f t f 2 f 16 "603 603" 100 0 0 100  foo bar ));
+DATA(insert OID = 192 (  box_contained     PGUID 11 f t f 2 f 16 "603 603" 100 0 0 100  foo bar ));
+DATA(insert OID = 193 (  rt_box_union      PGUID 11 f t f 2 f 603 "603 603" 100 0 0 100  foo bar ));
+DATA(insert OID = 194 (  rt_box_inter      PGUID 11 f t f 2 f 603 "603 603" 100 0 0 100  foo bar ));
+DATA(insert OID = 195 (  rt_box_size       PGUID 11 f t f 2 f 700 "603 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 196 (  rt_bigbox_size    PGUID 11 f t f 2 f 700 "603 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 197 (  rt_poly_union     PGUID 11 f t f 2 f 604 "604 604" 100 0 0 100  foo bar ));
+DATA(insert OID = 198 (  rt_poly_inter     PGUID 11 f t f 2 f 604 "604 604" 100 0 0 100  foo bar ));
+DATA(insert OID = 199 (  rt_poly_size      PGUID 11 f t f 2 f 23 "604 23" 100 0 0 100  foo bar ));
+
+/* OIDS 200 - 299 */
+
+DATA(insert OID = 200 (  float4in          PGUID 11 f t f 1 f 700 "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 201 (  float4out         PGUID 11 f t f 1 f 23  "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 202 (  float4mul         PGUID 11 f t f 2 f 700 "700 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 203 (  float4div         PGUID 11 f t f 2 f 700 "700 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 204 (  float4pl          PGUID 11 f t f 2 f 700 "700 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 205 (  float4mi          PGUID 11 f t f 2 f 700 "700 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 206 (  float4um          PGUID 11 f t f 1 f 700 "700" 100 0 0 100  foo bar ));
+DATA(insert OID = 207 (  float4abs         PGUID 11 f t f 1 f 700 "700 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 208 (  float4inc         PGUID 11 f t f 1 f 700 "700" 100 0 0 100  foo bar ));
+DATA(insert OID = 209 (  float4larger      PGUID 11 f t f 2 f 700 "700 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 211 (  float4smaller     PGUID 11 f t f 2 f 700 "700 700" 100 0 0 100  foo bar ));
+
+DATA(insert OID = 212 (  int4um            PGUID 11 f t f 1 f 23 "23" 100 0 0 100  foo bar ));
+DATA(insert OID = 213 (  int2um            PGUID 11 f t f 1 f 21 "21" 100 0 0 100  foo bar ));
+    
+DATA(insert OID = 214 (  float8in          PGUID 11 f t f 1 f 701 "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 215 (  float8out         PGUID 11 f t f 1 f 23  "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 216 (  float8mul         PGUID 11 f t f 2 f 701 "701 701" 100 0 0 100  foo bar ));
+DATA(insert OID = 217 (  float8div         PGUID 11 f t f 2 f 701 "701 701" 100 0 0 100  foo bar ));
+DATA(insert OID = 218 (  float8pl          PGUID 11 f t f 2 f 701 "701 701" 100 0 0 100  foo bar ));
+DATA(insert OID = 219 (  float8mi          PGUID 11 f t f 2 f 701 "701 701" 100 0 0 100  foo bar ));
+DATA(insert OID = 220 (  float8um          PGUID 11 f t f 1 f 701 "701" 100 0 0 100  foo bar ));
+DATA(insert OID = 221 (  float8abs         PGUID 11 f t f 1 f 701 "701" 100 0 0 100  foo bar ));
+DATA(insert OID = 222 (  float8inc         PGUID 11 f t f 1 f 701 "701" 100 0 0 100  foo bar ));
+DATA(insert OID = 223 (  float8larger      PGUID 11 f t f 2 f 701 "701 701" 100 0 0 100  foo bar ));
+DATA(insert OID = 224 (  float8smaller     PGUID 11 f t f 2 f 701 "701 701" 100 0 0 100  foo bar ));
+DATA(insert OID = 228 (  dround            PGUID 11 f t f 1 f 701 "701" 100 0 0 100  foo bar ));
+DATA(insert OID = 229 (  dtrunc            PGUID 11 f t f 1 f 701 "701" 100 0 0 100  foo bar ));
+DATA(insert OID = 230 (  dsqrt             PGUID 11 f t f 1 f 701 "701" 100 0 0 100  foo bar ));
+DATA(insert OID = 231 (  dcbrt             PGUID 11 f t f 1 f 701 "701" 100 0 0 100  foo bar ));
+DATA(insert OID = 232 (  dpow              PGUID 11 f t f 2 f 701 "701" 100 0 0 100  foo bar ));
+DATA(insert OID = 233 (  dexp              PGUID 11 f t f 1 f 701 "701" 100 0 0 100  foo bar ));
+DATA(insert OID = 234 (  dlog1             PGUID 11 f t f 1 f 701 "701" 100 0 0 100  foo bar ));
+    
+DATA(insert OID = 240 (  nabstimein        PGUID 11 f t f 1 f 702 "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 241 (  nabstimeout       PGUID 11 f t f 1 f 23  "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 242 (  reltimein         PGUID 11 f t f 1 f 703 "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 243 (  reltimeout        PGUID 11 f t f 1 f 23  "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 244 (  timepl            PGUID 11 f t f 2 f 702 "702 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 245 (  timemi            PGUID 11 f t f 2 f 702 "702 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 246 (  tintervalin       PGUID 11 f t f 1 f 704 "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 247 (  tintervalout      PGUID 11 f t f 1 f 23  "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 248 (  ininterval        PGUID 11 f t f 2 f 16 "702 704" 100 0 0 100  foo bar ));
+DATA(insert OID = 249 (  intervalrel       PGUID 11 f t f 1 f 703 "704" 100 0 0 100  foo bar ));
+DATA(insert OID = 250 (  timenow           PGUID 11 f t f 0 f 702 "0" 100 0 0 100  foo bar ));
+DATA(insert OID = 251 (  abstimeeq         PGUID 11 f t f 2 f 16 "702 702" 100 0 0 100  foo bar ));
+DATA(insert OID = 252 (  abstimene         PGUID 11 f t f 2 f 16 "702 702" 100 0 0 100  foo bar ));
+DATA(insert OID = 253 (  abstimelt         PGUID 11 f t f 2 f 16 "702 702" 100 0 0 100  foo bar ));
+DATA(insert OID = 254 (  abstimegt         PGUID 11 f t f 2 f 16 "702 702" 100 0 0 100  foo bar ));
+DATA(insert OID = 255 (  abstimele         PGUID 11 f t f 2 f 16 "702 702" 100 0 0 100  foo bar ));
+DATA(insert OID = 256 (  abstimege         PGUID 11 f t f 2 f 16 "702 702" 100 0 0 100  foo bar ));
+DATA(insert OID = 257 (  reltimeeq         PGUID 11 f t f 2 f 16 "703 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 258 (  reltimene         PGUID 11 f t f 2 f 16 "703 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 259 (  reltimelt         PGUID 11 f t f 2 f 16 "703 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 260 (  reltimegt         PGUID 11 f t f 2 f 16 "703 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 261 (  reltimele         PGUID 11 f t f 2 f 16 "703 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 262 (  reltimege         PGUID 11 f t f 2 f 16 "703 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 263 (  intervaleq        PGUID 11 f t f 2 f 16 "704 704" 100 0 0 100  foo bar ));
+DATA(insert OID = 264 (  intervalct        PGUID 11 f t f 2 f 16 "704 704" 100 0 0 100  foo bar ));
+DATA(insert OID = 265 (  intervalov        PGUID 11 f t f 2 f 16 "704 704" 100 0 0 100  foo bar ));
+DATA(insert OID = 266 (  intervalleneq     PGUID 11 f t f 2 f 16 "704 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 267 (  intervallenne     PGUID 11 f t f 2 f 16 "704 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 268 (  intervallenlt     PGUID 11 f t f 2 f 16 "704 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 269 (  intervallengt     PGUID 11 f t f 2 f 16 "704 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 270 (  intervallenle     PGUID 11 f t f 2 f 16 "704 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 271 (  intervallenge     PGUID 11 f t f 2 f 16 "704 703" 100 0 0 100  foo bar ));
+DATA(insert OID = 272 (  intervalstart     PGUID 11 f t f 1 f 702 "704" 100 0 0 100  foo bar ));
+DATA(insert OID = 273 (  intervalend       PGUID 11 f t f 1 f 702 "704" 100 0 0 100  foo bar ));
+DATA(insert OID = 274 (  timeofday         PGUID 11 f t f 0 f 25 "0" 100 0 0 100  foo bar ));
+
+DATA(insert OID = 276 (  int2fac           PGUID 11 f t f 1 f 21 "21" 100 0 0 100  foo bar ));
+DATA(insert OID = 279 (  float48mul        PGUID 11 f t f 2 f 701 "700 701" 100 0 0 100  foo bar ));
+DATA(insert OID = 280 (  float48div        PGUID 11 f t f 2 f 701 "700 701" 100 0 0 100  foo bar ));
+DATA(insert OID = 281 (  float48pl         PGUID 11 f t f 2 f 701 "700 701" 100 0 0 100  foo bar ));
+DATA(insert OID = 282 (  float48mi         PGUID 11 f t f 2 f 701 "700 701" 100 0 0 100  foo bar ));
+DATA(insert OID = 283 (  float84mul        PGUID 11 f t f 2 f 701 "701 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 284 (  float84div        PGUID 11 f t f 2 f 701 "701 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 285 (  float84pl         PGUID 11 f t f 2 f 701 "701 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 286 (  float84mi         PGUID 11 f t f 2 f 701 "701 700" 100 0 0 100  foo bar ));
+
+DATA(insert OID = 287 (  float4eq          PGUID 11 f t f 2 f 16 "700 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 288 (  float4ne          PGUID 11 f t f 2 f 16 "700 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 289 (  float4lt          PGUID 11 f t f 2 f 16 "700 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 290 (  float4le          PGUID 11 f t f 2 f 16 "700 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 291 (  float4gt          PGUID 11 f t f 2 f 16 "700 700" 100 0 0 100  foo bar ));
+DATA(insert OID = 292 (  float4ge          PGUID 11 f t f 2 f 16 "700 700" 100 0 0 100  foo bar ));
+
+DATA(insert OID = 293 (  float8eq          PGUID 11 f t f 2 f 16 "701 701" 100 0 0 100  foo bar ));
+DATA(insert OID = 294 (  float8ne          PGUID 11 f t f 2 f 16 "701 701" 100 0 0 100  foo bar ));
+DATA(insert OID = 295 (  float8lt          PGUID 11 f t f 2 f 16 "701 701"
author	Marc G. Fournier <scrappy@hub.org>
	Tue, 9 Jul 1996 06:22:35 +0000 (06:22 +0000)
committer	Marc G. Fournier <scrappy@hub.org>
	Tue, 9 Jul 1996 06:22:35 +0000 (06:22 +0000)