Add bit_count SQL function
authorPeter Eisentraut <peter@eisentraut.org>
Tue, 23 Mar 2021 07:45:51 +0000 (08:45 +0100)
committerPeter Eisentraut <peter@eisentraut.org>
Tue, 23 Mar 2021 09:13:58 +0000 (10:13 +0100)
This function for bit and bytea counts the set bits in the bit or byte
string.  Internally, we use the existing popcount functionality.

For the name, after some discussion, we settled on bit_count, which
also exists with this meaning in MySQL, Java, and Python.

Author: David Fetter <david@fetter.org>
Discussion: https://www.postgresql.org/message-id/flat/20201230105535.GJ13234@fetter.org

doc/src/sgml/func.sgml
src/backend/utils/adt/varbit.c
src/backend/utils/adt/varlena.c
src/include/catalog/catversion.h
src/include/catalog/pg_proc.dat
src/test/regress/expected/bit.out
src/test/regress/expected/strings.out
src/test/regress/sql/bit.sql
src/test/regress/sql/strings.sql

index 68fe6a95b494acf95acb66c321c13695f2b96c3a..1d3429fbd9c27439535f2521cf1ed9a697b05ccd 100644 (file)
@@ -4010,6 +4010,28 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
      </thead>
 
      <tbody>
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>bit_count</primary>
+        </indexterm>
+        <indexterm>
+         <primary>popcount</primary>
+         <see>bit_count</see>
+        </indexterm>
+        <function>bit_count</function> ( <parameter>bytes</parameter> <type>bytea</type> )
+        <returnvalue>bigint</returnvalue>
+       </para>
+       <para>
+        Returns the number of bits set in the binary string (also known as
+        <quote>popcount</quote>).
+       </para>
+       <para>
+        <literal>bit_count('\x1234567890'::bytea)</literal>
+        <returnvalue>31</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
@@ -4714,6 +4736,24 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
      </thead>
 
      <tbody>
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>bit_count</primary>
+        </indexterm>
+        <function>bit_count</function> ( <type>bit</type> )
+        <returnvalue>bigint</returnvalue>
+       </para>
+       <para>
+        Returns the number of bits set in the bit string (also known as
+        <quote>popcount</quote>).
+       </para>
+       <para>
+        <literal>bit_count(B'10111')</literal>
+        <returnvalue>4</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
index 2235866244da34e47f28d4c662411f3be9da27ca..0d0c0fd9f3c8bbddd62f044b859200c1e05f4ae9 100644 (file)
@@ -36,6 +36,7 @@
 #include "libpq/pqformat.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/supportnodes.h"
+#include "port/pg_bitutils.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/varbit.h"
@@ -1201,6 +1202,19 @@ bit_overlay(VarBit *t1, VarBit *t2, int sp, int sl)
    return result;
 }
 
+/*
+ * bit_count
+ *
+ * Returns the number of bits set in a bit string.
+ */
+Datum
+bit_bit_count(PG_FUNCTION_ARGS)
+{
+   VarBit     *arg = PG_GETARG_VARBIT_P(0);
+
+   PG_RETURN_INT64(pg_popcount((char *) VARBITS(arg), VARBITBYTES(arg)));
+}
+
 /*
  * bitlength, bitoctetlength
  * Return the length of a bit string
index 0bc345aa4d397e93666e95734a208479cdfa1927..640e3fd4c04b956cd18d56e5b3acdfdc37c579ab 100644 (file)
@@ -3440,6 +3440,17 @@ bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
    return result;
 }
 
+/*
+ * bit_count
+ */
+Datum
+bytea_bit_count(PG_FUNCTION_ARGS)
+{
+   bytea      *t1 = PG_GETARG_BYTEA_PP(0);
+
+   PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
+}
+
 /*
  * byteapos -
  *   Return the position of the specified substring.
index 3cf93fd381b5f3d5d0ec91926f5b84b05762260c..2f18734235ab247e4f6ce5e36f7e26b201d75d19 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 202103231
+#define CATALOG_VERSION_NO 202103232
 
 #endif
index b9f4afba050308afe58948fb8db048239ebcef1c..464fa8d614b372468dca5a1a1a16dab3c13b8f42 100644 (file)
 { oid => '752', descr => 'substitute portion of string',
   proname => 'overlay', prorettype => 'bytea',
   proargtypes => 'bytea bytea int4', prosrc => 'byteaoverlay_no_len' },
+{ oid => '8436', descr => 'number of set bits',
+  proname => 'bit_count', prorettype => 'int8', proargtypes => 'bytea',
+  prosrc => 'bytea_bit_count'},
 
 { oid => '725',
   proname => 'dist_pl', prorettype => 'float8', proargtypes => 'point line',
 { oid => '3033', descr => 'set bit',
   proname => 'set_bit', prorettype => 'bit', proargtypes => 'bit int4 int4',
   prosrc => 'bitsetbit' },
+{ oid => '8435', descr => 'number of set bits',
+  proname => 'bit_count', prorettype => 'int8', proargtypes => 'bit',
+  prosrc => 'bit_bit_count'},
 
 # for macaddr type support
 { oid => '436', descr => 'I/O',
index a7f95b846d96cc613e3947cdecb4765b5c44f256..a5aab9c0e355c250b7e6ab57d85b28bb0bf6b7f9 100644 (file)
@@ -710,6 +710,19 @@ SELECT overlay(B'0101011100' placing '001' from 20);
  0101011100001
 (1 row)
 
+-- bit_count
+SELECT bit_count(B'0101011100'::bit(10));
+ bit_count 
+-----------
+         5
+(1 row)
+
+SELECT bit_count(B'1111111111'::bit(10));
+ bit_count 
+-----------
+        10
+(1 row)
+
 -- This table is intentionally left around to exercise pg_dump/pg_upgrade
 CREATE TABLE bit_defaults(
   b1 bit(4) DEFAULT '1001',
index fb4573d85ff7f042b7264b14d8c48f68c79e6629..f751f0ca159d868474ae109e8acae0166da17266 100644 (file)
@@ -2227,3 +2227,9 @@ SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5
  Th\000o\x02\x03
 (1 row)
 
+SELECT bit_count('\x1234567890'::bytea);
+ bit_count 
+-----------
+        31
+(1 row)
+
index ea01742c4aa1fb4d45a945f1d701b439c401b349..0a424e796b929c8ff7bd368aaef15e309fb43cd3 100644 (file)
@@ -215,6 +215,10 @@ SELECT overlay(B'0101011100' placing '101' from 6);
 SELECT overlay(B'0101011100' placing '001' from 11);
 SELECT overlay(B'0101011100' placing '001' from 20);
 
+-- bit_count
+SELECT bit_count(B'0101011100'::bit(10));
+SELECT bit_count(B'1111111111'::bit(10));
+
 -- This table is intentionally left around to exercise pg_dump/pg_upgrade
 CREATE TABLE bit_defaults(
   b1 bit(4) DEFAULT '1001',
index 57a48c9d0b08c0de22de6a6c6e127b4bcb695ce9..c043f0254171eac0e48e9014a5218a3d4437bb23 100644 (file)
@@ -742,3 +742,5 @@ SELECT btrim(E'\\000trim\\000'::bytea, ''::bytea);
 SELECT encode(overlay(E'Th\\000omas'::bytea placing E'Th\\001omas'::bytea from 2),'escape');
 SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 8),'escape');
 SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5 for 3),'escape');
+
+SELECT bit_count('\x1234567890'::bytea);