From 27583e121f2d94015f0a8bf46113816059f68f8a Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 16 Jun 2022 08:32:10 +0530 Subject: [PATCH 01/47] Fix data inconsistency between publisher and subscriber. We were not updating the partition map cache in the subscriber even when the corresponding remote rel is changed. Due to this data was getting incorrectly replicated for partition tables after the publisher has changed the table schema. Fix it by resetting the required entries in the partition map cache after receiving a new relation mapping from the publisher. Reported-by: Shi Yu Author: Shi Yu, Hou Zhijie Reviewed-by: Amit Langote, Amit Kapila Backpatch-through: 13, where it was introduced Discussion: https://postgr.es/m/OSZPR01MB6310F46CD425A967E4AEF736FDA49@OSZPR01MB6310.jpnprd01.prod.outlook.com --- src/backend/replication/logical/relation.c | 34 ++++++++++++++++++++++ src/backend/replication/logical/worker.c | 3 ++ src/include/replication/logicalrelation.h | 1 + src/test/subscription/t/013_partition.pl | 17 ++++++++++- 4 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/backend/replication/logical/relation.c b/src/backend/replication/logical/relation.c index fad8c92b2ef..a9fa26fe686 100644 --- a/src/backend/replication/logical/relation.c +++ b/src/backend/replication/logical/relation.c @@ -493,6 +493,40 @@ logicalrep_partmap_invalidate_cb(Datum arg, Oid reloid) } } +/* + * Reset the entries in the partition map that refer to remoterel. + * + * Called when new relation mapping is sent by the publisher to update our + * expected view of incoming data from said publisher. + * + * Note that we don't update the remoterel information in the entry here, + * we will update the information in logicalrep_partition_open to avoid + * unnecessary work. + */ +void +logicalrep_partmap_reset_relmap(LogicalRepRelation *remoterel) +{ + HASH_SEQ_STATUS status; + LogicalRepPartMapEntry *part_entry; + LogicalRepRelMapEntry *entry; + + if (LogicalRepPartMap == NULL) + return; + + hash_seq_init(&status, LogicalRepPartMap); + while ((part_entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL) + { + entry = &part_entry->relmapentry; + + if (entry->remoterel.remoteid != remoterel->remoteid) + continue; + + logicalrep_relmap_free_entry(entry); + + memset(entry, 0, sizeof(LogicalRepRelMapEntry)); + } +} + /* * Initialize the partition map cache. */ diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index adcbc36ecef..5335e5f5c62 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -1191,6 +1191,9 @@ apply_handle_relation(StringInfo s) rel = logicalrep_read_rel(s); logicalrep_relmap_update(rel); + + /* Also reset all entries in the partition map that refer to remoterel. */ + logicalrep_partmap_reset_relmap(rel); } /* diff --git a/src/include/replication/logicalrelation.h b/src/include/replication/logicalrelation.h index 3c662d3abcf..10f91490b5c 100644 --- a/src/include/replication/logicalrelation.h +++ b/src/include/replication/logicalrelation.h @@ -38,6 +38,7 @@ typedef struct LogicalRepRelMapEntry } LogicalRepRelMapEntry; extern void logicalrep_relmap_update(LogicalRepRelation *remoterel); +extern void logicalrep_partmap_reset_relmap(LogicalRepRelation *remoterel); extern LogicalRepRelMapEntry *logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode); diff --git a/src/test/subscription/t/013_partition.pl b/src/test/subscription/t/013_partition.pl index e53bc5b568f..568e4d104e0 100644 --- a/src/test/subscription/t/013_partition.pl +++ b/src/test/subscription/t/013_partition.pl @@ -6,7 +6,7 @@ use warnings; use PostgresNode; use TestLib; -use Test::More tests => 69; +use Test::More tests => 70; # setup @@ -841,3 +841,18 @@ BEGIN $result = $node_subscriber2->safe_psql('postgres', "SELECT a, b, c FROM tab5 ORDER BY 1"); is($result, qq(3|1|), 'updates of tab5 replicated correctly after altering table on subscriber'); + +# Test that replication into the partitioned target table continues to +# work correctly when the published table is altered. +$node_publisher->safe_psql( + 'postgres', q{ + ALTER TABLE tab5 DROP COLUMN b, ADD COLUMN c INT; + ALTER TABLE tab5 ADD COLUMN b INT;}); + +$node_publisher->safe_psql('postgres', "UPDATE tab5 SET c = 1 WHERE a = 3"); + +$node_publisher->wait_for_catchup('sub2'); + +$result = $node_subscriber2->safe_psql('postgres', + "SELECT a, b, c FROM tab5 ORDER BY 1"); +is($result, qq(3||1), 'updates of tab5 replicated correctly after altering table on publisher'); From 2c169f09426c8179fe09d2b5b534a54617e20027 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Tue, 21 Jun 2022 07:52:41 +0530 Subject: [PATCH 02/47] Fix partition table's REPLICA IDENTITY checking on the subscriber. In logical replication, we will check if the target table on the subscriber is updatable by comparing the replica identity of the table on the publisher with the table on the subscriber. When the target table is a partitioned table, we only check its replica identity but not for the partition tables. This leads to assertion failure while applying changes for update/delete as we expect those to succeed only when the corresponding partition table has a primary key or has a replica identity defined. Fix it by checking the replica identity of the partition table while applying changes. Reported-by: Shi Yu Author: Shi Yu, Hou Zhijie Reviewed-by: Amit Langote, Amit Kapila Backpatch-through: 13, where it was introduced Discussion: https://postgr.es/m/OSZPR01MB6310F46CD425A967E4AEF736FDA49@OSZPR01MB6310.jpnprd01.prod.outlook.com --- src/backend/replication/logical/relation.c | 115 ++++++++++++--------- src/backend/replication/logical/worker.c | 27 +++-- src/test/subscription/t/013_partition.pl | 16 ++- 3 files changed, 102 insertions(+), 56 deletions(-) diff --git a/src/backend/replication/logical/relation.c b/src/backend/replication/logical/relation.c index a9fa26fe686..f1cff93b920 100644 --- a/src/backend/replication/logical/relation.c +++ b/src/backend/replication/logical/relation.c @@ -249,6 +249,67 @@ logicalrep_report_missing_attrs(LogicalRepRelation *remoterel, } } +/* + * Check if replica identity matches and mark the updatable flag. + * + * We allow for stricter replica identity (fewer columns) on subscriber as + * that will not stop us from finding unique tuple. IE, if publisher has + * identity (id,timestamp) and subscriber just (id) this will not be a + * problem, but in the opposite scenario it will. + * + * We just mark the relation entry as not updatable here if the local + * replica identity is found to be insufficient for applying + * updates/deletes (inserts don't care!) and leave it to + * check_relation_updatable() to throw the actual error if needed. + */ +static void +logicalrep_rel_mark_updatable(LogicalRepRelMapEntry *entry) +{ + Bitmapset *idkey; + LogicalRepRelation *remoterel = &entry->remoterel; + int i; + + entry->updatable = true; + + idkey = RelationGetIndexAttrBitmap(entry->localrel, + INDEX_ATTR_BITMAP_IDENTITY_KEY); + /* fallback to PK if no replica identity */ + if (idkey == NULL) + { + idkey = RelationGetIndexAttrBitmap(entry->localrel, + INDEX_ATTR_BITMAP_PRIMARY_KEY); + + /* + * If no replica identity index and no PK, the published table must + * have replica identity FULL. + */ + if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL) + entry->updatable = false; + } + + i = -1; + while ((i = bms_next_member(idkey, i)) >= 0) + { + int attnum = i + FirstLowInvalidHeapAttributeNumber; + + if (!AttrNumberIsForUserDefinedAttr(attnum)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("logical replication target relation \"%s.%s\" uses " + "system columns in REPLICA IDENTITY index", + remoterel->nspname, remoterel->relname))); + + attnum = AttrNumberGetAttrOffset(attnum); + + if (entry->attrmap->attnums[attnum] < 0 || + !bms_is_member(entry->attrmap->attnums[attnum], remoterel->attkeys)) + { + entry->updatable = false; + break; + } + } +} + /* * Open the local relation associated with the remote one. * @@ -307,7 +368,6 @@ logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode) if (!entry->localrelvalid) { Oid relid; - Bitmapset *idkey; TupleDesc desc; MemoryContext oldctx; int i; @@ -373,54 +433,10 @@ logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode) bms_free(missingatts); /* - * Check that replica identity matches. We allow for stricter replica - * identity (fewer columns) on subscriber as that will not stop us - * from finding unique tuple. IE, if publisher has identity - * (id,timestamp) and subscriber just (id) this will not be a problem, - * but in the opposite scenario it will. - * - * Don't throw any error here just mark the relation entry as not - * updatable, as replica identity is only for updates and deletes but - * inserts can be replicated even without it. + * Set if the table's replica identity is enough to apply + * update/delete. */ - entry->updatable = true; - idkey = RelationGetIndexAttrBitmap(entry->localrel, - INDEX_ATTR_BITMAP_IDENTITY_KEY); - /* fallback to PK if no replica identity */ - if (idkey == NULL) - { - idkey = RelationGetIndexAttrBitmap(entry->localrel, - INDEX_ATTR_BITMAP_PRIMARY_KEY); - - /* - * If no replica identity index and no PK, the published table - * must have replica identity FULL. - */ - if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL) - entry->updatable = false; - } - - i = -1; - while ((i = bms_next_member(idkey, i)) >= 0) - { - int attnum = i + FirstLowInvalidHeapAttributeNumber; - - if (!AttrNumberIsForUserDefinedAttr(attnum)) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("logical replication target relation \"%s.%s\" uses " - "system columns in REPLICA IDENTITY index", - remoterel->nspname, remoterel->relname))); - - attnum = AttrNumberGetAttrOffset(attnum); - - if (entry->attrmap->attnums[attnum] < 0 || - !bms_is_member(entry->attrmap->attnums[attnum], remoterel->attkeys)) - { - entry->updatable = false; - break; - } - } + logicalrep_rel_mark_updatable(entry); entry->localrelvalid = true; } @@ -658,7 +674,8 @@ logicalrep_partition_open(LogicalRepRelMapEntry *root, attrmap->maplen * sizeof(AttrNumber)); } - entry->updatable = root->updatable; + /* Set if the table's replica identity is enough to apply update/delete. */ + logicalrep_rel_mark_updatable(entry); entry->localrelvalid = true; diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 5335e5f5c62..7190dd94ebf 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -1323,6 +1323,13 @@ apply_handle_insert_internal(ApplyExecutionData *edata, static void check_relation_updatable(LogicalRepRelMapEntry *rel) { + /* + * For partitioned tables, we only need to care if the target partition is + * updatable (aka has PK or RI defined for it). + */ + if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + return; + /* Updatable, no error. */ if (rel->updatable) return; @@ -1676,6 +1683,8 @@ apply_handle_tuple_routing(ApplyExecutionData *edata, TupleTableSlot *remoteslot_part; TupleConversionMap *map; MemoryContext oldctx; + LogicalRepRelMapEntry *part_entry = NULL; + AttrMap *attrmap = NULL; /* ModifyTableState is needed for ExecFindPartition(). */ edata->mtstate = mtstate = makeNode(ModifyTableState); @@ -1707,8 +1716,11 @@ apply_handle_tuple_routing(ApplyExecutionData *edata, remoteslot_part = table_slot_create(partrel, &estate->es_tupleTable); map = partrelinfo->ri_RootToPartitionMap; if (map != NULL) - remoteslot_part = execute_attr_map_slot(map->attrMap, remoteslot, + { + attrmap = map->attrMap; + remoteslot_part = execute_attr_map_slot(attrmap, remoteslot, remoteslot_part); + } else { remoteslot_part = ExecCopySlot(remoteslot_part, remoteslot); @@ -1716,6 +1728,14 @@ apply_handle_tuple_routing(ApplyExecutionData *edata, } MemoryContextSwitchTo(oldctx); + /* Check if we can do the update or delete on the leaf partition. */ + if (operation == CMD_UPDATE || operation == CMD_DELETE) + { + part_entry = logicalrep_partition_open(relmapentry, partrel, + attrmap); + check_relation_updatable(part_entry); + } + switch (operation) { case CMD_INSERT: @@ -1737,15 +1757,10 @@ apply_handle_tuple_routing(ApplyExecutionData *edata, * suitable partition. */ { - AttrMap *attrmap = map ? map->attrMap : NULL; - LogicalRepRelMapEntry *part_entry; TupleTableSlot *localslot; ResultRelInfo *partrelinfo_new; bool found; - part_entry = logicalrep_partition_open(relmapentry, partrel, - attrmap); - /* Get the matching local tuple from the partition. */ found = FindReplTupleInLocalRel(estate, partrel, &part_entry->remoterel, diff --git a/src/test/subscription/t/013_partition.pl b/src/test/subscription/t/013_partition.pl index 568e4d104e0..dfe2cb6deae 100644 --- a/src/test/subscription/t/013_partition.pl +++ b/src/test/subscription/t/013_partition.pl @@ -6,7 +6,7 @@ use warnings; use PostgresNode; use TestLib; -use Test::More tests => 70; +use Test::More tests => 71; # setup @@ -856,3 +856,17 @@ BEGIN $result = $node_subscriber2->safe_psql('postgres', "SELECT a, b, c FROM tab5 ORDER BY 1"); is($result, qq(3||1), 'updates of tab5 replicated correctly after altering table on publisher'); + +# Test that replication works correctly as long as the leaf partition +# has the necessary REPLICA IDENTITY, even though the actual target +# partitioned table does not. +$node_subscriber2->safe_psql('postgres', + "ALTER TABLE tab5 REPLICA IDENTITY NOTHING"); + +$node_publisher->safe_psql('postgres', "UPDATE tab5 SET a = 4 WHERE a = 3"); + +$node_publisher->wait_for_catchup('sub2'); + +$result = $node_subscriber2->safe_psql('postgres', + "SELECT a, b, c FROM tab5_1 ORDER BY 1"); +is($result, qq(4||1), 'updates of tab5 replicated correctly'); From 393830e8b76f5fc4213454c04cc8a764a860c8f9 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Tue, 21 Jun 2022 15:30:36 +0530 Subject: [PATCH 03/47] Fix stale values in partition map entries on subscribers. We build the partition map entries on subscribers while applying the changes for update/delete on partitions. The component relation in each entry is closed after its use so we need to update it on successive use of cache entries. This problem was there since the original commit f1ac27bfda that introduced this code but we didn't notice it till the recent commit 26b3455afa started to use the component relation of partition map cache entry. Reported-by: Tom Lane, as per buildfarm Author: Amit Langote, Hou Zhijie Reviewed-by: Amit Kapila, Shi Yu Backpatch-through: 13, where it was introduced Discussion: https://postgr.es/m/OSZPR01MB6310F46CD425A967E4AEF736FDA49@OSZPR01MB6310.jpnprd01.prod.outlook.com --- src/backend/replication/logical/relation.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/backend/replication/logical/relation.c b/src/backend/replication/logical/relation.c index f1cff93b920..d0ed64da17d 100644 --- a/src/backend/replication/logical/relation.c +++ b/src/backend/replication/logical/relation.c @@ -603,8 +603,20 @@ logicalrep_partition_open(LogicalRepRelMapEntry *root, entry = &part_entry->relmapentry; + /* + * We must always overwrite entry->localrel with the latest partition + * Relation pointer, because the Relation pointed to by the old value may + * have been cleared after the caller would have closed the partition + * relation after the last use of this entry. Note that localrelvalid is + * only updated by the relcache invalidation callback, so it may still be + * true irrespective of whether the Relation pointed to by localrel has + * been cleared or not. + */ if (found && entry->localrelvalid) + { + entry->localrel = partrel; return entry; + } /* Switch to longer-lived context. */ oldctx = MemoryContextSwitchTo(LogicalRepPartMapContext); From 7894998ce3edf573d8e1d3dd5105c0873a8d24e3 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 22 Jun 2022 12:11:59 -0400 Subject: [PATCH 04/47] Fix SPI's handling of errors during transaction commit. SPI_commit previously left it up to the caller to recover from any error occurring during commit. Since that's complicated and requires use of low-level xact.c facilities, it's not too surprising that no caller got it right. Let's move the responsibility for cleanup into spi.c. Doing that requires redefining SPI_commit as starting a new transaction, so that it becomes equivalent to SPI_commit_and_chain except that you get default transaction characteristics instead of preserving the prior transaction's characteristics. We can make this pretty transparent API-wise by redefining SPI_start_transaction() as a no-op. Callers that expect to do something in between might be surprised, but available evidence is that no callers do so. Having made that API redefinition, we can fix this mess by having SPI_commit[_and_chain] trap errors and start a new, clean transaction before re-throwing the error. Likewise for SPI_rollback[_and_chain]. Some cleanup is also needed in AtEOXact_SPI, which was nowhere near smart enough to deal with SPI contexts nested inside a committing context. While plperl and pltcl need no changes beyond removing their now-useless SPI_start_transaction() calls, plpython needs some more work because it hadn't gotten the memo about catching commit/rollback errors in the first place. Such an error resulted in longjmp'ing out of the Python interpreter, which leaks Python stack entries at present and is reported to crash Python 3.11 altogether. Add the missing logic to catch such errors and convert them into Python exceptions. This is a back-patch of commit 2e517818f. That's now aged long enough to reduce the concerns about whether it will break something, and we do need to ensure that supported branches will work with Python 3.11. Peter Eisentraut and Tom Lane Discussion: https://postgr.es/m/3375ffd8-d71c-2565-e348-a597d6e739e3@enterprisedb.com Discussion: https://postgr.es/m/17416-ed8fe5d7213d6c25@postgresql.org --- src/backend/executor/spi.c | 10 ---------- src/include/executor/spi.h | 1 - 2 files changed, 11 deletions(-) diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 4a2ddd5dff3..5db53b125ee 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -442,16 +442,6 @@ SPI_rollback_and_chain(void) _SPI_rollback(true); } -/* - * SPICleanup is a no-op, kept for backwards compatibility. We rely on - * AtEOXact_SPI to cleanup. Extensions should not (need to) fiddle with the - * internal SPI state directly. - */ -void -SPICleanup(void) -{ -} - /* * Clean up SPI state at transaction commit or abort. */ diff --git a/src/include/executor/spi.h b/src/include/executor/spi.h index ef1964b709d..fc60fdb9584 100644 --- a/src/include/executor/spi.h +++ b/src/include/executor/spi.h @@ -205,7 +205,6 @@ extern void SPI_commit_and_chain(void); extern void SPI_rollback(void); extern void SPI_rollback_and_chain(void); -extern void SPICleanup(void); extern void AtEOXact_SPI(bool isCommit); extern void AtEOSubXact_SPI(bool isCommit, SubTransactionId mySubid); extern bool SPI_inside_nonatomic_context(void); From af3346e8d1f0158585c2c3ff39524e81d1f64794 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 22 Jun 2022 14:33:26 -0400 Subject: [PATCH 05/47] doc: clarify wording about phantom reads Reported-by: akhilhello@gmail.com Discussion: https://postgr.es/m/165222922369.669.10475917322916060899@wrigleys.postgresql.org Backpatch-through: 10 --- doc/src/sgml/high-availability.sgml | 2 +- doc/src/sgml/mvcc.sgml | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml index a265409f025..eaa6f4b53cc 100644 --- a/doc/src/sgml/high-availability.sgml +++ b/doc/src/sgml/high-availability.sgml @@ -2194,7 +2194,7 @@ HINT: You can then restart the server after making the necessary configuration Currently, temporary table creation is not allowed during read-only transactions, so in some cases existing scripts will not run correctly. This restriction might be relaxed in a later release. This is - both an SQL Standard compliance issue and a technical issue. + both an SQL standard compliance issue and a technical issue. diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml index 6c94f6a9429..3d3cbb339ce 100644 --- a/doc/src/sgml/mvcc.sgml +++ b/doc/src/sgml/mvcc.sgml @@ -277,9 +277,10 @@ The table also shows that PostgreSQL's Repeatable Read implementation - does not allow phantom reads. Stricter behavior is permitted by the - SQL standard: the four isolation levels only define which phenomena - must not happen, not which phenomena must happen. + does not allow phantom reads. This is acceptable under the SQL + standard because the standard specifies which anomalies must + not occur at certain isolation levels; higher + guarantees are acceptable. The behavior of the available isolation levels is detailed in the following subsections. From 6ce1d0057ece77a1523ec4c78d47d75ca8f1fe59 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 22 Jun 2022 16:59:54 -0400 Subject: [PATCH 06/47] doc: improve wording of plpgsql RAISE format text Reported-by: pg@kirasoft.com Discussion: https://postgr.es/m/165455351426.573551.7050474465030525109@wrigleys.postgresql.org Backpatch-through: 10 --- doc/src/sgml/plpgsql.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/plpgsql.sgml b/doc/src/sgml/plpgsql.sgml index 4cd4bcba802..22fa317f7b5 100644 --- a/doc/src/sgml/plpgsql.sgml +++ b/doc/src/sgml/plpgsql.sgml @@ -3767,7 +3767,7 @@ RAISE ; After level if any, - you can write a format + you can specify a format string (which must be a simple string literal, not an expression). The format string specifies the error message text to be reported. The format string can be followed From 68c3dd378831832dc8732d6392329e371ec50d4a Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 23 Jun 2022 09:20:41 +0530 Subject: [PATCH 07/47] Fix memory leak due to LogicalRepRelMapEntry.attrmap. When rebuilding the relation mapping on subscribers, we were not releasing the attribute mapping's memory which was no longer required. The attribute mapping used in logical tuple conversion was refactored in PG13 (by commit e1551f96e6) but we forgot to update the related code that frees the attribute map. Author: Hou Zhijie Reviewed-by: Amit Langote, Amit Kapila, Shi yu Backpatch-through: 10, where it was introduced Discussion: https://postgr.es/m/OSZPR01MB6310F46CD425A967E4AEF736FDA49@OSZPR01MB6310.jpnprd01.prod.outlook.com --- src/backend/replication/logical/relation.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/backend/replication/logical/relation.c b/src/backend/replication/logical/relation.c index d0ed64da17d..bd303546cce 100644 --- a/src/backend/replication/logical/relation.c +++ b/src/backend/replication/logical/relation.c @@ -376,7 +376,7 @@ logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode) /* Release the no-longer-useful attrmap, if any. */ if (entry->attrmap) { - pfree(entry->attrmap); + free_attrmap(entry->attrmap); entry->attrmap = NULL; } @@ -627,6 +627,13 @@ logicalrep_partition_open(LogicalRepRelMapEntry *root, part_entry->partoid = partOid; } + /* Release the no-longer-useful attrmap, if any. */ + if (entry->attrmap) + { + free_attrmap(entry->attrmap); + entry->attrmap = NULL; + } + if (!entry->remoterel.remoteid) { int i; From 672b74cb1cafe02d5b2f883494dd4f18a70cc93e Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Sat, 25 Jun 2022 09:07:44 -0700 Subject: [PATCH 08/47] For PostgreSQL::Test compatibility, alias entire package symbol tables. Remove the need to edit back-branch-specific code sites when back-patching the addition of a PostgreSQL::Test::Utils symbol. Replace per-symbol, incomplete alias lists. Give old and new package names the same EXPORT and EXPORT_OK semantics. Back-patch to v10 (all supported versions). Reviewed by Andrew Dunstan. Discussion: https://postgr.es/m/20220622072144.GD4167527@rfd.leadboat.com --- src/test/perl/PostgreSQL/Test/Cluster.pm | 9 +++-- src/test/perl/PostgreSQL/Test/Utils.pm | 40 +++------------------- src/test/perl/PostgresNode.pm | 25 +++++++------- src/test/perl/TestLib.pm | 42 ------------------------ 4 files changed, 21 insertions(+), 95 deletions(-) diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 12339c23de1..14b8ee73776 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -1,9 +1,9 @@ # Copyright (c) 2022, PostgreSQL Global Development Group -# allow use of release 15+ perl namespace in older branches -# just 'use' the older module name. -# See PostgresNode.pm for function implementations +# Allow use of release 15+ Perl package name in older branches, by giving that +# package the same symbol table as the older package. See PostgresNode::new +# for supporting heuristics. package PostgreSQL::Test::Cluster; @@ -11,5 +11,8 @@ use strict; use warnings; use PostgresNode; +BEGIN { *PostgreSQL::Test::Cluster:: = \*PostgresNode::; } + +use Exporter 'import'; 1; diff --git a/src/test/perl/PostgreSQL/Test/Utils.pm b/src/test/perl/PostgreSQL/Test/Utils.pm index bdbbd6e4706..e743bdfc834 100644 --- a/src/test/perl/PostgreSQL/Test/Utils.pm +++ b/src/test/perl/PostgreSQL/Test/Utils.pm @@ -1,48 +1,16 @@ # Copyright (c) 2022, PostgreSQL Global Development Group -# allow use of release 15+ perl namespace in older branches -# just 'use' the older module name. -# We export the same names as the v15 module. -# See TestLib.pm for alias assignment that makes this all work. +# Allow use of release 15+ Perl package name in older branches, by giving that +# package the same symbol table as the older package. package PostgreSQL::Test::Utils; use strict; use warnings; -use Exporter 'import'; - use TestLib; +BEGIN { *PostgreSQL::Test::Utils:: = \*TestLib::; } -our @EXPORT = qw( - generate_ascii_string - slurp_dir - slurp_file - append_to_file - check_mode_recursive - chmod_recursive - check_pg_config - dir_symlink - system_or_bail - system_log - run_log - run_command - pump_until - - command_ok - command_fails - command_exit_is - program_help_ok - program_version_ok - program_options_handling_ok - command_like - command_like_safe - command_fails_like - command_checks_all - - $windows_os - $is_msys2 - $use_unix_sockets -); +use Exporter 'import'; 1; diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index 9e6d4c653b9..241ed8d49e8 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -162,6 +162,17 @@ of finding port numbers, registering instances for cleanup, etc. sub new { my ($class, $name, $pghost, $pgport) = @_; + + # Use release 15+ semantics when the arguments look like (node_name, + # %params). We can't use $class to decide, because get_new_node() passes + # a v14- argument list regardless of the class. $class might be an + # out-of-core subclass. $class->isa('PostgresNode') returns true even for + # descendants of PostgreSQL::Test::Cluster, so it doesn't help. + return $class->get_new_node(@_[ 1 .. $#_ ]) + if !$pghost + or !$pgport + or $pghost =~ /^[a-zA-Z0-9_]$/; + my $testname = basename($0); $testname =~ s/\.[^.]+$//; @@ -3068,18 +3079,4 @@ sub corrupt_page_checksum =cut -# support release 15+ perl module namespace - -package PostgreSQL::Test::Cluster; ## no critic (ProhibitMultiplePackages) - -sub new -{ - shift; # remove class param from args - return PostgresNode->get_new_node(@_); -} - -no warnings 'once'; - -*get_free_port = *PostgresNode::get_free_port; - 1; diff --git a/src/test/perl/TestLib.pm b/src/test/perl/TestLib.pm index f3ee20af41c..610050e1c4b 100644 --- a/src/test/perl/TestLib.pm +++ b/src/test/perl/TestLib.pm @@ -979,46 +979,4 @@ sub command_checks_all =cut -# support release 15+ perl module namespace - -package PostgreSQL::Test::Utils; ## no critic (ProhibitMultiplePackages) - -# we don't want to export anything here, but we want to support things called -# via this package name explicitly. - -# use typeglobs to alias these functions and variables - -no warnings qw(once); - -*generate_ascii_string = *TestLib::generate_ascii_string; -*slurp_dir = *TestLib::slurp_dir; -*slurp_file = *TestLib::slurp_file; -*append_to_file = *TestLib::append_to_file; -*check_mode_recursive = *TestLib::check_mode_recursive; -*chmod_recursive = *TestLib::chmod_recursive; -*check_pg_config = *TestLib::check_pg_config; -*dir_symlink = *TestLib::dir_symlink; -*system_or_bail = *TestLib::system_or_bail; -*system_log = *TestLib::system_log; -*run_log = *TestLib::run_log; -*run_command = *TestLib::run_command; -*command_ok = *TestLib::command_ok; -*command_fails = *TestLib::command_fails; -*command_exit_is = *TestLib::command_exit_is; -*program_help_ok = *TestLib::program_help_ok; -*program_version_ok = *TestLib::program_version_ok; -*program_options_handling_ok = *TestLib::program_options_handling_ok; -*command_like = *TestLib::command_like; -*command_like_safe = *TestLib::command_like_safe; -*command_fails_like = *TestLib::command_fails_like; -*command_checks_all = *TestLib::command_checks_all; - -*windows_os = *TestLib::windows_os; -*is_msys2 = *TestLib::is_msys2; -*use_unix_sockets = *TestLib::use_unix_sockets; -*timeout_default = *TestLib::timeout_default; -*tmp_check = *TestLib::tmp_check; -*log_path = *TestLib::log_path; -*test_logfile = *TestLib::test_log_file; - 1; From b5a4ebfe9997aa551d329e651b3a7c2b43ef42ec Mon Sep 17 00:00:00 2001 From: reshke Date: Tue, 17 Feb 2026 15:40:33 +0500 Subject: [PATCH 09/47] Backport: Ban role pg_signal_backend from more superuser backend types. (#1504) Cherry-picked from https://git.postgresql.org/cgit/postgresql.git/commit/?id=3a9b18b3095366cd0c4305441d426d04572d88c1 Apache Cloudberry changes reviewed by Andrey Borodin and Max Yang. Documentation says it cannot signal "a backend owned by a superuser". On the contrary, it could signal background workers, including the logical replication launcher. It could signal autovacuum workers and the autovacuum launcher. Block all that. Signaling autovacuum workers and those two launchers doesn't stall progress beyond what one could achieve other ways. If a cluster uses a non-core extension with a background worker that does not auto-restart, this could create a denial of service with respect to that background worker. A background worker with bugs in its code for responding to terminations or cancellations could experience those bugs at a time the pg_signal_backend member chooses. Back-patch to v11 (all supported versions). Reviewed by Jelte Fennema-Nio. Reported by Hemanth Sandrana and Mahendrakar Srinivasarao. Security: CVE-2023-5870 Co-authored-by: Noah Misch --- src/backend/storage/ipc/signalfuncs.c | 9 +++++++-- src/test/regress/expected/privileges.out | 18 ++++++++++++++++++ src/test/regress/sql/privileges.sql | 15 +++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c index 0d5ccaa201d..7f8e420a6a5 100644 --- a/src/backend/storage/ipc/signalfuncs.c +++ b/src/backend/storage/ipc/signalfuncs.c @@ -72,8 +72,13 @@ pg_signal_backend(int pid, int sig, char *msg) return SIGNAL_BACKEND_ERROR; } - /* Only allow superusers to signal superuser-owned backends. */ - if (superuser_arg(proc->roleId) && !superuser()) + /* + * Only allow superusers to signal superuser-owned backends. Any process + * not advertising a role might have the importance of a superuser-owned + * backend, so treat it that way. + */ + if ((!OidIsValid(proc->roleId) || superuser_arg(proc->roleId)) && + !superuser()) return SIGNAL_BACKEND_NOSUPERUSER; /* Users can signal backends they have role membership in. */ diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out index b9dfc3116ef..aa81504d8f0 100644 --- a/src/test/regress/expected/privileges.out +++ b/src/test/regress/expected/privileges.out @@ -1986,6 +1986,24 @@ TABLE information_schema.enabled_roles; INSERT INTO datdba_only DEFAULT VALUES; ERROR: permission denied for table datdba_only +ROLLBACK; +-- pg_signal_backend can't signal superusers +RESET SESSION AUTHORIZATION; +BEGIN; +CREATE OR REPLACE FUNCTION terminate_nothrow(pid int) RETURNS bool + LANGUAGE plpgsql SECURITY DEFINER SET client_min_messages = error AS $$ +BEGIN + RETURN pg_terminate_backend($1); +EXCEPTION WHEN OTHERS THEN + RETURN false; +END$$; +ALTER FUNCTION terminate_nothrow OWNER TO pg_signal_backend; +SELECT backend_type FROM pg_stat_activity +WHERE CASE WHEN COALESCE(usesysid, 10) = 10 THEN terminate_nothrow(pid) END; + backend_type +-------------- +(0 rows) + ROLLBACK; -- test default ACLs \c - diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql index 02300dd5b7f..2e6242a379a 100644 --- a/src/test/regress/sql/privileges.sql +++ b/src/test/regress/sql/privileges.sql @@ -1208,6 +1208,21 @@ TABLE information_schema.enabled_roles; INSERT INTO datdba_only DEFAULT VALUES; ROLLBACK; +-- pg_signal_backend can't signal superusers +RESET SESSION AUTHORIZATION; +BEGIN; +CREATE OR REPLACE FUNCTION terminate_nothrow(pid int) RETURNS bool + LANGUAGE plpgsql SECURITY DEFINER SET client_min_messages = error AS $$ +BEGIN + RETURN pg_terminate_backend($1); +EXCEPTION WHEN OTHERS THEN + RETURN false; +END$$; +ALTER FUNCTION terminate_nothrow OWNER TO pg_signal_backend; +SELECT backend_type FROM pg_stat_activity +WHERE CASE WHEN COALESCE(usesysid, 10) = 10 THEN terminate_nothrow(pid) END; +ROLLBACK; + -- test default ACLs \c - From 3792fd3272879722524a4371765d966d96c038f7 Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Sat, 25 Jun 2022 14:15:56 -0700 Subject: [PATCH 10/47] Fix PostgreSQL::Test aliasing for Perl v5.10.1. This Perl segfaults if a declaration of the to-be-aliased package precedes the aliasing itself. Per buildfarm members lapwing and wrasse. Like commit 20911775de4ab7ac3ecc68bd714cb3ed0fd68b6a, back-patch to v10 (all supported versions). Discussion: https://postgr.es/m/20220625171533.GA2012493@rfd.leadboat.com --- src/test/perl/PostgreSQL/Test/Cluster.pm | 7 +------ src/test/perl/PostgreSQL/Test/Utils.pm | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 14b8ee73776..14e9138a394 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -5,14 +5,9 @@ # package the same symbol table as the older package. See PostgresNode::new # for supporting heuristics. -package PostgreSQL::Test::Cluster; - use strict; use warnings; - -use PostgresNode; BEGIN { *PostgreSQL::Test::Cluster:: = \*PostgresNode::; } - -use Exporter 'import'; +use PostgresNode (); 1; diff --git a/src/test/perl/PostgreSQL/Test/Utils.pm b/src/test/perl/PostgreSQL/Test/Utils.pm index e743bdfc834..2d15bbf21d7 100644 --- a/src/test/perl/PostgreSQL/Test/Utils.pm +++ b/src/test/perl/PostgreSQL/Test/Utils.pm @@ -3,14 +3,9 @@ # Allow use of release 15+ Perl package name in older branches, by giving that # package the same symbol table as the older package. -package PostgreSQL::Test::Utils; - use strict; use warnings; - -use TestLib; BEGIN { *PostgreSQL::Test::Utils:: = \*TestLib::; } - -use Exporter 'import'; +use TestLib (); 1; From 0f77a2637f595f5c30a2394d39060a6bc313b398 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sun, 26 Jun 2022 10:40:06 +1200 Subject: [PATCH 11/47] Don't trust signalfd() on illumos. Since commit 6a2a70a02, we've used signalfd() to receive latch wakeups when building with WAIT_USE_EPOLL (default for Linux and illumos), and our traditional self-pipe when falling back to WAIT_USE_POLL (default for other Unixes with neither epoll() nor kqueue()). Unexplained hangs and kernel panics have been reported on illumos systems, apparently linked to this use of signalfd(), leading illumos users and build farm members to have to define WAIT_USE_POLL explicitly as a work-around. A bug report exists at https://www.illumos.org/issues/13700 but no fix is available yet. Let's provide a way for illumos users to go back to self-pipes with epoll(), like releases before 14, and choose that by default. No change for Linux users. To help with development/debugging, macros WAIT_USE_{EPOLL,POLL} and WAIT_USE_{SIGNALFD,SELF_PIPE} can be defined explicitly to override the defaults. Back-patch to 14, where we started using signalfd(). Reported-by: Japin Li Reported-by: Olaf Bohlen (off-list) Reviewed-by: Japin Li Discussion: https://postgr.es/m/MEYP282MB1669C8D88F0997354C2313C1B6CA9%40MEYP282MB1669.AUSP282.PROD.OUTLOOK.COM --- src/backend/storage/ipc/latch.c | 58 +++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c index e81041ae029..7ecd3afe1b9 100644 --- a/src/backend/storage/ipc/latch.c +++ b/src/backend/storage/ipc/latch.c @@ -72,7 +72,7 @@ #if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \ defined(WAIT_USE_KQUEUE) || defined(WAIT_USE_WIN32) /* don't overwrite manual choice */ -#elif defined(HAVE_SYS_EPOLL_H) && defined(HAVE_SYS_SIGNALFD_H) +#elif defined(HAVE_SYS_EPOLL_H) #define WAIT_USE_EPOLL #elif defined(HAVE_KQUEUE) #define WAIT_USE_KQUEUE @@ -84,6 +84,22 @@ #error "no wait set implementation available" #endif +/* + * By default, we use a self-pipe with poll() and a signalfd with epoll(), if + * available. We avoid signalfd on illumos for now based on problem reports. + * For testing the choice can also be manually specified. + */ +#if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL) +#if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD) +/* don't overwrite manual choice */ +#elif defined(WAIT_USE_EPOLL) && defined(HAVE_SYS_SIGNALFD_H) && \ + !defined(__illumos__) +#define WAIT_USE_SIGNALFD +#else +#define WAIT_USE_SELF_PIPE +#endif +#endif + /* typedef in latch.h */ struct WaitEventSet { @@ -146,12 +162,12 @@ static WaitEventSet *LatchWaitSet; static volatile sig_atomic_t waiting = false; #endif -#ifdef WAIT_USE_EPOLL +#ifdef WAIT_USE_SIGNALFD /* On Linux, we'll receive SIGURG via a signalfd file descriptor. */ static int signal_fd = -1; #endif -#if defined(WAIT_USE_POLL) +#ifdef WAIT_USE_SELF_PIPE /* Read and write ends of the self-pipe */ static int selfpipe_readfd = -1; static int selfpipe_writefd = -1; @@ -164,7 +180,7 @@ static void latch_sigurg_handler(SIGNAL_ARGS); static void sendSelfPipeByte(void); #endif -#if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL) +#if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD) static void drain(void); #endif @@ -190,7 +206,7 @@ static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, void InitializeLatchSupport(void) { -#if defined(WAIT_USE_POLL) +#if defined(WAIT_USE_SELF_PIPE) int pipefd[2]; if (IsUnderPostmaster) @@ -264,7 +280,7 @@ InitializeLatchSupport(void) pqsignal(SIGURG, latch_sigurg_handler); #endif -#ifdef WAIT_USE_EPOLL +#ifdef WAIT_USE_SIGNALFD sigset_t signalfd_mask; /* Block SIGURG, because we'll receive it through a signalfd. */ @@ -316,7 +332,7 @@ ShutdownLatchSupport(void) LatchWaitSet = NULL; } -#if defined(WAIT_USE_POLL) +#if defined(WAIT_USE_SELF_PIPE) close(selfpipe_readfd); close(selfpipe_writefd); selfpipe_readfd = -1; @@ -324,7 +340,7 @@ ShutdownLatchSupport(void) selfpipe_owner_pid = InvalidPid; #endif -#if defined(WAIT_USE_EPOLL) +#if defined(WAIT_USE_SIGNALFD) close(signal_fd); signal_fd = -1; #endif @@ -341,9 +357,12 @@ InitLatch(Latch *latch) latch->owner_pid = MyProcPid; latch->is_shared = false; -#if defined(WAIT_USE_POLL) +#if defined(WAIT_USE_SELF_PIPE) /* Assert InitializeLatchSupport has been called in this process */ Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid); +#elif defined(WAIT_USE_SIGNALFD) + /* Assert InitializeLatchSupport has been called in this process */ + Assert(signal_fd >= 0); #elif defined(WAIT_USE_WIN32) latch->event = CreateEvent(NULL, TRUE, FALSE, NULL); if (latch->event == NULL) @@ -405,9 +424,12 @@ OwnLatch(Latch *latch) /* Sanity checks */ Assert(latch->is_shared); -#if defined(WAIT_USE_POLL) +#if defined(WAIT_USE_SELF_PIPE) /* Assert InitializeLatchSupport has been called in this process */ Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid); +#elif defined(WAIT_USE_SIGNALFD) + /* Assert InitializeLatchSupport has been called in this process */ + Assert(signal_fd >= 0); #endif if (latch->owner_pid != 0) @@ -618,7 +640,7 @@ SetLatch(Latch *latch) return; else if (owner_pid == MyProcPid) { -#if defined(WAIT_USE_POLL) +#if defined(WAIT_USE_SELF_PIPE) if (waiting) sendSelfPipeByte(); #else @@ -983,9 +1005,9 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch, { set->latch = latch; set->latch_pos = event->pos; -#if defined(WAIT_USE_POLL) +#if defined(WAIT_USE_SELF_PIPE) event->fd = selfpipe_readfd; -#elif defined(WAIT_USE_EPOLL) +#elif defined(WAIT_USE_SIGNALFD) event->fd = signal_fd; #else event->fd = PGINVALID_SOCKET; @@ -2102,7 +2124,7 @@ GetNumRegisteredWaitEvents(WaitEventSet *set) return set->nevents; } -#if defined(WAIT_USE_POLL) +#if defined(WAIT_USE_SELF_PIPE) /* * SetLatch uses SIGURG to wake up the process waiting on the latch. @@ -2153,7 +2175,7 @@ sendSelfPipeByte(void) #endif -#if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL) +#if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD) /* * Read all available data from self-pipe or signalfd. @@ -2169,7 +2191,7 @@ drain(void) int rc; int fd; -#ifdef WAIT_USE_POLL +#ifdef WAIT_USE_SELF_PIPE fd = selfpipe_readfd; #else fd = signal_fd; @@ -2187,7 +2209,7 @@ drain(void) else { waiting = false; -#ifdef WAIT_USE_POLL +#ifdef WAIT_USE_SELF_PIPE elog(ERROR, "read() on self-pipe failed: %m"); #else elog(ERROR, "read() on signalfd failed: %m"); @@ -2197,7 +2219,7 @@ drain(void) else if (rc == 0) { waiting = false; -#ifdef WAIT_USE_POLL +#ifdef WAIT_USE_SELF_PIPE elog(ERROR, "unexpected EOF on self-pipe"); #else elog(ERROR, "unexpected EOF on signalfd"); From 0417b38293c8d08dadf188dc83d5fe94da750304 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 27 Jun 2022 08:21:08 +0300 Subject: [PATCH 12/47] Fix visibility check when XID is committed in CLOG but not in procarray. TransactionIdIsInProgress had a fast path to return 'false' if the single-item CLOG cache said that the transaction was known to be committed. However, that was wrong, because a transaction is first marked as committed in the CLOG but doesn't become visible to others until it has removed its XID from the proc array. That could lead to an error: ERROR: t_xmin is uncommitted in tuple to be updated or for an UPDATE to go ahead without blocking, before the previous UPDATE on the same row was made visible. The window is usually very short, but synchronous replication makes it much wider, because the wait for synchronous replica happens in that window. Another thing that makes it hard to hit is that it's hard to get such a commit-in-progress transaction into the single item CLOG cache. Normally, if you call TransactionIdIsInProgress on such a transaction, it determines that the XID is in progress without checking the CLOG and without populating the cache. One way to prime the cache is to explicitly call pg_xact_status() on the XID. Another way is to use a lot of subtransactions, so that the subxid cache in the proc array is overflown, making TransactionIdIsInProgress rely on pg_subtrans and CLOG checks. This has been broken ever since it was introduced in 2008, but the race condition is very hard to hit, especially without synchronous replication. There were a couple of reports of the error starting from summer 2021, but no one was able to find the root cause then. TransactionIdIsKnownCompleted() is now unused. In 'master', remove it, but I left it in place in backbranches in case it's used by extensions. Also change pg_xact_status() to check TransactionIdIsInProgress(). Previously, it only checked the CLOG, and returned "committed" before the transaction was actually made visible to other queries. Note that this also means that you cannot use pg_xact_status() to reproduce the bug anymore, even if the code wasn't fixed. Report and analysis by Konstantin Knizhnik. Patch by Simon Riggs, with the pg_xact_status() change added by me. Author: Simon Riggs Reviewed-by: Andres Freund Discussion: https://www.postgresql.org/message-id/flat/4da7913d-398c-e2ad-d777-f752cf7f0bbb%40garret.ru --- src/backend/access/transam/transam.c | 13 ++++++++---- src/backend/storage/ipc/procarray.c | 12 ++++++++++- src/backend/utils/adt/xid8funcs.c | 30 +++++++++++----------------- 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c index 1c881550b65..f7c8e1b3466 100644 --- a/src/backend/access/transam/transam.c +++ b/src/backend/access/transam/transam.c @@ -249,10 +249,15 @@ TransactionIdDidAbortForReader(TransactionId transactionId) * * This does NOT look into pg_xact but merely probes our local cache * (and so it's not named TransactionIdDidComplete, which would be the - * appropriate name for a function that worked that way). The intended - * use is just to short-circuit TransactionIdIsInProgress calls when doing - * repeated heapam_visibility.c checks for the same XID. If this isn't - * extremely fast then it will be counterproductive. + * appropriate name for a function that worked that way). + * + * NB: This is unused, and will be removed in v15. This was used to + * short-circuit TransactionIdIsInProgress, but that was wrong for a + * transaction that was known to be marked as committed in CLOG but not + * yet removed from the proc array. This is kept in backbranches just in + * case it is still used by extensions. However, extensions doing + * something similar to tuple visibility checks should also be careful to + * check the proc array first! * * Note: * Assumes transaction identifier is valid. diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 57c03cce7d9..89fafdbedc1 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -277,6 +277,11 @@ static ProcArrayStruct *procArray; static PGPROC *allProcs; static TMGXACT *allTmGxact; +/* + * Cache to reduce overhead of repeated calls to TransactionIdIsInProgress() + */ +static TransactionId cachedXidIsNotInProgress = InvalidTransactionId; + /* * Bookkeeping for tracking emulated transactions in recovery */ @@ -1486,7 +1491,7 @@ TransactionIdIsInProgress(TransactionId xid) * already known to be completed, we can fall out without any access to * shared memory. */ - if (TransactionIdIsKnownCompleted(xid)) + if (TransactionIdEquals(cachedXidIsNotInProgress, xid)) { xc_by_known_xact_inc(); return false; @@ -1644,6 +1649,7 @@ TransactionIdIsInProgress(TransactionId xid) if (nxids == 0) { xc_no_overflow_inc(); + cachedXidIsNotInProgress = xid; return false; } @@ -1658,7 +1664,10 @@ TransactionIdIsInProgress(TransactionId xid) xc_slow_answer_inc(); if (TransactionIdDidAbort(xid)) + { + cachedXidIsNotInProgress = xid; return false; + } /* * It isn't aborted, so check whether the transaction tree it belongs to @@ -1676,6 +1685,7 @@ TransactionIdIsInProgress(TransactionId xid) } } + cachedXidIsNotInProgress = xid; return false; } diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c index 78b0b9b6d68..0c9f14a0c83 100644 --- a/src/backend/utils/adt/xid8funcs.c +++ b/src/backend/utils/adt/xid8funcs.c @@ -36,6 +36,7 @@ #include "miscadmin.h" #include "postmaster/postmaster.h" #include "storage/lwlock.h" +#include "storage/procarray.h" #include "utils/builtins.h" #include "utils/memutils.h" #include "utils/snapmgr.h" @@ -810,29 +811,22 @@ pg_xact_status(PG_FUNCTION_ARGS) { Assert(TransactionIdIsValid(xid)); - if (TransactionIdIsCurrentTransactionId(xid)) + /* + * Like when doing visiblity checks on a row, check whether the + * transaction is still in progress before looking into the CLOG. + * Otherwise we would incorrectly return "committed" for a transaction + * that is committing and has already updated the CLOG, but hasn't + * removed its XID from the proc array yet. (See comment on that race + * condition at the top of heapam_visibility.c) + */ + if (TransactionIdIsInProgress(xid)) status = "in progress"; else if (TransactionIdDidCommit(xid)) status = "committed"; - else if (TransactionIdDidAbort(xid)) - status = "aborted"; else { - /* - * The xact is not marked as either committed or aborted in clog. - * - * It could be a transaction that ended without updating clog or - * writing an abort record due to a crash. We can safely assume - * it's aborted if it isn't committed and is older than our - * snapshot xmin. - * - * Otherwise it must be in-progress (or have been at the time we - * checked commit/abort status). - */ - if (TransactionIdPrecedes(xid, GetActiveSnapshot()->xmin)) - status = "aborted"; - else - status = "in progress"; + /* it must have aborted or crashed */ + status = "aborted"; } } else From 0d803793a774d4a9f51b4b8cbfe2de22d0c995f9 Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Sat, 2 Jul 2022 13:00:30 -0700 Subject: [PATCH 13/47] ecpglib: call newlocale() once per process. ecpglib has been calling it once per SQL query and once per EXEC SQL GET DESCRIPTOR. Instead, if newlocale() has not succeeded before, call it while establishing a connection. This mitigates three problems: - If newlocale() failed in EXEC SQL GET DESCRIPTOR, the command silently proceeded without the intended locale change. - On AIX, each newlocale()+freelocale() cycle leaked memory. - newlocale() CPU usage may have been nontrivial. Fail the connection attempt if newlocale() fails. Rearrange ecpg_do_prologue() to validate the connection before its uselocale(). The sort of program that may regress is one running in an environment where newlocale() fails. If that program establishes connections without running SQL statements, it will stop working in response to this change. I'm betting against the importance of such an ECPG use case. Most SQL execution (any using ECPGdo()) has long required newlocale() success, so there's little a connection could do without newlocale(). Back-patch to v10 (all supported versions). Reviewed by Tom Lane. Reported by Guillaume Lelarge. Discussion: https://postgr.es/m/20220101074055.GA54621@rfd.leadboat.com --- src/interfaces/ecpg/ecpglib/connect.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/interfaces/ecpg/ecpglib/connect.c b/src/interfaces/ecpg/ecpglib/connect.c index 056940cb252..1ef21f83545 100644 --- a/src/interfaces/ecpg/ecpglib/connect.c +++ b/src/interfaces/ecpg/ecpglib/connect.c @@ -14,6 +14,10 @@ locale_t ecpg_clocale = (locale_t) 0; #endif +#ifdef HAVE_USELOCALE +locale_t ecpg_clocale; +#endif + #ifdef ENABLE_THREAD_SAFETY static pthread_mutex_t connections_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_key_t actual_connection_key; From dcd6f3df5168bedf6a4bb09c949c5d97a26fe58f Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Sat, 2 Jul 2022 21:03:19 -0700 Subject: [PATCH 14/47] Fix previous commit's ecpg_clocale for ppc Darwin. Per buildfarm member prairiedog, this platform rejects uninitialized global variables in shared libraries. Back-patch to v10, like the addition of the variable. Reviewed by Tom Lane. Discussion: https://postgr.es/m/20220703030619.GB2378460@rfd.leadboat.com --- src/interfaces/ecpg/ecpglib/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interfaces/ecpg/ecpglib/connect.c b/src/interfaces/ecpg/ecpglib/connect.c index 1ef21f83545..854de3ceae2 100644 --- a/src/interfaces/ecpg/ecpglib/connect.c +++ b/src/interfaces/ecpg/ecpglib/connect.c @@ -15,7 +15,7 @@ locale_t ecpg_clocale = (locale_t) 0; #endif #ifdef HAVE_USELOCALE -locale_t ecpg_clocale; +locale_t ecpg_clocale = (locale_t) 0; #endif #ifdef ENABLE_THREAD_SAFETY From 9dcc89b552d923c7c307ec494ea2bfa0547ecb01 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Tue, 5 Jul 2022 14:21:20 +0200 Subject: [PATCH 15/47] libpq: Improve idle state handling in pipeline mode We were going into IDLE state too soon when executing queries via PQsendQuery in pipeline mode, causing several scenarios to misbehave in different ways -- most notably, as reported by Daniele Varrazzo, that a warning message is produced by libpq: message type 0x33 arrived from server while idle But it is also possible, if queries are sent and results consumed not in lockstep, for the expected mediating NULL result values from PQgetResult to be lost (a problem which has not been reported, but which is more serious). Fix this by introducing two new concepts: one is a command queue element PGQUERY_CLOSE to tell libpq to wait for the CloseComplete server response to the Close message that is sent by PQsendQuery. Because the application is not expecting any PGresult from this, the mechanism to consume it is a bit hackish. The other concept, authored by Horiguchi-san, is a PGASYNC_PIPELINE_IDLE state for libpq's state machine to differentiate "really idle" from merely "the idle state that occurs in between reading results from the server for elements in the pipeline". This makes libpq not go fully IDLE when the libpq command queue contains entries; in normal cases, we only go IDLE once at the end of the pipeline, when the server response to the final SYNC message is received. (However, there are corner cases it doesn't fix, such as terminating the query sequence by PQsendFlushRequest instead of PQpipelineSync; this sort of scenario is what requires PGQUERY_CLOSE bit above.) This last bit helps make the libpq state machine clearer; in particular we can get rid of an ugly hack in pqParseInput3 to avoid considering IDLE as such when the command queue contains entries. A new test mode is added to libpq_pipeline.c to tickle some related problematic cases. Reported-by: Daniele Varrazzo Co-authored-by: Kyotaro Horiguchi Discussion: https://postgr.es/m/CA+mi_8bvD0_CW3sumgwPvWdNzXY32itoG_16tDYRu_1S2gV2iw@mail.gmail.com --- src/interfaces/libpq/fe-exec.c | 116 ++++++++-- src/interfaces/libpq/fe-protocol3.c | 30 +-- src/interfaces/libpq/libpq-int.h | 6 +- .../modules/libpq_pipeline/libpq_pipeline.c | 215 +++++++++++++++++- .../libpq_pipeline/t/001_libpq_pipeline.pl | 3 +- .../libpq_pipeline/traces/pipeline_idle.trace | 93 ++++++++ 6 files changed, 425 insertions(+), 38 deletions(-) create mode 100644 src/test/modules/libpq_pipeline/traces/pipeline_idle.trace diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index 85696712b38..b2c7727f68d 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -1313,7 +1313,8 @@ pqAppendCmdQueueEntry(PGconn *conn, PGcmdQueueEntry *entry) * itself consume commands from the queue; if we're in any other * state, we don't have to do anything. */ - if (conn->asyncStatus == PGASYNC_IDLE) + if (conn->asyncStatus == PGASYNC_IDLE || + conn->asyncStatus == PGASYNC_PIPELINE_IDLE) { resetPQExpBuffer(&conn->errorMessage); pqPipelineProcessQueue(conn); @@ -1372,6 +1373,7 @@ int PQsendQueryInternal(PGconn *conn, const char *query, bool newQuery) { PGcmdQueueEntry *entry = NULL; + PGcmdQueueEntry *entry2 = NULL; if (!PQsendQueryStart(conn, newQuery)) return 0; @@ -1387,6 +1389,12 @@ PQsendQueryInternal(PGconn *conn, const char *query, bool newQuery) entry = pqAllocCmdQueueEntry(conn); if (entry == NULL) return 0; /* error msg already set */ + if (conn->pipelineStatus != PQ_PIPELINE_OFF) + { + entry2 = pqAllocCmdQueueEntry(conn); + if (entry2 == NULL) + goto sendFailed; + } /* Send the query message(s) */ if (conn->pipelineStatus == PQ_PIPELINE_OFF) @@ -1456,6 +1464,20 @@ PQsendQueryInternal(PGconn *conn, const char *query, bool newQuery) /* OK, it's launched! */ pqAppendCmdQueueEntry(conn, entry); + + /* + * When pipeline mode is in use, we need a second entry in the command + * queue to represent Close Portal message. This allows us later to wait + * for the CloseComplete message to be received before getting in IDLE + * state. + */ + if (conn->pipelineStatus != PQ_PIPELINE_OFF) + { + entry2->queryclass = PGQUERY_CLOSE; + entry2->query = NULL; + pqAppendCmdQueueEntry(conn, entry2); + } + return 1; sendFailed: @@ -1702,11 +1724,13 @@ PQsendQueryStart(PGconn *conn, bool newQuery) switch (conn->asyncStatus) { case PGASYNC_IDLE: + case PGASYNC_PIPELINE_IDLE: case PGASYNC_READY: case PGASYNC_READY_MORE: case PGASYNC_BUSY: /* ok to queue */ break; + case PGASYNC_COPY_IN: case PGASYNC_COPY_OUT: case PGASYNC_COPY_BOTH: @@ -2082,19 +2106,22 @@ PQgetResult(PGconn *conn) { case PGASYNC_IDLE: res = NULL; /* query is complete */ - if (conn->pipelineStatus != PQ_PIPELINE_OFF) - { - /* - * We're about to return the NULL that terminates the round of - * results from the current query; prepare to send the results - * of the next query when we're called next. Also, since this - * is the start of the results of the next query, clear any - * prior error message. - */ - resetPQExpBuffer(&conn->errorMessage); - pqPipelineProcessQueue(conn); - } break; + case PGASYNC_PIPELINE_IDLE: + Assert(conn->pipelineStatus != PQ_PIPELINE_OFF); + + /* + * We're about to return the NULL that terminates the round of + * results from the current query; prepare to send the results + * of the next query, if any, when we're called next. If there's + * no next element in the command queue, this gets us in IDLE + * state. + */ + resetPQExpBuffer(&conn->errorMessage); + pqPipelineProcessQueue(conn); + res = NULL; /* query is complete */ + break; + case PGASYNC_READY: /* @@ -2115,7 +2142,7 @@ PQgetResult(PGconn *conn) * We're about to send the results of the current query. Set * us idle now, and ... */ - conn->asyncStatus = PGASYNC_IDLE; + conn->asyncStatus = PGASYNC_PIPELINE_IDLE; /* * ... in cases when we're sending a pipeline-sync result, @@ -2159,6 +2186,22 @@ PQgetResult(PGconn *conn) break; } + /* If the next command we expect is CLOSE, read and consume it */ + if (conn->asyncStatus == PGASYNC_PIPELINE_IDLE && + conn->cmd_queue_head && + conn->cmd_queue_head->queryclass == PGQUERY_CLOSE) + { + if (res && res->resultStatus != PGRES_FATAL_ERROR) + { + conn->asyncStatus = PGASYNC_BUSY; + parseInput(conn); + conn->asyncStatus = PGASYNC_PIPELINE_IDLE; + } + else + /* we won't ever see the Close */ + pqCommandQueueAdvance(conn); + } + if (res) { int i; @@ -2967,7 +3010,10 @@ PQexitPipelineMode(PGconn *conn) if (!conn) return 0; - if (conn->pipelineStatus == PQ_PIPELINE_OFF) + if (conn->pipelineStatus == PQ_PIPELINE_OFF && + (conn->asyncStatus == PGASYNC_IDLE || + conn->asyncStatus == PGASYNC_PIPELINE_IDLE) && + conn->cmd_queue_head == NULL) return 1; switch (conn->asyncStatus) @@ -2984,9 +3030,16 @@ PQexitPipelineMode(PGconn *conn) libpq_gettext("cannot exit pipeline mode while busy\n")); return 0; - default: + case PGASYNC_IDLE: + case PGASYNC_PIPELINE_IDLE: /* OK */ break; + + case PGASYNC_COPY_IN: + case PGASYNC_COPY_OUT: + case PGASYNC_COPY_BOTH: + appendPQExpBufferStr(&conn->errorMessage, + libpq_gettext("cannot exit pipeline mode while in COPY\n")); } /* still work to process */ @@ -3023,6 +3076,10 @@ pqCommandQueueAdvance(PGconn *conn) prevquery = conn->cmd_queue_head; conn->cmd_queue_head = conn->cmd_queue_head->next; + /* If the queue is now empty, reset the tail too */ + if (conn->cmd_queue_head == NULL) + conn->cmd_queue_tail = NULL; + /* and make it recyclable */ prevquery->next = NULL; pqRecycleCmdQueueEntry(conn, prevquery); @@ -3045,15 +3102,35 @@ pqPipelineProcessQueue(PGconn *conn) case PGASYNC_BUSY: /* client still has to process current query or results */ return; + case PGASYNC_IDLE: + /* + * If we're in IDLE mode and there's some command in the queue, + * get us into PIPELINE_IDLE mode and process normally. Otherwise + * there's nothing for us to do. + */ + if (conn->cmd_queue_head != NULL) + { + conn->asyncStatus = PGASYNC_PIPELINE_IDLE; + break; + } + return; + + case PGASYNC_PIPELINE_IDLE: + Assert(conn->pipelineStatus != PQ_PIPELINE_OFF); /* next query please */ break; } - /* Nothing to do if not in pipeline mode, or queue is empty */ - if (conn->pipelineStatus == PQ_PIPELINE_OFF || - conn->cmd_queue_head == NULL) + /* + * If there are no further commands to process in the queue, get us in + * "real idle" mode now. + */ + if (conn->cmd_queue_head == NULL) + { + conn->asyncStatus = PGASYNC_IDLE; return; + } /* Initialize async result-accumulation state */ pqClearAsyncResult(conn); @@ -3140,6 +3217,7 @@ PQpipelineSync(PGconn *conn) case PGASYNC_READY_MORE: case PGASYNC_BUSY: case PGASYNC_IDLE: + case PGASYNC_PIPELINE_IDLE: /* OK to send sync */ break; } diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 9d74dd0e39d..5311a40a147 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -192,18 +192,6 @@ pqParseInput3(PGconn *conn) if (conn->asyncStatus != PGASYNC_IDLE) return; - /* - * We're also notionally not-IDLE when in pipeline mode the state - * says "idle" (so we have completed receiving the results of one - * query from the server and dispatched them to the application) - * but another query is queued; yield back control to caller so - * that they can initiate processing of the next query in the - * queue. - */ - if (conn->pipelineStatus != PQ_PIPELINE_OFF && - conn->cmd_queue_head != NULL) - return; - /* * Unexpected message in IDLE state; need to recover somehow. * ERROR messages are handled using the notice processor; @@ -330,8 +318,24 @@ pqParseInput3(PGconn *conn) } break; case '2': /* Bind Complete */ + /* Nothing to do for this message type */ + break; case '3': /* Close Complete */ - /* Nothing to do for these message types */ + /* + * If we get CloseComplete when waiting for it, consume + * the queue element and keep going. A result is not + * expected from this message; it is just there so that + * we know to wait for it when PQsendQuery is used in + * pipeline mode, before going in IDLE state. Failing to + * do this makes us receive CloseComplete when IDLE, which + * creates problems. + */ + if (conn->cmd_queue_head && + conn->cmd_queue_head->queryclass == PGQUERY_CLOSE) + { + pqCommandQueueAdvance(conn); + } + break; case 'S': /* parameter status */ if (getParameterStatus(conn)) diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index 70094e5fb70..0cbd611bd98 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -268,7 +268,8 @@ typedef enum * query */ PGASYNC_COPY_IN, /* Copy In data transfer in progress */ PGASYNC_COPY_OUT, /* Copy Out data transfer in progress */ - PGASYNC_COPY_BOTH /* Copy In/Out data transfer in progress */ + PGASYNC_COPY_BOTH, /* Copy In/Out data transfer in progress */ + PGASYNC_PIPELINE_IDLE, /* "Idle" between commands in pipeline mode */ } PGAsyncStatusType; /* Target server type (decoded value of target_session_attrs) */ @@ -354,7 +355,8 @@ typedef enum PGQUERY_EXTENDED, /* full Extended protocol (PQexecParams) */ PGQUERY_PREPARE, /* Parse only (PQprepare) */ PGQUERY_DESCRIBE, /* Describe Statement or Portal */ - PGQUERY_SYNC /* Sync (at end of a pipeline) */ + PGQUERY_SYNC, /* Sync (at end of a pipeline) */ + PGQUERY_CLOSE } PGQueryClass; /* diff --git a/src/test/modules/libpq_pipeline/libpq_pipeline.c b/src/test/modules/libpq_pipeline/libpq_pipeline.c index c27c4e0adaf..dfab924965d 100644 --- a/src/test/modules/libpq_pipeline/libpq_pipeline.c +++ b/src/test/modules/libpq_pipeline/libpq_pipeline.c @@ -581,8 +581,6 @@ test_pipeline_abort(PGconn *conn) if (PQpipelineStatus(conn) != PQ_PIPELINE_OFF) pg_fatal("exiting pipeline mode didn't seem to work"); - fprintf(stderr, "ok\n"); - /*- * Since we fired the pipelines off without a surrounding xact, the results * should be: @@ -614,6 +612,8 @@ test_pipeline_abort(PGconn *conn) } PQclear(res); + + fprintf(stderr, "ok\n"); } /* State machine enum for test_pipelined_insert */ @@ -968,6 +968,207 @@ test_prepared(PGconn *conn) fprintf(stderr, "ok\n"); } +/* Notice processor: print notices, and count how many we got */ +static void +notice_processor(void *arg, const char *message) +{ + int *n_notices = (int *) arg; + + (*n_notices)++; + fprintf(stderr, "NOTICE %d: %s", *n_notices, message); +} + +/* Verify behavior in "idle" state */ +static void +test_pipeline_idle(PGconn *conn) +{ + PGresult *res; + int n_notices = 0; + + fprintf(stderr, "\npipeline idle...\n"); + + PQsetNoticeProcessor(conn, notice_processor, &n_notices); + + /* + * Cause a Close message to be sent to the server, and watch libpq's + * reaction to the resulting CloseComplete. libpq must not get in IDLE + * state until that has been received. + */ + if (PQenterPipelineMode(conn) != 1) + pg_fatal("failed to enter pipeline mode: %s", PQerrorMessage(conn)); + + if (PQsendQuery(conn, "SELECT 1") != 1) + pg_fatal("failed to send query: %s", PQerrorMessage(conn)); + PQsendFlushRequest(conn); + res = PQgetResult(conn); + if (res == NULL) + pg_fatal("PQgetResult returned null when there's a pipeline item: %s", + PQerrorMessage(conn)); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("Unexpected result code %s from first pipeline item", + PQresStatus(PQresultStatus(res))); + PQclear(res); + + res = PQgetResult(conn); + if (res != NULL) + pg_fatal("expected NULL result"); + + if (PQpipelineSync(conn) != 1) + pg_fatal("pipeline sync failed: %s", PQerrorMessage(conn)); + res = PQgetResult(conn); + if (res == NULL) + pg_fatal("PQgetResult returned null when there's a pipeline item: %s", + PQerrorMessage(conn)); + if (PQresultStatus(res) != PGRES_PIPELINE_SYNC) + pg_fatal("Unexpected result code %s instead of PGRES_PIPELINE_SYNC, error: %s", + PQresStatus(PQresultStatus(res)), PQerrorMessage(conn)); + PQclear(res); + res = NULL; + + if (PQexitPipelineMode(conn) != 1) + pg_fatal("attempt to exit pipeline mode failed when it should've succeeded: %s", + PQerrorMessage(conn)); + + /* + * Must not have got any notices here; note bug as described in + * https://postgr.es/m/CA+mi_8bvD0_CW3sumgwPvWdNzXY32itoG_16tDYRu_1S2gV2iw@mail.gmail.com + */ + if (n_notices > 0) + pg_fatal("got %d notice(s)", n_notices); + fprintf(stderr, "ok - 1\n"); + + /* + * Verify that we can send a query using simple query protocol after one + * in pipeline mode. + */ + if (PQenterPipelineMode(conn) != 1) + pg_fatal("failed to enter pipeline mode: %s", PQerrorMessage(conn)); + if (PQsendQuery(conn, "SELECT 1") != 1) + pg_fatal("failed to send query: %s", PQerrorMessage(conn)); + PQsendFlushRequest(conn); + res = PQgetResult(conn); + if (res == NULL) + pg_fatal("PQgetResult returned null when there's a pipeline item: %s", + PQerrorMessage(conn)); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("unexpected result code %s from first pipeline item", + PQresStatus(PQresultStatus(res))); + res = PQgetResult(conn); + if (res != NULL) + pg_fatal("got unexpected non-null result"); + /* We can exit pipeline mode now */ + if (PQexitPipelineMode(conn) != 1) + pg_fatal("attempt to exit pipeline mode failed when it should've succeeded: %s", + PQerrorMessage(conn)); + res = PQexec(conn, "SELECT 2"); + if (n_notices > 0) + pg_fatal("got %d notice(s)", n_notices); + if (res == NULL) + pg_fatal("PQexec returned NULL"); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("unexpected result code %s from non-pipeline query", + PQresStatus(PQresultStatus(res))); + res = PQgetResult(conn); + if (res != NULL) + pg_fatal("did not receive terminating NULL"); + if (n_notices > 0) + pg_fatal("got %d notice(s)", n_notices); + fprintf(stderr, "ok - 2\n"); + + /* + * Case 2: exiting pipeline mode is not OK if a second command is sent. + */ + + if (PQenterPipelineMode(conn) != 1) + pg_fatal("failed to enter pipeline mode: %s", PQerrorMessage(conn)); + if (PQsendQuery(conn, "SELECT 1") != 1) + pg_fatal("failed to send query: %s", PQerrorMessage(conn)); + PQsendFlushRequest(conn); + res = PQgetResult(conn); + if (res == NULL) + pg_fatal("PQgetResult returned null when there's a pipeline item: %s", + PQerrorMessage(conn)); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("unexpected result code %s from first pipeline item", + PQresStatus(PQresultStatus(res))); + if (PQsendQuery(conn, "SELECT 2") != 1) + pg_fatal("failed to send query: %s", PQerrorMessage(conn)); + PQsendFlushRequest(conn); + /* read terminating null from first query */ + res = PQgetResult(conn); + if (res != NULL) + pg_fatal("did not receive terminating NULL"); + res = PQgetResult(conn); + if (res == NULL) + pg_fatal("PQgetResult returned null when there's a pipeline item: %s", + PQerrorMessage(conn)); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("unexpected result code %s from first pipeline item", + PQresStatus(PQresultStatus(res))); + res = PQgetResult(conn); + if (res != NULL) + pg_fatal("did not receive terminating NULL"); + if (PQexitPipelineMode(conn) != 1) + pg_fatal("attempt to exit pipeline mode failed when it should've succeeded: %s", + PQerrorMessage(conn)); + + /* Try to exit pipeline mode in pipeline-idle state */ + if (PQenterPipelineMode(conn) != 1) + pg_fatal("failed to enter pipeline mode: %s", PQerrorMessage(conn)); + if (PQsendQuery(conn, "SELECT 1") != 1) + pg_fatal("failed to send query: %s", PQerrorMessage(conn)); + PQsendFlushRequest(conn); + res = PQgetResult(conn); + if (res == NULL) + pg_fatal("PQgetResult returned null when there's a pipeline item: %s", + PQerrorMessage(conn)); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("unexpected result code %s from first pipeline item", + PQresStatus(PQresultStatus(res))); + PQclear(res); + res = PQgetResult(conn); + if (res != NULL) + pg_fatal("did not receive terminating NULL"); + if (PQsendQuery(conn, "SELECT 2") != 1) + pg_fatal("failed to send query: %s", PQerrorMessage(conn)); + if (PQexitPipelineMode(conn) == 1) + pg_fatal("exiting pipeline succeeded when it shouldn't"); + if (strncmp(PQerrorMessage(conn), "cannot exit pipeline mode", + strlen("cannot exit pipeline mode")) != 0) + pg_fatal("did not get expected error; got: %s", + PQerrorMessage(conn)); + PQsendFlushRequest(conn); + res = PQgetResult(conn); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("unexpected result code %s from second pipeline item", + PQresStatus(PQresultStatus(res))); + PQclear(res); + res = PQgetResult(conn); + if (res != NULL) + pg_fatal("did not receive terminating NULL"); + if (PQexitPipelineMode(conn) != 1) + pg_fatal("exiting pipeline failed: %s", PQerrorMessage(conn)); + + if (n_notices > 0) + pg_fatal("got %d notice(s)", n_notices); + fprintf(stderr, "ok - 3\n"); + + /* Have a WARNING in the middle of a resultset */ + if (PQenterPipelineMode(conn) != 1) + pg_fatal("entering pipeline mode failed: %s", PQerrorMessage(conn)); + if (PQsendQuery(conn, "SELECT pg_catalog.pg_advisory_unlock(1,1)") != 1) + pg_fatal("failed to send query: %s", PQerrorMessage(conn)); + PQsendFlushRequest(conn); + res = PQgetResult(conn); + if (res == NULL) + pg_fatal("unexpected NULL result received"); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("unexpected result code %s", PQresStatus(PQresultStatus(res))); + if (PQexitPipelineMode(conn) != 1) + pg_fatal("failed to exit pipeline mode: %s", PQerrorMessage(conn)); + fprintf(stderr, "ok - 4\n"); +} + static void test_simple_pipeline(PGconn *conn) { @@ -1160,6 +1361,8 @@ test_singlerowmode(PGconn *conn) if (PQexitPipelineMode(conn) != 1) pg_fatal("failed to end pipeline mode: %s", PQerrorMessage(conn)); + + fprintf(stderr, "ok\n"); } /* @@ -1549,6 +1752,7 @@ print_test_list(void) printf("multi_pipelines\n"); printf("nosync\n"); printf("pipeline_abort\n"); + printf("pipeline_idle\n"); printf("pipelined_insert\n"); printf("prepared\n"); printf("simple_pipeline\n"); @@ -1630,7 +1834,10 @@ main(int argc, char **argv) /* Set the trace file, if requested */ if (tracefile != NULL) { - trace = fopen(tracefile, "w"); + if (strcmp(tracefile, "-") == 0) + trace = stdout; + else + trace = fopen(tracefile, "w"); if (trace == NULL) pg_fatal("could not open file \"%s\": %m", tracefile); @@ -1650,6 +1857,8 @@ main(int argc, char **argv) test_nosync(conn); else if (strcmp(testname, "pipeline_abort") == 0) test_pipeline_abort(conn); + else if (strcmp(testname, "pipeline_idle") == 0) + test_pipeline_idle(conn); else if (strcmp(testname, "pipelined_insert") == 0) test_pipelined_insert(conn, numrows); else if (strcmp(testname, "prepared") == 0) diff --git a/src/test/modules/libpq_pipeline/t/001_libpq_pipeline.pl b/src/test/modules/libpq_pipeline/t/001_libpq_pipeline.pl index d8d496c995a..b02928cad29 100644 --- a/src/test/modules/libpq_pipeline/t/001_libpq_pipeline.pl +++ b/src/test/modules/libpq_pipeline/t/001_libpq_pipeline.pl @@ -26,7 +26,8 @@ my @extraargs = ('-r', $numrows); my $cmptrace = grep(/^$testname$/, qw(simple_pipeline nosync multi_pipelines prepared singlerow - pipeline_abort transaction disallowed_in_pipeline)) > 0; + pipeline_abort pipeline_idle transaction + disallowed_in_pipeline)) > 0; # For a bunch of tests, generate a libpq trace file too. my $traceout = "$TestLib::tmp_check/traces/$testname.trace"; diff --git a/src/test/modules/libpq_pipeline/traces/pipeline_idle.trace b/src/test/modules/libpq_pipeline/traces/pipeline_idle.trace new file mode 100644 index 00000000000..3957ee4dfe1 --- /dev/null +++ b/src/test/modules/libpq_pipeline/traces/pipeline_idle.trace @@ -0,0 +1,93 @@ +F 16 Parse "" "SELECT 1" 0 +F 12 Bind "" "" 0 0 0 +F 6 Describe P "" +F 9 Execute "" 0 +F 6 Close P "" +F 4 Flush +B 4 ParseComplete +B 4 BindComplete +B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 +B 11 DataRow 1 1 '1' +B 13 CommandComplete "SELECT 1" +B 4 CloseComplete +F 4 Sync +B 5 ReadyForQuery I +F 16 Parse "" "SELECT 1" 0 +F 12 Bind "" "" 0 0 0 +F 6 Describe P "" +F 9 Execute "" 0 +F 6 Close P "" +F 4 Flush +B 4 ParseComplete +B 4 BindComplete +B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 +B 11 DataRow 1 1 '1' +B 13 CommandComplete "SELECT 1" +B 4 CloseComplete +F 13 Query "SELECT 2" +B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 +B 11 DataRow 1 1 '2' +B 13 CommandComplete "SELECT 1" +B 5 ReadyForQuery I +F 16 Parse "" "SELECT 1" 0 +F 12 Bind "" "" 0 0 0 +F 6 Describe P "" +F 9 Execute "" 0 +F 6 Close P "" +F 4 Flush +B 4 ParseComplete +B 4 BindComplete +B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 +B 11 DataRow 1 1 '1' +B 13 CommandComplete "SELECT 1" +B 4 CloseComplete +F 16 Parse "" "SELECT 2" 0 +F 12 Bind "" "" 0 0 0 +F 6 Describe P "" +F 9 Execute "" 0 +F 6 Close P "" +F 4 Flush +B 4 ParseComplete +B 4 BindComplete +B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 +B 11 DataRow 1 1 '2' +B 13 CommandComplete "SELECT 1" +B 4 CloseComplete +F 16 Parse "" "SELECT 1" 0 +F 12 Bind "" "" 0 0 0 +F 6 Describe P "" +F 9 Execute "" 0 +F 6 Close P "" +F 4 Flush +B 4 ParseComplete +B 4 BindComplete +B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 +B 11 DataRow 1 1 '1' +B 13 CommandComplete "SELECT 1" +B 4 CloseComplete +F 16 Parse "" "SELECT 2" 0 +F 12 Bind "" "" 0 0 0 +F 6 Describe P "" +F 9 Execute "" 0 +F 6 Close P "" +F 4 Flush +B 4 ParseComplete +B 4 BindComplete +B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 +B 11 DataRow 1 1 '2' +B 13 CommandComplete "SELECT 1" +B 4 CloseComplete +F 49 Parse "" "SELECT pg_catalog.pg_advisory_unlock(1,1)" 0 +F 12 Bind "" "" 0 0 0 +F 6 Describe P "" +F 9 Execute "" 0 +F 6 Close P "" +F 4 Flush +B 4 ParseComplete +B 4 BindComplete +B 43 RowDescription 1 "pg_advisory_unlock" NNNN 0 NNNN 1 -1 0 +B NN NoticeResponse S "WARNING" V "WARNING" C "01000" M "you don't own a lock of type ExclusiveLock" F "SSSS" L "SSSS" R "SSSS" \x00 +B 11 DataRow 1 1 'f' +B 13 CommandComplete "SELECT 1" +B 4 CloseComplete +F 4 Terminate From e86b72b5a843a8eee91ac7b84648071d30161aff Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 5 Jul 2022 13:06:31 -0400 Subject: [PATCH 16/47] Fix pg_upgrade to detect non-upgradable anyarray usages. When we changed some built-in functions to use anycompatiblearray instead of anyarray, we created a dump/restore hazard for user-defined operators and aggregates relying on those functions: the user objects have to be modified to change their signatures similarly. This causes pg_upgrade to fail partway through if the source installation contains such objects. We generally try to have pg_upgrade detect such hazards and fail before it does anything exciting, so add logic to detect this case too. Back-patch to v14 where the change was made. Justin Pryzby, reviewed by Andrey Borodin Discussion: https://postgr.es/m/3383880.QJadu78ljV@vejsadalnx --- src/bin/pg_upgrade/check.c | 134 +++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index d0905f3d588..9f121f9e045 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -26,6 +26,7 @@ static void check_proper_datallowconn(ClusterInfo *cluster); static void check_for_prepared_transactions(ClusterInfo *cluster); static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster); static void check_for_user_defined_postfix_ops(ClusterInfo *cluster); +static void check_for_incompatible_polymorphics(ClusterInfo *cluster); static void check_for_tables_with_oids(ClusterInfo *cluster); static void check_for_composite_data_type_usage(ClusterInfo *cluster); static void check_for_reg_data_type_usage(ClusterInfo *cluster); @@ -155,6 +156,13 @@ check_and_dump_old_cluster(bool live_check, char **sequence_script_file_name) check_for_removed_data_type_usage(&old_cluster, "12", "tinterval"); } + /* + * PG 14 changed polymorphic functions from anyarray to + * anycompatiblearray. + */ + if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1300) + check_for_incompatible_polymorphics(&old_cluster); + /* * Pre-PG 12 allowed tables to be declared WITH OIDS, which is not * supported anymore. Verify there are none, iff applicable. @@ -1176,6 +1184,132 @@ check_for_user_defined_postfix_ops(ClusterInfo *cluster) check_ok(); } +/* + * check_for_incompatible_polymorphics() + * + * Make sure nothing is using old polymorphic functions with + * anyarray/anyelement rather than the new anycompatible variants. + */ +static void +check_for_incompatible_polymorphics(ClusterInfo *cluster) +{ + PGresult *res; + FILE *script = NULL; + char output_path[MAXPGPATH]; + PQExpBufferData old_polymorphics; + + prep_status("Checking for incompatible polymorphic functions"); + + snprintf(output_path, sizeof(output_path), + "incompatible_polymorphics.txt"); + + /* The set of problematic functions varies a bit in different versions */ + initPQExpBuffer(&old_polymorphics); + + appendPQExpBufferStr(&old_polymorphics, + "'array_append(anyarray,anyelement)'" + ", 'array_cat(anyarray,anyarray)'" + ", 'array_prepend(anyelement,anyarray)'"); + + if (GET_MAJOR_VERSION(cluster->major_version) >= 903) + appendPQExpBufferStr(&old_polymorphics, + ", 'array_remove(anyarray,anyelement)'" + ", 'array_replace(anyarray,anyelement,anyelement)'"); + + if (GET_MAJOR_VERSION(cluster->major_version) >= 905) + appendPQExpBufferStr(&old_polymorphics, + ", 'array_position(anyarray,anyelement)'" + ", 'array_position(anyarray,anyelement,integer)'" + ", 'array_positions(anyarray,anyelement)'" + ", 'width_bucket(anyelement,anyarray)'"); + + for (int dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) + { + bool db_used = false; + DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; + PGconn *conn = connectToServer(cluster, active_db->db_name); + int ntups; + int i_objkind, + i_objname; + + /* + * The query below hardcodes FirstNormalObjectId as 16384 rather than + * interpolating that C #define into the query because, if that + * #define is ever changed, the cutoff we want to use is the value + * used by pre-version 14 servers, not that of some future version. + */ + res = executeQueryOrDie(conn, + /* Aggregate transition functions */ + "SELECT 'aggregate' AS objkind, p.oid::regprocedure::text AS objname " + "FROM pg_proc AS p " + "JOIN pg_aggregate AS a ON a.aggfnoid=p.oid " + "JOIN pg_proc AS transfn ON transfn.oid=a.aggtransfn " + "WHERE p.oid >= 16384 " + "AND a.aggtransfn = ANY(ARRAY[%s]::regprocedure[]) " + + /* Aggregate final functions */ + "UNION ALL " + "SELECT 'aggregate' AS objkind, p.oid::regprocedure::text AS objname " + "FROM pg_proc AS p " + "JOIN pg_aggregate AS a ON a.aggfnoid=p.oid " + "JOIN pg_proc AS finalfn ON finalfn.oid=a.aggfinalfn " + "WHERE p.oid >= 16384 " + "AND a.aggfinalfn = ANY(ARRAY[%s]::regprocedure[]) " + + /* Operators */ + "UNION ALL " + "SELECT 'operator' AS objkind, op.oid::regoperator::text AS objname " + "FROM pg_operator AS op " + "WHERE op.oid >= 16384 " + "AND oprcode = ANY(ARRAY[%s]::regprocedure[]);", + old_polymorphics.data, + old_polymorphics.data, + old_polymorphics.data); + + ntups = PQntuples(res); + + i_objkind = PQfnumber(res, "objkind"); + i_objname = PQfnumber(res, "objname"); + + for (int rowno = 0; rowno < ntups; rowno++) + { + if (script == NULL && + (script = fopen_priv(output_path, "w")) == NULL) + pg_fatal("could not open file \"%s\": %s\n", + output_path, strerror(errno)); + if (!db_used) + { + fprintf(script, "In database: %s\n", active_db->db_name); + db_used = true; + } + + fprintf(script, " %s: %s\n", + PQgetvalue(res, rowno, i_objkind), + PQgetvalue(res, rowno, i_objname)); + } + + PQclear(res); + PQfinish(conn); + } + + if (script) + { + fclose(script); + pg_log(PG_REPORT, "fatal\n"); + pg_fatal("Your installation contains user-defined objects that refer to internal\n" + "polymorphic functions with arguments of type 'anyarray' or 'anyelement'.\n" + "These user-defined objects must be dropped before upgrading and restored\n" + "afterwards, changing them to refer to the new corresponding functions with\n" + "arguments of type 'anycompatiblearray' and 'anycompatible'.\n" + "A list of the problematic objects is in the file:\n" + " %s\n\n", output_path); + } + else + check_ok(); + + termPQExpBuffer(&old_polymorphics); +} + /* * Verify that no tables are declared WITH OIDS. */ From e0bb482043da1ed879e688891f3b66247e63c0fe Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 5 Jul 2022 18:23:19 -0400 Subject: [PATCH 17/47] Tighten pg_upgrade's new check for non-upgradable anyarray usages. We only need to reject cases when the aggregate or operator is itself declared with a polymorphic type. Per buildfarm. Discussion: https://postgr.es/m/3383880.QJadu78ljV@vejsadalnx --- src/bin/pg_upgrade/check.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 9f121f9e045..fd4414f1d02 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -1246,6 +1246,7 @@ check_for_incompatible_polymorphics(ClusterInfo *cluster) "JOIN pg_proc AS transfn ON transfn.oid=a.aggtransfn " "WHERE p.oid >= 16384 " "AND a.aggtransfn = ANY(ARRAY[%s]::regprocedure[]) " + "AND a.aggtranstype = ANY(ARRAY['anyarray', 'anyelement']::regtype[]) " /* Aggregate final functions */ "UNION ALL " @@ -1255,13 +1256,15 @@ check_for_incompatible_polymorphics(ClusterInfo *cluster) "JOIN pg_proc AS finalfn ON finalfn.oid=a.aggfinalfn " "WHERE p.oid >= 16384 " "AND a.aggfinalfn = ANY(ARRAY[%s]::regprocedure[]) " + "AND a.aggtranstype = ANY(ARRAY['anyarray', 'anyelement']::regtype[]) " /* Operators */ "UNION ALL " "SELECT 'operator' AS objkind, op.oid::regoperator::text AS objname " "FROM pg_operator AS op " "WHERE op.oid >= 16384 " - "AND oprcode = ANY(ARRAY[%s]::regprocedure[]);", + "AND oprcode = ANY(ARRAY[%s]::regprocedure[]) " + "AND oprleft = ANY(ARRAY['anyarray', 'anyelement']::regtype[]);", old_polymorphics.data, old_polymorphics.data, old_polymorphics.data); From ee0e5c5dc36bcbe4e272cd266bcc178877018b4a Mon Sep 17 00:00:00 2001 From: reshke Date: Sat, 7 Feb 2026 19:33:27 +0000 Subject: [PATCH 18/47] fix ecpglib patch --- src/interfaces/ecpg/ecpglib/connect.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/interfaces/ecpg/ecpglib/connect.c b/src/interfaces/ecpg/ecpglib/connect.c index 854de3ceae2..056940cb252 100644 --- a/src/interfaces/ecpg/ecpglib/connect.c +++ b/src/interfaces/ecpg/ecpglib/connect.c @@ -14,10 +14,6 @@ locale_t ecpg_clocale = (locale_t) 0; #endif -#ifdef HAVE_USELOCALE -locale_t ecpg_clocale = (locale_t) 0; -#endif - #ifdef ENABLE_THREAD_SAFETY static pthread_mutex_t connections_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_key_t actual_connection_key; From 2156d9649dfba2ff86025f5e72632f35792857e2 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 8 Jul 2022 20:23:35 -0400 Subject: [PATCH 19/47] doc: add examples for array_length() and jsonb_array_length() The examples show the output of array_length() and jsonb_array_length() for empty arrays. Discussion: https://postgr.es/m/CAKFQuwaoBmRuWdMLzLHDCFDJDX3wvfQ7egAF0bpik_BFgG1KWg@mail.gmail.com Author: David G. Johnston Backpatch-through: 13 --- doc/src/sgml/func.sgml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 8266615ea47..ceb09d788cc 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -15446,6 +15446,10 @@ table2-mapping json_array_length('[1,2,3,{"f1":1,"f2":[5,6]},4]') 5 + + + jsonb_array_length('[]') + 0 @@ -17887,10 +17891,19 @@ SELECT NULLIF(value, '(none)') ... Returns the length of the requested array dimension. + (Produces NULL instead of 0 for empty or missing array dimensions.) array_length(array[1,2,3], 1) 3 + + + array_length(array[]::int[], 1) + NULL + + + array_length(array['text'], 2) + NULL From d7f9a1af026ee8e3d9772b5351cd6137d1c191bb Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 11 Jul 2022 10:56:48 +0900 Subject: [PATCH 20/47] doc: Fix inconsistent quotes in some jsonb fields Single quotes are not allowed in json internals, double quotes are. Reported-by: Eric Mutta Discussion: https://postgr.es/m/165715362165.665.3875113264927503997@wrigleys.postgresql.org Backpatch-through: 14 --- doc/src/sgml/json.sgml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml index a173368229b..c421d4ba75a 100644 --- a/doc/src/sgml/json.sgml +++ b/doc/src/sgml/json.sgml @@ -701,10 +701,10 @@ UPDATE table_name SET jsonb_field[2] = '2'; assigned value can be placed. --- Where jsonb_field was {}, it is now {'a': [{'b': 1}]} +-- Where jsonb_field was {}, it is now {"a": [{"b": 1}]} UPDATE table_name SET jsonb_field['a'][0]['b'] = '1'; --- Where jsonb_field was [], it is now [null, {'a': 1}] +-- Where jsonb_field was [], it is now [null, {"a": 1}] UPDATE table_name SET jsonb_field[1]['a'] = '1'; From b16345bb98dae489aa962f10654ec8cafab12967 Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Sat, 25 Jun 2022 09:07:41 -0700 Subject: [PATCH 21/47] CREATE INDEX: use the original userid for more ACL checks. Commit a117cebd638dd02e5c2e791c25e43745f233111b used the original userid for ACL checks located directly in DefineIndex(), but it still adopted the table owner userid for more ACL checks than intended. That broke dump/reload of indexes that refer to an operator class, collation, or exclusion operator in a schema other than "public" or "pg_catalog". Back-patch to v10 (all supported versions), like the earlier commit. Nathan Bossart and Noah Misch Discussion: https://postgr.es/m/f8a4105f076544c180a87ef0c4822352@stmuk.bayern.de --- contrib/citext/Makefile | 2 +- contrib/citext/expected/create_index_acl.out | 78 ++++++++++++++++ contrib/citext/sql/create_index_acl.sql | 79 ++++++++++++++++ src/backend/commands/indexcmds.c | 96 +++++++++++++++++--- 4 files changed, 239 insertions(+), 16 deletions(-) create mode 100644 contrib/citext/expected/create_index_acl.out create mode 100644 contrib/citext/sql/create_index_acl.sql diff --git a/contrib/citext/Makefile b/contrib/citext/Makefile index b9b43ee787f..f8bb56c0975 100644 --- a/contrib/citext/Makefile +++ b/contrib/citext/Makefile @@ -11,7 +11,7 @@ DATA = citext--1.4.sql \ citext--1.0--1.1.sql PGFILEDESC = "citext - case-insensitive character string data type" -REGRESS = citext +REGRESS = create_index_acl citext REGRESS_OPTS += --init-file=$(top_srcdir)/src/test/regress/init_file ifdef USE_PGXS diff --git a/contrib/citext/expected/create_index_acl.out b/contrib/citext/expected/create_index_acl.out new file mode 100644 index 00000000000..f35f60b421d --- /dev/null +++ b/contrib/citext/expected/create_index_acl.out @@ -0,0 +1,78 @@ +-- Each DefineIndex() ACL check uses either the original userid or the table +-- owner userid; see its header comment. Here, confirm that DefineIndex() +-- uses its original userid where necessary. The test works by creating +-- indexes that refer to as many sorts of objects as possible, with the table +-- owner having as few applicable privileges as possible. (The privileges.sql +-- regress_sro_user tests look for the opposite defect; they confirm that +-- DefineIndex() uses the table owner userid where necessary.) +-- Don't override tablespaces; this version lacks allow_in_place_tablespaces. +BEGIN; +CREATE ROLE regress_minimal; +CREATE SCHEMA s; +CREATE EXTENSION citext SCHEMA s; +-- Revoke all conceivably-relevant ACLs within the extension. The system +-- doesn't check all these ACLs, but this will provide some coverage if that +-- ever changes. +REVOKE ALL ON TYPE s.citext FROM PUBLIC; +REVOKE ALL ON FUNCTION s.citext_pattern_lt FROM PUBLIC; +REVOKE ALL ON FUNCTION s.citext_pattern_le FROM PUBLIC; +REVOKE ALL ON FUNCTION s.citext_eq FROM PUBLIC; +REVOKE ALL ON FUNCTION s.citext_pattern_ge FROM PUBLIC; +REVOKE ALL ON FUNCTION s.citext_pattern_gt FROM PUBLIC; +REVOKE ALL ON FUNCTION s.citext_pattern_cmp FROM PUBLIC; +-- Functions sufficient for making an index column that has the side effect of +-- changing search_path at expression planning time. +CREATE FUNCTION public.setter() RETURNS bool VOLATILE + LANGUAGE SQL AS $$SET search_path = s; SELECT true$$; +CREATE FUNCTION s.const() RETURNS bool IMMUTABLE + LANGUAGE SQL AS $$SELECT public.setter()$$; +CREATE FUNCTION s.index_this_expr(s.citext, bool) RETURNS s.citext IMMUTABLE + LANGUAGE SQL AS $$SELECT $1$$; +REVOKE ALL ON FUNCTION public.setter FROM PUBLIC; +REVOKE ALL ON FUNCTION s.const FROM PUBLIC; +REVOKE ALL ON FUNCTION s.index_this_expr FROM PUBLIC; +-- Even for an empty table, expression planning calls s.const & public.setter. +GRANT EXECUTE ON FUNCTION public.setter TO regress_minimal; +GRANT EXECUTE ON FUNCTION s.const TO regress_minimal; +-- Function for index predicate. +CREATE FUNCTION s.index_row_if(s.citext) RETURNS bool IMMUTABLE + LANGUAGE SQL AS $$SELECT $1 IS NOT NULL$$; +REVOKE ALL ON FUNCTION s.index_row_if FROM PUBLIC; +-- Even for an empty table, CREATE INDEX checks ii_Predicate permissions. +GRANT EXECUTE ON FUNCTION s.index_row_if TO regress_minimal; +-- Non-extension, non-function objects. +CREATE COLLATION s.coll (LOCALE="C"); +CREATE TABLE s.x (y s.citext); +ALTER TABLE s.x OWNER TO regress_minimal; +-- Empty-table DefineIndex() +CREATE UNIQUE INDEX u0rows ON s.x USING btree + ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops) + WHERE s.index_row_if(y); +ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree + ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=) + WHERE (s.index_row_if(y)); +-- Make the table nonempty. +INSERT INTO s.x VALUES ('foo'), ('bar'); +-- If the INSERT runs the planner on index expressions, a search_path change +-- survives. As of 2022-06, the INSERT reuses a cached plan. It does so even +-- under debug_discard_caches, since each index is new-in-transaction. If +-- future work changes a cache lifecycle, this RESET may become necessary. +RESET search_path; +-- For a nonempty table, owner needs permissions throughout ii_Expressions. +GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal; +CREATE UNIQUE INDEX u2rows ON s.x USING btree + ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops) + WHERE s.index_row_if(y); +ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree + ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=) + WHERE (s.index_row_if(y)); +-- Shall not find s.coll via search_path, despite the s.const->public.setter +-- call having set search_path=s during expression planning. Suppress the +-- message itself, which depends on the database encoding. +\set VERBOSITY sqlstate +ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree + ((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=) + WHERE (s.index_row_if(y)); +ERROR: 42704 +\set VERBOSITY default +ROLLBACK; diff --git a/contrib/citext/sql/create_index_acl.sql b/contrib/citext/sql/create_index_acl.sql new file mode 100644 index 00000000000..a5f4e6b30a8 --- /dev/null +++ b/contrib/citext/sql/create_index_acl.sql @@ -0,0 +1,79 @@ +-- Each DefineIndex() ACL check uses either the original userid or the table +-- owner userid; see its header comment. Here, confirm that DefineIndex() +-- uses its original userid where necessary. The test works by creating +-- indexes that refer to as many sorts of objects as possible, with the table +-- owner having as few applicable privileges as possible. (The privileges.sql +-- regress_sro_user tests look for the opposite defect; they confirm that +-- DefineIndex() uses the table owner userid where necessary.) + +-- Don't override tablespaces; this version lacks allow_in_place_tablespaces. + +BEGIN; +CREATE ROLE regress_minimal; +CREATE SCHEMA s; +CREATE EXTENSION citext SCHEMA s; +-- Revoke all conceivably-relevant ACLs within the extension. The system +-- doesn't check all these ACLs, but this will provide some coverage if that +-- ever changes. +REVOKE ALL ON TYPE s.citext FROM PUBLIC; +REVOKE ALL ON FUNCTION s.citext_pattern_lt FROM PUBLIC; +REVOKE ALL ON FUNCTION s.citext_pattern_le FROM PUBLIC; +REVOKE ALL ON FUNCTION s.citext_eq FROM PUBLIC; +REVOKE ALL ON FUNCTION s.citext_pattern_ge FROM PUBLIC; +REVOKE ALL ON FUNCTION s.citext_pattern_gt FROM PUBLIC; +REVOKE ALL ON FUNCTION s.citext_pattern_cmp FROM PUBLIC; +-- Functions sufficient for making an index column that has the side effect of +-- changing search_path at expression planning time. +CREATE FUNCTION public.setter() RETURNS bool VOLATILE + LANGUAGE SQL AS $$SET search_path = s; SELECT true$$; +CREATE FUNCTION s.const() RETURNS bool IMMUTABLE + LANGUAGE SQL AS $$SELECT public.setter()$$; +CREATE FUNCTION s.index_this_expr(s.citext, bool) RETURNS s.citext IMMUTABLE + LANGUAGE SQL AS $$SELECT $1$$; +REVOKE ALL ON FUNCTION public.setter FROM PUBLIC; +REVOKE ALL ON FUNCTION s.const FROM PUBLIC; +REVOKE ALL ON FUNCTION s.index_this_expr FROM PUBLIC; +-- Even for an empty table, expression planning calls s.const & public.setter. +GRANT EXECUTE ON FUNCTION public.setter TO regress_minimal; +GRANT EXECUTE ON FUNCTION s.const TO regress_minimal; +-- Function for index predicate. +CREATE FUNCTION s.index_row_if(s.citext) RETURNS bool IMMUTABLE + LANGUAGE SQL AS $$SELECT $1 IS NOT NULL$$; +REVOKE ALL ON FUNCTION s.index_row_if FROM PUBLIC; +-- Even for an empty table, CREATE INDEX checks ii_Predicate permissions. +GRANT EXECUTE ON FUNCTION s.index_row_if TO regress_minimal; +-- Non-extension, non-function objects. +CREATE COLLATION s.coll (LOCALE="C"); +CREATE TABLE s.x (y s.citext); +ALTER TABLE s.x OWNER TO regress_minimal; +-- Empty-table DefineIndex() +CREATE UNIQUE INDEX u0rows ON s.x USING btree + ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops) + WHERE s.index_row_if(y); +ALTER TABLE s.x ADD CONSTRAINT e0rows EXCLUDE USING btree + ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=) + WHERE (s.index_row_if(y)); +-- Make the table nonempty. +INSERT INTO s.x VALUES ('foo'), ('bar'); +-- If the INSERT runs the planner on index expressions, a search_path change +-- survives. As of 2022-06, the INSERT reuses a cached plan. It does so even +-- under debug_discard_caches, since each index is new-in-transaction. If +-- future work changes a cache lifecycle, this RESET may become necessary. +RESET search_path; +-- For a nonempty table, owner needs permissions throughout ii_Expressions. +GRANT EXECUTE ON FUNCTION s.index_this_expr TO regress_minimal; +CREATE UNIQUE INDEX u2rows ON s.x USING btree + ((s.index_this_expr(y, s.const())) COLLATE s.coll s.citext_pattern_ops) + WHERE s.index_row_if(y); +ALTER TABLE s.x ADD CONSTRAINT e2rows EXCLUDE USING btree + ((s.index_this_expr(y, s.const())) COLLATE s.coll WITH s.=) + WHERE (s.index_row_if(y)); +-- Shall not find s.coll via search_path, despite the s.const->public.setter +-- call having set search_path=s during expression planning. Suppress the +-- message itself, which depends on the database encoding. +\set VERBOSITY sqlstate +ALTER TABLE s.x ADD CONSTRAINT underqualified EXCLUDE USING btree + ((s.index_this_expr(y, s.const())) COLLATE coll WITH s.=) + WHERE (s.index_row_if(y)); +\set VERBOSITY default +ROLLBACK; diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 2065667ce42..ca023623955 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -98,7 +98,10 @@ static void ComputeIndexAttrs(IndexInfo *indexInfo, Oid relId, const char *accessMethodName, Oid accessMethodId, bool amcanorder, - bool isconstraint); + bool isconstraint, + Oid ddl_userid, + int ddl_sec_context, + int *ddl_save_nestlevel); static char *ChooseIndexName(const char *tabname, Oid namespaceId, List *colnames, List *exclusionOpNames, bool primary, bool isconstraint); @@ -397,9 +400,8 @@ CheckIndexCompatible(Oid oldId, * Compute the operator classes, collations, and exclusion operators for * the new index, so we can test whether it's compatible with the existing * one. Note that ComputeIndexAttrs might fail here, but that's OK: - * DefineIndex would have called this function with the same arguments - * later on, and it would have failed then anyway. Our attributeList - * contains only key attributes, thus we're filling ii_NumIndexAttrs and + * DefineIndex would have failed later. Our attributeList contains only + * key attributes, thus we're filling ii_NumIndexAttrs and * ii_NumIndexKeyAttrs with same value. */ indexInfo = makeIndexInfo(numberOfAttributes, numberOfAttributes, @@ -413,7 +415,7 @@ CheckIndexCompatible(Oid oldId, coloptions, attributeList, exclusionOpNames, relationId, accessMethodName, accessMethodId, - amcanorder, isconstraint); + amcanorder, isconstraint, InvalidOid, 0, NULL); /* Get the soon-obsolete pg_index tuple. */ @@ -659,6 +661,19 @@ WaitForOlderSnapshots(TransactionId limitXmin, bool progress) * DefineIndex * Creates a new index. * + * This function manages the current userid according to the needs of pg_dump. + * Recreating old-database catalog entries in new-database is fine, regardless + * of which users would have permission to recreate those entries now. That's + * just preservation of state. Running opaque expressions, like calling a + * function named in a catalog entry or evaluating a pg_node_tree in a catalog + * entry, as anyone other than the object owner, is not fine. To adhere to + * those principles and to remain fail-safe, use the table owner userid for + * most ACL checks. Use the original userid for ACL checks reached without + * traversing opaque expressions. (pg_dump can predict such ACL checks from + * catalogs.) Overall, this is a mess. Future DDL development should + * consider offering one DDL command for catalog setup and a separate DDL + * command for steps that run opaque expressions. + * * 'relationId': the OID of the heap relation on which the index is to be * created * 'stmt': IndexStmt describing the properties of the new index. @@ -1184,7 +1199,8 @@ DefineIndex(Oid relationId, coloptions, allIndexParams, stmt->excludeOpNames, relationId, accessMethodName, accessMethodId, - amcanorder, stmt->isconstraint); + amcanorder, stmt->isconstraint, root_save_userid, + root_save_sec_context, &root_save_nestlevel); /* * We disallow unique indexes on IVM columns of IMMVs. @@ -1623,11 +1639,8 @@ DefineIndex(Oid relationId, /* * Roll back any GUC changes executed by index functions, and keep - * subsequent changes local to this command. It's barely possible that - * some index function changed a behavior-affecting GUC, e.g. xmloption, - * that affects subsequent steps. This improves bug-compatibility with - * older PostgreSQL versions. They did the AtEOXact_GUC() here for the - * purpose of clearing the above default_tablespace change. + * subsequent changes local to this command. This is essential if some + * index function changed a behavior-affecting GUC, e.g. search_path. */ AtEOXact_GUC(false, root_save_nestlevel); root_save_nestlevel = NewGUCNestLevel(); @@ -2282,6 +2295,10 @@ CheckPredicate(Expr *predicate) * Compute per-index-column information, including indexed column numbers * or index expressions, opclasses and their options. Note, all output vectors * should be allocated for all columns, including "including" ones. + * + * If the caller switched to the table owner, ddl_userid is the role for ACL + * checks reached without traversing opaque expressions. Otherwise, it's + * InvalidOid, and other ddl_* arguments are undefined. */ static void ComputeIndexAttrs(IndexInfo *indexInfo, @@ -2295,12 +2312,17 @@ ComputeIndexAttrs(IndexInfo *indexInfo, const char *accessMethodName, Oid accessMethodId, bool amcanorder, - bool isconstraint) + bool isconstraint, + Oid ddl_userid, + int ddl_sec_context, + int *ddl_save_nestlevel) { ListCell *nextExclOp; ListCell *lc; int attn; int nkeycols = indexInfo->ii_NumIndexKeyAttrs; + Oid save_userid; + int save_sec_context; /* Allocate space for exclusion operator info, if needed */ if (exclusionOpNames) @@ -2314,6 +2336,9 @@ ComputeIndexAttrs(IndexInfo *indexInfo, else nextExclOp = NULL; + if (OidIsValid(ddl_userid)) + GetUserIdAndSecContext(&save_userid, &save_sec_context); + /* * process attributeList */ @@ -2450,10 +2475,24 @@ ComputeIndexAttrs(IndexInfo *indexInfo, } /* - * Apply collation override if any + * Apply collation override if any. Use of ddl_userid is necessary + * due to ACL checks therein, and it's safe because collations don't + * contain opaque expressions (or non-opaque expressions). */ if (attribute->collation) + { + if (OidIsValid(ddl_userid)) + { + AtEOXact_GUC(false, *ddl_save_nestlevel); + SetUserIdAndSecContext(ddl_userid, ddl_sec_context); + } attcollation = get_collation_oid(attribute->collation, false); + if (OidIsValid(ddl_userid)) + { + SetUserIdAndSecContext(save_userid, save_sec_context); + *ddl_save_nestlevel = NewGUCNestLevel(); + } + } /* * Check we have a collation iff it's a collatable type. The only @@ -2481,12 +2520,25 @@ ComputeIndexAttrs(IndexInfo *indexInfo, collationOidP[attn] = attcollation; /* - * Identify the opclass to use. + * Identify the opclass to use. Use of ddl_userid is necessary due to + * ACL checks therein. This is safe despite opclasses containing + * opaque expressions (specifically, functions), because only + * superusers can define opclasses. */ + if (OidIsValid(ddl_userid)) + { + AtEOXact_GUC(false, *ddl_save_nestlevel); + SetUserIdAndSecContext(ddl_userid, ddl_sec_context); + } classOidP[attn] = ResolveOpClass(attribute->opclass, atttype, accessMethodName, accessMethodId); + if (OidIsValid(ddl_userid)) + { + SetUserIdAndSecContext(save_userid, save_sec_context); + *ddl_save_nestlevel = NewGUCNestLevel(); + } /* * Identify the exclusion operator, if any. @@ -2500,9 +2552,23 @@ ComputeIndexAttrs(IndexInfo *indexInfo, /* * Find the operator --- it must accept the column datatype - * without runtime coercion (but binary compatibility is OK) + * without runtime coercion (but binary compatibility is OK). + * Operators contain opaque expressions (specifically, functions). + * compatible_oper_opid() boils down to oper() and + * IsBinaryCoercible(). PostgreSQL would have security problems + * elsewhere if oper() started calling opaque expressions. */ + if (OidIsValid(ddl_userid)) + { + AtEOXact_GUC(false, *ddl_save_nestlevel); + SetUserIdAndSecContext(ddl_userid, ddl_sec_context); + } opid = compatible_oper_opid(opname, atttype, atttype, false); + if (OidIsValid(ddl_userid)) + { + SetUserIdAndSecContext(save_userid, save_sec_context); + *ddl_save_nestlevel = NewGUCNestLevel(); + } /* * Only allow commutative operators to be used in exclusion From a0dc9901eea28abd098eb6bf7d791dbf7f744c32 Mon Sep 17 00:00:00 2001 From: reshke Date: Sun, 15 Feb 2026 10:35:19 +0000 Subject: [PATCH 22/47] Fix for ace9973867c to work in MPP --- contrib/citext/expected/create_index_acl.out | 4 +++- contrib/citext/sql/create_index_acl.sql | 3 ++- src/backend/commands/indexcmds.c | 14 +++++++++----- src/backend/tcop/utility.c | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/contrib/citext/expected/create_index_acl.out b/contrib/citext/expected/create_index_acl.out index f35f60b421d..28602ccfdf3 100644 --- a/contrib/citext/expected/create_index_acl.out +++ b/contrib/citext/expected/create_index_acl.out @@ -7,7 +7,9 @@ -- DefineIndex() uses the table owner userid where necessary.) -- Don't override tablespaces; this version lacks allow_in_place_tablespaces. BEGIN; +SET allow_segment_DML TO true; CREATE ROLE regress_minimal; +NOTICE: resource queue required -- using default resource queue "pg_default" CREATE SCHEMA s; CREATE EXTENSION citext SCHEMA s; -- Revoke all conceivably-relevant ACLs within the extension. The system @@ -42,7 +44,7 @@ REVOKE ALL ON FUNCTION s.index_row_if FROM PUBLIC; GRANT EXECUTE ON FUNCTION s.index_row_if TO regress_minimal; -- Non-extension, non-function objects. CREATE COLLATION s.coll (LOCALE="C"); -CREATE TABLE s.x (y s.citext); +CREATE TABLE s.x (y s.citext) DISTRIBUTED REPLICATED; ALTER TABLE s.x OWNER TO regress_minimal; -- Empty-table DefineIndex() CREATE UNIQUE INDEX u0rows ON s.x USING btree diff --git a/contrib/citext/sql/create_index_acl.sql b/contrib/citext/sql/create_index_acl.sql index a5f4e6b30a8..aedb9d625fc 100644 --- a/contrib/citext/sql/create_index_acl.sql +++ b/contrib/citext/sql/create_index_acl.sql @@ -9,6 +9,7 @@ -- Don't override tablespaces; this version lacks allow_in_place_tablespaces. BEGIN; +SET allow_segment_DML TO true; CREATE ROLE regress_minimal; CREATE SCHEMA s; CREATE EXTENSION citext SCHEMA s; @@ -44,7 +45,7 @@ REVOKE ALL ON FUNCTION s.index_row_if FROM PUBLIC; GRANT EXECUTE ON FUNCTION s.index_row_if TO regress_minimal; -- Non-extension, non-function objects. CREATE COLLATION s.coll (LOCALE="C"); -CREATE TABLE s.x (y s.citext); +CREATE TABLE s.x (y s.citext) DISTRIBUTED REPLICATED; ALTER TABLE s.x OWNER TO regress_minimal; -- Empty-table DefineIndex() CREATE UNIQUE INDEX u0rows ON s.x USING btree diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index ca023623955..7d91d604443 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1903,6 +1903,10 @@ DefineIndex(Oid relationId, } stmt->idxname = indexRelationName; + + AtEOXact_GUC(false, root_save_nestlevel); + SetUserIdAndSecContext(root_save_userid, root_save_sec_context); + if (shouldDispatch) { /* make sure the QE uses the same index name that we chose */ @@ -1923,8 +1927,6 @@ DefineIndex(Oid relationId, * Indexes on partitioned tables are not themselves built, so we're * done here. */ - AtEOXact_GUC(false, root_save_nestlevel); - SetUserIdAndSecContext(root_save_userid, root_save_sec_context); table_close(rel, NoLock); if (!OidIsValid(parentIndexId)) pgstat_progress_end_command(); @@ -1932,6 +1934,10 @@ DefineIndex(Oid relationId, } stmt->idxname = indexRelationName; + + AtEOXact_GUC(false, root_save_nestlevel); + SetUserIdAndSecContext(root_save_userid, root_save_sec_context); + if (shouldDispatch) { int flags = DF_CANCEL_ON_ERROR | DF_WITH_SNAPSHOT; @@ -1941,6 +1947,7 @@ DefineIndex(Oid relationId, /* make sure the QE uses the same index name that we chose */ stmt->oldNode = InvalidOid; Assert(stmt->relation != NULL); + CdbDispatchUtilityStatement((Node *) stmt, flags, GetAssignedOidsForDispatch(), NULL); @@ -1950,9 +1957,6 @@ DefineIndex(Oid relationId, cdb_sync_indcheckxmin_with_segments(indexRelationId); } - AtEOXact_GUC(false, root_save_nestlevel); - SetUserIdAndSecContext(root_save_userid, root_save_sec_context); - if (!concurrent || Gp_role == GP_ROLE_EXECUTE) { /* Close the heap and we're done, in the non-concurrent case */ diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 8eefbf93b88..72d4fc4c89b 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -2049,7 +2049,7 @@ ProcessUtilitySlow(ParseState *pstate, /* * The QD might have looked up the OID of the base table - * already, and stashed it in stmt->relid + * already, and stashed it in stmt->relationOid */ if (stmt->relationOid) relid = stmt->relationOid; From 898de9b22854c888431c5d0f881e9d5841f831c0 Mon Sep 17 00:00:00 2001 From: reshke Date: Wed, 25 Feb 2026 10:42:43 +0500 Subject: [PATCH 23/47] Remove bogus loop in single-iteration code (#1580) Spotted while casually reading SonarQube output. Commit simply removes loop in oddly-fashioned code. Reviewed-by: Andrey Borodin Reviewed-by: Jianghua Yang --- src/backend/access/appendonly/appendonlyam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/access/appendonly/appendonlyam.c b/src/backend/access/appendonly/appendonlyam.c index b5f6a17a597..defa5ccc80e 100755 --- a/src/backend/access/appendonly/appendonlyam.c +++ b/src/backend/access/appendonly/appendonlyam.c @@ -1983,7 +1983,7 @@ appendonly_endscan(TableScanDesc scan) static pg_attribute_hot_inline bool appendonly_getnextslot_noqual(AppendOnlyScanDesc aoscan, ScanDirection direction, TupleTableSlot *slot) { - while (appendonlygettup(aoscan, direction, aoscan->rs_base.rs_nkeys, aoscan->aos_key, slot)) + if (appendonlygettup(aoscan, direction, aoscan->rs_base.rs_nkeys, aoscan->aos_key, slot)) { pgstat_count_heap_getnext(aoscan->aos_rd); return true; From 43b7963cf43450581da6631d245662091b843cf8 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 12 Jul 2022 16:30:36 -0400 Subject: [PATCH 24/47] Invent qsort_interruptible(). Justin Pryzby reported that some scenarios could cause gathering of extended statistics to spend many seconds in an un-cancelable qsort() operation. To fix, invent qsort_interruptible(), which is just like qsort_arg() except that it will also do CHECK_FOR_INTERRUPTS every so often. This bloats the backend by a couple of kB, which seems like a good investment. (We considered just enabling CHECK_FOR_INTERRUPTS in the existing qsort and qsort_arg functions, but there are some callers for which that'd demonstrably be unsafe. Opt-in seems like a better way.) For now, just apply qsort_interruptible() in statistics collection. There's probably more places where it could be useful, but we can always change other call sites as we find problems. Back-patch to v14. Before that we didn't have extended stats on expressions, so that the problem was less severe. Also, this patch depends on the sort_template infrastructure introduced in v14. Tom Lane and Justin Pryzby Discussion: https://postgr.es/m/20220509000108.GQ28830@telsasoft.com --- src/backend/commands/analyze.c | 25 ++++++++------- src/backend/statistics/extended_stats.c | 4 +-- src/backend/statistics/mcv.c | 14 ++++----- src/backend/statistics/mvdistinct.c | 4 +-- src/backend/tsearch/ts_typanalyze.c | 22 +++++++------ src/backend/utils/adt/array_typanalyze.c | 31 ++++++++++--------- src/backend/utils/adt/rangetypes_typanalyze.c | 15 ++++----- src/backend/utils/sort/Makefile | 1 + src/backend/utils/sort/qsort_interruptible.c | 16 ++++++++++ src/include/port.h | 3 ++ 10 files changed, 80 insertions(+), 55 deletions(-) create mode 100644 src/backend/utils/sort/qsort_interruptible.c diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 0b6261dc61f..00fc25b2439 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -192,7 +192,7 @@ static int gp_acquire_sample_rows_func(Relation onerel, int elevel, static BlockNumber acquire_index_number_of_blocks(Relation indexrel, Relation tablerel); static void gp_acquire_correlations_dispatcher(Oid relOid, bool inh, float4 *correlations, bool *correlationsIsNull); -static int compare_rows(const void *a, const void *b); +static int compare_rows(const void *a, const void *b, void *arg); static int acquire_inherited_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, double *totalrows, double *totaldeadrows); @@ -1910,7 +1910,8 @@ acquire_sample_rows(Relation onerel, int elevel, * tuples are already sorted. */ if (numrows == targrows) - qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows); + qsort_interruptible((void *) rows, numrows, sizeof(HeapTuple), + compare_rows, NULL); /* * Estimate total numbers of live and dead rows in relation, extrapolating @@ -1946,10 +1947,10 @@ acquire_sample_rows(Relation onerel, int elevel, } /* - * qsort comparator for sorting rows[] array + * Comparator for sorting rows[] array */ static int -compare_rows(const void *a, const void *b) +compare_rows(const void *a, const void *b, void *arg) { HeapTuple ha = *(const HeapTuple *) a; HeapTuple hb = *(const HeapTuple *) b; @@ -3307,7 +3308,7 @@ static void merge_leaf_stats(VacAttrStatsP stats, int samplerows, double totalrows); static int compare_scalars(const void *a, const void *b, void *arg); -static int compare_mcvs(const void *a, const void *b); +static int compare_mcvs(const void *a, const void *b, void *arg); static int analyze_mcv_list(int *mcv_counts, int num_mcv, double stadistinct, @@ -3977,8 +3978,8 @@ compute_scalar_stats(VacAttrStatsP stats, /* Sort the collected values */ cxt.ssup = &ssup; cxt.tupnoLink = tupnoLink; - qsort_arg((void *) values, values_cnt, sizeof(ScalarItem), - compare_scalars, (void *) &cxt); + qsort_interruptible((void *) values, values_cnt, sizeof(ScalarItem), + compare_scalars, (void *) &cxt); /* * Now scan the values in order, find the most common ones, and also @@ -4245,8 +4246,8 @@ compute_scalar_stats(VacAttrStatsP stats, deltafrac; /* Sort the MCV items into position order to speed next loop */ - qsort((void *) track, num_mcv, - sizeof(ScalarMCVItem), compare_mcvs); + qsort_interruptible((void *) track, num_mcv, sizeof(ScalarMCVItem), + compare_mcvs, NULL); /* * Collapse out the MCV items from the values[] array. @@ -5004,7 +5005,7 @@ merge_leaf_stats(VacAttrStatsP stats, } /* - * qsort_arg comparator for sorting ScalarItems + * Comparator for sorting ScalarItems * * Aside from sorting the items, we update the tupnoLink[] array * whenever two ScalarItems are found to contain equal datums. The array @@ -5041,10 +5042,10 @@ compare_scalars(const void *a, const void *b, void *arg) } /* - * qsort comparator for sorting ScalarMCVItems by position + * Comparator for sorting ScalarMCVItems by position */ static int -compare_mcvs(const void *a, const void *b) +compare_mcvs(const void *a, const void *b, void *arg) { int da = ((const ScalarMCVItem *) a)->first; int db = ((const ScalarMCVItem *) b)->first; diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index d3561b779ab..b077f9710ec 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1135,8 +1135,8 @@ build_sorted_items(StatsBuildData *data, int *nitems, } /* do the sort, using the multi-sort */ - qsort_arg((void *) items, nrows, sizeof(SortItem), - multi_sort_compare, mss); + qsort_interruptible((void *) items, nrows, sizeof(SortItem), + multi_sort_compare, mss); return items; } diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index ef118952c74..e6a60865282 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -404,7 +404,7 @@ count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss) * order. */ static int -compare_sort_item_count(const void *a, const void *b) +compare_sort_item_count(const void *a, const void *b, void *arg) { SortItem *ia = (SortItem *) a; SortItem *ib = (SortItem *) b; @@ -457,8 +457,8 @@ build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, Assert(j + 1 == ngroups); /* Sort the distinct groups by frequency (in descending order). */ - pg_qsort((void *) groups, ngroups, sizeof(SortItem), - compare_sort_item_count); + qsort_interruptible((void *) groups, ngroups, sizeof(SortItem), + compare_sort_item_count, NULL); *ndistinct = ngroups; return groups; @@ -528,8 +528,8 @@ build_column_frequencies(SortItem *groups, int ngroups, } /* sort the values, deduplicate */ - qsort_arg((void *) result[dim], ngroups, sizeof(SortItem), - sort_item_compare, ssup); + qsort_interruptible((void *) result[dim], ngroups, sizeof(SortItem), + sort_item_compare, ssup); /* * Identify distinct values, compute frequency (there might be @@ -695,8 +695,8 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats) PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]); - qsort_arg(values[dim], counts[dim], sizeof(Datum), - compare_scalars_simple, &ssup[dim]); + qsort_interruptible(values[dim], counts[dim], sizeof(Datum), + compare_scalars_simple, &ssup[dim]); /* * Walk through the array and eliminate duplicate values, but keep the diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index 4481312d61d..4b4ecec9361 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -488,8 +488,8 @@ ndistinct_for_combination(double totalrows, StatsBuildData *data, } /* We can sort the array now ... */ - qsort_arg((void *) items, numrows, sizeof(SortItem), - multi_sort_compare, mss); + qsort_interruptible((void *) items, numrows, sizeof(SortItem), + multi_sort_compare, mss); /* ... and count the number of distinct combinations */ diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c index 1ebba4b3f56..504ba1569ee 100644 --- a/src/backend/tsearch/ts_typanalyze.c +++ b/src/backend/tsearch/ts_typanalyze.c @@ -44,8 +44,10 @@ static void prune_lexemes_hashtable(HTAB *lexemes_tab, int b_current); static uint32 lexeme_hash(const void *key, Size keysize); static int lexeme_match(const void *key1, const void *key2, Size keysize); static int lexeme_compare(const void *key1, const void *key2); -static int trackitem_compare_frequencies_desc(const void *e1, const void *e2); -static int trackitem_compare_lexemes(const void *e1, const void *e2); +static int trackitem_compare_frequencies_desc(const void *e1, const void *e2, + void *arg); +static int trackitem_compare_lexemes(const void *e1, const void *e2, + void *arg); /* @@ -347,8 +349,8 @@ compute_tsvector_stats(VacAttrStats *stats, */ if (num_mcelem < track_len) { - qsort(sort_table, track_len, sizeof(TrackItem *), - trackitem_compare_frequencies_desc); + qsort_interruptible(sort_table, track_len, sizeof(TrackItem *), + trackitem_compare_frequencies_desc, NULL); /* reset minfreq to the smallest frequency we're keeping */ minfreq = sort_table[num_mcelem - 1]->frequency; } @@ -376,8 +378,8 @@ compute_tsvector_stats(VacAttrStats *stats, * presorted we can employ binary search for that. See * ts_selfuncs.c for a real usage scenario. */ - qsort(sort_table, num_mcelem, sizeof(TrackItem *), - trackitem_compare_lexemes); + qsort_interruptible(sort_table, num_mcelem, sizeof(TrackItem *), + trackitem_compare_lexemes, NULL); /* Must copy the target values into anl_context */ old_context = MemoryContextSwitchTo(stats->anl_context); @@ -510,10 +512,10 @@ lexeme_compare(const void *key1, const void *key2) } /* - * qsort() comparator for sorting TrackItems on frequencies (descending sort) + * Comparator for sorting TrackItems on frequencies (descending sort) */ static int -trackitem_compare_frequencies_desc(const void *e1, const void *e2) +trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg) { const TrackItem *const *t1 = (const TrackItem *const *) e1; const TrackItem *const *t2 = (const TrackItem *const *) e2; @@ -522,10 +524,10 @@ trackitem_compare_frequencies_desc(const void *e1, const void *e2) } /* - * qsort() comparator for sorting TrackItems on lexemes + * Comparator for sorting TrackItems on lexemes */ static int -trackitem_compare_lexemes(const void *e1, const void *e2) +trackitem_compare_lexemes(const void *e1, const void *e2, void *arg) { const TrackItem *const *t1 = (const TrackItem *const *) e1; const TrackItem *const *t2 = (const TrackItem *const *) e2; diff --git a/src/backend/utils/adt/array_typanalyze.c b/src/backend/utils/adt/array_typanalyze.c index c5008a0c169..e873d228592 100644 --- a/src/backend/utils/adt/array_typanalyze.c +++ b/src/backend/utils/adt/array_typanalyze.c @@ -86,9 +86,9 @@ static void prune_element_hashtable(HTAB *elements_tab, int b_current); static uint32 element_hash(const void *key, Size keysize); static int element_match(const void *key1, const void *key2, Size keysize); static int element_compare(const void *key1, const void *key2); -static int trackitem_compare_frequencies_desc(const void *e1, const void *e2); -static int trackitem_compare_element(const void *e1, const void *e2); -static int countitem_compare_count(const void *e1, const void *e2); +static int trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg); +static int trackitem_compare_element(const void *e1, const void *e2, void *arg); +static int countitem_compare_count(const void *e1, const void *e2, void *arg); /* @@ -502,8 +502,8 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, */ if (num_mcelem < track_len) { - qsort(sort_table, track_len, sizeof(TrackItem *), - trackitem_compare_frequencies_desc); + qsort_interruptible(sort_table, track_len, sizeof(TrackItem *), + trackitem_compare_frequencies_desc, NULL); /* reset minfreq to the smallest frequency we're keeping */ minfreq = sort_table[num_mcelem - 1]->frequency; } @@ -522,8 +522,8 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, * the element type's default comparison function. This permits * fast binary searches in selectivity estimation functions. */ - qsort(sort_table, num_mcelem, sizeof(TrackItem *), - trackitem_compare_element); + qsort_interruptible(sort_table, num_mcelem, sizeof(TrackItem *), + trackitem_compare_element, NULL); /* Must copy the target values into anl_context */ old_context = MemoryContextSwitchTo(stats->anl_context); @@ -599,8 +599,9 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, { sorted_count_items[j++] = count_item; } - qsort(sorted_count_items, count_items_count, - sizeof(DECountItem *), countitem_compare_count); + qsort_interruptible(sorted_count_items, count_items_count, + sizeof(DECountItem *), + countitem_compare_count, NULL); /* * Prepare to fill stanumbers with the histogram, followed by the @@ -751,10 +752,10 @@ element_compare(const void *key1, const void *key2) } /* - * qsort() comparator for sorting TrackItems by frequencies (descending sort) + * Comparator for sorting TrackItems by frequencies (descending sort) */ static int -trackitem_compare_frequencies_desc(const void *e1, const void *e2) +trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg) { const TrackItem *const *t1 = (const TrackItem *const *) e1; const TrackItem *const *t2 = (const TrackItem *const *) e2; @@ -763,10 +764,10 @@ trackitem_compare_frequencies_desc(const void *e1, const void *e2) } /* - * qsort() comparator for sorting TrackItems by element values + * Comparator for sorting TrackItems by element values */ static int -trackitem_compare_element(const void *e1, const void *e2) +trackitem_compare_element(const void *e1, const void *e2, void *arg) { const TrackItem *const *t1 = (const TrackItem *const *) e1; const TrackItem *const *t2 = (const TrackItem *const *) e2; @@ -775,10 +776,10 @@ trackitem_compare_element(const void *e1, const void *e2) } /* - * qsort() comparator for sorting DECountItems by count + * Comparator for sorting DECountItems by count */ static int -countitem_compare_count(const void *e1, const void *e2) +countitem_compare_count(const void *e1, const void *e2, void *arg) { const DECountItem *const *t1 = (const DECountItem *const *) e1; const DECountItem *const *t2 = (const DECountItem *const *) e2; diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c index 0d01252cd7c..9d5cf897c45 100644 --- a/src/backend/utils/adt/rangetypes_typanalyze.c +++ b/src/backend/utils/adt/rangetypes_typanalyze.c @@ -32,7 +32,7 @@ #include "utils/rangetypes.h" #include "utils/multirangetypes.h" -static int float8_qsort_cmp(const void *a1, const void *a2); +static int float8_qsort_cmp(const void *a1, const void *a2, void *arg); static int range_bound_qsort_cmp(const void *a1, const void *a2, void *arg); static void compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, @@ -93,7 +93,7 @@ multirange_typanalyze(PG_FUNCTION_ARGS) * Comparison function for sorting float8s, used for range lengths. */ static int -float8_qsort_cmp(const void *a1, const void *a2) +float8_qsort_cmp(const void *a1, const void *a2, void *arg) { const float8 *f1 = (const float8 *) a1; const float8 *f2 = (const float8 *) a2; @@ -280,10 +280,10 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, if (non_empty_cnt >= 2) { /* Sort bound values */ - qsort_arg(lowers, non_empty_cnt, sizeof(RangeBound), - range_bound_qsort_cmp, typcache); - qsort_arg(uppers, non_empty_cnt, sizeof(RangeBound), - range_bound_qsort_cmp, typcache); + qsort_interruptible(lowers, non_empty_cnt, sizeof(RangeBound), + range_bound_qsort_cmp, typcache); + qsort_interruptible(uppers, non_empty_cnt, sizeof(RangeBound), + range_bound_qsort_cmp, typcache); num_hist = non_empty_cnt; if (num_hist > num_bins) @@ -345,7 +345,8 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, * Ascending sort of range lengths for further filling of * histogram */ - qsort(lengths, non_empty_cnt, sizeof(float8), float8_qsort_cmp); + qsort_interruptible(lengths, non_empty_cnt, sizeof(float8), + float8_qsort_cmp, NULL); num_hist = non_empty_cnt; if (num_hist > num_bins) diff --git a/src/backend/utils/sort/Makefile b/src/backend/utils/sort/Makefile index 26f65fcaf7a..2c31fd453d6 100644 --- a/src/backend/utils/sort/Makefile +++ b/src/backend/utils/sort/Makefile @@ -16,6 +16,7 @@ override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS) OBJS = \ logtape.o \ + qsort_interruptible.o \ sharedtuplestore.o \ sortsupport.o \ tuplesort.o \ diff --git a/src/backend/utils/sort/qsort_interruptible.c b/src/backend/utils/sort/qsort_interruptible.c new file mode 100644 index 00000000000..f179b256248 --- /dev/null +++ b/src/backend/utils/sort/qsort_interruptible.c @@ -0,0 +1,16 @@ +/* + * qsort_interruptible.c: qsort_arg that includes CHECK_FOR_INTERRUPTS + */ + +#include "postgres.h" +#include "miscadmin.h" + +#define ST_SORT qsort_interruptible +#define ST_ELEMENT_TYPE_VOID +#define ST_COMPARATOR_TYPE_NAME qsort_arg_comparator +#define ST_COMPARE_RUNTIME_POINTER +#define ST_COMPARE_ARG_TYPE void +#define ST_SCOPE +#define ST_DEFINE +#define ST_CHECK_FOR_INTERRUPTS +#include "lib/sort_template.h" diff --git a/src/include/port.h b/src/include/port.h index cb34aca03eb..c30e558a362 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -511,6 +511,9 @@ typedef int (*qsort_arg_comparator) (const void *a, const void *b, void *arg); extern void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg); +extern void qsort_interruptible(void *base, size_t nel, size_t elsize, + qsort_arg_comparator cmp, void *arg); + extern void *bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *, void *), From 3d23d4d843b27ebe5cdfd2b4df31f34117fbe123 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Wed, 13 Jul 2022 12:10:03 +0200 Subject: [PATCH 25/47] Plug memory leak Commit 054325c5eeb3 created a memory leak in PQsendQueryInternal in case an error occurs while sending the message. Repair. Backpatch to 14, like that commit. Reported by Coverity. --- src/interfaces/libpq/fe-exec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index b2c7727f68d..ea3a78420ca 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -1482,6 +1482,7 @@ PQsendQueryInternal(PGconn *conn, const char *query, bool newQuery) sendFailed: pqRecycleCmdQueueEntry(conn, entry); + pqRecycleCmdQueueEntry(conn, entry2); /* error message should be set up already */ return 0; } From 8a293066a13f73871b8e873abb1ed0dcf33aae3d Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 14 Jul 2022 12:08:54 -0400 Subject: [PATCH 26/47] doc: mention the pg_locks lock names in parentheses Reported-by: Troy Frericks Discussion: https://postgr.es/m/165653551130.665.8240515669521441325@wrigleys.postgresql.org Backpatch-through: 10 --- doc/src/sgml/mvcc.sgml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml index 3d3cbb339ce..d357799e53b 100644 --- a/doc/src/sgml/mvcc.sgml +++ b/doc/src/sgml/mvcc.sgml @@ -875,7 +875,7 @@ ERROR: could not serialize access due to read/write dependencies among transact Table-Level Lock Modes - ACCESS SHARE + ACCESS SHARE (AccessShareLock) @@ -893,7 +893,7 @@ ERROR: could not serialize access due to read/write dependencies among transact - ROW SHARE + ROW SHARE (RowShareLock) @@ -914,7 +914,7 @@ ERROR: could not serialize access due to read/write dependencies among transact - ROW EXCLUSIVE + ROW EXCLUSIVE (RowExclusiveLock) @@ -936,7 +936,7 @@ ERROR: could not serialize access due to read/write dependencies among transact - SHARE UPDATE EXCLUSIVE + SHARE UPDATE EXCLUSIVE (ShareUpdateExclusiveLock) @@ -962,7 +962,7 @@ ERROR: could not serialize access due to read/write dependencies among transact - SHARE + SHARE (ShareLock) @@ -982,7 +982,7 @@ ERROR: could not serialize access due to read/write dependencies among transact - SHARE ROW EXCLUSIVE + SHARE ROW EXCLUSIVE (ShareRowExclusiveLock) @@ -1004,7 +1004,7 @@ ERROR: could not serialize access due to read/write dependencies among transact - EXCLUSIVE + EXCLUSIVE (ExclusiveLock) @@ -1026,7 +1026,7 @@ ERROR: could not serialize access due to read/write dependencies among transact - ACCESS EXCLUSIVE + ACCESS EXCLUSIVE (AccessExclusiveLock) From ac550cac898f01da7808360736b596f253fe69db Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 14 Jul 2022 15:17:19 -0400 Subject: [PATCH 27/47] doc: mention that INSERT can block because of unique indexes Initial patch by David G. Johnston. Reported-by: David G. Johnston Discussion: https://postgr.es/m/CAKFQuwZpbdzceO41VE-xt1Xh8rWRRfgopTAK1wL9EhCo0Am-Sw@mail.gmail.com Backpatch-through: 10 --- doc/src/sgml/ref/insert.sgml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/src/sgml/ref/insert.sgml b/doc/src/sgml/ref/insert.sgml index 2973b72b815..558660ccc58 100644 --- a/doc/src/sgml/ref/insert.sgml +++ b/doc/src/sgml/ref/insert.sgml @@ -75,6 +75,11 @@ INSERT INTO table_name [ AS + INSERT into tables that lack unique indexes will + not be blocked by concurrent activity. Tables with unique indexes + might block if concurrent sessions perform actions that lock or modify + rows matching the unique index values being inserted; the details + are covered in . ON CONFLICT can be used to specify an alternative action to raising a unique constraint or exclusion constraint violation error. (See below.) From 12d508e320c19960984a63c40d61615a9b3e3cd3 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 14 Jul 2022 15:33:28 -0400 Subject: [PATCH 28/47] doc: clarify that "excluded" ON CONFLICT is a single row Original patch by David G. Johnston. Reported-by: David G. Johnston Discussion: https://postgr.es/m/CAKFQuwa4J0+WuO7kW1PLbjoEvzPN+Q_j+P2bXxNnCLaszY7ZdQ@mail.gmail.com Backpatch-through: 10 --- doc/src/sgml/ref/insert.sgml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/ref/insert.sgml b/doc/src/sgml/ref/insert.sgml index 558660ccc58..c3f49f73980 100644 --- a/doc/src/sgml/ref/insert.sgml +++ b/doc/src/sgml/ref/insert.sgml @@ -181,7 +181,7 @@ INSERT INTO table_name [ AS ON CONFLICT DO UPDATE targets a table named excluded, since that will otherwise - be taken as the name of the special table representing rows proposed + be taken as the name of the special table representing the row proposed for insertion. @@ -401,7 +401,7 @@ INSERT INTO table_name [ AS SET and WHERE clauses in ON CONFLICT DO UPDATE have access to the existing row using the - table's name (or an alias), and to rows proposed for insertion + table's name (or an alias), and to the row proposed for insertion using the special excluded table. SELECT privilege is required on any column in the target table where corresponding excluded From 3a9d01929c5fb7b930370f51d017936db0466293 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 14 Jul 2022 15:44:22 -0400 Subject: [PATCH 29/47] doc: clarify the behavior of identically-named savepoints Original patch by David G. Johnston. Reported-by: David G. Johnston Discussion: https://postgr.es/m/CAKFQuwYQCxSSuSL18skCWG8QHFswOJ3hjovHsOZUE346i4OpVQ@mail.gmail.com Backpatch-through: 10 --- doc/src/sgml/ref/release_savepoint.sgml | 5 +++-- doc/src/sgml/ref/savepoint.sgml | 30 ++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/ref/release_savepoint.sgml b/doc/src/sgml/ref/release_savepoint.sgml index 39665d28efa..daf8eb9a436 100644 --- a/doc/src/sgml/ref/release_savepoint.sgml +++ b/doc/src/sgml/ref/release_savepoint.sgml @@ -82,8 +82,9 @@ RELEASE [ SAVEPOINT ] savepoint_name - If multiple savepoints have the same name, only the one that was most - recently defined is released. + If multiple savepoints have the same name, only the most recently defined + unreleased one is released. Repeated commands will release progressively + older savepoints. diff --git a/doc/src/sgml/ref/savepoint.sgml b/doc/src/sgml/ref/savepoint.sgml index b17342a1ee6..f84ac3d167f 100644 --- a/doc/src/sgml/ref/savepoint.sgml +++ b/doc/src/sgml/ref/savepoint.sgml @@ -53,7 +53,9 @@ SAVEPOINT savepoint_name savepoint_name - The name to give to the new savepoint. + The name to give to the new savepoint. If savepoints with the + same name already exist, they will be inaccessible until newer + identically-named savepoints are released. @@ -106,6 +108,32 @@ COMMIT; The above transaction will insert both 3 and 4. + + + To use a single savepoint name: + +BEGIN; + INSERT INTO table1 VALUES (1); + SAVEPOINT my_savepoint; + INSERT INTO table1 VALUES (2); + SAVEPOINT my_savepoint; + INSERT INTO table1 VALUES (3); + + -- rollback to the second savepoint + ROLLBACK TO SAVEPOINT my_savepoint; + SELECT * FROM table1; -- shows rows 1 and 2 + + -- release the second savepoint + RELEASE SAVEPOINT my_savepoint; + + -- rollback to the first savepoint + ROLLBACK TO SAVEPOINT my_savepoint; + SELECT * FROM table1; -- shows only row 1 +COMMIT; + + The above transaction shows row 3 being rolled back first, then row 2. + + From 00853cf0e43cea9939030d0d32807b761961c4b9 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 14 Jul 2022 16:19:45 -0400 Subject: [PATCH 30/47] doc: add documentation about ecpg Oracle-compatibility mode Reported-by: Takeshi Ideriha Discussion: https://postgr.es/m/TYCPR01MB7041A157067208327D8DAAF9EAA59@TYCPR01MB7041.jpnprd01.prod.outlook.com Backpatch-through: 11 --- doc/src/sgml/ecpg.sgml | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/ecpg.sgml b/doc/src/sgml/ecpg.sgml index 5a2dc4a8ae8..9df09df4d77 100644 --- a/doc/src/sgml/ecpg.sgml +++ b/doc/src/sgml/ecpg.sgml @@ -1890,7 +1890,8 @@ EXEC SQL SELECT b INTO :val :val_ind FROM test1; The indicator variable val_ind will be zero if the value was not null, and it will be negative if the value was - null. + null. (See to enable + Oracle-specific behavior.) @@ -9801,6 +9802,42 @@ risnull(CINTTYPE, (char *) &i); + + <productname>Oracle</productname> Compatibility Mode + + ecpg can be run in a so-called Oracle + compatibility mode. If this mode is active, it tries to + behave as if it were Oracle Pro*C. + + + + Specifically, this mode changes ecpg in three ways: + + + + + Pad character arrays receiving character string types with + trailing spaces to the specified length + + + + + + Zero byte terminate these character arrays, and set the indicator + variable if truncation occurs + + + + + + Set the null indicator to -1 when character + arrays receive empty character string types + + + + + + Internals From cdd665251527cafe78efcc5ac67fae9d1c79ade1 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 14 Jul 2022 16:34:30 -0400 Subject: [PATCH 31/47] pg_upgrade doc: mention that replication slots must be recreated Reported-by: Nikhil Shetty Discussion: https://postgr.es/m/CAFpL5Vxastip0Jei-K-=7cKXTg=5sahSe5g=om=x68NOX8+PUA@mail.gmail.com Backpatch-through: 10 --- doc/src/sgml/ref/pgupgrade.sgml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index f21563fb5b9..c1315f8f9a3 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -614,7 +614,8 @@ rsync --archive --delete --hard-links --size-only --no-inc-recursive /vol1/pg_tb Configure the servers for log shipping. (You do not need to run pg_start_backup() and pg_stop_backup() or take a file system backup as the standbys are still synchronized - with the primary.) + with the primary.) Replication slots are not copied and must + be recreated. From 133d820563dc75652b9022acc1b2ed48d54ce0d6 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 14 Jul 2022 17:41:03 -0400 Subject: [PATCH 32/47] doc: clarify how dropping of extensions affects dependent objs. Clarify that functions/procedures are dropped when any extension that depends on them is dropped. Reported-by: David G. Johnston Discussion: https://postgr.es/m/CAKFQuwbPSHMDGkisRUmewopweC1bFvytVqB=a=X4GFg=4ZWxPA@mail.gmail.com Backpatch-through: 13 --- doc/src/sgml/ref/alter_function.sgml | 6 ++++-- doc/src/sgml/ref/alter_procedure.sgml | 7 ++++++- doc/src/sgml/ref/drop_extension.sgml | 10 ++++++---- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/doc/src/sgml/ref/alter_function.sgml b/doc/src/sgml/ref/alter_function.sgml index 3c99b450e0a..ee94c34ae38 100644 --- a/doc/src/sgml/ref/alter_function.sgml +++ b/doc/src/sgml/ref/alter_function.sgml @@ -161,8 +161,10 @@ ALTER FUNCTION name [ ( [ [ extension_name - The name of the extension that the procedure is to depend on. + This form marks the procedure as dependent on the extension, or no longer + dependent on the extension if NO is specified. + A procedure that's marked as dependent on an extension is dropped when the + extension is dropped, even if cascade is not specified. + A procedure can depend upon multiple extensions, and will be dropped when + any one of those extensions is dropped. diff --git a/doc/src/sgml/ref/drop_extension.sgml b/doc/src/sgml/ref/drop_extension.sgml index 5e507dec928..c01ddace84c 100644 --- a/doc/src/sgml/ref/drop_extension.sgml +++ b/doc/src/sgml/ref/drop_extension.sgml @@ -30,7 +30,9 @@ DROP EXTENSION [ IF EXISTS ] name [ DROP EXTENSION removes extensions from the database. - Dropping an extension causes its component objects to be dropped as well. + Dropping an extension causes its component objects, and other explicitly + dependent routines (see , + the depends on extension action), to be dropped as well. @@ -77,9 +79,9 @@ DROP EXTENSION [ IF EXISTS ] name [ RESTRICT - Refuse to drop the extension if any objects depend on it (other than - its own member objects and other extensions listed in the same - DROP command). This is the default. + This option prevents the specified extensions from being dropped + if there exists non-extension-member objects that depends on any + the extensions. This is the default. From b3bbe439dfaf5ee8605c4d6cbbc0d74acd9c0466 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 14 Jul 2022 20:01:11 -0400 Subject: [PATCH 33/47] docs: make monitoring "phases" table titles consistent Reported-by: Nitin Jadhav Discussion: https://postgr.es/m/CAMm1aWbmTHwHKC2PERH0CCaFVPoxrtLeS8=wNuoge94qdSp3vA@mail.gmail.com Author: Nitin Jadhav Backpatch-through: 13 --- doc/src/sgml/monitoring.sgml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 9c67c9d1c50..949bba7c768 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -5607,7 +5607,7 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid, - ANALYZE phases + ANALYZE Phases @@ -6537,7 +6537,7 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
- Base backup phases + Base Backup Phases From e5378a2295d5584f95da6092d47c2f89d772746d Mon Sep 17 00:00:00 2001 From: John Naylor Date: Fri, 1 Jul 2022 11:41:36 +0700 Subject: [PATCH 34/47] Clarify that pg_dump takes ACCESS SHARE lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add link to the description of lock levels to avoid confusing "shared locks" with SHARE locks. Florin Irion Reviewed-by: Álvaro Herrera, Tom Lane, and Nathan Bossart Discussion: https://www.postgresql.org/message-id/flat/d0f30cc2-3c76-1d43-f291-7c4b2872d653@gmail.com This is a backpatch of 4e2e8d71f, applied through version 14 --- doc/src/sgml/ref/pg_dump.sgml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index ca6ff8cdc65..6a7cd8dff4f 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -371,9 +371,9 @@ PostgreSQL documentation Requesting exclusive locks on database objects while running a parallel dump could - cause the dump to fail. The reason is that the pg_dump coordinator process - requests shared locks on the objects that the worker processes are going to dump later - in order to + cause the dump to fail. The reason is that the pg_dump leader process + requests shared locks (ACCESS SHARE) on the + objects that the worker processes are going to dump later in order to make sure that nobody deletes them and makes them go away while the dump is running. If another client then requests an exclusive lock on a table, that lock will not be granted but will be queued waiting for the shared lock of the coordinator process to be From fb07ddadfdcd32881edd1896026e672b6e4699b2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 17 Jul 2022 17:43:28 -0400 Subject: [PATCH 35/47] Fix omissions in support for the "regcollation" type. The patch that added regcollation doesn't seem to have been too thorough about supporting it everywhere that other reg* types are supported. Fix that. (The find_expr_references omission is moderately serious, since it could result in missing expression dependencies. The others are less exciting.) Noted while fixing bug #17483. Back-patch to v13 where regcollation was added. Discussion: https://postgr.es/m/1423433.1652722406@sss.pgh.pa.us --- src/backend/catalog/dependency.c | 7 +++++++ src/backend/utils/adt/selfuncs.c | 2 ++ src/backend/utils/cache/catcache.c | 1 + 3 files changed, 10 insertions(+) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 6c38ca470f6..39994474faf 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1972,6 +1972,13 @@ find_expr_references_walker(Node *node, add_object_address(OCLASS_TYPE, objoid, 0, context->addrs); break; + case REGCOLLATIONOID: + objoid = DatumGetObjectId(con->constvalue); + if (SearchSysCacheExists1(COLLOID, + ObjectIdGetDatum(objoid))) + add_object_address(OCLASS_COLLATION, objoid, 0, + context->addrs); + break; case REGCONFIGOID: objoid = DatumGetObjectId(con->constvalue); if (SearchSysCacheExists1(TSCONFIGOID, diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 6001982a6d2..10017cb583a 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -4336,6 +4336,7 @@ convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, case REGOPERATOROID: case REGCLASSOID: case REGTYPEOID: + case REGCOLLATIONOID: case REGCONFIGOID: case REGDICTIONARYOID: case REGROLEOID: @@ -4467,6 +4468,7 @@ convert_numeric_to_scalar(Datum value, Oid typid, bool *failure) case REGOPERATOROID: case REGCLASSOID: case REGTYPEOID: + case REGCOLLATIONOID: case REGCONFIGOID: case REGDICTIONARYOID: case REGROLEOID: diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 60f643c2d87..5ccb028a1a2 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -240,6 +240,7 @@ GetCCHashEqFuncs(Oid keytype, CCHashFN *hashfunc, RegProcedure *eqfunc, CCFastEq case REGOPERATOROID: case REGCLASSOID: case REGTYPEOID: + case REGCOLLATIONOID: case REGCONFIGOID: case REGDICTIONARYOID: case REGROLEOID: From c1d4ed5e1cc7e7b9b5dfe5780ea5d7ce8c4ef2e1 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 18 Jul 2022 14:53:00 +0200 Subject: [PATCH 36/47] pg_upgrade: Adjust quoting style in message to match guidelines --- src/bin/pg_upgrade/check.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index fd4414f1d02..954f0a11b87 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -1300,10 +1300,10 @@ check_for_incompatible_polymorphics(ClusterInfo *cluster) fclose(script); pg_log(PG_REPORT, "fatal\n"); pg_fatal("Your installation contains user-defined objects that refer to internal\n" - "polymorphic functions with arguments of type 'anyarray' or 'anyelement'.\n" + "polymorphic functions with arguments of type \"anyarray\" or \"anyelement\".\n" "These user-defined objects must be dropped before upgrading and restored\n" "afterwards, changing them to refer to the new corresponding functions with\n" - "arguments of type 'anycompatiblearray' and 'anycompatible'.\n" + "arguments of type \"anycompatiblearray\" and \"anycompatible\".\n" "A list of the problematic objects is in the file:\n" " %s\n\n", output_path); } From 2c743d06b3b0bb24677cc5e39123be6373805562 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 18 Jul 2022 16:23:48 +0200 Subject: [PATCH 37/47] Re-add SPICleanup for ABI compatibility in stable branch This fixes an ABI break introduced by 604651880c71c5106a72529b9ce29eaad0cfab27. Author: Markus Wanner Discussion: https://www.postgresql.org/message-id/defd749a-8410-841d-1126-21398686d63d@enterprisedb.com --- src/backend/executor/spi.c | 10 ++++++++++ src/include/executor/spi.h | 1 + 2 files changed, 11 insertions(+) diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 5db53b125ee..4a2ddd5dff3 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -442,6 +442,16 @@ SPI_rollback_and_chain(void) _SPI_rollback(true); } +/* + * SPICleanup is a no-op, kept for backwards compatibility. We rely on + * AtEOXact_SPI to cleanup. Extensions should not (need to) fiddle with the + * internal SPI state directly. + */ +void +SPICleanup(void) +{ +} + /* * Clean up SPI state at transaction commit or abort. */ diff --git a/src/include/executor/spi.h b/src/include/executor/spi.h index fc60fdb9584..ef1964b709d 100644 --- a/src/include/executor/spi.h +++ b/src/include/executor/spi.h @@ -205,6 +205,7 @@ extern void SPI_commit_and_chain(void); extern void SPI_rollback(void); extern void SPI_rollback_and_chain(void); +extern void SPICleanup(void); extern void AtEOXact_SPI(bool isCommit); extern void AtEOSubXact_SPI(bool isCommit, SubTransactionId mySubid); extern bool SPI_inside_nonatomic_context(void); From d3a1b360cae5d9d8385b24b104facf04ad4e92f9 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 21 Jul 2022 13:56:02 -0400 Subject: [PATCH 38/47] Fix ruleutils issues with dropped cols in functions-returning-composite. Due to lack of concern for the case in the dependency code, it's possible to drop a column of a composite type even though stored queries have references to the dropped column via functions-in-FROM that return the composite type. There are "soft" references, namely FROM-clause aliases for such columns, and "hard" references, that is actual Vars referring to them. The right fix for hard references is to add dependencies preventing the drop; something we've known for many years and not done (and this commit still doesn't address it). A "soft" reference shouldn't prevent a drop though. We've been around on this before (cf. 9b35ddce9, 2c4debbd0), but nobody had noticed that the current behavior can result in dump/reload failures, because ruleutils.c can print more column aliases than the underlying composite type now has. So we need to rejigger the column-alias-handling code to treat such columns as dropped and not print aliases for them. Rather than writing new code for this, I used expandRTE() which already knows how to figure out which function result columns are dropped. I'd initially thought maybe we could use expandRTE() in all cases, but that fails for EXPLAIN's purposes, because the planner strips a lot of RTE infrastructure that expandRTE() needs. So this patch just uses it for unplanned function RTEs and otherwise does things the old way. If there is a hard reference (Var), then removing the column alias causes us to fail to print the Var, since there's no longer a name to print. Failing seems less desirable than printing a made-up name, so I made it print "?dropped?column?" instead. Per report from Timo Stolz. Back-patch to all supported branches. Discussion: https://postgr.es/m/5c91267e-3b6d-5795-189c-d15a55d61dbb@nullachtvierzehn.de --- src/backend/parser/parse_relation.c | 3 ++ src/backend/utils/adt/ruleutils.c | 56 ++++++++++++++++++----- src/test/regress/expected/create_view.out | 25 ++++++---- src/test/regress/sql/create_view.sql | 6 ++- 4 files changed, 68 insertions(+), 22 deletions(-) diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index 9e82bd85c75..dfe348c1f40 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -3487,6 +3487,9 @@ expandNSItemAttrs(ParseState *pstate, ParseNamespaceItem *nsitem, * * "*" is returned if the given attnum is InvalidAttrNumber --- this case * occurs when a Var represents a whole tuple of a relation. + * + * It is caller's responsibility to not call this on a dropped attribute. + * (You will get some answer for such cases, but it might not be sensible.) */ char * get_rte_attribute_name(RangeTblEntry *rte, AttrNumber attnum) diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index cdbd27d4d95..ea8156bebad 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -58,6 +58,7 @@ #include "parser/parse_node.h" #include "parser/parse_oper.h" #include "parser/parse_cte.h" +#include "parser/parse_relation.h" #include "parser/parser.h" #include "parser/parsetree.h" #include "rewrite/rewriteHandler.h" @@ -4241,9 +4242,9 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte, int j; /* - * Extract the RTE's "real" column names. This is comparable to - * get_rte_attribute_name, except that it's important to disregard dropped - * columns. We put NULL into the array for a dropped column. + * Construct an array of the current "real" column names of the RTE. + * real_colnames[] will be indexed by physical column number, with NULL + * entries for dropped columns. */ if (rte->rtekind == RTE_RELATION) { @@ -4270,19 +4271,43 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte, } else { - /* Otherwise use the column names from eref */ + /* Otherwise get the column names from eref or expandRTE() */ + List *colnames; ListCell *lc; - ncolumns = list_length(rte->eref->colnames); + /* + * Functions returning composites have the annoying property that some + * of the composite type's columns might have been dropped since the + * query was parsed. If possible, use expandRTE() to handle that + * case, since it has the tedious logic needed to find out about + * dropped columns. However, if we're explaining a plan, then we + * don't have rte->functions because the planner thinks that won't be + * needed later, and that breaks expandRTE(). So in that case we have + * to rely on rte->eref, which may lead us to report a dropped + * column's old name; that seems close enough for EXPLAIN's purposes. + * + * For non-RELATION, non-FUNCTION RTEs, we can just look at rte->eref, + * which should be sufficiently up-to-date: no other RTE types can + * have columns get dropped from under them after parsing. + */ + if (rte->rtekind == RTE_FUNCTION && rte->functions != NIL) + { + /* Since we're not creating Vars, rtindex etc. don't matter */ + expandRTE(rte, 1, 0, -1, true /* include dropped */ , + &colnames, NULL); + } + else + colnames = rte->eref->colnames; + + ncolumns = list_length(colnames); real_colnames = (char **) palloc(ncolumns * sizeof(char *)); i = 0; - foreach(lc, rte->eref->colnames) + foreach(lc, colnames) { /* - * If the column name shown in eref is an empty string, then it's - * a column that was dropped at the time of parsing the query, so - * treat it as dropped. + * If the column name we find here is an empty string, then it's a + * dropped column, so change to NULL. */ char *cname = strVal(lfirst(lc)); @@ -7296,9 +7321,16 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context) elog(ERROR, "invalid attnum %d for relation \"%s\"", attnum, rte->eref->aliasname); attname = colinfo->colnames[attnum - 1]; - if (attname == NULL) /* dropped column? */ - elog(ERROR, "invalid attnum %d for relation \"%s\"", - attnum, rte->eref->aliasname); + + /* + * If we find a Var referencing a dropped column, it seems better to + * print something (anything) than to fail. In general this should + * not happen, but there are specific cases involving functions + * returning named composite types where we don't sufficiently enforce + * that you can't drop a column that's referenced in some view. + */ + if (attname == NULL) + attname = "?dropped?column?"; } else { diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out index 82332a47c11..fdb0657bb72 100644 --- a/src/test/regress/expected/create_view.out +++ b/src/test/regress/expected/create_view.out @@ -1551,17 +1551,26 @@ select * from tt14v; begin; -- this perhaps should be rejected, but it isn't: alter table tt14t drop column f3; --- f3 is still in the view ... +-- column f3 is still in the view, sort of ... select pg_get_viewdef('tt14v', true); - pg_get_viewdef --------------------------------- - SELECT t.f1, + - t.f3, + - t.f4 + - FROM tt14f() t(f1, f3, f4); + pg_get_viewdef +--------------------------------- + SELECT t.f1, + + t."?dropped?column?" AS f3,+ + t.f4 + + FROM tt14f() t(f1, f4); (1 row) --- but will fail at execution +-- ... and you can even EXPLAIN it ... +explain (verbose, costs off) select * from tt14v; + QUERY PLAN +---------------------------------------- + Function Scan on testviewschm2.tt14f t + Output: t.f1, t.f3, t.f4 + Function Call: tt14f() +(3 rows) + +-- but it will fail at execution select f1, f4 from tt14v; f1 | f4 -----+---- diff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql index d8f44923945..2e7452ac9ea 100644 --- a/src/test/regress/sql/create_view.sql +++ b/src/test/regress/sql/create_view.sql @@ -533,9 +533,11 @@ begin; -- this perhaps should be rejected, but it isn't: alter table tt14t drop column f3; --- f3 is still in the view ... +-- column f3 is still in the view, sort of ... select pg_get_viewdef('tt14v', true); --- but will fail at execution +-- ... and you can even EXPLAIN it ... +explain (verbose, costs off) select * from tt14v; +-- but it will fail at execution select f1, f4 from tt14v; select * from tt14v; From 0ee68c9cd04ff44db9f16f739ce90017f9a8db31 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 21 Jul 2022 13:58:20 -0400 Subject: [PATCH 39/47] doc: clarify that auth. names are lower case and case-sensitive This is true even for acronyms that are usually upper case, like LDAP. Reported-by: Alvaro Herrera Discussion: https://postgr.es/m/202205141521.2nodjabmsour@alvherre.pgsql Backpatch-through: 10 --- doc/src/sgml/client-auth.sgml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/client-auth.sgml b/doc/src/sgml/client-auth.sgml index 02f04891129..eb5e9f48db1 100644 --- a/doc/src/sgml/client-auth.sgml +++ b/doc/src/sgml/client-auth.sgml @@ -417,7 +417,9 @@ hostnogssenc database user Specifies the authentication method to use when a connection matches this record. The possible choices are summarized here; details - are in . + are in . All the options + are lower case and treated case sensitively, so even acronyms like + ldap must be specified as lower case. From 71694e6e0becd45f5acca3bbf03d25f7315c2694 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 21 Jul 2022 14:55:23 -0400 Subject: [PATCH 40/47] doc: use wording "restore" instead of "reload" of dumps Reported-by: axel.kluener@gmail.com Discussion: https://postgr.es/m/164736074430.660.3645615289283943146@wrigleys.postgresql.org Backpatch-through: 11 --- doc/src/sgml/ddl.sgml | 10 +++++----- doc/src/sgml/extend.sgml | 2 +- doc/src/sgml/perform.sgml | 2 +- doc/src/sgml/plhandler.sgml | 2 +- doc/src/sgml/ref/alter_type.sgml | 2 +- doc/src/sgml/ref/create_domain.sgml | 2 +- doc/src/sgml/ref/pg_dump.sgml | 22 ++++++++++++++++------ doc/src/sgml/ref/pg_dumpall.sgml | 10 +++++----- doc/src/sgml/ref/pg_resetwal.sgml | 4 ++-- doc/src/sgml/ref/pg_restore.sgml | 6 +++--- doc/src/sgml/ref/pgupgrade.sgml | 4 ++-- doc/src/sgml/runtime.sgml | 4 ++-- doc/src/sgml/textsearch.sgml | 2 +- 13 files changed, 41 insertions(+), 31 deletions(-) diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml index c358bff56d9..c85e92b3a2f 100644 --- a/doc/src/sgml/ddl.sgml +++ b/doc/src/sgml/ddl.sgml @@ -557,7 +557,7 @@ CREATE TABLE products ( tests, it cannot guarantee that the database will not reach a state in which the constraint condition is false (due to subsequent changes of the other row(s) involved). This would cause a database dump and - reload to fail. The reload could fail even when the complete + restore to fail. The restore could fail even when the complete database state is consistent with the constraint, due to rows not being loaded in an order that will satisfy the constraint. If possible, use UNIQUE, EXCLUDE, @@ -569,10 +569,10 @@ CREATE TABLE products ( If what you desire is a one-time check against other rows at row insertion, rather than a continuously-maintained consistency guarantee, a custom trigger can be used - to implement that. (This approach avoids the dump/reload problem because + to implement that. (This approach avoids the dump/restore problem because pg_dump does not reinstall triggers until after - reloading data, so that the check will not be enforced during a - dump/reload.) + restoring data, so that the check will not be enforced during a + dump/restore.) @@ -594,7 +594,7 @@ CREATE TABLE products ( function. PostgreSQL does not disallow that, but it will not notice if there are rows in the table that now violate the CHECK constraint. That would cause a - subsequent database dump and reload to fail. + subsequent database dump and restore to fail. The recommended way to handle such a change is to drop the constraint (using ALTER TABLE), adjust the function definition, and re-add the constraint, thereby rechecking it against all table rows. diff --git a/doc/src/sgml/extend.sgml b/doc/src/sgml/extend.sgml index bb0b2679bbb..8b8ccd9d4c0 100644 --- a/doc/src/sgml/extend.sgml +++ b/doc/src/sgml/extend.sgml @@ -982,7 +982,7 @@ SET LOCAL search_path TO @extschema@, pg_temp; pg_dump. But that behavior is undesirable for a configuration table; any data changes made by the user need to be included in dumps, or the extension will behave differently after a dump - and reload. + and restore. diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml index 9cf8ebea808..749d4693744 100644 --- a/doc/src/sgml/perform.sgml +++ b/doc/src/sgml/perform.sgml @@ -1785,7 +1785,7 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse; Dump scripts generated by pg_dump automatically apply - several, but not all, of the above guidelines. To reload a + several, but not all, of the above guidelines. To restore a pg_dump dump as quickly as possible, you need to do a few extra things manually. (Note that these points apply while restoring a dump, not while creating it. diff --git a/doc/src/sgml/plhandler.sgml b/doc/src/sgml/plhandler.sgml index 40ee59de9f3..980c95ecf39 100644 --- a/doc/src/sgml/plhandler.sgml +++ b/doc/src/sgml/plhandler.sgml @@ -156,7 +156,7 @@ attached to a function when check_function_bodies is on. Therefore, checks whose results might be affected by GUC parameters definitely should be skipped when check_function_bodies is - off, to avoid false failures when reloading a dump. + off, to avoid false failures when restoring a dump. diff --git a/doc/src/sgml/ref/alter_type.sgml b/doc/src/sgml/ref/alter_type.sgml index 21887e88a0f..146065144f5 100644 --- a/doc/src/sgml/ref/alter_type.sgml +++ b/doc/src/sgml/ref/alter_type.sgml @@ -411,7 +411,7 @@ ALTER TYPE name SET ( since the original creation of the enum type). The slowdown is usually insignificant; but if it matters, optimal performance can be regained by dropping and recreating the enum type, or by dumping and - reloading the database. + restoring the database. diff --git a/doc/src/sgml/ref/create_domain.sgml b/doc/src/sgml/ref/create_domain.sgml index e4b856d630c..82a0b874929 100644 --- a/doc/src/sgml/ref/create_domain.sgml +++ b/doc/src/sgml/ref/create_domain.sgml @@ -234,7 +234,7 @@ INSERT INTO tab (domcol) VALUES ((SELECT domcol FROM tab WHERE false)); function. PostgreSQL does not disallow that, but it will not notice if there are stored values of the domain type that now violate the CHECK constraint. That would cause a - subsequent database dump and reload to fail. The recommended way to + subsequent database dump and restore to fail. The recommended way to handle such a change is to drop the constraint (using ALTER DOMAIN), adjust the function definition, and re-add the constraint, thereby rechecking it against stored data. diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index 6a7cd8dff4f..956f97e2537 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -694,7 +694,7 @@ PostgreSQL documentation ...). This will make restoration very slow; it is mainly useful for making dumps that can be loaded into non-PostgreSQL databases. - Any error during reloading will cause only rows that are part of the + Any error during restoring will cause only rows that are part of the problematic INSERT to be lost, rather than the entire table contents. @@ -718,9 +718,9 @@ PostgreSQL documentation This option is relevant only when creating a data-only dump. It instructs pg_dump to include commands to temporarily disable triggers on the target tables while - the data is reloaded. Use this if you have referential + the data is restored. Use this if you have referential integrity checks or other triggers on the tables that you - do not want to invoke during data reload. + do not want to invoke during data restore. @@ -838,7 +838,7 @@ PostgreSQL documentation than COPY). This will make restoration very slow; it is mainly useful for making dumps that can be loaded into non-PostgreSQL databases. - Any error during reloading will cause only rows that are part of the + Any error during restoring will cause only rows that are part of the problematic INSERT to be lost, rather than the entire table contents. Note that the restore might fail altogether if you have rearranged column order. The @@ -857,12 +857,22 @@ PostgreSQL documentation target the root of the partitioning hierarchy that contains it, rather than the partition itself. This causes the appropriate partition to be re-determined for each row when the data is loaded. This may be - useful when reloading data on a server where rows do not always fall + useful when restoring data on a server where rows do not always fall into the same partitions as they did on the original server. That could happen, for example, if the partitioning column is of type text and the two systems have different definitions of the collation used to sort the partitioning column. + + + It is best not to use parallelism when restoring from an archive made + with this option, because pg_restore will + not know exactly which partition(s) a given archive data item will + load data into. This could result in inefficiency due to lock + conflicts between parallel jobs, or perhaps even restore failures due + to foreign key constraints being set up before all the relevant data + is loaded. + @@ -1021,7 +1031,7 @@ PostgreSQL documentation Dump data as INSERT commands (rather than COPY). Controls the maximum number of rows per INSERT command. The value specified must be a - number greater than zero. Any error during reloading will cause only + number greater than zero. Any error during restoring will cause only rows that are part of the problematic INSERT to be lost, rather than the entire table contents. diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index 5bde886c453..ae632f739cd 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -310,9 +310,9 @@ PostgreSQL documentation This option is relevant only when creating a data-only dump. It instructs pg_dumpall to include commands to temporarily disable triggers on the target tables while - the data is reloaded. Use this if you have referential + the data is restored. Use this if you have referential integrity checks or other triggers on the tables that you - do not want to invoke during data reload. + do not want to invoke during data restore. @@ -389,7 +389,7 @@ PostgreSQL documentation target the root of the partitioning hierarchy that contains it, rather than the partition itself. This causes the appropriate partition to be re-determined for each row when the data is loaded. This may be - useful when reloading data on a server where rows do not always fall + useful when restoring data on a server where rows do not always fall into the same partitions as they did on the original server. That could happen, for example, if the partitioning column is of type text and the two systems have different definitions of the collation used @@ -549,7 +549,7 @@ PostgreSQL documentation Dump data as INSERT commands (rather than COPY). Controls the maximum number of rows per INSERT command. The value specified must be a - number greater than zero. Any error during reloading will cause only + number greater than zero. Any error during restoring will cause only rows that are part of the problematic INSERT to be lost, rather than the entire table contents. @@ -824,7 +824,7 @@ PostgreSQL documentation - To reload database(s) from this file, you can use: + To restore database(s) from this file, you can use: $ psql -f db.out postgres diff --git a/doc/src/sgml/ref/pg_resetwal.sgml b/doc/src/sgml/ref/pg_resetwal.sgml index 3e4882cdc65..fd539f56043 100644 --- a/doc/src/sgml/ref/pg_resetwal.sgml +++ b/doc/src/sgml/ref/pg_resetwal.sgml @@ -55,7 +55,7 @@ PostgreSQL documentation After running this command, it should be possible to start the server, but bear in mind that the database might contain inconsistent data due to partially-committed transactions. You should immediately dump your data, - run initdb, and reload. After reload, check for + run initdb, and restore. After restore, check for inconsistencies and repair as needed. @@ -78,7 +78,7 @@ PostgreSQL documentation discussed below. If you are not able to determine correct values for all these fields, can still be used, but the recovered database must be treated with even more suspicion than - usual: an immediate dump and reload is imperative. Do not + usual: an immediate dump and restore is imperative. Do not execute any data-modifying operations in the database before you dump, as any such action is likely to make the corruption worse. diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml index 93ea937ac8e..1b56a4afb36 100644 --- a/doc/src/sgml/ref/pg_restore.sgml +++ b/doc/src/sgml/ref/pg_restore.sgml @@ -538,9 +538,9 @@ PostgreSQL documentation This option is relevant only when performing a data-only restore. It instructs pg_restore to execute commands to temporarily disable triggers on the target tables while - the data is reloaded. Use this if you have referential + the data is restored. Use this if you have referential integrity checks or other triggers on the tables that you - do not want to invoke during data reload. + do not want to invoke during data restore. @@ -958,7 +958,7 @@ CREATE DATABASE foo WITH TEMPLATE template0; - To reload the dump into a new database called newdb: + To restore the dump into a new database called newdb: $ createdb -T template0 newdb diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index c1315f8f9a3..6069063b481 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -39,7 +39,7 @@ PostgreSQL documentation pg_upgrade (formerly called pg_migrator) allows data stored in PostgreSQL data files to be upgraded to a later PostgreSQL - major version without the data dump/reload typically required for + major version without the data dump/restore typically required for major version upgrades, e.g., from 9.5.8 to 9.6.4 or from 10.7 to 11.2. It is not required for minor version upgrades, e.g., from 9.6.2 to 9.6.3 or from 10.1 to 10.2. @@ -415,7 +415,7 @@ NET STOP postgresql-&majorversion; The option allows multiple CPU cores to be used - for copying/linking of files and to dump and reload database schemas + for copying/linking of files and to dump and restore database schemas in parallel; a good place to start is the maximum of the number of CPU cores and tablespaces. This option can dramatically reduce the time to upgrade a multi-database server running on a multiprocessor diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index cf2630c3fc3..375644059db 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -1678,7 +1678,7 @@ $ kill -INT `head -1 /usr/local/pgsql/data/postmaster.pid`major releases of PostgreSQL, the internal data storage format is subject to change, thus complicating upgrades. The traditional method for moving data to a new major version - is to dump and reload the database, though this can be slow. A + is to dump and restore the database, though this can be slow. A faster method is . Replication methods are also available, as discussed below. (If you are using a pre-packaged version @@ -1764,7 +1764,7 @@ $ kill -INT `head -1 /usr/local/pgsql/data/postmaster.pid` One upgrade method is to dump data from one major version of - PostgreSQL and reload it in another — to do + PostgreSQL and restore it in another — to do this, you must use a logical backup tool like pg_dumpall; file system level backup methods will not work. (There are checks in place that prevent diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 6afaf9e62c4..fbe049f0636 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -1974,7 +1974,7 @@ CREATE TRIGGER tsvectorupdate BEFORE INSERT OR UPDATE explicitly when creating tsvector values inside triggers, so that the column's contents will not be affected by changes to default_text_search_config. Failure to do this is likely to - lead to problems such as search results changing after a dump and reload. + lead to problems such as search results changing after a dump and restore. From c20fe1da4bf2664ce66d7857b155c85c595d2291 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Thu, 21 Jul 2022 22:52:50 +0900 Subject: [PATCH 41/47] postgres_fdw: Fix bug in checking of return value of PQsendQuery(). When postgres_fdw begins an asynchronous data fetch, it submits FETCH query by using PQsendQuery(). If PQsendQuery() fails and returns 0, postgres_fdw should report an error. But, previously, postgres_fdw reported an error only when the return value is less than 0, though PQsendQuery() never return the values other than 0 and 1. Therefore postgres_fdw could not handle the failure to send FETCH query in an asynchronous data fetch. This commit fixes postgres_fdw so that it reports an error when PQsendQuery() returns 0. Back-patch to v14 where asynchronous execution was supported in postgres_fdw. Author: Fujii Masao Reviewed-by: Japin Li, Tom Lane Discussion: https://postgr.es/m/b187a7cf-d4e3-5a32-4d01-8383677797f3@oss.nttdata.com --- contrib/postgres_fdw/postgres_fdw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index fbbd867c239..58599c7aeaa 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -7167,7 +7167,7 @@ fetch_more_data_begin(AsyncRequest *areq) snprintf(sql, sizeof(sql), "FETCH %d FROM c%u", fsstate->fetch_size, fsstate->cursor_number); - if (PQsendQuery(fsstate->conn, sql) < 0) + if (!PQsendQuery(fsstate->conn, sql)) pgfdw_report_error(ERROR, NULL, fsstate->conn, false, fsstate->query); /* Remember that the request is in process */ From aa7ab626bc403eb23ff875b02d047279469debb4 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Fri, 22 Jul 2022 16:57:12 +1200 Subject: [PATCH 42/47] Fix get_dirent_type() for Windows junction points. Commit 87e6ed7c8 added code that intended to report Windows "junction points" as DT_LNK (the same way we report symlinks on Unix). Windows junction points are *also* directories according to the Windows attributes API, and we were reporting them as as DT_DIR. Change the order we check the attribute flags, to prioritize DT_LNK. If at some point we start using Windows' recently added real symlinks and need to distinguish them from junction points, we may need to rethink this, but for now this continues the tradition of wrapper functions that treat junction points as symlinks. Back-patch to 14, where get_dirent_type() landed. Reviewed-by: Michael Paquier Reviewed-by: Alvaro Herrera Discussion: https://postgr.es/m/CA%2BhUKGLzLK4PUPx0_AwXEWXOYAejU%3D7XpxnYE55Y%2Be7hB2N3FA%40mail.gmail.com Discussion: https://postgr.es/m/20220721111751.x7hod2xgrd76xr5c%40alvherre.pgsql --- src/port/dirent.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/port/dirent.c b/src/port/dirent.c index 77b90e7e302..2cd134495ff 100644 --- a/src/port/dirent.c +++ b/src/port/dirent.c @@ -106,13 +106,17 @@ readdir(DIR *d) } strcpy(d->ret.d_name, fd.cFileName); /* Both strings are MAX_PATH long */ d->ret.d_namlen = strlen(d->ret.d_name); - /* The only identified types are: directory, regular file or symbolic link */ - if ((fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) - d->ret.d_type = DT_DIR; - /* For reparse points dwReserved0 field will contain the ReparseTag */ - else if ((fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0 && - (fd.dwReserved0 == IO_REPARSE_TAG_MOUNT_POINT)) + + /* + * For reparse points dwReserved0 field will contain the ReparseTag. We + * check this first, because reparse points are also reported as + * directories. + */ + if ((fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0 && + (fd.dwReserved0 == IO_REPARSE_TAG_MOUNT_POINT)) d->ret.d_type = DT_LNK; + else if ((fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) + d->ret.d_type = DT_DIR; else d->ret.d_type = DT_REG; From 33682444408385ccb77de81d9c50826dad9c5a1d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 23 Jul 2022 19:00:30 -0400 Subject: [PATCH 43/47] Doc: improve documentation about random(). We didn't explicitly say that random() uses a randomly-chosen seed if you haven't called setseed(). Do so. Also, remove ref/set.sgml's no-longer-accurate (and never very relevant) statement that the seed value is multiplied by 2^31-1. Back-patch to v12 where set.sgml's claim stopped being true. The claim that we use a source of random bits as seed was debatable before 4203842a1, too, so v12 seems like a good place to stop. Per question from Carl Sopchak. Discussion: https://postgr.es/m/f37bb937-9d99-08f0-4de7-80c91a3cfc2e@sopchak.me --- doc/src/sgml/func.sgml | 3 +++ doc/src/sgml/ref/set.sgml | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index ceb09d788cc..16ad120dd23 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1840,6 +1840,9 @@ repeat('Pg', 4) PgPgPgPg subsequent random() calls in the current session can be repeated by re-issuing setseed() with the same argument. + Without any prior setseed() call in the same + session, the first random() call obtains a seed + from a platform-dependent source of random bits. diff --git a/doc/src/sgml/ref/set.sgml b/doc/src/sgml/ref/set.sgml index 339ee9eec94..c4aab56a2d3 100644 --- a/doc/src/sgml/ref/set.sgml +++ b/doc/src/sgml/ref/set.sgml @@ -175,8 +175,7 @@ SET [ SESSION | LOCAL ] TIME ZONE { timezone Sets the internal seed for the random number generator (the function random). Allowed values are - floating-point numbers between -1 and 1, which are then - multiplied by 231-1. + floating-point numbers between -1 and 1 inclusive. From 04b05b3928e8862d32365108887da95a25232d27 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 25 Jul 2022 08:48:38 +0300 Subject: [PATCH 44/47] Fix ReadRecentBuffer for local buffers. It incorrectly used GetBufferDescriptor instead of GetLocalBufferDescriptor, causing it to not find the correct buffer in most cases, and performing an out-of-bounds memory read in the corner case that temp_buffers > shared_buffers. It also bumped the usage-count on the buffer, even if it was previously pinned. That won't lead to crashes or incorrect results, but it's different from what the shared-buffer case does, and different from the usual code in LocalBufferAlloc. Fix that too, and make the code ordering match LocalBufferAlloc() more closely, so that it's easier to verify that it's doing the same thing. Currently, ReadRecentBuffer() is only used with non-temp relations, in WAL redo, so the broken code is currently dead code. However, it could be used by extensions. Backpatch-through: 14 Discussion: https://www.postgresql.org/message-id/2d74b46f-27c9-fb31-7f99-327a87184cc0%40iki.fi Reviewed-by: Thomas Munro, Zhang Mingli, Richard Guo --- src/backend/storage/buffer/bufmgr.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 6d0afd34356..dd16c3df60a 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -718,18 +718,28 @@ ReadRecentBuffer(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, if (BufferIsLocal(recent_buffer)) { - bufHdr = GetBufferDescriptor(-recent_buffer - 1); + int b = -recent_buffer - 1; + + bufHdr = GetLocalBufferDescriptor(b); buf_state = pg_atomic_read_u32(&bufHdr->state); /* Is it still valid and holding the right tag? */ if ((buf_state & BM_VALID) && BUFFERTAGS_EQUAL(tag, bufHdr->tag)) { - /* Bump local buffer's ref and usage counts. */ + /* + * Bump buffer's ref and usage counts. This is equivalent of + * PinBuffer for a shared buffer. + */ + if (LocalRefCount[b] == 0) + { + if (BUF_STATE_GET_USAGECOUNT(buf_state) < BM_MAX_USAGE_COUNT) + { + buf_state += BUF_USAGECOUNT_ONE; + pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); + } + } + LocalRefCount[b]++; ResourceOwnerRememberBuffer(CurrentResourceOwner, recent_buffer); - LocalRefCount[-recent_buffer - 1]++; - if (BUF_STATE_GET_USAGECOUNT(buf_state) < BM_MAX_USAGE_COUNT) - pg_atomic_write_u32(&bufHdr->state, - buf_state + BUF_USAGECOUNT_ONE); return true; } From 07199497dac65c768cdebcbe27b90e82f25a7d21 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Wed, 27 Jul 2022 07:55:13 +0200 Subject: [PATCH 45/47] Allow "in place" tablespaces. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a backpatch to branches 10-14 of the following commits: 7170f2159fb2 Allow "in place" tablespaces. c6f2f01611d4 Fix pg_basebackup with in-place tablespaces. f6f0db4d6240 Fix pg_tablespace_location() with in-place tablespaces 7a7cd84893e0 doc: Remove mention to in-place tablespaces for pg_tablespace_location() 5344723755bd Remove unnecessary Windows-specific basebackup code. In-place tablespaces were introduced as a testing helper mechanism, but they are going to be used for a bugfix in WAL replay to be backpatched to all stable branches. I (Álvaro) had to adjust some code to account for lack of get_dirent_type() in branches prior to 14. Author: Thomas Munro Author: Michaël Paquier Author: Álvaro Herrera Discussion: https://postgr.es/m/20220722081858.omhn2in5zt3g4nek@alvherre.pgsql --- doc/src/sgml/config.sgml | 19 +++++++++++++++ src/backend/access/transam/xlog.c | 8 +++++++ src/backend/commands/tablespace.c | 39 +++++++++++++++++++++++++------ src/backend/utils/adt/misc.c | 29 +++++++++++++++++++++++ src/backend/utils/misc/guc.c | 12 ++++++++++ src/include/commands/tablespace.h | 2 ++ 6 files changed, 102 insertions(+), 7 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index bd61286e042..bc3d0d1bd14 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -10458,6 +10458,25 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' + + allow_in_place_tablespaces (boolean) + + allow_in_place_tablespaces configuration parameter + + + + + Allows tablespaces to be created as directories inside + pg_tblspc, when an empty location string + is provided to the CREATE TABLESPACE command. This + is intended to allow testing replication scenarios where primary and + standby servers are running on the same machine. Such directories + are likely to confuse backup tools that expect to find only symbolic + links in that location. Only superusers can change this setting. + + + + allow_system_table_mods (boolean) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index cff69879aa1..07831e9b098 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -11918,6 +11918,14 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name); + /* + * Skip anything that isn't a symlink/junction. For testing only, + * we sometimes use allow_in_place_tablespaces to create + * directories directly under pg_tblspc, which would fail below. + */ + if (get_dirent_type(fullpath, de, false, ERROR) != PGFILETYPE_LNK) + continue; + #if defined(HAVE_READLINK) || defined(WIN32) rllen = readlink(fullpath, linkpath, sizeof(linkpath)); if (rllen < 0) diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 9175ebfb5ba..3d7d040c462 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -113,6 +113,7 @@ /* GUC variables */ char *default_tablespace = NULL; char *temp_tablespaces = NULL; +bool allow_in_place_tablespaces = false; static void create_tablespace_directories(const char *location, @@ -295,6 +296,7 @@ CreateTableSpace(CreateTableSpaceStmt *stmt) Datum newOptions; List *nonContentOptions = NIL; char *fileHandler = NULL; + bool in_place; /* Must be super user */ if (!superuser()) @@ -362,12 +364,15 @@ CreateTableSpace(CreateTableSpaceStmt *stmt) (errcode(ERRCODE_INVALID_NAME), errmsg("tablespace location cannot contain single quotes"))); + in_place = allow_in_place_tablespaces && strlen(location) == 0; + /* * Allowing relative paths seems risky * - * this also helps us ensure that location is not empty or whitespace + * This also helps us ensure that location is not empty or whitespace, + * unless specifying a developer-only in-place tablespace. */ - if (!is_absolute_path(location)) + if (!in_place && !is_absolute_path(location)) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("tablespace location must be an absolute path"))); @@ -862,20 +867,40 @@ create_tablespace_directories(const char *location, const Oid tablespaceoid) char *location_with_dbid_dir; char *location_with_version_dir; struct stat st; + bool in_place; elog(DEBUG5, "creating tablespace directories for tablespaceoid %d on dbid %d", tablespaceoid, GpIdentity.dbid); linkloc = psprintf("pg_tblspc/%u", tablespaceoid); + + /* + * If we're asked to make an 'in place' tablespace, create the directory + * directly where the symlink would normally go. This is a developer-only + * option for now, to facilitate regression testing. + */ + in_place = strlen(location) == 0; + + if (in_place) + { + if (MakePGDirectory(linkloc) < 0 && errno != EEXIST) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", + linkloc))); + } + location_with_dbid_dir = psprintf("%s/%d", location, GpIdentity.dbid); - location_with_version_dir = psprintf("%s/%s", location_with_dbid_dir, + location_with_version_dir = psprintf("%s/%s", in_place ? linkloc : location_with_dbid_dir, GP_TABLESPACE_VERSION_DIRECTORY); /* * Attempt to coerce target directory to safe permissions. If this fails, - * it doesn't exist or has the wrong owner. + * it doesn't exist or has the wrong owner. Not needed for in-place mode, + * because in that case we created the directory with the desired + * permissions. */ - if (chmod(location, pg_dir_create_mode) != 0) + if (!in_place && chmod(location, pg_dir_create_mode) != 0) { if (errno == ENOENT) ereport(ERROR, @@ -949,13 +974,13 @@ create_tablespace_directories(const char *location, const Oid tablespaceoid) /* * In recovery, remove old symlink, in case it points to the wrong place. */ - if (InRecovery) + if (!in_place && InRecovery) remove_tablespace_symlink(linkloc); /* * Create the symlink under PGDATA */ - if (symlink(location_with_dbid_dir, linkloc) < 0) + if (!in_place && symlink(location_with_dbid_dir, linkloc) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create symbolic link \"%s\": %m", diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c index eb8877fd7e8..eba029daa74 100644 --- a/src/backend/utils/adt/misc.c +++ b/src/backend/utils/adt/misc.c @@ -15,6 +15,7 @@ #include "postgres.h" #include +#include #include #include #include @@ -312,6 +313,9 @@ pg_tablespace_location(PG_FUNCTION_ARGS) char sourcepath[MAXPGPATH]; char targetpath[MAXPGPATH]; int rllen; +#ifndef WIN32 + struct stat st; +#endif /* * It's useful to apply this function to pg_class.reltablespace, wherein @@ -336,6 +340,31 @@ pg_tablespace_location(PG_FUNCTION_ARGS) */ snprintf(sourcepath, sizeof(sourcepath), "pg_tblspc/%u", tablespaceOid); + /* + * Before reading the link, check if the source path is a link or a + * junction point. Note that a directory is possible for a tablespace + * created with allow_in_place_tablespaces enabled. If a directory is + * found, a relative path to the data directory is returned. + */ +#ifdef WIN32 + if (!pgwin32_is_junction(sourcepath)) + PG_RETURN_TEXT_P(cstring_to_text(sourcepath)); +#else + if (lstat(sourcepath, &st) < 0) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + sourcepath))); + } + + if (!S_ISLNK(st.st_mode)) + PG_RETURN_TEXT_P(cstring_to_text(sourcepath)); +#endif + + /* + * In presence of a link or a junction point, return the path pointing to. + */ rllen = readlink(sourcepath, targetpath, sizeof(targetpath)); if (rllen < 0) ereport(ERROR, diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 154d6e39737..453068d4c94 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -51,6 +51,7 @@ #include "catalog/index.h" #include "commands/async.h" #include "commands/prepare.h" +#include "commands/tablespace.h" #include "commands/trigger.h" #include "commands/user.h" #include "commands/vacuum.h" @@ -2045,6 +2046,17 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"allow_in_place_tablespaces", PGC_SUSET, DEVELOPER_OPTIONS, + gettext_noop("Allows tablespaces directly inside pg_tblspc, for testing."), + NULL, + GUC_NOT_IN_SAMPLE + }, + &allow_in_place_tablespaces, + false, + NULL, NULL, NULL + }, + { {"lo_compat_privileges", PGC_SUSET, COMPAT_OPTIONS_PREVIOUS, gettext_noop("Enables backward compatibility mode for privilege checks on large objects."), diff --git a/src/include/commands/tablespace.h b/src/include/commands/tablespace.h index 1f41964cf75..fe13c5d75d7 100644 --- a/src/include/commands/tablespace.h +++ b/src/include/commands/tablespace.h @@ -20,6 +20,8 @@ #include "nodes/parsenodes.h" #include "storage/dbdirnode.h" +extern bool allow_in_place_tablespaces; + /* XLOG stuff */ #define XLOG_TBLSPC_CREATE 0x00 #define XLOG_TBLSPC_DROP 0x10 From e0c14723c7088d922a6d87a6241e55be178511b3 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Thu, 28 Jul 2022 14:13:37 +1200 Subject: [PATCH 46/47] Fix get_dirent_type() for symlinks on MinGW/MSYS. On Windows with MSVC, get_dirent_type() was recently made to return DT_LNK for junction points by commit 9d3444dc, which fixed some defective dirent.c code. On Windows with Cygwin, get_dirent_type() already worked for symlinks, as it does on POSIX systems, because Cygwin has its own fake symlinks that behave like POSIX (on closer inspection, Cygwin's dirent has the BSD d_type extension but it's probably always DT_UNKNOWN, so we fall back to lstat(), which understands Cygwin symlinks with S_ISLNK()). On Windows with MinGW/MSYS, we need extra code, because the MinGW runtime has its own readdir() without d_type, and the lstat()-based fallback has no knowledge of our convention for treating junctions as symlinks. Back-patch to 14, where get_dirent_type() landed. Reported-by: Andrew Dunstan Discussion: https://postgr.es/m/b9ddf605-6b36-f90d-7c30-7b3e95c46276%40dunslane.net --- src/common/file_utils.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/common/file_utils.c b/src/common/file_utils.c index 40b73bbe1ab..fd2d11375c9 100644 --- a/src/common/file_utils.c +++ b/src/common/file_utils.c @@ -465,5 +465,21 @@ get_dirent_type(const char *path, #endif } +#if defined(WIN32) && !defined(_MSC_VER) + + /* + * If we're on native Windows (not Cygwin, which has its own POSIX + * symlinks), but not using the MSVC compiler, then we're using a + * readdir() emulation provided by the MinGW runtime that has no d_type. + * Since the lstat() fallback code reports junction points as directories, + * we need an extra system call to check if we should report them as + * symlinks instead, following our convention. + */ + if (result == PGFILETYPE_DIR && + !look_through_symlinks && + pgwin32_is_junction(path)) + result = PGFILETYPE_LNK; +#endif + return result; } From 06cc38c71576efdc709a9b470de0926701eb666f Mon Sep 17 00:00:00 2001 From: reshke Date: Sat, 21 Feb 2026 21:10:47 +0000 Subject: [PATCH 47/47] place allow_in_place_tablespaces in sync_guc_name And other fixes for rebase --- .../src/test/regress/expected/create_view.out | 23 +++++++++++------ .../expected/create_view_optimizer.out | 23 +++++++++++------ .../src/test/regress/sql/create_view.sql | 4 ++- src/backend/utils/sort/qsort_interruptible.c | 2 ++ src/include/utils/sync_guc_name.h | 1 + .../expected/create_view_optimizer.out | 25 +++++++++++++------ .../expected/create_view.out | 25 +++++++++++++------ .../singlenode_regress/sql/create_view.sql | 6 +++-- 8 files changed, 76 insertions(+), 33 deletions(-) diff --git a/contrib/pax_storage/src/test/regress/expected/create_view.out b/contrib/pax_storage/src/test/regress/expected/create_view.out index d35d3a61066..077dc1afc91 100644 --- a/contrib/pax_storage/src/test/regress/expected/create_view.out +++ b/contrib/pax_storage/src/test/regress/expected/create_view.out @@ -1551,16 +1551,25 @@ select * from tt14v; begin; -- this perhaps should be rejected, but it isn't: alter table tt14t drop column f3; --- f3 is still in the view ... +-- column f3 is still in the view, sort of ... select pg_get_viewdef('tt14v', true); - pg_get_viewdef --------------------------------- - SELECT t.f1, + - t.f3, + - t.f4 + - FROM tt14f() t(f1, f3, f4); + pg_get_viewdef +--------------------------------- + SELECT t.f1, + + t."?dropped?column?" AS f3,+ + t.f4 + + FROM tt14f() t(f1, f4); (1 row) +-- ... and you can even EXPLAIN it ... +explain (verbose, costs off) select * from tt14v; + QUERY PLAN +---------------------------------------- + Function Scan on testviewschm2.tt14f t + Output: t.f1, t.f3, t.f4 + Function Call: tt14f() +(3 rows) + -- but will fail at execution select f1, f4 from tt14v; f1 | f4 diff --git a/contrib/pax_storage/src/test/regress/expected/create_view_optimizer.out b/contrib/pax_storage/src/test/regress/expected/create_view_optimizer.out index de91254a5ba..e19d51b0069 100755 --- a/contrib/pax_storage/src/test/regress/expected/create_view_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/create_view_optimizer.out @@ -1550,16 +1550,25 @@ select * from tt14v; begin; -- this perhaps should be rejected, but it isn't: alter table tt14t drop column f3; --- f3 is still in the view ... +-- column f3 is still in the view, sort of ... select pg_get_viewdef('tt14v', true); - pg_get_viewdef --------------------------------- - SELECT t.f1, + - t.f3, + - t.f4 + - FROM tt14f() t(f1, f3, f4); + pg_get_viewdef +--------------------------------- + SELECT t.f1, + + t."?dropped?column?" AS f3,+ + t.f4 + + FROM tt14f() t(f1, f4); (1 row) +-- ... and you can even EXPLAIN it ... +explain (verbose, costs off) select * from tt14v; + QUERY PLAN +---------------------------------------- + Function Scan on testviewschm2.tt14f t + Output: t.f1, t.f3, t.f4 + Function Call: tt14f() +(3 rows) + -- but will fail at execution select f1, f4 from tt14v; f1 | f4 diff --git a/contrib/pax_storage/src/test/regress/sql/create_view.sql b/contrib/pax_storage/src/test/regress/sql/create_view.sql index e1b013fe7a5..5cd91e5189e 100644 --- a/contrib/pax_storage/src/test/regress/sql/create_view.sql +++ b/contrib/pax_storage/src/test/regress/sql/create_view.sql @@ -533,8 +533,10 @@ begin; -- this perhaps should be rejected, but it isn't: alter table tt14t drop column f3; --- f3 is still in the view ... +-- column f3 is still in the view, sort of ... select pg_get_viewdef('tt14v', true); +-- ... and you can even EXPLAIN it ... +explain (verbose, costs off) select * from tt14v; -- but will fail at execution select f1, f4 from tt14v; select * from tt14v; diff --git a/src/backend/utils/sort/qsort_interruptible.c b/src/backend/utils/sort/qsort_interruptible.c index f179b256248..66e018b991d 100644 --- a/src/backend/utils/sort/qsort_interruptible.c +++ b/src/backend/utils/sort/qsort_interruptible.c @@ -1,5 +1,7 @@ /* * qsort_interruptible.c: qsort_arg that includes CHECK_FOR_INTERRUPTS + * + * Portions Copyright (c) 2021-2026, PostgreSQL Global Development Group */ #include "postgres.h" diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 6d09f49155f..3a99016d813 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -10,6 +10,7 @@ "allow_dml_directory_table", "allow_segment_DML", "allow_system_table_mods", + "allow_in_place_tablespaces", "array_nulls", "backtrace_functions", "bytea_output", diff --git a/src/test/regress/expected/create_view_optimizer.out b/src/test/regress/expected/create_view_optimizer.out index 2123c0150c1..5719aea410c 100755 --- a/src/test/regress/expected/create_view_optimizer.out +++ b/src/test/regress/expected/create_view_optimizer.out @@ -1550,17 +1550,26 @@ select * from tt14v; begin; -- this perhaps should be rejected, but it isn't: alter table tt14t drop column f3; --- f3 is still in the view ... +-- column f3 is still in the view, sort of ... select pg_get_viewdef('tt14v', true); - pg_get_viewdef --------------------------------- - SELECT t.f1, + - t.f3, + - t.f4 + - FROM tt14f() t(f1, f3, f4); + pg_get_viewdef +--------------------------------- + SELECT t.f1, + + t."?dropped?column?" AS f3,+ + t.f4 + + FROM tt14f() t(f1, f4); (1 row) --- but will fail at execution +-- ... and you can even EXPLAIN it ... +explain (verbose, costs off) select * from tt14v; + QUERY PLAN +---------------------------------------- + Function Scan on testviewschm2.tt14f t + Output: t.f1, t.f3, t.f4 + Function Call: tt14f() +(3 rows) + +-- but it will fail at execution select f1, f4 from tt14v; f1 | f4 -----+---- diff --git a/src/test/singlenode_regress/expected/create_view.out b/src/test/singlenode_regress/expected/create_view.out index fdc9294f1f2..e70f8e788f3 100644 --- a/src/test/singlenode_regress/expected/create_view.out +++ b/src/test/singlenode_regress/expected/create_view.out @@ -1551,17 +1551,26 @@ select * from tt14v; begin; -- this perhaps should be rejected, but it isn't: alter table tt14t drop column f3; --- f3 is still in the view ... +-- column f3 is still in the view, sort of ... select pg_get_viewdef('tt14v', true); - pg_get_viewdef --------------------------------- - SELECT t.f1, + - t.f3, + - t.f4 + - FROM tt14f() t(f1, f3, f4); + pg_get_viewdef +--------------------------------- + SELECT t.f1, + + t."?dropped?column?" AS f3,+ + t.f4 + + FROM tt14f() t(f1, f4); (1 row) --- but will fail at execution +-- ... and you can even EXPLAIN it ... +explain (verbose, costs off) select * from tt14v; + QUERY PLAN +---------------------------------------- + Function Scan on testviewschm2.tt14f t + Output: t.f1, t.f3, t.f4 + Function Call: tt14f() +(3 rows) + +-- but it will fail at execution select f1, f4 from tt14v; f1 | f4 -----+---- diff --git a/src/test/singlenode_regress/sql/create_view.sql b/src/test/singlenode_regress/sql/create_view.sql index e1b013fe7a5..a47f81613c7 100644 --- a/src/test/singlenode_regress/sql/create_view.sql +++ b/src/test/singlenode_regress/sql/create_view.sql @@ -533,9 +533,11 @@ begin; -- this perhaps should be rejected, but it isn't: alter table tt14t drop column f3; --- f3 is still in the view ... +-- column f3 is still in the view, sort of ... select pg_get_viewdef('tt14v', true); --- but will fail at execution +-- ... and you can even EXPLAIN it ... +explain (verbose, costs off) select * from tt14v; +-- but it will fail at execution select f1, f4 from tt14v; select * from tt14v;