# Copyright (c) 2022-2025, PostgreSQL Global Development Group

# Set of tests for pg_upgrade, including cross-version checks.
use strict;
use warnings FATAL => 'all';

use Cwd            qw(abs_path);
use File::Basename qw(dirname);
use File::Find     qw(find);
use File::Path     qw(rmtree);

use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use PostgreSQL::Test::AdjustDump;
use PostgreSQL::Test::AdjustUpgrade;
use Test::More;

# Can be changed to test the other modes.
my $mode = $ENV{PG_TEST_PG_UPGRADE_MODE} || '--copy';

my $tempdir = PostgreSQL::Test::Utils::tempdir;

# Generate a database with a name made of a range of ASCII characters.
sub generate_db
{
	my ($node, $prefix, $from_char, $to_char, $suffix) = @_;

	my $dbname = $prefix;
	for my $i ($from_char .. $to_char)
	{
		next if $i == 7 || $i == 10 || $i == 13;    # skip BEL, LF, and CR
		$dbname = $dbname . sprintf('%c', $i);
	}

	$dbname .= $suffix;
	$node->command_ok(
		[ 'createdb', $dbname ],
		"created database with ASCII characters from $from_char to $to_char");
}

# Filter the contents of a dump before its use in a content comparison for
# upgrade testing. This returns the path to the filtered dump.
sub filter_dump
{
	my ($is_old, $old_version, $dump_file) = @_;
	my $dump_contents = slurp_file($dump_file);

	if ($is_old)
	{
		$dump_contents = adjust_old_dumpfile($old_version, $dump_contents);
	}
	else
	{
		$dump_contents = adjust_new_dumpfile($old_version, $dump_contents);
	}

	my $dump_file_filtered = "${dump_file}_filtered";
	open(my $dh, '>', $dump_file_filtered)
	  || die "opening $dump_file_filtered";
	print $dh $dump_contents;
	close($dh);

	return $dump_file_filtered;
}

# Dump database db from the given node in plain format and adjust it for
# comparing dumps from the original and the restored database.
#
# file_prefix is used to create unique names for all dump files so that they
# remain available for debugging in case the test fails.
#
# adjust_child_columns is passed to adjust_regress_dumpfile() which actually
# adjusts the dump output.
#
# The name of the file containing adjusted dump is returned.
sub get_dump_for_comparison
{
	my ($node, $db, $file_prefix, $adjust_child_columns) = @_;

	my $dumpfile = $tempdir . '/' . $file_prefix . '.sql';
	my $dump_adjusted = "${dumpfile}_adjusted";

	open(my $dh, '>', $dump_adjusted)
	  || die "could not open $dump_adjusted for writing $!";

	$node->run_log(
		[
			'pg_dump', '--no-sync',
			'-d' => $node->connstr($db),
			'-f' => $dumpfile
		]);

	print $dh adjust_regress_dumpfile(slurp_file($dumpfile),
		$adjust_child_columns);
	close($dh);

	return $dump_adjusted;
}

# The test of pg_upgrade requires two clusters, an old one and a new one
# that gets upgraded.  Before running the upgrade, a logical dump of the
# old cluster is taken, and a second logical dump of the new one is taken
# after the upgrade.  The upgrade test passes if there are no differences
# (after filtering) in these two dumps.

# Testing upgrades with an older version of PostgreSQL requires setting up
# two environment variables, as of:
# - "olddump", to point to a dump file that will be used to set up the old
#   instance to upgrade from.
# - "oldinstall", to point to the installation path of the old cluster.
if (   (defined($ENV{olddump}) && !defined($ENV{oldinstall}))
	|| (!defined($ENV{olddump}) && defined($ENV{oldinstall})))
{
	# Not all variables are defined, so leave and die if test is
	# done with an older installation.
	die "olddump or oldinstall is undefined";
}

# Paths to the dumps taken during the tests.
my $dump1_file = "$tempdir/dump1.sql";
my $dump2_file = "$tempdir/dump2.sql";

note "testing using transfer mode $mode";

# Initialize node to upgrade
my $oldnode =
  PostgreSQL::Test::Cluster->new('old_node',
	install_path => $ENV{oldinstall});

my %old_node_params = ();

# To increase coverage of non-standard segment size and group access without
# increasing test runtime, run these tests with a custom setting.
# --allow-group-access and --wal-segsize have been added in v11.
my @custom_opts = ();
if ($oldnode->pg_version >= 11)
{
	push @custom_opts, ('--wal-segsize', '1');
	push @custom_opts, '--allow-group-access';
}

my $old_provider_field;
my $old_datlocale_field;

# account for field additions and changes
if ($oldnode->pg_version >= 15)
{
	$old_provider_field = "datlocprovider";
	if ($oldnode->pg_version >= '17devel')
	{
		$old_datlocale_field = "datlocale";
	}
	else
	{
		$old_datlocale_field = "daticulocale AS datlocale";
	}
}
else
{
	$old_provider_field = "'c' AS datlocprovider";
	$old_datlocale_field = "NULL AS datlocale";
}

# Set up the locale settings for the original cluster, so that we
# can test that pg_upgrade copies the locale settings of template0
# from the old to the new cluster.

my $original_enc_name;
my $original_provider;
my $original_datcollate = "C";
my $original_datctype = "C";
my $original_datlocale;

if ($oldnode->pg_version >= '17devel')
{
	$original_enc_name = "UTF-8";
	$original_provider = "b";
	$original_datlocale = "C.UTF-8";
}
elsif ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes')
{
	$original_enc_name = "UTF-8";
	$original_provider = "i";
	$original_datlocale = "fr-CA";
}
else
{
	$original_enc_name = "SQL_ASCII";
	$original_provider = "c";
	$original_datlocale = "";
}

my %encodings = ('UTF-8' => 6, 'SQL_ASCII' => 0);
my $original_encoding = $encodings{$original_enc_name};

my @old_initdb_params = @custom_opts;

push @old_initdb_params, ('--encoding', $original_enc_name);
push @old_initdb_params, ('--lc-collate', $original_datcollate);
push @old_initdb_params, ('--lc-ctype', $original_datctype);

# add --locale-provider, if supported
my %provider_name = ('b' => 'builtin', 'i' => 'icu', 'c' => 'libc');
if ($oldnode->pg_version >= 15)
{
	push @old_initdb_params,
	  ('--locale-provider', $provider_name{$original_provider});
	if ($original_provider eq 'b')
	{
		push @old_initdb_params, ('--builtin-locale', $original_datlocale);
	}
	elsif ($original_provider eq 'i')
	{
		push @old_initdb_params, ('--icu-locale', $original_datlocale);
	}
}

# Since checksums are now enabled by default, and weren't before 18,
# pass '-k' to initdb on old versions so that upgrades work.
push @old_initdb_params, '-k' if $oldnode->pg_version < 18;

$old_node_params{extra} = \@old_initdb_params;
$oldnode->init(%old_node_params);
# Override log_statement=all set by Cluster.pm.  This avoids large amounts
# of log traffic that slow this test down even more when run under valgrind.
$oldnode->append_conf('postgresql.conf', 'log_statement = none');
$oldnode->start;

my $result;
$result = $oldnode->safe_psql(
	'postgres',
	"SELECT encoding, $old_provider_field, datcollate, datctype, $old_datlocale_field
                 FROM pg_database WHERE datname='template0'");
is( $result,
	"$original_encoding|$original_provider|$original_datcollate|$original_datctype|$original_datlocale",
	"check locales in original cluster");

# The default location of the source code is the root of this directory.
my $srcdir = abs_path("../../..");

# Set up the data of the old instance with a dump or pg_regress.
if (defined($ENV{olddump}))
{
	# Use the dump specified.
	my $olddumpfile = $ENV{olddump};
	die "no dump file found!" unless -e $olddumpfile;

	# Load the dump using the "postgres" database as "regression" does
	# not exist yet, and we are done here.
	$oldnode->command_ok(
		[ 'psql', '--no-psqlrc', '--file' => $olddumpfile, 'postgres' ],
		'loaded old dump file');
}
else
{
	# Default is to use pg_regress to set up the old instance.

	# Create databases with names covering most ASCII bytes.  The
	# first name exercises backslashes adjacent to double quotes, a
	# Windows special case.
	generate_db($oldnode, 'regression\\"\\', 1, 45, '\\\\"\\\\\\');
	generate_db($oldnode, 'regression', 46, 90, '');
	generate_db($oldnode, 'regression', 91, 127, '');

	# Grab any regression options that may be passed down by caller.
	my $extra_opts = $ENV{EXTRA_REGRESS_OPTS} || "";

	# --dlpath is needed to be able to find the location of regress.so
	# and any libraries the regression tests require.
	my $dlpath = dirname($ENV{REGRESS_SHLIB});

	# --outputdir points to the path where to place the output files.
	my $outputdir = $PostgreSQL::Test::Utils::tmp_check;

	# --inputdir points to the path of the input files.
	my $inputdir = "$srcdir/src/test/regress";

	note 'running regression tests in old instance';
	my $rc =
	  system($ENV{PG_REGRESS}
		  . " $extra_opts "
		  . "--dlpath=\"$dlpath\" "
		  . "--bindir= "
		  . "--host="
		  . $oldnode->host . " "
		  . "--port="
		  . $oldnode->port . " "
		  . "--schedule=$srcdir/src/test/regress/parallel_schedule "
		  . "--max-concurrent-tests=20 "
		  . "--inputdir=\"$inputdir\" "
		  . "--outputdir=\"$outputdir\"");
	if ($rc != 0)
	{
		# Dump out the regression diffs file, if there is one
		my $diffs = "$outputdir/regression.diffs";
		if (-e $diffs)
		{
			print "=== dumping $diffs ===\n";
			print slurp_file($diffs);
			print "=== EOF ===\n";
		}
	}
	is($rc, 0, 'regression tests pass');
}

# Initialize a new node for the upgrade.
my $newnode = PostgreSQL::Test::Cluster->new('new_node');

# The new cluster will be initialized with different locale settings,
# but these settings will be overwritten with those of the original
# cluster.
my %new_node_params = %old_node_params;
my @new_initdb_params = @custom_opts;
push @new_initdb_params, ('--encoding', 'SQL_ASCII');
push @new_initdb_params, ('--locale-provider', 'libc');
$new_node_params{extra} = \@new_initdb_params;
$newnode->init(%new_node_params);
# Avoid unnecessary log noise
$newnode->append_conf('postgresql.conf', 'log_statement = none');

# Stabilize stats for comparison.
$newnode->append_conf('postgresql.conf', 'autovacuum = off');

my $newbindir = $newnode->config_data('--bindir');
my $oldbindir = $oldnode->config_data('--bindir');

# Before dumping, get rid of objects not existing or not supported in later
# versions. This depends on the version of the old server used, and matters
# only if different major versions are used for the dump.
if (defined($ENV{oldinstall}))
{
	# Consult AdjustUpgrade to find out what we need to do.
	my $dbnames =
	  $oldnode->safe_psql('postgres', qq(SELECT datname FROM pg_database));
	my %dbnames;
	do { $dbnames{$_} = 1; }
	  foreach split /\s+/s, $dbnames;
	my $adjust_cmds =
	  adjust_database_contents($oldnode->pg_version, %dbnames);

	foreach my $updb (keys %$adjust_cmds)
	{
		my @command_args = ();
		for my $upcmd (@{ $adjust_cmds->{$updb} })
		{
			push @command_args, '--command' => $upcmd;
		}

		# For simplicity, use the newer version's psql to issue the commands.
		$newnode->command_ok(
			[
				'psql', '--no-psqlrc',
				'--set' => 'ON_ERROR_STOP=1',
				'--dbname' => $oldnode->connstr($updb),
				@command_args,
			],
			"ran version adaptation commands for database $updb");
	}
}

# Stabilize stats before pg_dump / pg_dumpall. Doing it after initializing
# the new node gives enough time for autovacuum to update statistics on the
# old node.
$oldnode->append_conf('postgresql.conf', 'autovacuum = off');
$oldnode->restart;

# Test that dump/restore of the regression database roundtrips cleanly.  This
# doesn't work well when the nodes are different versions, so skip it in that
# case.  Note that this isn't a pg_upgrade test, but it's convenient to do it
# here because we've gone to the trouble of creating the regression database.
#
# Do this while the old cluster is running before it is shut down by the
# upgrade test but after turning its autovacuum off for stable statistics.
SKIP:
{
	my $dstnode = PostgreSQL::Test::Cluster->new('dst_node');

	skip "different Postgres versions"
	  if ($oldnode->pg_version != $dstnode->pg_version);
	skip "source node not using default install"
	  if (defined $oldnode->install_path);

	# Setup destination database cluster with the same configuration as the
	# source cluster to avoid any differences between dumps taken from both the
	# clusters caused by differences in their configurations.
	$dstnode->init(%old_node_params);
	# Avoid unnecessary log noise
	$dstnode->append_conf('postgresql.conf', 'log_statement = none');
	# Stabilize stats for comparison.
	$dstnode->append_conf('postgresql.conf', 'autovacuum = off');
	$dstnode->start;

	# Use --create in dump and restore commands so that the restored
	# database has the same configurable variable settings as the original
	# database so that the dumps taken from both databases taken do not
	# differ because of locale changes. Additionally this provides test
	# coverage for --create option.
	#
	# Use directory format so that we can use parallel dump/restore.
	my $dump_file = "$tempdir/regression.dump";
	$oldnode->command_ok(
		[
			'pg_dump', '-Fd', '-j2', '--no-sync',
			'-d' => $oldnode->connstr('regression'),
			'--create', '-f' => $dump_file
		],
		'pg_dump on source instance');

	$dstnode->command_ok(
		[ 'pg_restore', '--create', '-j2', '-d' => 'postgres', $dump_file ],
		'pg_restore to destination instance');

	# Dump original and restored database for comparison.
	my $src_dump =
	  get_dump_for_comparison($oldnode, 'regression', 'src_dump', 1);
	my $dst_dump =
	  get_dump_for_comparison($dstnode, 'regression', 'dest_dump', 0);

	compare_files($src_dump, $dst_dump,
		'dump outputs from original and restored regression databases match');
}

# Take a dump before performing the upgrade as a base comparison. Note
# that we need to use pg_dumpall from the new node here.
my @dump_command = (
	'pg_dumpall', '--no-sync',
	'--dbname' => $oldnode->connstr('postgres'),
	'--file' => $dump1_file);
# --extra-float-digits is needed when upgrading from a version older than 11.
push(@dump_command, '--extra-float-digits', '0')
  if ($oldnode->pg_version < 12);
$newnode->command_ok(\@dump_command, 'dump before running pg_upgrade');

# After dumping, update references to the old source tree's regress.so
# to point to the new tree.
if (defined($ENV{oldinstall}))
{
	# First, fetch all the references to libraries that are not part
	# of the default path $libdir.
	my $output = $oldnode->safe_psql('regression',
		"SELECT DISTINCT probin::text FROM pg_proc WHERE probin NOT LIKE '\$libdir%';"
	);
	chomp($output);
	my @libpaths = split("\n", $output);

	my $dump_data = slurp_file($dump1_file);

	my $newregresssrc = dirname($ENV{REGRESS_SHLIB});
	foreach (@libpaths)
	{
		my $libpath = $_;
		$libpath = dirname($libpath);
		$dump_data =~ s/$libpath/$newregresssrc/g;
	}

	open my $fh, ">", $dump1_file or die "could not open dump file";
	print $fh $dump_data;
	close $fh;

	# This replaces any references to the old tree's regress.so
	# the new tree's regress.so.  Any references that do *not*
	# match $libdir are switched so as this request does not
	# depend on the path of the old source tree.  This is useful
	# when using an old dump.  Do the operation on all the databases
	# that allow connections so as this includes the regression
	# database and anything the user has set up.
	$output = $oldnode->safe_psql('postgres',
		"SELECT datname FROM pg_database WHERE datallowconn;");
	chomp($output);
	my @datnames = split("\n", $output);
	foreach (@datnames)
	{
		my $datname = $_;
		$oldnode->safe_psql(
			$datname, "UPDATE pg_proc SET probin =
		  regexp_replace(probin, '.*/', '$newregresssrc/')
		  WHERE probin NOT LIKE '\$libdir/%'");
	}
}

# Create an invalid database, will be deleted below
$oldnode->safe_psql(
	'postgres', qq(
  CREATE DATABASE regression_invalid;
  UPDATE pg_database SET datconnlimit = -2 WHERE datname = 'regression_invalid';
));

# In a VPATH build, we'll be started in the source directory, but we want
# to run pg_upgrade in the build directory so that any files generated finish
# in it, like delete_old_cluster.{sh,bat}.
chdir ${PostgreSQL::Test::Utils::tmp_check};

# Upgrade the instance.
$oldnode->stop;

# Cause a failure at the start of pg_upgrade, this should create the logging
# directory pg_upgrade_output.d but leave it around.  Keep --check for an
# early exit.
command_checks_all(
	[
		'pg_upgrade', '--no-sync',
		'--old-datadir' => $oldnode->data_dir,
		'--new-datadir' => $newnode->data_dir,
		'--old-bindir' => $oldbindir . '/does/not/exist/',
		'--new-bindir' => $newbindir,
		'--socketdir' => $newnode->host,
		'--old-port' => $oldnode->port,
		'--new-port' => $newnode->port,
		$mode, '--check',
	],
	1,
	[qr{check for ".*?does/not/exist" failed}],
	[],
	'run of pg_upgrade --check for new instance with incorrect binary path');
ok(-d $newnode->data_dir . "/pg_upgrade_output.d",
	"pg_upgrade_output.d/ not removed after pg_upgrade failure");
rmtree($newnode->data_dir . "/pg_upgrade_output.d");

# Check that pg_upgrade aborts when encountering an invalid database
# (However, versions that were out of support by commit c66a7d75e652 don't
# know how to do this, so skip this test there.)
SKIP:
{
	skip "database invalidation not implemented", 1
	  if $oldnode->pg_version < 11;

	command_checks_all(
		[
			'pg_upgrade', '--no-sync',
			'--old-datadir' => $oldnode->data_dir,
			'--new-datadir' => $newnode->data_dir,
			'--old-bindir' => $oldbindir,
			'--new-bindir' => $newbindir,
			'--socketdir' => $newnode->host,
			'--old-port' => $oldnode->port,
			'--new-port' => $newnode->port,
			$mode, '--check',
		],
		1,
		[qr/datconnlimit/],
		[qr/^$/],
		'invalid database causes failure');
	rmtree($newnode->data_dir . "/pg_upgrade_output.d");
}

# And drop it, so we can continue
$oldnode->start;
$oldnode->safe_psql('postgres', 'DROP DATABASE regression_invalid');
$oldnode->stop;

# --check command works here, cleans up pg_upgrade_output.d.
command_ok(
	[
		'pg_upgrade', '--no-sync',
		'--old-datadir' => $oldnode->data_dir,
		'--new-datadir' => $newnode->data_dir,
		'--old-bindir' => $oldbindir,
		'--new-bindir' => $newbindir,
		'--socketdir' => $newnode->host,
		'--old-port' => $oldnode->port,
		'--new-port' => $newnode->port,
		$mode, '--check',
	],
	'run of pg_upgrade --check for new instance');
ok(!-d $newnode->data_dir . "/pg_upgrade_output.d",
	"pg_upgrade_output.d/ removed after pg_upgrade --check success");

# Actual run, pg_upgrade_output.d is removed at the end.
command_ok(
	[
		'pg_upgrade', '--no-sync',
		'--old-datadir' => $oldnode->data_dir,
		'--new-datadir' => $newnode->data_dir,
		'--old-bindir' => $oldbindir,
		'--new-bindir' => $newbindir,
		'--socketdir' => $newnode->host,
		'--old-port' => $oldnode->port,
		'--new-port' => $newnode->port,
		$mode,
	],
	'run of pg_upgrade for new instance');
ok( !-d $newnode->data_dir . "/pg_upgrade_output.d",
	"pg_upgrade_output.d/ removed after pg_upgrade success");

$newnode->start;

# Check if there are any logs coming from pg_upgrade, that would only be
# retained on failure.
my $log_path = $newnode->data_dir . "/pg_upgrade_output.d";
if (-d $log_path)
{
	my @log_files;
	find(
		sub {
			push @log_files, $File::Find::name
			  if $File::Find::name =~ m/.*\.log/;
		},
		$newnode->data_dir . "/pg_upgrade_output.d");

	my $test_logfile = $PostgreSQL::Test::Utils::test_logfile;

	note "=== pg_upgrade logs found - appending to $test_logfile ===\n";
	foreach my $log (@log_files)
	{
		note "=== appending $log ===\n";
		print "=== contents of $log ===\n";
		print slurp_file($log);
		print "=== EOF ===\n";
	}
}

# Test that upgraded cluster has original locale settings.
$result = $newnode->safe_psql(
	'postgres',
	"SELECT encoding, datlocprovider, datcollate, datctype, datlocale
                 FROM pg_database WHERE datname='template0'");
is( $result,
	"$original_encoding|$original_provider|$original_datcollate|$original_datctype|$original_datlocale",
	"check that locales in new cluster match original cluster");

# Second dump from the upgraded instance.
@dump_command = (
	'pg_dumpall', '--no-sync',
	'--dbname' => $newnode->connstr('postgres'),
	'--file' => $dump2_file);
# --extra-float-digits is needed when upgrading from a version older than 11.
push(@dump_command, '--extra-float-digits' => '0')
  if ($oldnode->pg_version < 12);
$newnode->command_ok(\@dump_command, 'dump after running pg_upgrade');

# Filter the contents of the dumps.
my $dump1_filtered = filter_dump(1, $oldnode->pg_version, $dump1_file);
my $dump2_filtered = filter_dump(0, $oldnode->pg_version, $dump2_file);

# Compare the two dumps, there should be no differences.
compare_files($dump1_filtered, $dump2_filtered,
	'old and new dumps match after pg_upgrade');

done_testing();
