
# Copyright (c) 2024-2025, PostgreSQL Global Development Group

use strict;
use warnings FATAL => 'all';
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Time::HiRes qw(usleep);
use Test::More;

##################################################
# Test race condition when a restart point is running during a promotion,
# checking that WAL segments are correctly removed in the restart point
# while the promotion finishes.
#
# This test relies on an injection point that causes the checkpointer to
# wait in the middle of a restart point on a standby.  The checkpointer
# is awaken to finish its restart point only once the promotion of the
# standby is completed, and the node should be able to restart properly.
##################################################

if ($ENV{enable_injection_points} ne 'yes')
{
	plan skip_all => 'Injection points not supported by this build';
}

# Initialize primary node.  log_checkpoints is required as the checkpoint
# activity is monitored based on the contents of the logs.
my $node_primary = PostgreSQL::Test::Cluster->new('master');
$node_primary->init(allows_streaming => 1);
$node_primary->append_conf(
	'postgresql.conf', q[
log_checkpoints = on
restart_after_crash = on
]);
$node_primary->start;

# Check if the extension injection_points is available, as it may be
# possible that this script is run with installcheck, where the module
# would not be installed by default.
if (!$node_primary->check_extension('injection_points'))
{
	plan skip_all => 'Extension injection_points not installed';
}

my $backup_name = 'my_backup';
$node_primary->backup($backup_name);

# Setup a standby.
my $node_standby = PostgreSQL::Test::Cluster->new('standby1');
$node_standby->init_from_backup($node_primary, $backup_name,
	has_streaming => 1);
$node_standby->start;

# Dummy table for the upcoming tests.
$node_primary->safe_psql('postgres', 'checkpoint');
$node_primary->safe_psql('postgres', 'CREATE TABLE prim_tab (a int);');

# Register an injection point on the standby so as the follow-up
# restart point will wait on it.
$node_primary->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
# Wait until the extension has been created on the standby
$node_primary->wait_for_replay_catchup($node_standby);

# Note that from this point the checkpointer will wait in the middle of
# a restart point on the standby.
$node_standby->safe_psql('postgres',
	"SELECT injection_points_attach('create-restart-point', 'wait');");

# Execute a restart point on the standby, that we will now be waiting on.
# This needs to be in the background.
my $logstart = -s $node_standby->logfile;
my $psql_session =
  $node_standby->background_psql('postgres', on_error_stop => 0);
$psql_session->query_until(
	qr/starting_checkpoint/, q(
   \echo starting_checkpoint
   CHECKPOINT;
));

# Switch one WAL segment to make the previous restart point remove the
# segment once the restart point completes.
$node_primary->safe_psql('postgres', 'INSERT INTO prim_tab VALUES (1);');
$node_primary->safe_psql('postgres', 'SELECT pg_switch_wal();');
$node_primary->wait_for_replay_catchup($node_standby);

# Wait until the checkpointer is in the middle of the restart point
# processing.
$node_standby->wait_for_event('checkpointer', 'create-restart-point');

# Check the logs that the restart point has started on standby.  This is
# optional, but let's be sure.
ok( $node_standby->log_contains(
		"restartpoint starting: immediate wait", $logstart),
	"restartpoint has started");

# Trigger promotion during the restart point.
$node_primary->stop;
$node_standby->promote;

# Update the start position before waking up the checkpointer!
$logstart = -s $node_standby->logfile;

# Now wake up the checkpointer.
$node_standby->safe_psql('postgres',
	"SELECT injection_points_wakeup('create-restart-point');");

# Wait until the previous restart point completes on the newly-promoted
# standby, checking the logs for that.
my $checkpoint_complete = 0;
foreach my $i (0 .. 10 * $PostgreSQL::Test::Utils::timeout_default)
{
	if ($node_standby->log_contains("restartpoint complete", $logstart))
	{
		$checkpoint_complete = 1;
		last;
	}
	usleep(100_000);
}
is($checkpoint_complete, 1, 'restart point has completed');

# Kill with SIGKILL, forcing all the backends to restart.
my $psql_timeout = IPC::Run::timer(3600);
my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
my $killme = IPC::Run::start(
	[
		'psql', '--no-psqlrc', '--no-align', '--tuples-only', '--quiet',
		'--set' => 'ON_ERROR_STOP=1',
		'--file' => '-',
		'--dbname' => $node_standby->connstr('postgres')
	],
	'<' => \$killme_stdin,
	'>' => \$killme_stdout,
	'2>' => \$killme_stderr,
	$psql_timeout);
$killme_stdin .= q[
SELECT pg_backend_pid();
];
$killme->pump until $killme_stdout =~ /[[:digit:]]+[\r\n]$/;
my $pid = $killme_stdout;
chomp($pid);
$killme_stdout = '';
$killme_stderr = '';

my $ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'KILL', $pid);
is($ret, 0, 'killed process with KILL');

# Wait until the server restarts, finish consuming output.
$killme_stdin .= q[
SELECT 1;
];
ok( pump_until(
		$killme,
		$psql_timeout,
		\$killme_stderr,
		qr/server closed the connection unexpectedly|connection to server was lost|could not send data to server/m
	),
	"psql query died successfully after SIGKILL");
$killme->finish;

# Wait till server finishes restarting.
$node_standby->poll_query_until('postgres', undef, '');

# After recovery, the server should be able to start.
my $stdout;
my $stderr;
($ret, $stdout, $stderr) = $node_standby->psql('postgres', 'select 1');
is($ret, 0, "psql connect success");
is($stdout, 1, "psql select 1");

done_testing();
