/*-------------------------------------------------------------------------
 *
 * astreamer_verify.c
 *
 * Archive streamer for verification of a tar format backup (including
 * compressed tar format backups).
 *
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
 *
 * src/bin/pg_verifybackup/astreamer_verify.c
 *
 *-------------------------------------------------------------------------
 */

#include "postgres_fe.h"

#include "access/xlog_internal.h"
#include "catalog/pg_control.h"
#include "pg_verifybackup.h"

typedef struct astreamer_verify
{
	/* These fields don't change once initialized. */
	astreamer	base;
	verifier_context *context;
	char	   *archive_name;
	Oid			tblspc_oid;

	/* These fields change for each archive member. */
	manifest_file *mfile;
	bool		verify_checksum;
	bool		verify_control_data;
	pg_checksum_context *checksum_ctx;
	uint64		checksum_bytes;
	ControlFileData control_file;
	uint64		control_file_bytes;
} astreamer_verify;

static void astreamer_verify_content(astreamer *streamer,
									 astreamer_member *member,
									 const char *data, int len,
									 astreamer_archive_context context);
static void astreamer_verify_finalize(astreamer *streamer);
static void astreamer_verify_free(astreamer *streamer);

static void member_verify_header(astreamer *streamer, astreamer_member *member);
static void member_compute_checksum(astreamer *streamer,
									astreamer_member *member,
									const char *data, int len);
static void member_verify_checksum(astreamer *streamer);
static void member_copy_control_data(astreamer *streamer,
									 astreamer_member *member,
									 const char *data, int len);
static void member_verify_control_data(astreamer *streamer);
static void member_reset_info(astreamer *streamer);

static const astreamer_ops astreamer_verify_ops = {
	.content = astreamer_verify_content,
	.finalize = astreamer_verify_finalize,
	.free = astreamer_verify_free
};

/*
 * Create an astreamer that can verify a tar file.
 */
astreamer *
astreamer_verify_content_new(astreamer *next, verifier_context *context,
							 char *archive_name, Oid tblspc_oid)
{
	astreamer_verify *streamer;

	streamer = palloc0(sizeof(astreamer_verify));
	*((const astreamer_ops **) &streamer->base.bbs_ops) =
		&astreamer_verify_ops;

	streamer->base.bbs_next = next;
	streamer->context = context;
	streamer->archive_name = archive_name;
	streamer->tblspc_oid = tblspc_oid;

	if (!context->skip_checksums)
		streamer->checksum_ctx = pg_malloc(sizeof(pg_checksum_context));

	return &streamer->base;
}

/*
 * Main entry point of the archive streamer for verifying tar members.
 */
static void
astreamer_verify_content(astreamer *streamer, astreamer_member *member,
						 const char *data, int len,
						 astreamer_archive_context context)
{
	astreamer_verify *mystreamer = (astreamer_verify *) streamer;

	Assert(context != ASTREAMER_UNKNOWN);

	switch (context)
	{
		case ASTREAMER_MEMBER_HEADER:
			/* Initial setup plus decide which checks to perform. */
			member_verify_header(streamer, member);
			break;

		case ASTREAMER_MEMBER_CONTENTS:
			/* Incremental work required to verify file contents. */
			if (mystreamer->verify_checksum)
				member_compute_checksum(streamer, member, data, len);
			if (mystreamer->verify_control_data)
				member_copy_control_data(streamer, member, data, len);
			break;

		case ASTREAMER_MEMBER_TRAILER:
			/* Now we've got all the file data. */
			if (mystreamer->verify_checksum)
				member_verify_checksum(streamer);
			if (mystreamer->verify_control_data)
				member_verify_control_data(streamer);

			/* Reset for next archive member. */
			member_reset_info(streamer);
			break;

		case ASTREAMER_ARCHIVE_TRAILER:
			break;

		default:
			/* Shouldn't happen. */
			pg_fatal("unexpected state while parsing tar archive");
	}
}

/*
 * End-of-stream processing for a astreamer_verify stream.
 */
static void
astreamer_verify_finalize(astreamer *streamer)
{
	Assert(streamer->bbs_next == NULL);
}

/*
 * Free memory associated with a astreamer_verify stream.
 */
static void
astreamer_verify_free(astreamer *streamer)
{
	astreamer_verify *mystreamer = (astreamer_verify *) streamer;

	if (mystreamer->checksum_ctx)
		pfree(mystreamer->checksum_ctx);

	pfree(streamer);
}

/*
 * Prepare to validate the next archive member.
 */
static void
member_verify_header(astreamer *streamer, astreamer_member *member)
{
	astreamer_verify *mystreamer = (astreamer_verify *) streamer;
	manifest_file *m;
	char		pathname[MAXPGPATH];

	/* We are only interested in normal files. */
	if (member->is_directory || member->is_link)
		return;

	/*
	 * The backup manifest stores a relative path to the base directory for
	 * files belonging to a tablespace, while the tablespace backup tar
	 * archive does not include this path.
	 *
	 * The pathname taken from the tar file could contain '.' or '..'
	 * references, which we want to remove, so apply canonicalize_path(). It
	 * could also be an absolute pathname, which we want to treat as a
	 * relative path, so prepend "./" if we're not adding a tablespace prefix
	 * to make sure that canonicalize_path() does what we want.
	 */
	if (OidIsValid(mystreamer->tblspc_oid))
		snprintf(pathname, MAXPGPATH, "%s/%u/%s",
				 "pg_tblspc", mystreamer->tblspc_oid, member->pathname);
	else
		snprintf(pathname, MAXPGPATH, "./%s", member->pathname);
	canonicalize_path(pathname);

	/* Ignore any files that are listed in the ignore list. */
	if (should_ignore_relpath(mystreamer->context, pathname))
		return;

	/* Check whether there's an entry in the manifest hash. */
	m = manifest_files_lookup(mystreamer->context->manifest->files, pathname);
	if (m == NULL)
	{
		report_backup_error(mystreamer->context,
							"file \"%s\" is present in archive \"%s\" but not in the manifest",
							member->pathname, mystreamer->archive_name);
		return;
	}
	mystreamer->mfile = m;

	/* Flag this entry as having been encountered in a tar archive. */
	m->matched = true;

	/* Check that the size matches. */
	if (m->size != member->size)
	{
		report_backup_error(mystreamer->context,
							"file \"%s\" has size %llu in archive \"%s\" but size %" PRIu64 " in the manifest",
							member->pathname,
							(unsigned long long) member->size,
							mystreamer->archive_name,
							m->size);
		m->bad = true;
		return;
	}

	/*
	 * Decide whether we're going to verify the checksum for this file, and
	 * whether we're going to perform the additional validation that we do
	 * only for the control file.
	 */
	mystreamer->verify_checksum =
		(!mystreamer->context->skip_checksums && should_verify_checksum(m));
	mystreamer->verify_control_data =
		mystreamer->context->manifest->version != 1 &&
		!m->bad && strcmp(m->pathname, XLOG_CONTROL_FILE) == 0;

	/* If we're going to verify the checksum, initial a checksum context. */
	if (mystreamer->verify_checksum &&
		pg_checksum_init(mystreamer->checksum_ctx, m->checksum_type) < 0)
	{
		report_backup_error(mystreamer->context,
							"%s: could not initialize checksum of file \"%s\"",
							mystreamer->archive_name, m->pathname);

		/*
		 * Checksum verification cannot be performed without proper context
		 * initialization.
		 */
		mystreamer->verify_checksum = false;
	}
}

/*
 * Computes the checksum incrementally for the received file content.
 *
 * Should have a correctly initialized checksum_ctx, which will be used for
 * incremental checksum computation.
 */
static void
member_compute_checksum(astreamer *streamer, astreamer_member *member,
						const char *data, int len)
{
	astreamer_verify *mystreamer = (astreamer_verify *) streamer;
	pg_checksum_context *checksum_ctx = mystreamer->checksum_ctx;
	manifest_file *m = mystreamer->mfile;

	Assert(mystreamer->verify_checksum);
	Assert(m->checksum_type == checksum_ctx->type);

	/*
	 * Update the total count of computed checksum bytes so that we can
	 * cross-check against the file size.
	 */
	mystreamer->checksum_bytes += len;

	/* Feed these bytes to the checksum calculation. */
	if (pg_checksum_update(checksum_ctx, (uint8 *) data, len) < 0)
	{
		report_backup_error(mystreamer->context,
							"could not update checksum of file \"%s\"",
							m->pathname);
		mystreamer->verify_checksum = false;
	}
}

/*
 * Perform the final computation and checksum verification after the entire
 * file content has been processed.
 */
static void
member_verify_checksum(astreamer *streamer)
{
	astreamer_verify *mystreamer = (astreamer_verify *) streamer;
	manifest_file *m = mystreamer->mfile;
	uint8		checksumbuf[PG_CHECKSUM_MAX_LENGTH];
	int			checksumlen;

	Assert(mystreamer->verify_checksum);

	/*
	 * It's unclear how this could fail, but let's check anyway to be safe.
	 */
	if (mystreamer->checksum_bytes != m->size)
	{
		report_backup_error(mystreamer->context,
							"file \"%s\" in archive \"%s\" should contain %" PRIu64 " bytes, but %" PRIu64 " bytes were read",
							m->pathname, mystreamer->archive_name,
							m->size,
							mystreamer->checksum_bytes);
		return;
	}

	/* Get the final checksum. */
	checksumlen = pg_checksum_final(mystreamer->checksum_ctx, checksumbuf);
	if (checksumlen < 0)
	{
		report_backup_error(mystreamer->context,
							"could not finalize checksum of file \"%s\"",
							m->pathname);
		return;
	}

	/* And check it against the manifest. */
	if (checksumlen != m->checksum_length)
		report_backup_error(mystreamer->context,
							"file \"%s\" in archive \"%s\" has checksum of length %d, but expected %d",
							m->pathname, mystreamer->archive_name,
							m->checksum_length, checksumlen);
	else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
		report_backup_error(mystreamer->context,
							"checksum mismatch for file \"%s\" in archive \"%s\"",
							m->pathname, mystreamer->archive_name);
}

/*
 * Stores the pg_control file contents into a local buffer; we need the entire
 * control file data for verification.
 */
static void
member_copy_control_data(astreamer *streamer, astreamer_member *member,
						 const char *data, int len)
{
	astreamer_verify *mystreamer = (astreamer_verify *) streamer;

	/* Should be here only for control file */
	Assert(mystreamer->verify_control_data);

	/*
	 * Copy the new data into the control file buffer, but do not overrun the
	 * buffer. Note that the on-disk length of the control file is expected to
	 * be PG_CONTROL_FILE_SIZE, but the part that fits in our buffer is
	 * shorter, just sizeof(ControlFileData).
	 */
	if (mystreamer->control_file_bytes < sizeof(ControlFileData))
	{
		size_t		remaining;

		remaining = sizeof(ControlFileData) - mystreamer->control_file_bytes;
		memcpy(((char *) &mystreamer->control_file)
			   + mystreamer->control_file_bytes,
			   data, Min((size_t) len, remaining));
	}

	/* Remember how many bytes we saw, even if we didn't buffer them. */
	mystreamer->control_file_bytes += len;
}

/*
 * Performs the CRC calculation of pg_control data and then calls the routines
 * that execute the final verification of the control file information.
 */
static void
member_verify_control_data(astreamer *streamer)
{
	astreamer_verify *mystreamer = (astreamer_verify *) streamer;
	manifest_data *manifest = mystreamer->context->manifest;
	pg_crc32c	crc;

	/* Should be here only for control file */
	Assert(strcmp(mystreamer->mfile->pathname, XLOG_CONTROL_FILE) == 0);
	Assert(mystreamer->verify_control_data);

	/*
	 * If the control file is not the right length, that's a big problem.
	 *
	 * NB: There is a theoretical overflow risk here from casting to int, but
	 * it isn't likely to be a real problem and this enables us to match the
	 * same format string that pg_rewind uses for this case. Perhaps both this
	 * and pg_rewind should use an unsigned 64-bit value, but for now we don't
	 * worry about it.
	 */
	if (mystreamer->control_file_bytes != PG_CONTROL_FILE_SIZE)
		report_fatal_error("unexpected control file size %d, expected %d",
						   (int) mystreamer->control_file_bytes,
						   PG_CONTROL_FILE_SIZE);

	/* Compute the CRC. */
	INIT_CRC32C(crc);
	COMP_CRC32C(crc, &mystreamer->control_file,
				offsetof(ControlFileData, crc));
	FIN_CRC32C(crc);

	/* Control file contents not meaningful if CRC is bad. */
	if (!EQ_CRC32C(crc, mystreamer->control_file.crc))
		report_fatal_error("%s: %s: CRC is incorrect",
						   mystreamer->archive_name,
						   mystreamer->mfile->pathname);

	/* Can't interpret control file if not current version. */
	if (mystreamer->control_file.pg_control_version != PG_CONTROL_VERSION)
		report_fatal_error("%s: %s: unexpected control file version",
						   mystreamer->archive_name,
						   mystreamer->mfile->pathname);

	/* System identifiers should match. */
	if (manifest->system_identifier !=
		mystreamer->control_file.system_identifier)
		report_fatal_error("%s: %s: manifest system identifier is %" PRIu64 ", but control file has %" PRIu64,
						   mystreamer->archive_name,
						   mystreamer->mfile->pathname,
						   manifest->system_identifier,
						   mystreamer->control_file.system_identifier);
}

/*
 * Reset flags and free memory allocations for member file verification.
 */
static void
member_reset_info(astreamer *streamer)
{
	astreamer_verify *mystreamer = (astreamer_verify *) streamer;

	mystreamer->mfile = NULL;
	mystreamer->verify_checksum = false;
	mystreamer->verify_control_data = false;
	mystreamer->checksum_bytes = 0;
	mystreamer->control_file_bytes = 0;
}
