#----------------------------------------------------------------------
#
# gen_tabcomplete.pl
#	Perl script that transforms tab-complete.in.c to tab-complete.c.
#
# This script converts a C else-if chain into a switch statement.
# The else-if statements to be processed must appear at single-tab-stop
# indentation between lines reading
#	/* BEGIN GEN_TABCOMPLETE */
#	/* END GEN_TABCOMPLETE */
# The first clause in each if-condition must be a call of one of the
# functions Matches, HeadMatches, TailMatches, MatchesCS, HeadMatchesCS,
# or TailMatchesCS.  Its argument(s) must be string literals or macros
# that expand to string literals or NULL.  These clauses are removed from
# the code and replaced by "break; case N:", where N is a unique number
# for each such case label.
# The BEGIN GEN_TABCOMPLETE and END GEN_TABCOMPLETE lines are replaced
# by "switch (pattern_id) {" and "}" wrapping to make a valid switch.
# The remainder of the code is copied verbatim.
#
# An if-condition can also be an OR ("||") of several *Matches function
# calls, or it can be an AND ("&&") of a *Matches call with some other
# condition.  For example,
#
#	else if (HeadMatches("DROP", "DATABASE") && ends_with(prev_wd, '('))
#
# will be transformed to
#
#		break;
#	case N:
#		if (ends_with(prev_wd, '('))
#
# In addition, there must be one input line that reads
#	/* Insert tab-completion pattern data here. */
# This line is replaced in the output file by macro calls, one for each
# replaced match condition.  The output for the above example would be
#	TCPAT(N, HeadMatch, "DROP", "DATABASE"),
# where N is the replacement case label, "HeadMatch" is the original
# function name minus "es", and the rest are the function arguments.
# The tab-completion data line must appear before BEGIN GEN_TABCOMPLETE.
#
#
# Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
# Portions Copyright (c) 1994, Regents of the University of California
#
# src/bin/psql/gen_tabcomplete.pl
#
#----------------------------------------------------------------------

use strict;
use warnings FATAL => 'all';
use Getopt::Long;

my $outfile = '';

GetOptions('outfile=s' => \$outfile) or die "$0: wrong arguments";

my $infile = $ARGV[0];
open my $infh, '<', $infile
  or die "$0: could not open input file '$infile': $!\n";

my $outfh;
if ($outfile)
{
	open $outfh, '>', $outfile
	  or die "$0: could not open output file '$outfile': $!\n";
}
else
{
	$outfh = *STDOUT;
}

# Opening boilerplate for output file.
printf $outfh <<EOM;
/*-------------------------------------------------------------------------
 *
 * tab-complete.c
 *    Preprocessed tab-completion code.
 *
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * NOTES
 *  ******************************
 *  *** DO NOT EDIT THIS FILE! ***
 *  ******************************
 *
 *  It has been GENERATED by src/bin/psql/gen_tabcomplete.pl
 *
 *-------------------------------------------------------------------------
 */

#define SWITCH_CONVERSION_APPLIED

#line 1 "${infile}"
EOM

# Scan input file until we find the data-replacement label line.
# Dump what we scan directly into the output file.
while (<$infh>)
{
	chomp;
	last if m|^\s*/\* Insert tab-completion pattern data here\. \*/\s*$|;
	print $outfh "$_\n";
}

# $table_data collects what we will substitute for the "pattern data" line.
my $table_data = '';
# $output_code collects code that we can't emit till after $table_data.
my $output_code = '';
# last case label assigned
my $last_case_label = 0;

# We emit #line directives to keep the output file's line numbering in sync
# with the line numbering of the original, to simplify compiler error message
# reading and debugging.
my $next_line_no = $. + 1;
$output_code .= "#line ${next_line_no} \"${infile}\"\n";

# Scan until we find the BEGIN GEN_TABCOMPLETE line.
# Add the scanned code to $output_code verbatim.
while (<$infh>)
{
	chomp;
	last if m|^\s*/\* BEGIN GEN_TABCOMPLETE \*/\s*$|;
	$output_code .= $_ . "\n";
}

# Emit the switch-starting lines.
$output_code .= "\tswitch (pattern_id)\n";
$output_code .= "\t{\n";

# Keep line numbering in sync.
$next_line_no = $. + 1;
$output_code .= "#line ${next_line_no} \"${infile}\"\n";

# Scan input file, collecting outer-level else-if conditions
# to pass to process_else_if.
# Lines that aren't else-if conditions go to $output_code verbatim.
# True if we're handling a multiline else-if condition
my $in_else_if = 0;
# The accumulated line
my $else_if_line;
my $else_if_lineno;

while (<$infh>)
{
	chomp;
	last if m|^\s*/\* END GEN_TABCOMPLETE \*/\s*$|;
	if ($in_else_if)
	{
		my $rest = $_;
		# collapse leading whitespace
		$rest =~ s/^\s+//;
		$else_if_line .= ' ' . $rest;
		# Double right paren is currently sufficient to detect completion
		if ($else_if_line =~ m/\)\)$/)
		{
			process_else_if($else_if_line, $else_if_lineno, $.);
			$in_else_if = 0;
		}
	}
	elsif (m/^\telse if \(/)
	{
		$else_if_line = $_;
		$else_if_lineno = $.;
		# Double right paren is currently sufficient to detect completion
		if ($else_if_line =~ m/\)\)$/)
		{
			process_else_if($else_if_line, $else_if_lineno, $.);
		}
		else
		{
			$in_else_if = 1;
		}
	}
	else
	{
		$output_code .= $_ . "\n";
	}
}

die "unfinished else-if" if $in_else_if;

# Emit the switch-ending lines.
$output_code .= "\tbreak;\n";
$output_code .= "\tdefault:\n";
$output_code .= "\t\tAssert(false);\n";
$output_code .= "\t\tbreak;\n";
$output_code .= "\t}\n";

# Keep line numbering in sync.
$next_line_no = $. + 1;
$output_code .= "#line ${next_line_no} \"${infile}\"\n";

# Scan the rest, adding it to $output_code verbatim.
while (<$infh>)
{
	chomp;
	$output_code .= $_ . "\n";
}

# Dump out the table data.
print $outfh $table_data;

# Dump out the modified code, and we're done!
print $outfh $output_code;

close($infh);
close($outfh);

# Disassemble an else-if condition.
# Add the generated table-contents macro(s) to $table_data,
# and add the replacement case label(s) to $output_code.
sub process_else_if
{
	my ($else_if_line, $else_if_lineno, $end_lineno) = @_;

	# Strip the initial "else if (", which we know is there
	$else_if_line =~ s/^\telse if \(//;

	# Handle OR'd conditions
	my $isfirst = 1;
	while ($else_if_line =~
		s/^(Head|Tail|)Matches(CS|)\((("[^"]*"|MatchAnyExcept\("[^"]*"\)|[A-Za-z,\s])+)\)\s*\|\|\s*//
	  )
	{
		my $typ = $1;
		my $cs = $2;
		my $args = $3;
		process_match($typ, $cs, $args, $else_if_lineno, $isfirst);
		$isfirst = 0;
	}

	# Check for AND'd condition
	if ($else_if_line =~
		s/^(Head|Tail|)Matches(CS|)\((("[^"]*"|MatchAnyExcept\("[^"]*"\)|[A-Za-z,\s])+)\)\s*&&\s*//
	  )
	{
		my $typ = $1;
		my $cs = $2;
		my $args = $3;
		warn
		  "could not process OR/ANDed if condition at line $else_if_lineno\n"
		  if !$isfirst;
		process_match($typ, $cs, $args, $else_if_lineno, $isfirst);
		$isfirst = 0;
		# approximate line positioning of AND'd condition
		$output_code .= "#line ${end_lineno} \"${infile}\"\n";
		$output_code .= "\tif ($else_if_line\n";
	}
	elsif ($else_if_line =~
		s/^(Head|Tail|)Matches(CS|)\((("[^"]*"|MatchAnyExcept\("[^"]*"\)|[A-Za-z,\s])+)\)\)$//
	  )
	{
		my $typ = $1;
		my $cs = $2;
		my $args = $3;
		process_match($typ, $cs, $args, $else_if_lineno, $isfirst);
		$isfirst = 0;
	}
	else
	{
		warn
		  "could not process if condition at line $else_if_lineno: the rest looks like $else_if_line\n";
		$output_code .= "\telse if ($else_if_line\n";
	}

	# Keep line numbering in sync.
	if ($end_lineno != $else_if_lineno)
	{
		my $next_lineno = $end_lineno + 1;
		$output_code .= "#line ${next_lineno} \"${infile}\"\n";
	}
}

sub process_match
{
	my ($typ, $cs, $args, $lineno, $isfirst) = @_;

	# Assign a new case label only for the first pattern in an OR group.
	if ($isfirst)
	{
		$last_case_label++;

		# We intentionally keep the "break;" and the "case" on one line, so
		# that they have the same line number as the original "else if"'s
		# first line.  This avoids misleading displays in, e.g., lcov.
		$output_code .= "\t";
		$output_code .= "break; " if $last_case_label > 1;
		$output_code .= "case $last_case_label:\n";
	}

	$table_data .=
	  "\tTCPAT(${last_case_label}, ${typ}Match${cs}, ${args}),\n";
}


sub usage
{
	die <<EOM;
Usage: gen_tabcomplete.pl [--outfile/-o <path>] input_file
    --outfile       Output file (default is stdout)

gen_tabcomplete.pl transforms tab-complete.in.c to tab-complete.c.
EOM
}
