#!/usr/bin/env perl

use strict;
use warnings;

use Data::Dumper;
use Getopt::Long;
use File::Path qw(make_path);
use Pod::Usage;
use FindBin;
use lib "$FindBin::Bin/../lib";

use App::Test::Generator::SchemaExtractor;

=head1 NAME

extract-schemas - Extract test schemas from Perl modules

=head1 SYNOPSIS

    extract-schemas [options] <module.pm>

    Options:
      --output-dir DIR    Output directory for schema files (default: schemas/)
      --strict-pod=off|warn|fatal
      --verbose           Show detailed analysis
      --help              Show this help message
      --man               Show full documentation

    Examples:
      extract-schemas lib/MyModule.pm
      extract-schemas --output-dir my_schemas --verbose lib/MyModule.pm
      extract-schemas --help

=head1 DESCRIPTION

This tool analyzes a Perl module and generates YAML schema files for each
method, suitable for use with App::Test::Generator.

The extractor uses three sources of information:

=over 4

=item 1. POD Documentation

Parses parameter descriptions from POD to extract types and constraints.

=item 2. Code Analysis

Analyzes validation patterns in the code (ref checks, length checks, etc.)

=item 3. Method Signatures

Extracts parameter names from method signatures.

=back

The tool assigns a confidence level (high/medium/low) to each schema based
on how much information it could infer.

=cut

# Parse command line options
my %cli_opts = (
	help => 0,
	man => 0,
);

my %extractor_opts = (
	output_dir => 'schemas',
	strict_pod => 'warn',
	verbose => 0,
);

GetOptions(
	'output-dir|o=s' => \$extractor_opts{output_dir},
	'strict-pod|s=s' => \$extractor_opts{strict_pod},
	'verbose|v' => \$extractor_opts{verbose},
	'help|h' => \$cli_opts{help},
	'man|m' => \$cli_opts{man},
) or pod2usage(2);

pod2usage(-exitval => 0, -verbose => 1) if($cli_opts{help});
pod2usage(-exitval => 0, -verbose => 2) if $cli_opts{man};

if($extractor_opts{strict_pod} !~ /^(off|warn|fatal)$/) {
	die "Invalid --strict-pod value '$extractor_opts{strict_pod}'. Expected off, warn, or fatal";
}

# Get input file
my $input_file = shift @ARGV or pod2usage('Error: No input file specified');

die "Error: File not found: $input_file" unless -f $input_file;

# Run the extractor
print "Extracting schemas from: $input_file\n";
print "Output directory: $extractor_opts{output_dir}\n\n";

make_path($extractor_opts{output_dir}) unless -d $extractor_opts{output_dir};

my $extractor = App::Test::Generator::SchemaExtractor->new(
	input_file => $input_file,
	# output_dir => $opts{output_dir},
	# verbose => $opts{verbose},
	# strict_pod	=> $opts{strict_pod},
	%extractor_opts,
);

my $schemas = $extractor->extract_all();

# Summary report
print "\n", '=' x 70, "\n",
	"EXTRACTION SUMMARY\n",
	'=' x 70, "\n\n";

my %input_confidence_counts = (high => 0, medium => 0, low => 0);
my %output_confidence_counts = (high => 0, medium => 0, low => 0);

foreach my $method (sort keys %$schemas) {
	my $schema = $schemas->{$method};
	my $iconf = $schema->{_confidence}{input}{level} // 'low';
	$input_confidence_counts{$iconf}++;
	my $oconf = $schema->{_confidence}{output}{level} // 'low';
	$output_confidence_counts{$oconf}++;

	# my $param_count = scalar keys %{$schema->{input}};

	# Filter out underscore keys which could be used in the future for metadata
	my $param_count = scalar grep { $_ !~ /^_/ } keys %{ $schema->{input} };

	printf "%-30s %d params  [%s input confidence] [%s output confidence]\n",
		$method, $param_count, uc($iconf), uc($oconf);
}

print "\n";
print 'Total methods: ', (scalar keys %$schemas), "\n";
print "  Input:\n";
print "    High confidence:   $input_confidence_counts{high}\n";
print "    Medium confidence: $input_confidence_counts{medium}\n";
print "    Low confidence:    $input_confidence_counts{low}\n";
print "  Output:\n";
print "    High confidence:   $output_confidence_counts{high}\n";
print "    Medium confidence: $output_confidence_counts{medium}\n";
print "    Low confidence:    $output_confidence_counts{low}\n";
print "\n";

if ($input_confidence_counts{low} > 0 || $input_confidence_counts{medium} > 0) {
	print "RECOMMENDATION:\n",
		"Review the generated schemas in $extractor_opts{output_dir}/\n",
		"Focus on methods with medium/low confidence ratings.\n\n";
}

if($extractor_opts{'verbose'}) {
	print "Schemas:\n\t", Dumper($schemas);
}

print "Schema files written to: $extractor_opts{output_dir}/\n";

__END__

=head1 SCHEMA FORMAT

The generated YAML files have the following structure:

    method: method_name
    confidence: high|medium|low
    notes:
      - Any warnings or suggestions
    input:
      param_name:
        type: string|integer|number|boolean|arrayref|hashref|object
        min: 5
        max: 100
        optional: 0
        matches: /pattern/

=head1 CONFIDENCE LEVELS

=over 4

=item B<high>

Strong evidence from POD and code analysis. Schema should be accurate.

=item B<medium>

Partial information available. Review recommended.

=item B<low>

Limited information. Manual review required.

=back

=head1 EXAMPLES

=head2 Basic Usage

    extract-schemas lib/MyModule.pm

Extracts schemas to ./schemas/ directory.

=head2 Custom Output Directory

    extract-schemas --output-dir test_schemas lib/MyModule.pm

=head2 Verbose Mode

    extract-schemas --verbose lib/MyModule.pm

Shows detailed analysis of each method.

=head2 Pod Checking

  --strict-pod=LEVEL
    Enforce POD/code consistency.
    LEVEL may be:
      off    - do not validate POD
      warn   - warn on mismatches (default)
      fatal  - abort on mismatches

=head1 NEXT STEPS

After extracting schemas:

1. Review the generated YAML files, especially those marked low confidence
2. Edit the schemas to add missing information or correct errors
3. Use the schemas with App::Test::Generator:

    test-generator --schema schemas/my_method.yaml

=head1 SEE ALSO

L<App::Test::Generator>, L<PPI>, L<Pod::Simple>

=head1 AUTHOR

Nigel Horne

=cut
