#! /usr/bin/perl
use warnings;
use strict;
use integer;
use FindBin;
use lib $FindBin::RealBin;
use Def;
use Cmdsel;

# This helper script lists commands and packages for the body of
# cmdsel-debs.txt.  It requires the path to the big Contents.gz file.
# It optionally also accepts the names of one or more files containing
# lists of selected binary packages, one name per line; if so, the
# script outputs no package not listed.

our $cmd_zcat       = 'zcat';
our $mark_contents  = qr/^FILE\s+LOCATION\s*$/;
our $width          = 24;
our $file_cmdsel    = $Def::cmdsel_txt;
our $file_underride = "${FindBin::RealBin}/cmdsel-debs.data";

our $usage = <<END;
usage: $0 {-t cmdsel.txt} {-d cmdsel-debs.data} [-Dh] \\
[Contents.gz file] {[selections file]...}
    -t cmdsel.txt
    -d cmdsel-debs.data
           use alternate data files (these options must appear first)
    -D ignore cmdsel-debs.data
    -h print this usage message
END

our %f = (
  '1'     => [
    { a => 'usr/bin/'            , b => ''       },
    { a => 'bin/'                , b => ''       },
    { a => 'usr/share/man/man1/' , b => '.1.gz'  },
    { a => 'usr/X11R6/bin/'      , b => ''       },
    { a => 'usr/X11R6/man/man1/' , b => '.1x.gz' },
    { a => 'usr/share/man/man1/' , b => '.1x.gz' },
    { a => 'usr/X11R6/man/man1/' , b => '.1.gz'  },
  ],
  '1x'    => [
    { a => 'usr/X11R6/bin/'      , b => ''       },
    { a => 'usr/bin/'            , b => ''       },
    { a => 'bin/'                , b => ''       },
    { a => 'usr/X11R6/man/man1/' , b => '.1x.gz' },
    { a => 'usr/share/man/man1/' , b => '.1.gz'  },
    { a => 'usr/share/man/man1/' , b => '.1x.gz' },
    { a => 'usr/X11R6/man/man1/' , b => '.1.gz'  },
  ],
  '8'     => [
    { a => 'usr/sbin/'           , b => ''       },
    { a => 'usr/bin/'            , b => ''       },
    { a => 'bin/'                , b => ''       },
    { a => 'sbin/'               , b => ''       },
    { a => 'usr/share/man/man8/' , b => '.8.gz'  },
  ],
);
for my $s ( qw( 4 5 7 ) ) {
  my $x = "${s}x";
  $f{$s} = [
    { a => "usr/share/man/man$s/", b => ".$s.gz" },
    { a => "usr/X11R6/man/man$s/", b => ".$x.gz" },
    { a => "usr/share/man/man$s/", b => ".$x.gz" },
    { a => "usr/X11R6/man/man$s/", b => ".$s.gz" },
  ];
  $f{$x} = [
    { a => "usr/X11R6/man/man$s/", b => ".$x.gz" },
    { a => "usr/share/man/man$s/", b => ".$s.gz" },
    { a => "usr/share/man/man$s/", b => ".$x.gz" },
    { a => "usr/X11R6/man/man$s/", b => ".$s.gz" },
  ];
}

my $width1 = $width - 1;

# Read command-line arguments and options.
while ( @ARGV >= 2 && ( $ARGV[0] eq '-t' || $ARGV[0] eq '-m' ) ) {
  my( $opt, $file ) = splice @ARGV, 0, 2;
  if ( $opt eq '-t' ) { $file_cmdsel    = $file }
  if ( $opt eq '-d' ) { $file_underride = $file }
}
my @opt;
my @arg;
push @{ /^-\S/ ? \@opt : \@arg }, $_ for @ARGV;
my %opt = map {
  my $o = $_;
  map { substr( $o, $_, 1 ) => 1 } 1 .. length($o)-1
} @opt;
if ( @arg < 1 || $opt{'?'} || $opt{h} ) {
  print $usage;
  exit 0;
}
my $file_contents = shift(@arg);
# (At this point, @arg contains the names of the selections files,
# if any.)

# Subroutine: eliminate duplicates from a list, without otherwise
# affecting the list ordering.
sub elimdup (@) {
  my @ret;
  my %already;
  for ( @_ ) {
    push @ret, $_ unless $already{$_};
    ++$already{$_};
  }
  return @ret;
}

# Read in and process cmdsel.txt.
my $parse;
open  F, '<', $file_cmdsel;
  $parse = Cmdsel::parse <F>;
close F;

# Read in the selections.
my %sel;
for ( @arg ) {
  open  F, '<', $_;
    while (<F>) {
      chomp;
      $sel{$_} = 1 if /\S/ && !/^#/;
    }
  close F;
}
print "$_\n" for keys %sel;

# Read in Contents.gz.
my %pkg;
open  F, "-|", "$cmd_zcat $file_contents";
  1 until <F> =~ $mark_contents;
  while (<F>) {
    my( $file, $pkgs ) = /^(\S+)\s+(\S+)\s*$/
      or  die "$0: badly formatted Contents line\n$_\n";
    exists $pkg{$file}
      and die "$0: file $file listed twice\n";
    $pkg{$file} = $pkgs eq '->'
      ? [] : [ elimdup split /\s*,\s*/, $pkgs ];
    $pkg{$file}[$_] =~ /\// or splice @{ $pkg{$file} }, $_, 1
      for reverse 0 .. $#{ $pkg{$file} };
    s/^.*\/// or die "$0: impossible" for @{ $pkg{$file} };
    !@arg || $sel{ $pkg{$file}[$_] } or splice @{ $pkg{$file} }, $_, 1
      for reverse 0 .. $#{ $pkg{$file} };
  }
close F;

# Read in underrides.
my %underride;
if ( !$opt{D} && -e $file_underride ) {
  open  F, '<', $file_underride;
    while (<F>) {
      my( $cmdsect, $pkg ) = /^(\S+)\s+(\S+)$/;
      $underride{$cmdsect} = $pkg;
    }
  close F;
}

# Create an output record for each appropriate cmdsel.txt entry.
my @out0;
for my $ram ( keys %$parse ) {
  my $ram1 = $parse->{$ram};
  my $x    = $ram1->{x};
  for my $cmd ( keys %$x ) {
    next if $cmd eq '...';
    my $cmd1    = $x->{$cmd};
    my $c       = $cmd;
    $c =~ s/^.*\///;
    $c =~ s/\s+-.*$//;
    my $sect    = $cmd1->{sect};
    my $isx     = $cmd1->{isx };
    next if $sect eq $Cmdsel::shell_sect;
    my $sx      = $sect . $isx;
    my $cmdsect = "$cmd($sx)";
    my $f;
    {
      # Find the best candidate file.  A file is a candidate if listed
      # in %pkg.  It is a better candidate if it is also provided by at
      # least one selected package.  It is the best candidate if
      # provided by exactly one selected package.
      my $quality = 0;
      for my $i ( 0 .. $#{ $f{$sx} } ) {
        my $fa = $f{$sx}[$i]{a};
        my $fb = $f{$sx}[$i]{b};
        my $f0 = "$fa$c$fb";
        $pkg{$f0} or next;
        my $q0 = 1;
        my $n0 = @{ $pkg{$f0} };
        if ( $n0 ) {
          ++$q0;
          ++$q0 if $n0 == 1;
        }
        if ( $q0 > $quality ) {
          $f       = $f0;
          $quality = $q0;
        }
        last if $quality >= 3;
      }
    }
    my $pkgs  ; $pkgs   = $pkg{$f} if defined $f;
    my $pkgstr;
    if    ( defined($pkgs) && @$pkgs == 1 ) {
      $pkgstr = $pkgs->[0];
      $underride{$cmdsect}
        and warn "$0: underride for $cmdsect ignored\n";
    }
    elsif ( defined($underride{$cmdsect}) ) {
      $pkgstr = $underride{$cmdsect};
    }
    else {
      $pkgstr = defined($pkgs) && @$pkgs
        ? join( ',', @$pkgs ) : '?';
      warn "$0: missing underride for $cmdsect\n";
    }
    push @out0, {
      sx      => $sx     ,
      cmd     => $cmd    ,
      cmdsect => $cmdsect,
      pkgs    => $pkgs   ,
      pkgstr  => $pkgstr ,
    };
  }
}

my @out = sort {
  $a->{sx     } cmp $b->{sx     } or
  $a->{cmd    } cmp $b->{cmd    } or
  $a->{cmdsect} cmp $b->{cmdsect} or
  $a->{pkgstr } cmp $b->{pkgstr }
} @out0;

printf "%-${width1}s %s\n", "$_->{cmd}($_->{sx})", $_->{pkgstr}
  for @out;

