#!/usr/bin/perl -w
# $Id: SPprep.pl,v 1.14 2007/03/08 14:49:52 rw Exp $

use strict;

$| = 1;
$[ = 0;
my $C = $0; $C =~ s%^.*/%%;

$ENV{"LC_ALL"} = "C";
my $GTv = "1.1.0";
my $rpmO3 = "";
my $rpmO4 = "--nosignature --nodigest";
my $rpmO = ( `rpm -q --qf '%{VERSION}' rpm` =~ /^4/ ) ? $rpmO4 : $rpmO3;

my $debug = 0;

my $prod = "";
my $SO = "-";
my $WD = "";
my $OD = "./data/raw";
my @WDs = (
  "./data/input",
	);

sub Date($) {
  use POSIX qw(strftime);
  strftime $_[0], localtime;
}

my $archRE = q(alpha|i386|ia64|ppc|s390x?|sparc|x86_64);
my $ArchRE = $archRE;
$ArchRE .= q(|IPF|amd64|x86-64|ppc64|i486|i586|i686);
$ArchRE .= q(|noarch|\\\(none\\\));
# fixme: build $ArchRE from $archRE + %iArch

my $distRE = qr(^
    ([^0-9]+)- # Product/Distribution-Names contain no digits
    ([0-9.]+)- # Product/Distribution-Versions contain only digits (& dots)
    (?:(.*?)-)?? # optional string (e.g. "Int", "Developer")
    ($archRE) # known architectures (fixpoint!)
    (?:-(SP[^-]+))? # optional ServicePack identifier (e.g. "SP1" or "SP2a")
    (?:-([^-]+))??$ # optional release id (e.g. "RC" or "beta")
    $ )xo;

my $editionRE = qr(^
    (?:([^:]+):)?	# optional Epoch
    ([^-]+)-		# Version followed by '-'
    ([^-]+)		# Release
    (?:\.$archRE)?	# optionally followed by '.' and Architecture
    $ )xo;

my $ignoreRE = qr((?:SPident|release-notes|gpg-pubkey))x;

my $QF =  q();
my $QP = qr();
$QF .=  q(%{NAME});
$QP .=  q(^(.+) );
$QF .=  q(\t%|EPOCH?{%{EPOCH}:}:{}|%{VERSION}-%{RELEASE});
$QP .=  q([\t-]([^-]+)-(.+?) );
$QF .=  q(\t%|ARCH?{%{ARCH}}:{noarch}|);
$QP .= qq([.\t]($ArchRE) );
$QF .=  q(\t%{BUILDTIME}\t"%{VENDOR}");
$QP .=  q((?:\s+([0-9]+) (?:\s+\" ( (?>[^"]+) | \"[^"]*\" )+ \")?)? );
$QF .=  q(\t);
$QP .=  q(\s*$ );

$QP = qr($QP)xo;
$ArchRE = qr($ArchRE)xo;
$archRE = qr($archRE)xo;

# ( Version comparison "a la RPM" is brought to us by <mls@suse.de> !-)
sub _verscmp($$) {
  my ($s1, $s2) = @_;
  if ( !defined($s1) ) {
    return ( defined($s2) ? -1 : 0 );
  }
  return ( 1 ) if !defined $s2;
  return ( 0 ) if $s1 eq $s2;
  while ( 1 ) {
    $s1 =~ s/^[^a-zA-Z0-9]+//;
    $s2 =~ s/^[^a-zA-Z0-9]+//;
    my ($x1, $x2, $r);
    if ($s1 =~ /^([0-9]+)(.*?)$/) {
      ($x1, $s1) = ($1, $2);
      ($x2, $s2) = $s2 =~ /^([0-9]*)(.*?)$/;
      return ( 1 ) if $x2 eq '';
      $x1 =~ s/^0+//;
      $x2 =~ s/^0+//;
      $r = length($x1) - length($x2) || $x1 cmp $x2;
    } else {
      ($x1, $s1) = $s1 =~ /^([a-zA-Z]*)(.*?)$/;
      ($x2, $s2) = $s2 =~ /^([a-zA-Z]*)(.*?)$/;
      return ( -1 ) if $x1 eq '' || $x2 eq '';
      $r = $x1 cmp $x2;
    }
    return ( $r ) if $r;
    if ($s1 eq '') {
      return ( ($s2 eq '') ? 0 : -1 );
    }
    return ( 1 ) if $s2 eq ''
  }
}

sub CMPversions($$) {
  my ($evr1, $evr2) = @_;
  my $r;

  my ($e1, $v1, $r1) = $evr1 =~ /^(?:(\d*):)?(.*?)(?:-([^-]*))?$/;
  my ($e2, $v2, $r2) = $evr2 =~ /^(?:(\d*):)?(.*?)(?:-([^-]*))?$/;
  $e1 = "0" if defined($e1) && $e1 eq '';
  $e2 = "0" if defined($e2) && $e2 eq '';
  if ( defined($e1) || defined($e2) ) {
    $r = _verscmp(defined($e1) ? $e1 : "0", defined($e2) ? $e2 : "0");
    return ( $r ) if $r;
  }
  $r = _verscmp($v1, $v2);
  return ( $r ) if $r;
  return ( _verscmp($r1, $r2) );
}

sub breakD($) {
  my($aD, $aV, $aP, $aA, $aS, $aR);
  if ( ($aD, $aV, $aP, $aA, $aS, $aR) = ($_[0] =~ /$distRE/o) ) {
    $aP ||= "";
    $aS ||= "";
    $aR ||= "zzzz";
    $aR = "zzzy" . $aR if ( $aR =~ /^(GA|FCS)$/ );
    $aR = "zzzx" . $aR if ( $aR =~ /^RC/ );
    return ( ($aD, $aV, $aP, $aA, $aS, $aR) );
  } else {
    return undef;
  }
}

sub checkD($$) {
  my($aD, $aV, $aP, $aA, $aS, $aR) = breakD($_[0]);
  my $d = $_[1];
  my $aRp;
  if ( defined($aD) ) {
    $aRp = substr( $aR, 0, 1);
    $aR = substr( $aR, 1);
    if ($d ) {
      printf( STDERR
	      "Dist=\t\t$aD\n" .
	      " Version=\t$aV\n" .
	      " Add=\t\t$aP\n" .
	      " Arch=\t\t$aA\n" .
	      " SP=\t\t$aS\n" .
	      " Release=\t$aR (ord=%d)\n" .
	      "", ord($aRp));
    }
    return ( 1 );
  } else {
    print STDERR "Warning! Ignoring <$_[0]> ...\n";
    return ( 0 );
  }
}

sub dist2float($$$$);
my $d2fmsg = "d2f(";

sub dist2float($$$$){
  my($n, $v, $p, $s) = @_;
  my $r = 0;
  my $so = 100;
  my $spe = 0;
  my %match = (
    "SLES-9-NLPOS-9-"		=>  ["SLES", "9", "", "SP1"],
    "SLES-9-OES-9-"		=>  ["SLES", "9", "", "SP1"],
  );
  my %order = (
    "UnitedLinux"	=>  7.0 - (1 / ($so ** 2)),
    "CORE"		=>  0.0 - (1 / ($so ** 2)),
  );
  $d2fmsg .= "$n-$v-$p-$s)";
  if ( "$p" && exists( $match{"$n-$v-$p-$s"}) ) {
    my ($bn, $bv, $bp, $bs) = @{ $match{"$n-$v-$p-$s"} };
    $d2fmsg .= " == (";
    return ( dist2float( $bn, $bv, $bp, $bs) );
  } elsif ( "$p" && exists( $match{"$n-$v-$p-"}) ) {
    my $d = $debug;
    my ($bn, $bv, $bp, $bs) = @{ $match{"$n-$v-$p-"} };
    $debug = 0;
    $d2fmsg .= " => <(";
    $r = dist2float( $bn, $bv, $bp, $bs);
    $debug = $d;
    $d2fmsg .= ">";
  } else {
    my @S = split( /[[:^alnum:]]/, $v);
    my $sn = $S[0];
    $sn =~ s{^([0-9]*).*}{$1};
    if ( exists( $order{"$n-$S[0]"}) ) {
      $r = $order{"$n-$S[0]"};
    } elsif ( exists( $order{"$n"})) {
      $r = $order{"$n"} + $sn;
    } else {
      $r = $sn;
    }
    for my $i ( 1..$#S ) {
      $r += ($S[$i] / ($so ** $i));
    }
    $spe = $#S;
    $d2fmsg .= " [$spe]";
  }
  if ( $s =~ m{^SP([0-9]*)([a-z]?)$} ) {
    my ($sv, $sa) = ($1, $2);
    if ( defined( $sa) && "$sa" ) {
      $sa = (ord($sa) - ord('a') + 1) / 10;
    } else {
      $sa = 0.0;
    }
    if ( ! defined( $sv) || ! "$sv" ) {
      $sv = 1.0;
    }
    $r += ( $sv + $sa ) / ($so ** ($spe + 1));
  }
  if ( $debug ) {
    printf STDERR "%-56s = %.14f\n", $d2fmsg, $r;
    $d2fmsg = "d2f(";
  } else {
    $d2fmsg .= sprintf " = %.2f", $r;
  }
  return ( $r );
}


sub IsOK($) {
  my( $f) = @_;
  my( $v);
  if ( -r $f ) {
    open( CHECK, "< $f") || die("open($f): $!\n");
    $v = <CHECK>;
    chomp $v;
    if ( $v !~ /^\#\# / ) {
      print STDERR "$f: prehistoric: re-new!\n";
      return 0;
    } elsif ( $v =~ /^\#\# V([0-9.]+) --/ && CMPversions($1, $GTv) < 0 ) {
      print STDERR "$f V$1: outdated (< $GTv): re-fresh!\n";
      return 0;
    } else {
      my $fv = $1;
      my $s = "$f ";
      if ( ! defined( $fv) ) {
	$s .= "'$v': unknown format (expected V$GTv)";
      } elsif ( CMPversions($fv, $GTv) == 0 ) {
	$s .= "V$fv == $GTv: up-to-date";
      } else {
	$s .= "V$fv: newer?! (compared to V$GTv)";
      }
      print STDERR "$s: keep!\n";
      return 1;
    }
  }
}

sub process($$) {
  my ($d, $O) = @_;
  my $c = 0;
  my $ignPatchTree;
  my ($s, $o) = ($d, $O);
  my %P = ();

  if ( "$O" =~ m{^([-/])(.+)$} ) {
    if ( $1 eq "-" ) {
      $O = "-";
    } else {
      $O = "$2";
    }
    $s = $2;
  }
  $ignPatchTree = ($O !~ /^SLES-7/);
  if ( $prod ) {
    $s = $prod;
  }
  open( FIND, "find $d/. -type f -name '*.rpm' -not -name '*src.rpm' " .
	"-printf '\%P\n' |") || die( "open(find rpms|): $!\n");
  if ( "$O" eq "-" ) {
    $o = "<stdout>";
    open( OUT, ">& STDOUT") || die( "re-open($o): $!\n");
  } else {
    $o = "$OD/$O";
    open( OUT, "> $o") || die( "open($o): $!\n");
  }

  $s =~ s,/(sles|core)[0-9]*-($ArchRE)$,,o;
  $s =~ s,^.*/,,;
  print OUT "## V$GTv -- $s\n" || die( "write($o): $!\n");
  while ( <FIND> ) {
    chomp;
    my $p = "$d/$_";
    next unless ( -r $p );
    if ( ! m(^[^/]+/(C|DV)D[0-9]/($ArchRE/update|suse))xo &&
	 ! m(^yast2-update/)xo &&
	 ! m(^(?:(C|DV)D\d+/)?($ArchRE/update|suse|UnitedLinux))xo ) {
      print STDERR "   discarding: $_\n";
      next;
    } elsif ( m(^.*/$ignoreRE)xo ) {
      #print STDERR "   ignoring: $_\n";
      next;
    }

    $o = `rpm -qp $rpmO --qf '$QF' $p`;
    if ( $o !~ m($QP)o ) {
      print STDERR "$p: invalid input ignored\n";
      next;
    }
    if ( exists( $P{$o}) ) {
      $P{$o} .= ",$_";
    } else {
      $P{$o} = "$_";
    }
  }
  foreach $o ( sort( keys( %P)) ) {
    print OUT "$o$P{$o}\n";
    $c++;
  }
  close( OUT);
  if ( $c ) {
    print STDERR "$c packages processed\n";
  } else {
    print STDERR "no packages processed  =>  deleted...\n";
    unlink( "$OD/$O") || die( "unlink($OD/$O): $!\n");
  }
}


sub recurse(@) {
  my( @wds ) = @_;


  foreach my $s ( @_ ) {
    $s .= "/" if ( -d $s );

    foreach my $d ( <$s*> ) {
      next if ( ! -d $d || ! -x $d || $d eq "iso" );
      my( $P, $p) = ( $d =~ m:^(.+)/([^/]+)$: );
      next unless checkD( $p, $debug & 1);

      print STDERR Date( '%Y-%m-%dT%T') . ":  $p\n";
      next if ( IsOK( "$OD/$p") );
      process( $d, $p);
    }
  }
  print STDERR Date( '%Y-%m-%dT%T') . ": *** fin ***\n";
}

{
  use Getopt::Long;
  use Pod::Usage;
  $Getopt::Long::debug = 0;
  $Getopt::Long::ignorecase = 0;
  $Getopt::Long::bundling = 1;
  $Getopt::Long::passthrough = 0;
  my %Opt = ();

  pod2usage(2)  unless ( GetOptions( \%Opt,
     'help|h', 'dir|D=s', 'product|P=s', "db-dir|d=s") && ! $Opt{'help'} );

  pod2usage(1) if ( $Opt{'help'} );
  $WD = $Opt{'dir'} if ( $Opt{'dir'} );
  if ( $Opt{'product'} ) {
    $prod = $Opt{'product'};
    if ( $Opt{'db-dir'} ) {
      $OD = $Opt{'db-dir'};
      $SO = "/" . $prod;
    } else {
      $SO = "-" . $prod;
    }
  } elsif ( $Opt{'db-dir'} ) {
    pod2usage(2);
  }
}

if ( $WD ) {
  print STDERR "$C: process( '$WD', '$SO')\n";
  process( $WD, $SO);

} else {
  @WDs = @ARGV if $#ARGV >= $[;

  print STDERR "$C: recurse( ", join( ", ", @WDs), ")\n";
  recurse( @WDs);
}
__END__

=head1 NAME

SPprep - Create Database Files for SPident

=head1 SYNOPSIS

SPprep [--dir=single-directory [--product=name [--db-dir=dir]]]
       [directory containing "products" ...]

 Options:
   -h --help            brief help message
   -D --dir=single      create database for a single product only
                        with output going to STDOUT
   -P --product=name    explicitely name product (instead of deriving from
                        the directory name)
   -d --db-dir=dir      drop output in "dir/name" (instead of STDOUT)

=cut
