# Arch Perl library, Copyright (C) 2004 Mikhael Goikhman
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

use 5.005;
use strict;

package AXP::Command::revlib::prune;
use base 'AXP::Command::revlib';

use Arch::Util qw(run_tla run_cmd);
use Arch::Library;
use Arch::SharedIndex;

sub infoline ($) {
	return "remove revlib revisions using one or another algo";
}

sub options ($) {
	(
		number  => { sh => 'n', desc => "leave ARG revisions, prune all others", type => '=i' },
		size    => { sh => 's', desc => "leave ARG bytes, prune other revisions", type => '=s' },
		all     => { sh => 'a', desc => "prune all revisions" },
		test    => { sh => 't', desc => "do not actually remove anything" },
		verbose => { sh => 'v', desc => "be more verbose", type => '+' },
		quiet   => { sh => 'q', desc => "be quiet" },
		sort    => { sh => 'o', desc => "sort by ARG, see below", type => '=s' },
		reverse => { sh => 'r', desc => "reverse sorted revision list" },
	);
}

sub helptext ($) {
	q{
		Revision libraries are very handy, but occupy disk space.
		It may be useful to keep the library of the constant size.

		One of the options --number, --size, --all should be given.

		Names of the revision fields to be used in the weighted sort:
			default|quasi-time, time|mtime, ctime, atime,
			btime (cache beginning time), name, size, random

		Examples:
			# keep the size of revision library at 50Mb at most 
			axp revlib prune --size 50MB

			# keep at most 100 recent revisions in revision library
			axp revlib prune --number 100 --quiet

			# don't actually remove, list revisions to be pruned
			axp revlib prune --number 40 --test --verbose

			# a fancy way to report the number of library revisions
			axp revlib prune --test --all --sort random -v -v

		More examples:
			axp revlib prune --size 2GB --sort name --reverse
			axp revlib prune -s 2GB -o time -v -v

		This command may be run hourly or daily from a cronjob, like:
			14 0/3 * * * /path/to/axp revlib prune -q -n 2000
			14 1/3 * * * /path/to/axp revlib prune -q -s 5GB
			14 2/3 * * * /path/to/axp revlib cleanup -q

		The exit status is 0 if any directories to be cleaned up are
		found, and 1 otherwise.
	};
}

# optimized, don't evaluate something twice, or something that is not requested
sub sort ($$) {
	my $self = shift;
	my $index = shift;
	my %opt = %{$self->{options}};

	my $sort_name = $opt{sort} || "default";
	my %used_attributes = ();
	my $weight_sub = undef;
	my $cmp_sub = sub { $_[1] <=> $_[0] };

	if ($opt{size}) {
		$used_attributes{dir} = 1;
		$used_attributes{size} = 1;
	}
	if ($sort_name =~ /^d|default|q|quasi|quasi-time$/i) {
		$used_attributes{dir} = 1;
		$used_attributes{mtime} = 1;
		$weight_sub = sub { $_[1]->{mtime} + rand(4 * 24 * 60 * 60) };
	}
	elsif ($sort_name =~ /^t|time|m|mtime$/i) {
		$used_attributes{dir} = 1;
		$used_attributes{mtime} = 1;
		$weight_sub = sub { $_[1]->{mtime} };
	}
	elsif ($sort_name =~ /^c|ctime$/i) {
		$used_attributes{dir} = 1;
		$used_attributes{ctime} = 1;
		$weight_sub = sub { $_[1]->{ctime} };
	}
	elsif ($sort_name =~ /^a|atime$/i) {
		$used_attributes{dir} = 1;
		$used_attributes{atime} = 1;
		$weight_sub = sub { $_[1]->{atime} };
	}
	elsif ($sort_name =~ /^b|btime$/i) {
		$weight_sub = sub { $_[1]->{btime} };
	}
	elsif ($sort_name =~ /^s|size$/i) {
		$used_attributes{dir} = 1;
		$used_attributes{size} = 1;
		$weight_sub = sub { $_[1]->{size} };
		$cmp_sub = sub { $_[0] <=> $_[1] };
	}
	elsif ($sort_name =~ /^n|name$/i) {
		$weight_sub = sub { $_[0] };
		$cmp_sub = sub { $_[0] cmp $_[1] };
	}
	elsif ($sort_name =~ /^r|random$/i) {
		$weight_sub = sub { rand(100000) };
	}
	else {
		die "Unrecognized sort name ($sort_name), supported:\n\t" .
			"default/quasi-time, time/mtime, ctime, atime, btime, name, size, random\n";
	}
	my $update_sub = sub {
		my $revision = shift;
		my $attributes = shift;
		if ($used_attributes{dir} && !$attributes->{dir}) {
			($attributes->{dir}) = run_tla("library-find", $revision);
			die "Internal error: no reported library revision $revision\n"
				unless $attributes->{dir};
		}
		if ($used_attributes{size} && !$attributes->{size}) {
			my $du_output = run_cmd("du", "-sk", $attributes->{dir});
			die "Unexpected output ($du_output) of [du -sk $attributes->{dir}]\n"
				unless $du_output =~ /^(\d+)/;
			$attributes->{size} = $1;
		}
		if ($used_attributes{atime}) {
			$attributes->{atime} = $^T - 24 * 60 * 60 * (-A $attributes->{dir});
		}
		if ($used_attributes{mtime} && !$attributes->{mtime}) {
			$attributes->{mtime} = $^T - 24 * 60 * 60 * (-M $attributes->{dir});
		}
		if ($used_attributes{ctime} && !$attributes->{ctime}) {
			$attributes->{ctime} = $^T - 24 * 60 * 60 * (-C $attributes->{dir});
		}
		return $attributes;
	};
	$index->update(
		$update_sub,
		sub {
			foreach (keys %used_attributes) {
				return 1 unless defined $_[1]->{$_} && $_ ne "atime";
			}
			return 0;
		}
	);

	my $revision_attributes = $index->hash;
	my %weights = ();
	while (my ($revision, $attributes) = each %$revision_attributes) {
		$weights{$revision} = $weight_sub->($revision, $attributes);
	}

	my @revisions = sort { &$cmp_sub($weights{$a}, $weights{$b}) } keys %$revision_attributes;
	@revisions = reverse @revisions if $opt{reverse};
	return \@revisions;
}

sub execute ($) {
	my $self = shift;
	my %opt = %{$self->{options}};

	if ($opt{size}) {
		$opt{size} =~ /^(\d+)(|kb|mb|gb)$/i or
			$self->show_usage(error => "Invalid size $opt{size}");
		my $factor = {"" => 1, kb => 1024, mb => 1 << 20, gb => 1 << 30}
			->{lc($2)};
		$opt{size} = $1 * $factor;
	}

	unless ($opt{number} || $opt{size} || $opt{all}) {
		$self->show_usage(error => "None of the --number, --size, --all options given");
	}

	my $dir = $self->setup_config_dir;
	my $cache_file = "$dir/revlib-cache";
	my $index = Arch::SharedIndex->new(
		file => $cache_file,
		perl_data => 1,
	);

	my $library = Arch::Library->new;
	my $all_revisions = $library->expanded_revisions;
	my %all_revisions = map { $_ => 1 } @$all_revisions;
	$index->filter(sub { !$all_revisions{$_[0]} });
	$index->fetch_store(
		{ btime => time() }, $all_revisions
	);
	$all_revisions = $self->sort($index);

	my $limit;
	if ($opt{number}) {
		$limit = $opt{number};
	}
	if ($opt{size}) {
		my $revision_attributes = $index->hash;
		my $ndx = 0;
		my $used_size = 0;
		while ($ndx < @$all_revisions) {
			my $revision = $all_revisions->[$ndx];
			my $rev_size = $revision_attributes->{$revision}->{size} * 1024;
			last unless $used_size + $rev_size <= $opt{size};
			$used_size += $rev_size;
			$ndx++;
		}
		$limit = $ndx;
	}
	if ($opt{all}) {
		$limit = 0;
	}
	my $revisions_to_prune = [ @$all_revisions[ $limit .. @$all_revisions - 1 ] ];

	print "* test, no real revisions are removed\n"
		if $opt{test} && !$opt{quiet};
	print "* going to prune ", scalar @$revisions_to_prune, " out of ",
		scalar @$all_revisions, " revisions\n" unless $opt{quiet};
	if ($opt{verbose} >= 2) {
		print "** keeping $_\n" foreach @$all_revisions[0 .. $limit - 1];
	}
	foreach my $revision (@$revisions_to_prune) {
		print "** removing $revision\n" if $opt{verbose};
		run_tla("library-remove", $revision) unless $opt{test};
	}
	$index->delete($revisions_to_prune) unless $opt{test};
	print "* done, ", @$all_revisions - @$revisions_to_prune,
		" revisions are left in the library\n" unless $opt{quiet};
}

1;
