package Lire::Time;

use strict;

use Time::Local;
use Time::Timezone;

use vars qw/ @ISA @EXPORT/;
BEGIN {
    require Exporter;
    @ISA = qw/ Exporter /;
    @EXPORT = qw/ date2cal syslog2cal clf2cal cal2rfc cal2ymdhms getMonthName/;
}

=pod

=head1 NAME

Lire::Time - parses and prints date in formats common to many log files.

=head1 SYNOPSIS

    use Lire::Time qw/ syslog2cal /;

    my @ltime = localtime;

    while ( <LOGFILE> ) {
	#...
	my $time = syslog2cal( $m, $d, $t, \@ltime );
    }

=head1 DESCRIPTION

This module supplies many functions to parse dates in formats that you
are likely to encounter in log files. It also offers many functions to format
epoch time in useful format.

=head2 NOTE ABOUT FUNCTION EXPORT

Altough all documented functions are exported by default to the caller
namespace, you should explicitely import the functions you require
since exporting by default isn't recommanded by the perl modules
guidelines.

=cut

# convert Jan and friends to localtime(3) tm_mon thingies
my %monthnumbers = (
        Jan => 0,
        Feb => 1,
        Mar => 2,
        Apr => 3,
        May => 4,
        Jun => 5,
        Jul => 6,
        Aug => 7,
        Sep => 8,
        Oct => 9,
        Nov => 10,
        Dec => 11,
);

my %monthnames = (
         0 => 'Jan',
         1 => 'Feb',
         2 => 'Mar',
         3 => 'Apr',
         4 => 'May',
         5 => 'Jun',
         6 => 'Jul',
         7 => 'Aug',
         8 => 'Sep',
         9 => 'Oct',
        10 => 'Nov',
        11 => 'Dec',
);



my %daynames = (
        0 => 'Sun',
        1 => 'Mon',
        2 => 'Tue',
        3 => 'Wed',
        4 => 'Thu',
        5 => 'Fri',
        6 => 'Sat',
);

#
# do an educated guess about the year:
# guess the year from the given month and current time. second arg should be
# pointer to array as returned by localtime.
#
sub guess_tm_year
{
    my $sub = 'guess_tm_year';

    die "$sub give 2 args"
      unless @_ == 2;

    # month in tm_ format: 0 <= month <= 11
    my $month = shift;
    my $ltime = shift;

    die "$sub month '$month' should be in [0..11]"
      unless $month =~ /^\d+$/ && $month >= 0 && $month <= 11;

    die "$sub second arg should pointer to array as returned by localtime()"
      unless ref $ltime eq 'ARRAY' && $ltime->[5];

    my $nowmonth = $ltime->[4];
    die "$sub ltime's month '$nowmonth' should be in [0..11]"
      unless $nowmonth =~ /^\d+$/ && $nowmonth >= 0 && $nowmonth <= 11;

    my $nowyear = $ltime->[5]; # nof years since 1900
    die "$sub ltime's year '$nowyear' should be >= 0"
      unless $nowyear =~ /^\d+$/ && $nowmonth >= 0;

    my $diffmonth = $nowmonth - $month;
    # since both nowmonth and month are in 0 .. 11,
    # we know: -11 <= diffmonth <= 11

    # log
    #  11 - - - - - - 0       0 0
    #  10 - - - - - 0 0       0 0
    #   9 - - - - 0
    #   8 - - - 0
    #   7 - - 0
    #   6 - 0       0 0       0 0
    #   5 0 0       0 0       0 0
    #   4 
    #   3
    #   2
    #   1 0 0       0 0       0 0
    #   0 0 0       0 0       0 +
    #     0 1 2 3 4 5 6 7 8 91011
    #        now

    if (-11 <=  $diffmonth and $diffmonth <= -6) {
        return $nowyear - 1;
    } elsif (-5 <= $diffmonth and $diffmonth <= 10) {
        return $nowyear;
    } else {
        # $diffmonth == 11

        # this only occurs when nowmonth == 11 and month = 0: we
        # received a log from the future: from an eastern timezone
        return $nowyear + 1;
    }
    # returned is years since 1900
}

# convert Mar 20 09:13:32 to ($tm_sec, $tm_min, $tm_hour, $tm_day, $tm_month)
sub syslog2tm
{
    my $sub = 'syslog2tm';

    die "$sub give 4 args"
      unless (@_ == 4);

    my $month = shift; # month name (e.g. May) we tolerate e.g. jan
    my $day = shift;   # day, possibly 0 padded
    my $time = shift;  # time as hh:mm:ss or hh:mm:ss.sss

    my $tm = shift;    # ref to array, to store return in

    # process $time
    my ( $hour, $min, $sec, $msec ) = $time =~ /^(\d\d):(\d\d):(\d\d)(\.\d+)?$/
      or die "$sub time '$time' should be hh:mm:ss";

    # convert from string to integers
    my $tm_hour = $hour + 0;
    my $tm_min = $min + 0;
    my $tm_sec = $sec + 0; # get rid of leading zero

    # process day
    my $tm_day = $day + 0;

    # process month
    my $gotit = 0;
    my $tm_month;
    for my $mn (keys %monthnumbers) {
        if ( lc $month eq lc $mn ) {
	    $tm_month = $monthnumbers{$mn};
	    $gotit = 1;
        }
    }
    die("$sub cannot get monthnumber from monthname '$month'")
      unless $gotit;

    @{ $tm } = ( $tm_sec, $tm_min, $tm_hour, $tm_day, $tm_month );

    return 1;
}

# iso 8601:1988 5.3.3.1 ,
# http://doc.mdcc.cx/~joostvb/doc/iso-8601:1988-representation_of_dates_and_times.pdf
sub tzdiff2sec
{
    die "tzdiff2sec needs 1 arg\n"
      unless @_ == 1;

    # e.g. +0100 or -0900 ; +hh:mm, +hhmm, or +hh
    my ( $sign, $hour, $min ) = $_[0] =~ /^([+-])?(\d\d):?(\d\d)?$/
      or die "invalid tzdiff format: $_[0]. It must looks like +0100 or -01:00\n";
    $sign ||= "+";
    $hour ||= 0;
    $min  ||= 0;
    my $sec = $hour * 60 * 60 + $min * 60;
    $sec *= -1 if $sign eq '-';

    return $sec;
}

=pod

=head1 DATE PARSING FUNCTIONS

This module includes several functions that convert between a more
human readable date format and UNIX epoch time. All parsing functions
will return the number of seconds since Jan 1 1970 00:00:00 UTC and
will die() when passed invalid arguments.

=head2 date2cal()

    my $time = date2cal( $year, $month, $day, $time, [$timezone] );
    my $time = date2cal( "2001 Mar 20 09:32:29 +0100" );

This function will convert a date in the date(1) default output format
to UNIX epoch time. The function accepts either the date in a string
or splitted on whitespace. If the timezone component is omitted, the
local timezone is assumed (usually based on the value of the TZ
environment variable).

=cut

# Tue, 20 Mar 2001 09:46:40 +0100   is  Tue Mar 20 08:46:50 UTC 2001
#
# http://www.cl.cam.ac.uk/~mgk25/iso-time.html says:
#
# 12:00 UTC (aka 12:00Z) = 13:00+01:00 = 0700-0500
#
# There exists no international standard that specifies abbreviations for
# civil time zones like CET, EST, etc.
#
# nice to handle apache stuff like [18/Mar/2001:16:00:26 +0100]
#
sub date2cal
{
    my $sub = 'date2cal';

    @_ = split /\s+/, $_[0] if @_ == 1;

    die "$sub give 1, 4 or 5 args" if @_ < 4;

    # tzdiff e.g. +0100 or -0900 ; +hh:mm, +hhmm, or +hh
    my ($year, $month, $day, $time, $tzdiff ) = @_;

    if ( defined $tzdiff ) {
	$tzdiff = tzdiff2sec( $tzdiff );
    } else {
	$tzdiff = tz_local_offset();
    }

    my @tm;
    syslog2tm($month, $day, $time, \@tm);

    # @tm still has to get fixed with respect to $tzdiff
    my ($tm_sec, $tm_min, $tm_hour, $tm_day, $tm_month) = @tm;

    my $tm_year = $year - 1900;

    push @tm, ($tm_year);

    # We use timegm because the time will be modified by tzdiff
    my $cal = timegm( @tm );

    # If tzdiff is undefined, it will use the current time zone
    return $cal - $tzdiff;
}

=pod

=head2 syslog2cal()

    my $time = syslog2cal( $month, $day, $time, $local_tm_ref );
    my $time = syslog2cal( "Mar 11 13:21:00", $local_tm_ref );

This function will convert a date in the syslog default output format
to UNIX epoch time. The function accepts either the date in a string
or splitted on whitespace. 

Since the syslog format doesn't contain timezone information, the
local timezone is assumed (usually determined by the TZ environment
variable).

The last argument is a reference to an array returned by localtime().

    my $local_tm_ref = [localtime()];

It is used to determine the year.

=cut

sub syslog2cal
{
    my $sub = 'syslog2cal';

    @_ = ( split( /\s+/, $_[0]), $_[1] )
      if (@_ == 2 );

    die("$sub give 4 args") unless @_ == 4;

    my $month	= $_[0]; # month name (e.g. May) we tolerate e.g. jan
    my $day	= $_[1]; # day, possibly 0 padded
    my $time	= $_[2]; # time as hh:mm:ss or hh:mm:ss.sss

    my $ltime	= $_[3]; # pointer to array as returned by localtime() call.

    my @tm;
    syslog2tm($month, $day, $time, \@tm);

    # $year is years since 1900
    my $tm_year = guess_tm_year($tm[4], $ltime);

    push @tm, $tm_year;

    return timelocal(@tm);
}

=pod

=head2 clf2cal()

    my $time = clf2cal( "[18/Mar/2001:15:59:30 +0100]" );

This function will convert a date as found in Common Log Format to
UNIX epoch time.

=cut

sub clf2cal
{
    my $sub = 'clf2cal';

    die "$sub take 1 arg\n"
      unless (@_ == 1);

    my ($time) = @_;

    # ($dom, $month, $year, hour:min:sec, $tzdiff)
    my @date =
      $time =~ /^\[(\d+)\/(\w+)\/(\d{4}):(\d{2}:\d{2}:\d{2})\s+([-+][:\d]+)\]$/
	or die "$sub time '$time' should be something like [18/Mar/2001:15:59:30 +0100]\n";

    #                            year month day time tzdiff
    return date2cal($date[2], $date[1], $date[0], $date[3], $date[4]);
}

=pod

=head1 DATE FORMATING FUNCTIONS

This module includes some functions to convert date in UNIX epoch time to
some more human readable output. All functions will die() when passed
invalid arguments.

=head2 cal2rfc()

    print cal2rfc( $time );

This function will convert a date in UNIX epoch time to the RFC822 format
(used in email, for example). A RFC822 date looks like

    Wed, 30 May 2001 12:45:13 +0000

The timezone offset specification will correspond to the local
timezone (usually determined by the TZ environment variable).

=cut

sub cal2rfc
{
    my $sub = 'cal2rfc';

    die("$sub give 1 arg")
      unless @_ == 1;

    my $time = $_[0];

    my ( $tm_sec, $tm_min, $tm_hour, $tm_mday, $tm_mon, $tm_year, $tm_wday )
      = localtime $time ;

    my $off	= tz_local_offset;
    my $tzoff	= sprintf("%+05d", int($off / 3600) * 100 + $off % 60);

    return $daynames{$tm_wday} .
      ", $tm_mday " .
      $monthnames{$tm_mon} . " " .
      ($tm_year + 1900) . " " .
      ($tm_hour < 10 ? '0' : '') . "$tm_hour:" .
      ($tm_min < 10 ? '0' : '') . "$tm_min:" .
      ($tm_sec < 10 ? '0' : '') . "$tm_sec " .
	$tzoff;
}

=pod

=head2 cal2ymdhms()

    print cal2ymdhms( $time );

This function converts a date in UNIX epoch time to a string of the form:

    YYYYMMDDHHMMSS

This representation will correspond the time in the local timezone (usually determined by the TZ environment variable.)

=cut

sub cal2ymdhms
{
    my $sub = 'cal2ymdhms';

    die "cal2ymdhms give 1 arg"
      unless @_ == 1;

    my ( $tm_sec, $tm_min, $tm_hour, $tm_mday, $tm_mon, $tm_year )
      = localtime $_[0];

    $tm_mon++;

    return '' . ($tm_year + 1900) .
      ($tm_mon  < 10 ? '0' : '') . $tm_mon .
      ($tm_mday < 10 ? '0' : '') . $tm_mday .
      ($tm_hour < 10 ? '0' : '') . $tm_hour .
      ($tm_min  < 10 ? '0' : '') . $tm_min .
      ($tm_sec  < 10 ? '0' : '') . $tm_sec;
}


=pod

=head2 getMonthName()

    print getMonthName( 0 ); # Gives 'Jan'

This function takes as parameter a number (0-11) representing the
month (as returned by localtime() for example) and will return the
English abbreviated name of that month ( Jan, Feb, etc. ).

=cut

sub getMonthName {
    my $sub = 'getMonthName';

    die "$sub take 1 arg\n"
      unless (@_ == 1);

    my ( $month ) = @_;

    die "$sub month should be between 0 and 11"
      unless $month >= 0 && $month <= 11;

    return $monthnames{$month};
}


# keep perl happy
1;

__END__

=pod

=head1 VERSION

$Id: Time.pm,v 1.37 2002/02/09 20:41:13 flacoste Exp $

=head1 COPYRIGHT

Copyright (C) 2000-2002 Stichting LogReport Foundation LogReport@LogReport.org

This file is part of Lire.

Lire is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html or write to the Free Software 
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

=head1 AUTHOR

Joost van Baal <joostvb@logreport.org>

=cut


