package Data::Password::zxcvbn::Match::Date;
use Moo;
with 'Data::Password::zxcvbn::Match';
use List::AllUtils 0.14 qw(max min_by);
our $VERSION = '1.1.3'; # VERSION
# ABSTRACT: match class for digit sequences that look like dates


my $MIN_YEAR_SPACE = 20;
my $REFERENCE_YEAR = 2017;

has year => ( is => 'ro', required => 1 );
has separator => ( is => 'ro', default => '' );


sub estimate_guesses {
    my ($self, $min_guesses) = @_;

    # base guesses: (year distance from REFERENCE_YEAR) * num_days * num_years
    my $year_space = max(abs($self->year - $REFERENCE_YEAR),$MIN_YEAR_SPACE);
    my $guesses = $year_space * 365;
    # add factor of 4 for separator selection (one of ~4 choices)
    $guesses *=4 if $self->separator;

    return $guesses;
}


my $MAYBE_DATE_NO_SEP_RE = qr{\A ([0-9]{4,8}) \z}x;
my $MAYBE_DATE_WITH_SEP_RE = qr{\A ([0-9]{1,4}) ([\s/\\_.-]) ([0-9]{1,2}) \2 ([0-9]{1,4}) \z}x;
my $MAX_YEAR = 2050;
my $MIN_YEAR = 1000;
my %SPLITS = (
    4 => [  # for length-4 strings, eg 1191 or 9111, two ways to split:
        [1, 2],  # 1 1 91 (2nd split starts at index 1, 3rd at index 2)
        [2, 3],  # 91 1 1
    ],
    5 => [
        [1, 3],  # 1 11 91
        [2, 3],  # 11 1 91
    ],
    6 => [
        [1, 2],  # 1 1 1991
        [2, 4],  # 11 11 91
        [4, 5],  # 1991 1 1
    ],
    7 => [
        [1, 3],  # 1 11 1991
        [2, 3],  # 11 1 1991
        [4, 5],  # 1991 1 11
        [4, 6],  # 1991 11 1
    ],
    8 => [
        [2, 4],  # 11 11 1991
        [4, 6],  # 1991 11 11
    ],
);

sub make {
    my ($class, $password) = @_;
    # a "date" is recognized as:
    # * any 3-tuple that starts or ends with a 2- or 4-digit year,
    # * with 2 or 0 separator chars (1.1.91 or 1191),
    # * maybe zero-padded (01-01-91 vs 1-1-91),
    # * a month between 1 and 12,
    # * a day between 1 and 31.
    #
    # note: this isn't true date parsing in that "feb 31st" is allowed,
    # this doesn't check for leap years, etc.
    #
    # recipe:
    #
    # start with regex to find maybe-dates, then attempt to map the
    # integers onto month-day-year to filter the maybe-dates into
    # dates.
    #
    # finally, remove matches that are substrings of other matches to
    # reduce noise.
    #
    # note: instead of using a lazy or greedy regex to find many dates
    # over the full string, this uses a ^...$ regex against every
    # substring of the password -- less performant but leads to every
    # possible date match.

    my $length = length($password);
    # dates without separators are between length 4 '1191' and 8 '11111991'
    return [] if $length < 4;

    my @matches;

    for my $i (0..$length-3) {
        for my $j ($i+3 .. $i+8) {
            last if $j >= $length;

            my $token = substr($password,$i,$j-$i+1);
            next unless $token =~ $MAYBE_DATE_NO_SEP_RE;

            my @candidates;
            for my $split (@{ $SPLITS{length($token)} || [] }) {
                my ($k,$l) = @{$split};

                my $year = $class->_map_ints_to_year(
                    substr($token,0,$k),
                    substr($token,$k,$l-$k),
                    substr($token,$l),
                ) or next;

                push @candidates,$year;
            }
            next unless @candidates;

            # at this point: different possible year mappings for the
            # same i,j substring. match the candidate date that likely
            # takes the fewest guesses: a year closest to
            # 2017. ($REFERENCE_YEAR).
            #
            # ie, considering '111504', prefer 11-15-04 to 1-1-1504
            # (interpreting '04' as 2004)
            my $best_candidate = min_by { abs($_ - $REFERENCE_YEAR) } @candidates;
            push @matches, $class->new({
                token => $token,
                i => $i, j => $j,
                separator => '',
                year => $best_candidate,
            });
        }
    }

    # dates with separators are between length 6 '1/1/91' and 10 '11/11/1991'
    for my $i (0..$length-5) {
        for my $j ($i+5 .. $i+10) {
            last if $j >= $length;

            my $token = substr($password,$i,$j-$i+1);
            my @pieces = $token =~ $MAYBE_DATE_WITH_SEP_RE
                or next;

            my $year = $class->_map_ints_to_year(
                $pieces[0],
                $pieces[2],
                $pieces[3]
            ) or next;

            push @matches, $class->new({
                token => $token,
                i => $i, j => $j,
                separator => $pieces[1],
                year => $year,
            });
        }
    }

    # matches now contains all valid date strings in a way that is
    # tricky to capture with regexes only. while thorough, it will
    # contain some unintuitive noise:
    #
    # '2015_06_04', in addition to matching 2015_06_04, will also
    # contain 5(!) other date matches: 15_06_04, 5_06_04, ..., even
    # 2015 (matched as 5/1/2020)
    #
    # to reduce noise, remove date matches that are strict substrings
    # of others

    @matches = grep {
        my $match = $_;
        my $is_submatch = grep {
            $_ == $match
                ? 0
                : $_->i <= $match->i && $_->j >= $match->j
                ? 1
                : 0
            } @matches;
        !$is_submatch;
    } @matches;

    @matches = sort @matches;
    return \@matches;
}

sub _map_ints_to_year {
    my ($class,@ints) = @_;

    ## no critic (ProhibitBooleanGrep)

    # given a 3-tuple, discard if:
    #   middle int is over 31 (for all dmy formats, years are never allowed in
    #   the middle)
    #   middle int is zero
    return undef if $ints[1] > 31 or $ints[1] <= 0;
    #   any int is over the max allowable year
    #   any int is over two digits but under the min allowable year
    return undef if grep { $_ > $MAX_YEAR ||
                               ( $_ > 99 && $_ < $MIN_YEAR ) } @ints;
    #   2 ints are over 31, the max allowable day
    return undef if grep { $_ > 31 } @ints >= 2;
    #   2 ints are zero
    return undef if grep { $_ == 0 } @ints >= 2;
    #   all ints are over 12, the max allowable month
    return undef if grep { $_ > 12 } @ints == 3;

    # first look for a four digit year: yyyy + daymonth or daymonth + yyyy
    my @possible_four_digit_splits = (
        [ $ints[2], $ints[0], $ints[1] ],
        [ $ints[0], $ints[1], $ints[2] ],
    );
    for my $split (@possible_four_digit_splits) {
        my ($year,@rest) = @{$split};
        if ( $year >= $MIN_YEAR && $year <= $MAX_YEAR) {
            # for a candidate that includes a four-digit year,
            # when the remaining ints don't match to a day and month,
            # it is not a date.
            if ($class->_map_ints_to_dm(@rest)) {
                return $year;
            }
            else {
                return undef;
            }
        }
    }

    # given no four-digit year, two digit years are the most flexible
    # int to match, so try to parse a day-month out of @ints[0,1] or
    # @ints[1,0]
    for my $split (@possible_four_digit_splits) {
        my ($year,@rest) = @{$split};
        if ($class->_map_ints_to_dm(@rest)) {
            $year = $class->_two_to_four_digit_year($year);
            return $year;
        }
    }

    return undef;
}

sub _map_ints_to_dm {
    my ($class,@ints) = @_;
    for my $case ([@ints],[reverse @ints]) {
        my ($d,$m) = @{$case};
        if ( $d >= 1 && $d <= 31 && $m >= 1 && $m <= 12) {
            return 1
        }
    }
    return undef;
}

sub _two_to_four_digit_year {
    my ($class, $year) = @_;
    return $year if $year > 99;
    return 1900 + $year if $year > 50;
    return 2000 + $year;
}


sub feedback_warning {
    my ($self) = @_;

    return 'Dates are often easy to guess';
}

sub feedback_suggestions {
    return [ 'Avoid dates and years that are associated with you' ];
}


around fields_for_json => sub {
    my ($orig,$self) = @_;
    ( $self->$orig(), qw(year separator) )
};

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

Data::Password::zxcvbn::Match::Date - match class for digit sequences that look like dates

=head1 VERSION

version 1.1.3

=head1 DESCRIPTION

This class represents the guess that a certain substring of a
password, consisting of digits and maybe separators, can be guessed by
scanning dates in the recent past (like birthdays, or recent events).

=head1 ATTRIBUTES

=head2 C<year>

Integer, the year extracted from the token.

=head2 C<separator>

String, possibly empty: the separator used between digits in the
token.

=head1 METHODS

=head2 C<estimate_guesses>

The number of guesses is the number of days between the extracted
L</year> and a reference year (currently 2017), multiplied by the
possible separators.

=head2 C<make>

  my @matches = @{ Data::Password::zxcvbn::Match::Date->make(
    $password,
  ) };

Scans the C<$password> for sequences of digits and separators that
look like dates. Some examples:

=over 4

=item *

1/1/91

=item *

1191

=item *

1991-01-01

=item *

910101

=back

=head2 C<feedback_warning>

=head2 C<feedback_suggestions>

This class suggests not using dates.

=head2 C<fields_for_json>

The JSON serialisation for matches of this class will contain C<token
i j guesses guesses_log10 year separator>.

=head1 AUTHOR

Gianni Ceccarelli <gianni.ceccarelli@broadbean.com>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2022 by BroadBean UK, a CareerBuilder Company.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
