#!/usr/bin/env perl

package App::ccdiff;

use 5.014000;
use warnings;
use charnames ();
use File::Find;
use Encode          qw( encode                      );
use List::Util      qw( first  max                  );
use Term::ANSIColor qw(:constants color             );
use Getopt::Long    qw(:config bundling noignorecase);

our $VERSION = "0.34";
our $CMD     = $0 =~ s{.*/}{}r;

sub usage {
    my $err = shift and select STDERR;
    say "usage: $CMD [options] file1 [file2]";
    say "       $CMD [options] dir1   dir2";
    say "       $CMD --man | --info";
    say "	file1 or file2 can be - (but not both)";
    say "   -V     --version      Show version and exit";
    say "   -v[1]  --verbose[=1]  Set verbosity";
    say "  Diff options:";
    say "   -U     --utf-8                 Input is in UTF-8";
    say "   -u[3]  --unified=3             Show a unified diff";
    say "          --no-header             Skip header with file names/stamps";
    say "   -I     --index                 Add indices to the change chunks";
    say "   -I n   --index=4               Only show chunk n";
    say "   -w     --ignore-all-space      Ignore all whitespace";
    say "   -b     --ignore-space-change   Ignore horizontal whitespace changes";
    say "   -Z     --ignore-trailing-space Ignore whitespace at line ending";
    say "   -B     --ignore-blank-lines    Ignore changes where lines are all blank";
    say "   -i     --ignore-case           Ignore case changes";
    say "   --dc=C --diff-class=C          Select Algorithm::Diff (AD or PP)";
    say "                                      or Algorithm::Diff::XS (XS = default)";
    say "  Other options:";
    say "   -t n   --threshold=2  Horizontal line diff threshold";
    say "   -h n   --heuristics=n Horizontal char diff threshold";
    say "   -e n   --ellipsis=n   Compress horizontal equal sections";
    say "   -m     --markers      Use markers to indicate change positions";
    say "   -a     --ascii        Use ASCII instead of Unicode indicators";
    say "          --no-color     Reset all colors to none.";
    say "          --list-colors  List available colors and exit";
    say "          --old=red      Color to indicate removed content";
    say "          --new=green    Color to indicate added   content";
    say "          --bg=white     Background color for colored indicators";
    say "   -p     --pink         Shortcut for --old=magenta";
    say "   -r     --reverse      Reverse/invert the colors of the indicators";
    say "   -s     --swap         Swap old/new color indicators";
    say "          --settings     Show default settings (after reading rc)";
    exit $err;
    } # usage

my %rc = (
    ascii	=> 0,
    bg		=> "white",
    chr_cml	=> "\x{21b1}",
    chr_cmr	=> "\x{21b0}",
    chr_eli	=> "\x{2508}",
    chr_eli_v	=> "\x{21a4}\x{21a6}",
    chr_eql	=> " ",
    chr_new	=> "\x{25b2}",
    chr_old	=> "\x{25bc}",
    ellipsis	=> 0,
    emacs	=> 0,
    header	=> 1,	# A color name is allowed
    heuristics	=> 0,
    index	=> 0,
    iwbzusepp	=> 0,
    markers	=> 0,
    new		=> "green",
    old		=> "red",
    reverse	=> 0,
    swap	=> 0,
    threshold	=> 2,
    utf8	=> 0,
    verbose	=> "cyan",
    );
read_rc ();

my $opt_a = $rc{ascii};
my $opt_b;
my $opt_B;
#y $opt_c;
my $opt_E;
my $opt_h = $rc{heuristics};
my $opt_H = $rc{header};
my $opt_i;
my $opt_I = $rc{index};
my $opt_m = $rc{markers};
my $opt_r = $rc{reverse};
my $opt_s = $rc{swap};
my $opt_t = $rc{threshold};
my $opt_e = $rc{ellipsis};
my $opt_u = $rc{unified};
my $opt_U = $rc{utf8};
my $opt_v = 0;
my $opt_w;
my $opt_Z;
my $emacs     = $rc{emacs};
my $old_color = $rc{old};
my $new_color = $rc{new};
my $rev_color = $rc{bg};
my $cli_color = $ENV{CLICOLOR};	# https://bixense.com/clicolors/
my $no_colors = $ENV{NO_COLOR}; # https://no-color.org
my $list_colors;
my $diff_class;

if ($no_colors) {
    $ENV{CLICOLOR_FORCE}	and $no_colors = 0;
    }
elsif (defined $cli_color) {
    # true $cli_color is the default for ccdiff
    !$cli_color || !-t		and $no_colors = 1;
    }

unless (caller) {
    $ENV{CCDIFF_OPTIONS} and unshift @ARGV, split m/\s+/ => $ENV{CCDIFF_OPTIONS};
    GetOptions (
	"help|?"	=> sub { usage (0); },
	"V|version"	=> sub { say "$CMD [$VERSION]"; exit 0; },
	  "man"		=> sub { pod_nroff (); },
	  "info"	=> sub { pod_text  (); },

	"U|utf-8!"		=> \$opt_U,
	  "dc|diff-class=s"	=> \$diff_class,
	  "pp!"			=> sub { $diff_class = "PP" },

    #   "c|context:3"		=> \$opt_c,	# implement context-diff?
	"u|unified:3"		=> \$opt_u,
	"I|idx|index:-1"	=> \$opt_I,
	"t|threshold=i"		=> \$opt_t,
	"H|header!"		=> \$opt_H,
	  "HC|header-color=s"	=> \$opt_H,
	"h|heuristics=i"	=> \$opt_h,
	"e|ellipsis=i"		=> \$opt_e,
	  "emacs!"		=> \$emacs,

	"a|ascii"		=> sub { $opt_a ^= 1 },
	"m|markers"		=> sub { $opt_m ^= 1 },
	"r|reverse|invert"	=> sub { $opt_r ^= 1 },
	"s|swap!"		=> sub { $opt_s ^= 1 },

	"i|ignore-case!"			=> \$opt_i,
	"w|ignore-all-space!"			=> \$opt_w,
	"b|ignore-ws|ignore-space-change!"	=> \$opt_b,
	"Z|ignore-trailing-space!"		=> \$opt_Z,
	"E|ignore-tab-expansion!"		=> \$opt_E, # NYI
	"B|ignore-blank-lines!"			=> \$opt_B, # Partly implemented

	"p|pink!"		=> sub { $old_color = "magenta" },
	  "old=s"		=> \$old_color,
	  "new=s"		=> \$new_color,
	  "bg=s"		=> \$rev_color,
	  "no-colors"		=> \$no_colors,
	  "list-colors!"	=> \$list_colors,
	  "settings|defaults"	=> sub {
		binmode STDOUT, ":encoding(utf-8)";
		printf "%-10s : %s\n", $_, $rc{$_} // "<undef>" for sort keys %rc;
		exit 0;
		},

	"v|verbose:1"	=> \$opt_v,
	) or usage (1);
    }

$opt_w      and $opt_b = $opt_Z = $opt_E = $opt_B = 1;
$opt_h >= 1 and $opt_h /= 100;

sub pod_text {
    require Pod::Text::Color;
    my $m = $no_colors ? "Pod::Text" : "Pod::Text::Color";
    my $p = $m->new ();
    open my $fh, ">", \my $out;
    $p->parse_from_file ($0, $fh);
    close $fh;
    print $out;
    exit 0;
    } # pod_text

sub pod_nroff {
    first { -x "$_/nroff" } grep { -d } split m/:+/ => $ENV{PATH} or pod_text ();

    require Pod::Man;
    my $p = Pod::Man->new ();
    open my $fh, "|-", "nroff", "-man";
    $p->parse_from_file ($0, $fh);
    close $fh;
    exit 0;
    } # pod_nroff

# Color initialization
for ($old_color, $new_color, $rev_color) {
    s/^(.*)[ _]bold$/bold $1/i;
    s/^bold_/bold /i;
    }
my %clr = map { $_ => color (s{^(.*)[ _]bold$}{bold $1}ir =~
                             s{^bold[ _]}{bold }ir) }
	  map {( $_, "on_$_", "bold $_" )}
    qw( red green blue black white cyan magenta yellow );
$clr{$_} //= color ($_) for tac_colors ();
$no_colors and $clr{$_} = "" for keys %clr;
$clr{none} = $clr{on_none} = "";

my ($reset,   $bg_new,  $bg_old,
    $chr_cml, $chr_cmr, $chr_ctx, $chr_eli, $chr_eql, $chr_lft,
    $chr_new, $chr_old, $chr_rgt,
    $clr_dbg, $clr_grn, $clr_new, $clr_old, $clr_red, $clr_rev,
    $cmp_sub) = (RESET);

if ($list_colors) {
    my @clr = map { sprintf "%s%-18s%s", $clr{$_}, $_, $reset } sort keys %clr;
    while (@clr) {
	say join "  " => map { $_ // "" } splice @clr, 0, 4;
	}
    exit;
    }

sub set_options {
    for ([ \$old_color, $rc{old} ], [ \$new_color, $rc{new} ], [ \$rev_color, $rc{bg} ]) {
	my ($c, $def) = @$_;
	$$c && exists $clr{$$c} and next;
	warn "color ", $$c // "(undefined)", " is unknown, using $def instead\n";
	$$c = $def;
	}
    $clr_red = $clr{$old_color};
    $clr_grn = $clr{$new_color};
    $clr_rev = $clr{$rev_color};
    $clr_dbg = $opt_r && exists $clr{"on_$rc{verbose}"} ? $clr{"on_$rc{verbose}"} : $clr{$rc{verbose}};
    $reset   = $no_colors ? "" : RESET;

    $bg_old = $clr{$rc{bg_old} || ($opt_r ? "on_$old_color" =~ s/bold //ir :
					       "on_$rev_color" =~ s/bold //ir)};
    $bg_new = $clr{$rc{bg_new} || ($opt_r ? "on_$new_color" =~ s/bold //ir :
					       "on_$rev_color" =~ s/bold //ir)};
    $clr_old = $opt_r ? $clr_rev . $bg_old : $clr_red . $bg_old;
    $clr_new = $opt_r ? $clr_rev . $bg_new : $clr_grn . $bg_new;
    $opt_s and ($clr_new, $clr_old) = ($clr_old, $clr_new);
    # Indicators
    if ($opt_a) {
	@rc{qw( chr_old chr_new chr_cml chr_cmr chr_eli chr_eli_v )} = qw( ^ ^ > < - <> );
	}
    elsif (!$opt_U) {
	$rc{$_} = encode ("utf-8", $rc{$_}) for qw( chr_old chr_new chr_cml chr_cmr chr_eli chr_eli_v );
	}
    $chr_old = $clr_old . $rc{chr_old} . $reset;
    $chr_new = $clr_new . $rc{chr_new} . $reset;
    $chr_cml = $clr_dbg . $rc{chr_cml} . $reset;
    $chr_cmr = $clr_dbg . $rc{chr_cmr} . $reset;
    $chr_eql =            $rc{chr_eql};
    $chr_lft = $clr_old . (defined $opt_u ? "-" : "< ") . $reset;
    $chr_rgt = $clr_new . (defined $opt_u ? "+" : "> ") . $reset;
    $chr_ctx =             defined $opt_u ? " " : "  ";
    $chr_eli = $opt_v >= 2 ? $rc{chr_eli_v} : $rc{chr_eli};
    $opt_m && $opt_v > 1 && length ($chr_eli) > 1 and $opt_m = 0;

    if ($opt_i || $opt_b || $opt_Z) {
	$rc{iwbzusepp} and $diff_class = "Algorithm::Diff";
	$cmp_sub = {
	    keyGen => sub {
		my $line = shift;
		$opt_i and $line = lc $line;
		$opt_Z and $line =~ s/[ \t\r]+$//g;
		$opt_b and $line =~ s/[ \t]+/ /g;
		return $line;
		}
	    };
	}
    } # set_options

if ($diff_class) {
    if ($diff_class =~ m/^(?:ad|pp|algorithm(?:-|::)diff(?:(?:-|::)pp)?)$/i) {
	$diff_class =
	    eval { require Algorithm::Diff;     "Algorithm::Diff";     }
	}
    elsif ($diff_class =~ m/^(?:adx|xs|algorithm(?:-|::)diff(?:-|::)xs)$/i) {
	$diff_class = "Algorithm::Diff::XS";
	$diff_class =
	    eval { require Algorithm::Diff::XS; "Algorithm::Diff::XS"; }
	}
    else {
	die "$diff_class is an unsupported Diff class\n";
	}
    }
else {
    $diff_class =
	eval { require Algorithm::Diff::XS; "Algorithm::Diff::XS"; }
     || eval { require Algorithm::Diff;     "Algorithm::Diff";     };
    }
$diff_class or die "Cannot load Algorithm::Diff:\n";

unless (caller) {
    if (@ARGV == 2 && -d $ARGV[0] && -d $ARGV[1]) {
	my ($d_from, $d_to) = @ARGV;
	my %fn;
	my @f;
	foreach my $idx (0, 1) {
	    my $dir = $ARGV[$idx];
	    find (sub {
		-f && -s or return;
		my $fn = $File::Find::name =~ s{^$dir/*}{}r;
		$fn{$fn}++;
		$f[$idx]{$fn}++;
		}, $dir);
	    }
	foreach my $fn (sort keys %fn) {
	    if ($f[0]{$fn} && $f[1]{$fn}) {
		ccdiff ("$d_from/$fn", "$d_to/$fn");
		}
	    elsif ($f[0]{$fn}) {
		say "Only in $d_from: $fn";
		}
	    else {
		say "Only in $d_to: $fn";
		}
	    delete $f[0]{$fn};
	    }
	}
    else {
	ccdiff (@ARGV);
	}
    exit 0;
    }

sub ccdiff {
    my $f1 = shift or usage (1);
    my $f2 = $_[0] // "-";

    -b $f1 || -c $f1 || -b $f2 || -c $f2 and
	die "Character and block devices are not supported\n";
    -d $f1 || -d $f2 and
	die "$CMD does not support directory diff\n";

    my $fh;

    if (@_ > 1 && ref $_[1]) { # optional hash with overruling arguments
	my %opt = %{$_[1]};
	foreach my $o (keys %opt) {
	    my $v = $opt{$o};
	    $o eq "ascii"			and $opt_a = $v;
	    $o eq "bg"				and $rev_color = $v;
#	    $o eq "context"			and $opt_c = $v;
	    $o eq "ellipsis"			and $opt_e = $v;
	    $o eq "emacs"			and $emacs = $v;
	    $o eq "header"			and $opt_H = $v;
	    $o eq "heuristics"			and $opt_h = $v;
	    $o eq "ignore-all-space"		and $opt_w = $v;
	    $o eq "ignore-blank-lines"		and $opt_B = $v;
	    $o eq "ignore-case"			and $opt_i = $v;
	    $o eq "ignore-space-change"		and $opt_b = $v;
	    $o eq "ignore-tab-expansion"	and $opt_E = $v;
	    $o eq "ignore-trailing-space"	and $opt_Z = $v;
	    $o eq "index"			and $opt_I = $v;
	    $o eq "list-colors"			and $list_colors = $v;
	    $o eq "markers"			and $opt_m = $v;
	    $o eq "new"				and $new_color = $v;
	    $o eq "old"				and $old_color = $v;
	    $o eq "reverse"			and $opt_r = $v;
	    $o eq "swap"			and $opt_s = $v;
	    $o eq "threshold"			and $opt_t = $v;
	    $o eq "unified"			and $opt_u = $v;
	    $o eq "unified"			and $opt_u = $v;
	    $o eq "utf-8"			and $opt_U = $v;
	    $o eq "verbose"			and $opt_v = $v;

	    if ($o eq "out") {
		open   $fh, ">", $v or die "Cannot select out: $!\n";
		select $fh;
		}
	    }
	}

    set_options ();
    $emacs and @_ == 0 && -f $f1 && -f "$f1~" and ($f1, $f2) = ("$f1~", $f1);

    $f1 eq "-" && $f2 eq "-" and usage (1);

        binmode STDERR, ":encoding(utf-8)";

    if ($opt_U) {
	binmode STDIN,  ":encoding(utf-8)";
	binmode STDOUT, ":encoding(utf-8)";
	}

    my @d1 = ref $f1 eq "ARRAY" ? @$f1 : $f1 eq "-" ? <STDIN> : do {
	open my $fh, "<", $f1 or die "$f1: $!\n";
	$opt_U and binmode $fh, ":encoding(utf-8)";
	<$fh>;
	};
    my @d2 = ref $f2 eq "ARRAY" ? @$f2 : $f2 eq "-" ? <STDIN> : do {
	open my $fh, "<", $f2 or die "$f2: $!\n";
	$opt_U and binmode $fh, ":encoding(utf-8)";
	<$fh>;
	};
    if ($opt_H) {
	my $hc = "";
	if ($opt_H =~ m/^\w\w+/) {
	    my ($hfg, $hbg) = split m/_?(?=on_)/ => lc $opt_H =~ s/\s+/_/gr;
	    $hfg && defined $clr{$hfg} and $hc .= $clr{$hfg};
	    $hbg && defined $clr{$hbg} and $hc .= $clr{$hbg};
	    }
	my $nl = max length $f1, length $f2, 7;
	my $sl = $hc ? ($ENV{COLUMNS} || 80) - 4 - $nl : 1;
	my @h = map { -f $_
	    ? { tag   => "",
		name  => $_,
		stamp => scalar localtime ((stat $_)[9]),
		}
	    : { tag   => "",
		name  => "*STDIN",
		stamp => scalar localtime,
		}
	    } $f1, $f2;
	if (defined $opt_u) {
	    ($h[0]{tag}, $h[1]{tag}) = ("---", "+++");
	    $sl -= 2;
	    printf "%s%s %-*s %-*s%s\n", $hc, $_->{tag},
		$nl, $_->{name}, $sl, $_->{stamp}, $clr{reset} for @h;
	    }
	#elsif ($opt_c) { # diff -c also provides (ugly) headers, but opt_c is NYI
	#   }
	else {
	    ($h[0]{tag}, $h[1]{tag}) = ("<", ">");
	    printf "%s%s %-*s %-*s%s\n", $hc, $_->{tag},
		$nl, $_->{name}, $sl, $_->{stamp}, $clr{reset} for @h;
	    }
	}

    my $diff = $diff_class->new (\@d1, \@d2, $cmp_sub);
    $diff->Base (1);

    my ($N, $idx, @s) = (0, 0);
    while ($diff->Next) {
	$N++;
	if ($diff->Same) {
	    if (defined $opt_u) {
		@s = $diff->Items (1);
		$N > 1 and print "$chr_ctx$_" for grep { defined } @s[0..($opt_u - 1)];
		unshift @s, undef while @s < $opt_u;
		}
	    next;
	    }
	my $sep = "";
	my @d  = map {[ $diff->Items ($_) ]} 1, 2;
	my @do = @{$d[0]};
	my @dn = @{$d[1]};

	if ($opt_B and "@do" !~ m/\S/ && "@dn" !~ m/\S/) {
	    # Modify @s for -u?
	    next;
	    }
	if ($opt_I) {
	    $idx++;
	    $opt_I > 0 && $idx != $opt_I and next;
	    printf "%s[%03d]%s ", ${clr_dbg}, $idx, $reset;
	    }

	if (!@dn) {
	    printf "%d,%dd%d\n", $diff->Get (qw( Min1 Max1 Max2 ));
	    $_ = $clr_old . (s/$/$reset/r) for @do;
	    }
	elsif (!@do) {
	    printf "%da%d,%d\n", $diff->Get (qw( Max1 Min2 Max2 ));
	    $_ = $clr_new . (s/$/$reset/r) for @dn;
	    }
	else {
	    $sep = "---\n" unless defined $opt_u;
	    printf "%d,%dc%d,%d\n", $diff->Get (qw( Min1 Max1 Min2 Max2 ));
	    if ($opt_t > 0 and abs (@do - @dn) > $opt_t) {
		$_ = $clr_old . (s/$/$reset/r) for @do;
		$_ = $clr_new . (s/$/$reset/r) for @dn;
		}
	    else {
		my @D = subdiff (@d, my $heu = {});
		if ($opt_h and $heu->{pct} > $opt_h) {
		    $_ = $clr_old . (s/$/$reset/r) for @do;
		    $_ = $clr_new . (s/$/$reset/r) for @dn;
		    }
		else {
		    @do = @{$D[0]};
		    @dn = @{$D[1]};
		    }
		}
	    }
	if ($opt_u and @s) {
	    print "$chr_ctx$_" for grep { defined } map { $s[$#s - $opt_u + $_] } 1..$opt_u;
	    }
	print "$chr_lft$_" for @do;
	print $sep;
	print "$chr_rgt$_" for @dn;
	}

    if ($fh) {
	select STDOUT;
	close $fh;
	}
    } # ccdiff

sub subdiff {
    my ($old, $new, $heu) = @_;
    my $d = $diff_class->new (map { [ map { split m// } @$_ ] } $old, $new);
    my ($d1, $d2, $x1, $x2, @h1, @h2) = ("", "", "", "");
    my ($cml, $cmr) = $opt_v < 2 ? ("", "") : ($chr_cml, $chr_cmr);
    my ($cmd, $cma) = ($chr_old, $chr_new);
    @{$heu}{qw( old new same )} = (1, 1, 1); # prevent div/0
    while ($d->Next) {
	my @c  = map {[ $d->Items ($_) ]} 1, 2;
	my @co = @{$c[0]};
	my @cn = @{$c[1]};
	if ($d->Same) {
	    $heu->{same} += scalar @co;
	    my $e = $chr_eli;
	    my $c = join "" => @co;
	    if ($opt_e) {
		my $join = "";
		foreach my $sc (split m/\n/ => $c) {
		    $_ .= $join for $d1, $d2, $x1, $x2;
		    $join = "\n";
		    my $l  = length $sc;      # The length of this "same" chunck
		    my $le = $l - 2 * $opt_e; # The length of the text replaces with ellipsis
		    my $ee = $opt_v <= 1 ? $e : $e =~ s/^.\K(?=.$)/$le/r;
		    if ($le > length $ee) {
			my $lsc = substr $sc, 0,           $opt_e;
			$d1 .= $lsc;
			$d2 .= $lsc;
			$lsc =~ s/\S/$chr_eql/g;
			$x1 .= $lsc;
			$x2 .= $lsc;
			my $rsc = substr $sc, $l - $opt_e, $opt_e;
			$d1 .= $clr_dbg . $ee . $reset . $rsc;
			$d2 .= $clr_dbg . $ee . $reset . $rsc;
			$rsc =~ s/\S/$chr_eql/g;
			$x1 .= $chr_eql x length ($ee) . $rsc;
			$x2 .= $chr_eql x length ($ee) . $rsc;
			next;
			}
		    else {
			$d1 .= $sc;
			$d2 .= $sc;
			$sc =~ s/\S/$chr_eql/g;
			$x1 .= $sc;
			$x2 .= $sc;
			}
		    }
		next;
		}
	    $d1 .= $c;
	    $d2 .= $c;
	    $c =~ s/\S/$chr_eql/g;
	    $x1 .= $c;
	    $x2 .= $c;
	    next;
	    }
	if (@co) {
	    $heu->{old} += scalar @co;
	    $d1 .= $cml.$clr_old;
	    $d1 .= s/\n/$reset\n$clr_old/gr for @co;
	    $d1 .= $reset.$cmr;
	    $x1 .= $_ for map { s/[^\t\r\n]/$cmd/gr } @co;
	    $opt_v and push @h1, map { $opt_U ? charnames::viacode (ord) : unpack "H*"; } @co;
	    }
	if (@cn) {
	    $heu->{new} += scalar @cn;
	    $d2 .= $cml.$clr_new;
	    $d2 .= s/\n/$reset\n$clr_new/gr for @cn;
	    $d2 .= $reset.$cmr;
	    $x2 .= $_ for map { s/[^\t\r\n]/$cma/gr } @cn;
	    $opt_v and push @h2, map { $opt_U ? charnames::viacode (ord) : unpack "H*"; } @cn;
	    }
	}
    $heu->{pct} = ($heu->{old} + $heu->{new}) / (2 * $heu->{same});
    my @d = map { [ split m/(?<=\n)/ => s/\n*\z/\n/r ] } $d1, $d2;
    if ($opt_m) {
	$opt_v > 1 and s/(\S+)/ $1 /g for $x1, $x2;
	s/[ \t]*\n*\z/\n/ for $x1, $x2;
	my @x = map { /\S/ ? [ split m/(?<=\n)/ ] : [] } $x1, $x2;
	foreach my $n (0, 1) {
	    @{$x[$n]} and $d[$n] = [ map {( $d[$n][$_], $x[$n][$_] // "" )} 0 .. (scalar @{$d[$n]} - 1) ];
	    }
	}
    if ($opt_v) {
	$opt_U && $opt_v > 2 and $_ .= sprintf " (U+%06X)", charnames::vianame ($_) for @h1, @h2;
	@h1 and push @{$d[0]}, sprintf " -- ${clr_dbg}verbose$reset : %s\n", join ", " => map { $clr_old.$_.$reset } @h1;
	@h2 and push @{$d[1]}, sprintf " -- ${clr_dbg}verbose$reset : %s\n", join ", " => map { $clr_new.$_.$reset } @h2;
	}
    @d;
    } # subdiff

sub read_rc {
    my $home = $ENV{HOME} || $ENV{USERPROFILE} || $ENV{HOMEPATH};
    foreach my $rcf (
	    "$home/ccdiff.rc",
	    "$home/.ccdiffrc",
	    "$home/.config/ccdiff",
	    ) {
	-s $rcf or next;
	(stat $rcf)[2] & 022 and next;
	open my $fh, "<", $rcf or next;
	while (<$fh>) {
	    my ($k, $v) = (m/^\s*([-\w]+)\s*[:=]\s*(.*\S)/) or next;
	    $rc{ lc $k
	        =~ s{[-_]colou?r$}{}ir
	        =~ s{background}{bg}ir
	        =~ s{^(?:unicode|utf-?8?)$}{utf8}ir
	      } = $v
		=~ s{U\+?([0-9A-Fa-f]{2,7})}{chr hex $1}ger
		=~ s{^(?:no|false)$}{0}ir
		=~ s{^(?:yes|true)$}{-1}ir; # -1 is still true
	    }
	}
    } # read_rc

# Return the known colors from Term::ANSIColor
# Stolen straight from the pm
sub tac_colors {
    my %c256;
    foreach my $r (0 .. 5) {
        foreach my $g (0 .. 5) {
            $c256{lc $_}++ for map {("RGB$r$g$_", "ON_RGB$r$g$_")} 0 .. 5;
	    }
	}
    $c256{lc $_}++ for
      # Basic colors
      qw(
	CLEAR           RESET             BOLD            DARK
	FAINT           ITALIC            UNDERLINE       UNDERSCORE
	BLINK           REVERSE           CONCEALED

	BLACK           RED               GREEN           YELLOW
	BLUE            MAGENTA           CYAN            WHITE
	ON_BLACK        ON_RED            ON_GREEN        ON_YELLOW
	ON_BLUE         ON_MAGENTA        ON_CYAN         ON_WHITE

	BRIGHT_BLACK    BRIGHT_RED        BRIGHT_GREEN    BRIGHT_YELLOW
	BRIGHT_BLUE     BRIGHT_MAGENTA    BRIGHT_CYAN     BRIGHT_WHITE
	ON_BRIGHT_BLACK ON_BRIGHT_RED     ON_BRIGHT_GREEN ON_BRIGHT_YELLOW
	ON_BRIGHT_BLUE  ON_BRIGHT_MAGENTA ON_BRIGHT_CYAN  ON_BRIGHT_WHITE
	),
      # 256 colors
      (map { ("ANSI$_", "ON_ANSI$_") } 0 .. 255),
      (map { ("GREY$_", "ON_GREY$_") } 0 .. 23);

    my $ACV = $Term::ANSIColor::VERSION;
    $ACV < 3.02 and delete @c256{grep m/italic/   => keys %c256};
    $ACV < 4.00 and delete @c256{grep m/rgb|grey/ => keys %c256};
    $ACV < 4.06 and delete @c256{grep m/ansi/     => keys %c256};
    sort keys %c256;
    } # tac_colors

1;

__END__

=encoding utf-8

=head1 NAME

ccdiff - Colored Character diff

=head1 SYNOPSIS

 ccdiff [options] file1|- file2|-
 ccdiff [options] dir1    dir2

 ccdiff --help
 ccdiff --man
 ccdiff --info

=head1 DESCRIPTION

Show the diff between two files on a character by character base. In contrast to
the standard diff tools, this tool uses the diff algorithm horizontally for each
line in the vertical diff, highlighting the changes. This is very handy in hard
to spot changes like C<O> to C<0>, C<I> to C<l> or C<1> and whitespace.

If there are two argument, and both are a folder/directory, a recursive diff is
executed. This is not available whan used as a (sub)class.

=head1 OPTIONS

=head2 Command line options

=over 2

=item --help -?

Show a summary of the available command-line options and exit.

=item --version -V

Show the version and exit.

=item --man

Show this manual using pod2man and nroff.

=item --info

Show this manual using pod2text.

=item --utf-8 -U

All I/O (streams to compare and standard out) are in UTF-8.

=item --diff-class=C --dc=C --pp

Select the class used to execute the diff. By default C<ccdiff> will select
the first available out of C<Algorithm::Diff::XS> or C<Algorithm::Diff>.

Sometime the C<XS> version fails on encoding and the pure-perl version will
work just fine. You can force C<ccdiff> to use either

Select the pure-perl version with any of C<PP>, C<AD>, C<Algorthm::Diff>,
C<Algorithm-Diff>, or C<Algorithm::Diff::PP> (case insensitive).
For convenience, C<--dc=pp> can be abbreviated to C<--pp>.

 --pp
 --dc=pp
 --dc=algorithm-diff
 --diff-class=Algorithm::Diff::PP

Select the XS version with any of C<XS>, C<ADX>, C<Algorthm::Diff::XS>, or
C<Algorithm-Diff-XS> (case insensitive).

 --dc=xs
 --dc=algorithm-diff-xs
 --diff-class=Algorithm::Diff::XS

=item --unified[=3] -u [3]

Generate a unified diff. The number of context lines is optional. When omitted
it defaults to 3. Currently there is no provision of dealing with overlapping
diff chunks. If the common part between two diff chunks is shorter than twice
the number of context lines, some lines may show twice.

The default is to use traditional diff:

 5,5c5,5
 < Sat Dec 18 07:00:33 1993,I.O.D.U.,,756194433,1442539
 ---
 > Sat Dec 18 07:08:33 1998,I.O.D.U.,,756194433,1442539

a unified diff (-u1) would be

 5,5c5,5
  Tue Sep  6 05:43:59 2005,B.O.Q.S.,,1125978239,1943341
 -Sat Dec 18 07:00:33 1993,I.O.D.U.,,756194433,1442539
 +Sat Dec 18 07:08:33 1998,I.O.D.U.,,756194433,1442539
  Mon Feb 23 10:37:02 2004,R.X.K.S.,van,1077529022,1654127

=item --verbose[=1] -v[1]

Show an additional line for each old or new section in a change chunk (not for
added or deleted lines) that shows the hexadecimal value of each character. If
C<--utf-8> is in effect, it will show the Unicode character name(s).

This is a debugging option, so invisible characters can still be "seen".

C<--verbose> accepts an optional verbosity-level. On level 2 and up, all
horizontal changes get left-and-right markers inserted to enable seeing the
location of the ZERO WIDTH or invisible characters. With level 3 and up and
Unicode enabled, the changed characters will also show the codepoint in hex.

An example of this:

With -Uu0v0:

 1,1c1,1
 - A  BCDE Fg
 + A BcdE​Fg

With -Uu0v1:

 1,1c1,1
 - A  BCDE Fg
 - -- verbose : SPACE, LATIN CAPITAL LETTER C, LATIN CAPITAL LETTER D, SPACE
 + A BcdE​Fg
 + -- verbose : LATIN SMALL LETTER C, LATIN SMALL LETTER D, ZERO WIDTH SPACE

With -Uu0v2:

 1,1c1,1
 - A ↱ ↰B↱CD↰E↱ ↰Fg
 - -- verbose : SPACE, LATIN CAPITAL LETTER C, LATIN CAPITAL LETTER D, SPACE
 + A B↱cd↰E↱​↰Fg
 + -- verbose : LATIN SMALL LETTER C, LATIN SMALL LETTER D, ZERO WIDTH SPACE

With -Uu0v3:

 1,1c1,1
 - A ↱ ↰B↱CD↰E↱ ↰Fg
 - -- verbose : SPACE (U+000020), LATIN CAPITAL LETTER C (U+000043), LATIN CAPITAL LETTER D (U+000044), SPACE (U+000020)
 + A B↱cd↰E↱​↰Fg
 + -- verbose : LATIN SMALL LETTER C (U+000063), LATIN SMALL LETTER D (U+000064), ZERO WIDTH SPACE (U+00200B)

With -Uu0v2 --ascii:

 1,1c1,1
 - A > <B>CD<E> <Fg
 - -- verbose : SPACE, LATIN CAPITAL LETTER C, LATIN CAPITAL LETTER D, SPACE
 + A B>cd<E>​<Fg
 + -- verbose : LATIN SMALL LETTER C, LATIN SMALL LETTER D, ZERO WIDTH SPACE

the word "verbose" and the character markers will be displayed using the
C<verbose> color. The characters used for the markers can be defined in your
configuration file as C<chr_cml> (the character used as marker on the left)
and C<chr_cmr> (the character used as marker on the right).

=item --markers -m

Use markers under each changed character in change-chunks.

C<--markers> is especially useful if the terminal does not support colors, or
if you want to copy/paste the output to (ASCII) mail. See also C<--ascii>. The
markers will have the same color as added or deleted text.

This will look like (with unified diff):

 5,5c5,5
 -Sat Dec 18 07:08:33 1998,I.O.D.U.,,756194433,1442539
 -               ▼       ▼
 +Sat Dec 18 07:00:33 1993,I.O.D.U.,,756194433,1442539
 +               ▲       ▲

The characters used for the markers can be defined in your configuration file
as C<chr_old> (the character used as marker under removed characters) and
C<chr_new> (the character used as marker under added characters).

If C<--ellipsis> is also in effect and either the C<chr_eli> is longer than
one character or C<--verbose> level is over 2, this option is automatically
disabled.

=item --ascii -a

Use (colored) ASCII indicators instead of Unicode. The default indicators are
Unicode characters that stand out better. The markers will have the same color
as added or deleted text.

For the vertical markers (C<-m>) that would look like:

 5,5c5,5
 -Sat Dec 18 07:08:33 1998,I.O.D.U.,,756194433,1442539
 -               ^       ^
 +Sat Dec 18 07:00:33 1993,I.O.D.U.,,756194433,1442539
 +               ^       ^

For the positional indicators, I did consider using U+034e (COMBINING UPWARDS
ARROW BELOW), but as most terminals are probably unable to show it due to line
height changes, I did not pursue the idea.

=item --pink -p

Change the default C<red> for deleted text to the color closest to pink that
is supported by L<Term::ANSIColor>: C<magenta>.

=item --reverse -r

Reverse/invert the foreground and background for the colored indicators.

If the foreground color has C<bold>, it will be stripped from the new background
color.

=item --swap -s

Swap the colors for new and old.

=item --list-colors

List available colors and exit.

=item --no-colors

Disable all colors. Useful for redirecting the diff output to a file that is to
be included in documentation.

This is the default if the environment variable C<$NO_COLOR> has a true value or
if the environment variable C<$CLICOLOR> is set to a false value.  If set,
C<$CLICOLOR_FORCE> will overrule the default of C<$NO_COLOR>.

=item --old=color

Define the foreground color for deleted text.

=item --new=color

Define the foreground color for added text.

=item --bg=color

Define the background color for changed text.

=item --index --idx -I

Prefix position indicators with an index.

 [001] 5,5c5,5
 -Sat Dec 18 07:08:33 1998,I.O.D.U.,,756194433,1442539
 +Sat Dec 18 07:00:33 1993,I.O.D.U.,,756194433,1442539

If a positive number is passed (C<--index=4> or C<-I 4>), display just the
chunk with that index, using the C<verbose> color:

This is useful in combination with C<--verbose>.

=item --threshold=2 -t 2

Defines the number of lines a change block may differ before the fall-back of
horizontal diff to vertical diff.

If a chunk describes a change, and the number of lines in the original block
has fewer or more lines than the new block and that difference exceeds this
threshold, C<ccdiff> will fall-back to vertical diff.

=item --heuristics=n -h n

Defines the percentage of character-changes a change block may differ before
the fall-back of horizontal diff to vertical diff.

This percentage is calculated as C<(characters removed + characters added) /
(2 * characters unchanged))>.

=item --ellipsis=n -e n

Defines the number of characters to keep on each side of a horizontal-equal
segment. The default is C<0>, meaning do not compress.

If set to a positive number, and the length of a segment of equal characters
inside a horizontal diff is longer than twice this value, the middle part is
replaced with C<┈ U02508 \N{BOX DRAWINGS LIGHT QUADRUPLE DASH HORIZONTAL}>
(instead of … U02026, as HORIZONTAL ELLIPSIS does not stand out enough).

With C<-u0me3> that would be like

 5,5c5,5
 -Sat┈07:08:33┈ 1998,I.┈539
 -        ▼        ▼
 +Sat┈07:00:33┈ 1993,I.┈539
 +        ▲        ▲

With C<-u0e3 -v2> like

 5,5c5,5
 -Sat↤9↦07:0↱0↰:33 199↱3↰,I.↤23↦539
 - -- verbose : DIGIT ZERO, DIGIT THREE
 +Sat↤9↦07:0↱8↰:33 199↱8↰,I.↤23↦539
 + -- verbose : DIGIT EIGHT, DIGIT EIGHT

The text used for the replaced text can be defined in your configuration file
as C<chr_eli> and/or C<chr_eli_v>.

=item --ignore-case -i

Ignore case on comparison.

=item --ignore-all-space -w

Ignore all white-space changes. This will set all options C<-b>, C<-Z>, C<-E>,
and C<-B>.

=item --ignore-trailing-space -Z

Ignore changes in trailing white-space (tabs and spaces).

=item --ignore-ws|ignore-space-change -b

Ignore changes in horizontal white-space (tabs and spaces). This does not
include white-space changes that split non-white-space or remove white-space
between two non-white-space elements.

=item --ignore-tab-expansion -E

NYI

=item --ignore-blank-lines -B

B<Just Partly Implemented> (WIP)

=back

=head2 Configuration files

In order to be able to overrule the defaults set in C<ccdiff>, one can set
options specific for this login. The following option files are looked for
in this order:

 - $HOME/ccdiff.rc
 - $HOME/.ccdiffrc
 - $HOME/.config/ccdiff

and evaluated in that order. Any options specified in a file later in that
chain will overwrite previously set options.

Option files are only read and evaluated if they are not empty and not writable
by others than the owner.

The syntax of the file is one option per line, where leading and trailing
white-space is ignored. If that line then starts with one of the options
listed below, followed by optional white-space followed by either an C<=> or
a C<:>, followed by optional white-space and the values, the value is assigned
to the option. The values C<no> and C<false> (case insensitive) are aliases
for C<0>. The values C<yes> and C<true> are aliases to C<-1> (C<-1> being a
true value).

Between parens is the corresponding command-line option.

=over 2

=item unified (-u)

If you prefer unified-diff over old-style diff by default, set this to the
desired number of context lines:

 unified : 3

The default is undefined

=item markers (-m)

 markers : false

Defines if markers should be used under changed characters. The default is to
use colors only. The C<-m> command line option will toggle the option when set
from a configuration file.

=item ascii (-a)

 ascii   : false

Defines to use ASCII markers instead of Unicode markers. The default is to use
Unicode markers.

=item reverse (-r)

 reverse : false

Defines if changes are displayed as foreground-color over background-color
or background-color over foreground-color. The default is C<false>, so it will
color the changes with the appropriate color (C<new> or C<old>) over the
default background color.

=item swap (-s)

 swap    : false

Swap the colors for new and old.

=item new (--new)

 new     : green

Defines the color to be used for added text. The default is C<green>.

The color C<none> is also accepted and disables this color.

Any color accepted by L<Term::ANSIColor> is allowed. Any other color will
result in a warning. This option can include C<bold> either as prefix or
as suffix.

This option may also be specified as

 new-color
 new_color
 new-colour
 new_colour

=item old (--old)

 old     : red

Defines the color to be used for deleted text. The default is C<red>.

The color C<none> is also accepted and disables this color.

Any color accepted by L<Term::ANSIColor> is allowed. Any other color will
result in a warning. This option can include C<bold> either as prefix or
as suffix.

This option may also be specified as

 old-color
 old_color
 old-colour
 old_colour

=item bg (--bg)

 bg      : white

Defines the color to be used as background for changed text. The default is
C<white>.

The color C<none> is also accepted and disables this color.

Any color accepted by L<Term::ANSIColor> is allowed. Any other color will
result in a warning. The C<bold> attribute is not allowed.

This option may also be specified as

 bg-color
 bg_color
 bg-colour
 bg_colour
 background
 background-color
 background_color
 background-colour
 background_colour

=item header (-H --header --HC=color --header-color=color)

 header  : 1
 header  : blue_on_white

Defines if a header is displayed above the diff (default is 1), supported
colors are allowed.

If the value is a valid supported color, it will show the header in that
color scheme.  To disable the header set it to C<0> in the RC file or use
C<--no-header> as a command line argument.

=item verbose

 verbose : cyan

Defines the color to be used as color for the verbose tag. The default is
C<cyan>. This color will only be used under C<--verbose>.

The color C<none> is also accepted and disables this color.

Any color accepted by L<Term::ANSIColor> is allowed. Any other color will
result in a warning.

This option may also be specified as

 verbose-color
 verbose_color
 verbose-colour
 verbose_colour

=item utf8 (-U)

 utf8    : yes

Defines whether all I/O is to be interpreted as UTF-8. The default is C<no>.

This option may also be specified as

 unicode
 utf
 utf-8

=item index (-I)

 index   : no

Defines if the position indication for a change chunk is prefixed with an
index number. The default is C<no>. The index is 1-based.

Without this option, the position indication would be like

 5,5c5,5
 19,19d18
 42a42,42

with this option, it would be

 [001] 5,5c5,5
 [002] 19,19d18
 [005] 42a42,42

When this option contains a positive integer, C<ccdiff> will only show the
diff chunk with that index.

=item emacs

 emacs   : no

If this option is yes/true, calling C<ccdiff> with just one single argument,
and that argument being an existing file, the arguments will act as

 $ ccdiff file~ file

if file~ exists.

=item threshold (-t)

 threshold : 2

Defines the number of lines a change block may differ before the fall-back of
horizontal diff to vertical diff.

=item heuristics (-h)

 heuristics : 40

Defines the percentage of character-changes a change block may differ before
the fall-back of horizontal diff to vertical diff. The default is undefined,
meaning no fallback based on heuristics.

=item ellipsis (-e)

 ellipsis : 0

Defines the number of characters to keep on each side of a horizontal-equal
segment. The default is C<0>, meaning to not compress. See also C<chr_eli>.

=item chr_old

 chr_old : U+25BC

Defines the character used to indicate the position of removed text on the
line below the text when option C<-m> is in effect.

=item chr_new

 chr_new : U+25B2

Defines the character used to indicate the position of added text on the
line below the text when option C<-m> is in effect.

=item chr_cml

 chr_cml : U+21B1

Defines the character used to indicate the starting position of changed text
in a line when verbose level is 3 and up.

=item chr_cmr

 chr_cmr : U+21B0

Defines the character used to indicate the ending position of changed text
in a line when verbose level is 3 and up.

=item chr_eli

 chr_eli : U+2508

Defines the character used to indicate omitted text in large unchanged text
when C<--ellipsis>/C<-e> is in effect.

This character is not equally well visible on all terminals or in all fonts,
so you might want to change it to something that stands out better in your
environment. Possible suggestions:

 … U+2026 HORIZONTAL ELLIPSIS
 ‴ U+2034 TRIPLE PRIME
 ‷ U+2037 REVERSED TRIPLE PRIME
 ↔ U+2194 LEFT RIGHT ARROW
 ↭ U+21ad LEFT RIGHT WAVE ARROW
 ↮ U+21ae LEFT RIGHT ARROW WITH STROKE
 ↹ U+21b9 LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR
 ⇄ U+21c4 RIGHTWARDS ARROW OVER LEFTWARDS ARROW
 ⇆ U+21c6 LEFTWARDS ARROW OVER RIGHTWARDS ARROW
 ⇎ U+21ce LEFT RIGHT DOUBLE ARROW WITH STROKE
 ⇔ U+21d4 LEFT RIGHT DOUBLE ARROW
 ⇹ U+21f9 LEFT RIGHT ARROW WITH VERTICAL STROKE
 ⇼ U+21fc LEFT RIGHT ARROW WITH DOUBLE VERTICAL STROKE
 ⇿ U+21ff LEFT RIGHT OPEN-HEADED ARROW
 ≋ U+224b TRIPLE TILDE
 ┄ U+2504 BOX DRAWINGS LIGHT TRIPLE DASH HORIZONTAL
 ┅ U+2505 BOX DRAWINGS HEAVY TRIPLE DASH HORIZONTAL
 ┈ U+2508 BOX DRAWINGS LIGHT QUADRUPLE DASH HORIZONTAL
 ┉ U+2509 BOX DRAWINGS HEAVY QUADRUPLE DASH HORIZONTAL
 ⧻ U+29fb TRIPLE PLUS
 ⬌ U+2b0c LEFT RIGHT BLACK ARROW

=item chr_eli_v

 chr_eli_v : U+21A4U+21A6

When using C<--ellipsis> with C<--verbose> level 2 or up, the single character
indicator will be replaced with this character. If it is 2 characters wide, the
length of the compressed part is put between the characters.

A suggested alternative might be U+21E4U+21E5

=item iwbZusePP

As L<Algorithm::Diff::XS> is fast but is not 100% drop-in compliant with
L<Algorithm::Diff> and options C<-i>, C<-w>, C<-b>, and C<-Z> are likely to
not work in the C<XS> version, this option allows automatic switching to
the slower version if any of these options is selected.

=back

=head1 Git integration

You can use ccdiff to show diffs in git. It may work like this:

 $ git config --global diff.tool ccdiff
 $ git config --global difftool.prompt false
 $ git config --global difftool.ccdiff.cmd 'ccdiff --utf-8 -u -r $LOCAL $REMOTE'
 $ git difftool SHA~..SHA
 $ wget https://github.com/Tux/App-ccdiff/raw/master/Files/git-ccdiff \
    -O ~/bin/git-ccdiff
 $ perl -pi -e 's{/pro/bin/perl}{/usr/bin/env perl}' ~/bin/git-ccdiff
 $ chmod 755 ~/bin/git-ccdiff
 $ git ccdiff SHA

Of course you can use C<curl> instead of C<wget> and you can choose your own
(fixed) path to C<perl> instead of using C</usr/bin/env>.

From then on you can do

 $ git ccdiff
 $ git ccdiff 5c5a39f2

=head1 CAVEATS

Due to the implementation, where both sides of the comparison are completely
kept in memory, this tool might not be able to deal with (very) large datasets.

=head2 Speed

There are situations where L<Algorithm::Diff> takes considerably more time
compared to e.g. GNU diff. Installing L<Algorithm::Diff::XS> will make
C<ccdiff> a lot faster. C<ccdiff> will choose L<Algorithm::Diff::XS> if
available.

Note however that options like C<-i>, C<-w>, C<-b>, and C<-Z> are likely to
be a no-op in L<Algorithm::Diff::XS>, as that has not been implemented and
it is rather unlikely it will be. Choose C<--dc=pp> then. If you want this
switch always, set C<iwbZusePP = 1> in one of your L</Configuration files>.

=head1 SEE ALSO

L<Algorithm::Diff::XS>, L<Algorithm::Diff>, L<Text::Diff>

=head1 AUTHOR

H.Merijn Brand

=head1 COPYRIGHT AND LICENSE

 Copyright (C) 2018-2025 H.Merijn Brand.  All rights reserved.

This library is free software;  you can redistribute and/or modify it under
the same terms as The Artistic License 2.0.

=for elvis
:ex:se gw=75|color guide #ff0000:

=cut
