1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
|
#!/usr/bin/perl
use strict;
use lib './lib';
use Test::More tests => 19;
use FindBin;
use lib $FindBin::RealBin;
use testload;
my $file = "$Dat_Dir/basic.html";
my $file2 = "$Dat_Dir/basic2.html";
use HTML::TableExtract;
# Check header row retention cases
my($label, $te, @rows, $table);
$label = 'header row (basic, default)';
$te = HTML::TableExtract->new();
ok($te->parse_file($file), "$label (parse)");
$table = $te->first_table_found;
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}}, "$label (row count)");
$label = 'header row (basic, no keep)';
$te = HTML::TableExtract->new( keep_headers => 0 );
ok($te->parse_file($file), "$label (parse)");
$table = $te->first_table_found;
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}}, "$label (row count)");
$label = 'header row (basic, keep)';
$te = HTML::TableExtract->new( keep_headers => 1 );
ok($te->parse_file($file), "$label (parse)");
$table = $te->first_table_found;
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}}, "$label (row count)");
my(@hrow, $hindex);
my @headers = qw(Eight Six Four Two Zero);
my @hlabels = map("Header $_", @headers);
$label = 'header row (header, default)';
$te = HTML::TableExtract->new( headers => [@headers] );
ok($te->parse_file($file), "$label (parse)");
$table = $te->first_table_found;
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}} - 1, "$label (row count)");
$hindex = $table->hrow_index;
@hrow = $table->hrow;
cmp_ok(join(' ', @hrow), 'eq', join(' ', @hlabels), "$label (hrow)");
$label = 'header row (header, nokeep)';
$te = HTML::TableExtract->new( headers => [@headers],
keep_headers => 0,
);
ok($te->parse_file($file), "$label (parse)");
$table = $te->first_table_found;
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}} - 1, "$label (row count)");
$hindex = $table->hrow_index;
@hrow = $table->hrow;
cmp_ok(join(' ', @hrow), 'eq', join(' ', @hlabels), "$label (hrow)");
$label = 'header row (header, keep)';
$te = HTML::TableExtract->new( headers => [@headers],
keep_headers => 1,
);
ok($te->parse_file($file), "$label (parse)");
$table = $te->first_table_found;
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}}, "$label (row count)");
$hindex = $table->hrow_index;
@hrow = $table->hrow;
cmp_ok(join(' ', @hrow), 'eq', join(' ', @hlabels), "$label (hrow)");
###
# Traditionally we clip extraneous rows above our header rows.
$label = 'pre-header row clip (header, nokeep)';
$te = HTML::TableExtract->new( headers => [@headers],
keep_headers => 0,
);
ok($te->parse_file($file2), "$label (parse)");
$table = $te->first_table_found;
my $ghi = get_grid_header_index($table->{grid});
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}} - $ghi - 1, "$label (row count)");
$label = 'pre-header row clip (header, keep)';
$te = HTML::TableExtract->new( headers => [@headers],
keep_headers => 1,
);
ok($te->parse_file($file2), "$label (parse)");
$table = $te->first_table_found;
$ghi = get_grid_header_index($table->{grid});
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}} - $ghi, "$label (row count)");
sub get_grid_header_index {
my $grid = shift;
my $ghi = 0;
foreach (0 .. $#{$table->{grid}}) {
my $item = $table->{grid}[$_][0];
$item = $$item if ref $item;
next if $item =~ /not\s+header/i;
$ghi = $_;
last;
}
$ghi;
}
|