File: 22_header_row.t

package info (click to toggle)
libhtml-tableextract-perl 2.15-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 284 kB
  • sloc: perl: 1,558; makefile: 2
file content (115 lines) | stat: -rwxr-xr-x 3,647 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/perl

use strict;
use lib './lib';
use Test::More tests => 19;

use FindBin;
use lib $FindBin::RealBin;
use testload;

my $file  = "$Dat_Dir/basic.html";
my $file2 = "$Dat_Dir/basic2.html";

use HTML::TableExtract;

# Check header row retention cases

my($label, $te, @rows, $table);

$label = 'header row (basic, default)';
$te = HTML::TableExtract->new();
ok($te->parse_file($file), "$label (parse)");
$table = $te->first_table_found;
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}}, "$label (row count)");

$label = 'header row (basic, no keep)';
$te = HTML::TableExtract->new( keep_headers => 0 );
ok($te->parse_file($file), "$label (parse)");
$table = $te->first_table_found;
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}}, "$label (row count)");

$label = 'header row (basic, keep)';
$te = HTML::TableExtract->new( keep_headers => 1 );
ok($te->parse_file($file), "$label (parse)");
$table = $te->first_table_found;
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}}, "$label (row count)");

my(@hrow, $hindex);
my @headers = qw(Eight Six Four Two Zero);
my @hlabels = map("Header $_", @headers);

$label = 'header row (header, default)';
$te = HTML::TableExtract->new( headers => [@headers] );
ok($te->parse_file($file), "$label (parse)");
$table = $te->first_table_found;
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}} - 1, "$label (row count)");
$hindex = $table->hrow_index;
@hrow = $table->hrow;
cmp_ok(join(' ', @hrow), 'eq', join(' ', @hlabels), "$label (hrow)");


$label = 'header row (header, nokeep)';
$te = HTML::TableExtract->new( headers => [@headers],
                               keep_headers => 0,
                             );
ok($te->parse_file($file), "$label (parse)");
$table = $te->first_table_found;
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}} - 1, "$label (row count)");
$hindex = $table->hrow_index;
@hrow = $table->hrow;
cmp_ok(join(' ', @hrow), 'eq', join(' ', @hlabels), "$label (hrow)");

$label = 'header row (header, keep)';
$te = HTML::TableExtract->new( headers => [@headers],
                               keep_headers => 1,
                             );
ok($te->parse_file($file), "$label (parse)");
$table = $te->first_table_found;
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}}, "$label (row count)");
$hindex = $table->hrow_index;
@hrow = $table->hrow;
cmp_ok(join(' ', @hrow), 'eq', join(' ', @hlabels), "$label (hrow)");

###

# Traditionally we clip extraneous rows above our header rows.

$label = 'pre-header row clip (header, nokeep)';
$te = HTML::TableExtract->new( headers => [@headers],
                               keep_headers => 0,
                             );
ok($te->parse_file($file2), "$label (parse)");
$table = $te->first_table_found;
my $ghi = get_grid_header_index($table->{grid});
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}} - $ghi - 1, "$label (row count)");

$label = 'pre-header row clip (header, keep)';
$te = HTML::TableExtract->new( headers => [@headers],
                               keep_headers => 1,
                             );
ok($te->parse_file($file2), "$label (parse)");
$table = $te->first_table_found;
$ghi = get_grid_header_index($table->{grid});
@rows = $table->rows;
cmp_ok(@rows, '==', scalar @{$table->{grid}} - $ghi, "$label (row count)");

sub get_grid_header_index {
  my $grid = shift;
  my $ghi = 0;
  foreach (0 .. $#{$table->{grid}}) {
    my $item = $table->{grid}[$_][0];
    $item = $$item if ref $item;
    next if $item =~ /not\s+header/i;
    $ghi = $_;
    last;
  }
  $ghi;
}