File: csv-split

package info (click to toggle)
libtext-csv-xs-perl 1.60-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 1,368 kB
  • sloc: perl: 8,771; makefile: 9
file content (129 lines) | stat: -rwxr-xr-x 2,364 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/pro/bin/perl

use 5.014000;
use warnings;

our $VERSION = "0.01 - 20201021";
our $CMD = $0 =~ s{.*/}{}r;

sub usage {
    my $err = shift and select STDERR;
    say "usage: $CMD [-c #] [-r #] [-b B] file.csv";
    say "    -c N   --columns=N    Split on max N columns per file";
    say "    -r N   --records=N    Split on max N records per file";
    say "    -b B   --base=B       Use B as base for new file names";
    exit $err;
    } # usage

use CSV;
use Getopt::Long qw(:config bundling);
GetOptions (
    "help|?"		=> sub { usage (0); },
    "V|version"		=> sub { say "$CMD [$VERSION]"; exit 0; },

    "c|columns=n"	=> \ my $opt_c,
    "r|rows|records=n"	=> \ my $opt_r,

    "b|base=s"		=> \ my $opt_b,

    "v|verbose:1"	=> \(my $opt_v = 0),
    ) or usage (1);

my $inf = shift or usage (1);
-s $inf or die "$inf is empty";

$opt_b //= $inf =~ s{\.csv$}{}ri;

my $x = "01";
open my $ih, "<", $inf or die "$inf: !$\n";

my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });

my @h = $csv->header ($ih, { bom => 1 });

if (!$opt_c or @h <= $opt_c) {
    # idx, fh, fields
    @h = ([ "", undef, @h ]);
    }
else {
    my @c;
    my $i = "A";
    while (@h) {
	push @c => [ $i++, undef, splice @h, 0, $opt_c ];
	}
    @h = @c;
    }

sub next_oh {
    for (@h) {
	my ($idx, $fh, @fld) = @$_;
	$fh and close $fh;
	my $fn = $opt_b;
	$opt_r and $fn .= "-$x";
	$idx   and $fn .= "-$idx";
	$fn .= ".csv";
	$fn eq $inf and die "I WILL NOT OVERWRITE $inf!";
	open $fh, ">", $fn or die "$fn: $!\n";
	$csv->say ($fh, \@fld);
	$_->[1] = $fh;
	}
    $x++;
    } # next_oh
$opt_r or next_oh ();

my $n = 0; # Skip header in record number
while (my $r = $csv->getline_hr ($ih)) {
    if ($opt_r) { $n++ % $opt_r or next_oh () };
    for (@h) {
	my ($idx, $fh, @fld) = @$_;
	$csv->say ($fh, [ @{$r}{@fld} ]);
	}
    }
close $ih;
close for grep { defined } map { $_->[1] } @h;

__END__

file.csv:
a,b,c,d,e
1,2,3,4,5
2,2,3,4,4
3,2,3,4,3
4,2,3,4,2
5,2,3,4,1

csv-split -r 3 file.csv

file-01.csv
a,b,c,d,e
1,2,3,4,5
2,2,3,4,4
3,2,3,4,3

file-02.csv
a,b,c,d,e
4,2,3,4,2
5,2,3,4,1

csv-split -c 3 file.csv

file-A.csv	file-B.csv
a,b,c		d,e
1,2,3		4,5
2,2,3		4,4
3,2,3		4,3
4,2,3		4,2
5,2,3		4,1

csv-split -r 3 -c 3 file.csv

file-01-A.csv	file-01-B.csv
a,b,c		d,e
1,2,3		4,5
2,2,3		4,4
3,2,3		4,3

file-02-A.csv	file-02-B.csv
a,b,c		d,e
4,2,3		4,2
5,2,3		4,1