File: csv2xlsx

package info (click to toggle)
libtext-csv-xs-perl 1.38-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 844 kB
  • sloc: perl: 6,862; makefile: 9
file content (261 lines) | stat: -rwxr-xr-x 8,064 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#!/pro/bin/perl

# csv2xlsx: Convert csv to xlsx
#	   (m)'17 [20 Oct 2017] Copyright H.M.Brand 2007-2018

use 5.14.0;
use warnings;

our $VERSION = "1.03";

sub usage {
    my $err = shift and select STDERR;
    print <<"EOU";
usage: csv2xlsx [-s <sep>] [-q <quot>] [-w <width>] [-d <dtfmt>]
               [-o <xlsx>] [file.csv]
       -s <sep>   use <sep>   as seperator char, auto-detect, default = ','
                  The string "tab" is allowed.
       -e <esc>   use <esc>   as escape    char, auto-detect, default = '"'
                  The string "undef" is allowed.
       -q <quot>  use <quot>  as quotation char,              default = '"'
                  The string "undef" will disable quotation.
       -w <width> use <width> as default minimum column width default = 4
       -o <xlsx>  write output to file named <xlsx>, defaults
                  to input file name with .csv replaced with .xlsx
                  if from standard input, defaults to csv2xlsx.xlsx
       -F         allow formula's. Otherwise fields starting with
                  an equal sign are forced to string
         --Fa=aaa Define formula action: die/croak/diag/empty/undef
         --Fd     Formula's will cause a die
         --Fc     Formula's will cause a croak
         --FD     Formula's will cause a warning (this is the default)
         --Fe     Formula's will be replaced by the empty string
         --Fu     Formula's will be replaced with an undefined cell
       -f         force usage of <xlsx> if already exists (unlink before use)
       -d <dtfmt> use <dtfmt> as date formats.   Default = 'dd-mm-yyyy'
       -C <C:fmt> use <fmt> as currency formats for currency <C>, no default
       -D cols    only convert dates in columns <cols>. Default is everywhere.
       -u         CSV is UTF8
       -m         merge multiple CSV's into a single xlsx (separate sheets)
		    -o is required, all arguments should be existing files
       -v [<lvl>] verbosity (default = 1)
EOU
    exit $err;
    } # usage

use Getopt::Long qw(:config bundling passthrough);
my $quo = '"';
my $esc = '"';
my $wdt = 4;		# Default minimal column width
my $fac = "diag";	# Formula action (default is warn only)
my $dtf = "dd-mm-yyyy";	# Date format
my $crf = "";		# Currency format, e.g.: $:### ### ##0.00
my $opt_v = 1;
my $dtc;

GetOptions (
    "help|?"		=> sub { usage (0); },
    "V|version"		=> sub { say $0 =~ s{.*/}{}r, " [$VERSION]"; exit 0; },

    "c|s|sep=s"		=> \my $sep, # Set after reading first line in attempt to auto-detect
    "q|quo=s"		=> \$quo,
    "e|esc=s"		=> \$esc,
    "w|width=i"		=> \$wdt,
    "o|x|out=s"		=> \my $xls,
    "d|date-fmt=s"	=> \$dtf,
    "D|date-col=s"	=> \$dtc,
    "C|curr-fmt=s"	=> \$crf,
    "f|force!"		=> \my $frc,
    "F|formulas!"	=> \my $frm,
      "Fa=s"		=> \$fac,
      "Fd"		=> sub { $fac = "die";		},
      "Fc"		=> sub { $fac = "croak";	},
      "FD"		=> sub { $fac = "diag";		},
      "Fe"		=> sub { $fac = "empty";	},
      "Fu"		=> sub { $fac = "undef";	},
    "u|utf-8|utf8!"	=> \my $utf,
    "m|merge!"		=> \my $mrg,
    "v|verbose:1"	=> \$opt_v,
    ) or usage (1);

if ($mrg) {
    my @csv;
    for (@ARGV) {
	if (m/\.xlsx?$/i) {
	    $xls and usage (1);
	    $xls = $_;
	    next;
	    }
	if (m/\.csv$/i && -s) {
	    push @csv, $_;
	    next;
	    }
	warn "Argument $_ is not an existing (CSV) file\n";
	usage (1);
	}
    $xls && @csv or usage (1);
    @ARGV = @csv;
    }

my $base = @ARGV && -f $ARGV[0] ? $ARGV[0] : "csv2xlsx";
$xls ||= $base =~ s/(?:\.csv)?$/.xlsx/ir;

-s $xls && $frc and unlink $xls;
if (-s $xls) {
    print STDERR "File '$xls' already exists. Overwrite? [y/N] > N\b";
    scalar <STDIN> =~ m/^[yj](?:es|a)?$/i or exit;
    }

# Don't split ourselves when modules do it _much_ better, and follow the standards
use Text::CSV_XS;
use Date::Calc qw( Delta_Days Days_in_Month );
use Excel::Writer::XLSX;
use Encode qw( from_to );

my $wbk = Excel::Writer::XLSX->new ($xls);
   $dtf =~ s/j/y/g;
my %fmt = (
    date => $wbk->add_format (align => "center", num_format => $dtf),
    rest => $wbk->add_format (align => "left"),
    wrap => $wbk->add_format (text_wrap => 1),
    );
$crf =~ s/^([^:]+):(.*)/$1/ and $fmt{currency} = $wbk->add_format (
    num_format	=> "$1 $2",
    align	=> "right",
    );

my @args = @ARGV ? @ARGV : ("");
foreach my $csvf (@args) {
    my $wks = $wbk->add_worksheet ($csvf =~ s{\.csv$}{}ir =~ s{.*/}{}r || "Sheet 1");
    my ($h, $w, @w) = (0, 1); # data height, -width, and default column widths
    my $row;
    my $firstline;
    my $fh;
    if (-f $csvf) {
	$opt_v and say "Reading $csvf";
	open $fh, "<", $csvf or die "$csvf: $!\n";
	}
    else {
	$opt_v and say "Reading STDIN";
	$fh = *ARGV;
	}
    my $Sep = $sep;
    unless ($Sep) { # No sep char passed, try to auto-detect;
	while (<$fh>) {
	    m/\S/ or next;	# Skip empty leading blank lines
	    $Sep = # start auto-detect with quoted strings
		   m/["\d];["\d;]/    ? ";"  :
		   m/["\d],["\d,]/    ? ","  :
		   m/["\d]\t["\d,]/   ? "\t" :
		   # If neither, then for unquoted strings
		   m/\w;[\w;]/        ? ";"  :
		   m/\w,[\w,]/        ? ","  :
		   m/\w\t[\w,]/       ? "\t" :
		   # And pipes (lowest prio)
		   m/["\d]\|["\d,]/   ? "|"  :
		   m/\w\|[\w,]/       ? "|"  :
		   m/,/  && !m/[;\t]/ ? ","  :
		   m/;/  && !m/[,\t]/ ? ";"  :
		   m/\t/ && !m/[,;]/  ? "\t" :
					","  ;
	    $firstline = $_;
	    last;
	    }
	}
    my $csv = Text::CSV_XS-> new ({
	sep_char       => $Sep eq "tab"   ? "\t"  : $Sep,
	quote_char     => $quo eq "undef" ? undef : $quo,
	escape_char    => $esc eq "undef" ? undef : $esc,
	binary         => 1,
	keep_meta_info => 1,
	auto_diag      => 1,
	formula        => $fac,
	});
    if ($firstline) {
	$csv->parse ($firstline) or die $csv->error_diag ();
	$row = [ $csv->fields ];
	}
    if ($opt_v > 3) {
	foreach my $k (qw( sep_char quote_char escape_char )) {
	    my $c = $csv->$k () || "undef";
	    $c =~ s/\t/\\t/g;
	    $c =~ s/\r/\\r/g;
	    $c =~ s/\n/\\n/g;
	    $c =~ s/\0/\\0/g;
	    $c =~ s/([\x00-\x1f\x7f-\xff])/sprintf"\\x{%02x}",ord$1/ge;
	    printf STDERR "%-11s = %s\n", $k, $c;
	    }
	}

    if (my $rows = $dtc) {
	$rows =~ s/-$/-999/;			# 3,6-
	$rows =~ s/-/../g;
	eval "\$dtc = { map { \$_ => 1 } $rows }";
	}

    while ($row && @$row or $row = $csv->getline ($fh)) {
	my @row = @$row;
	@row > $w and push @w, ($wdt) x (($w = @row) - @w);
	foreach my $c (0 .. $#row) {
	    my $val = $row[$c] // "";
	    my $l = length $val;
	    $l > ($w[$c] // -1) and $w[$c] = $l;

	    if ($utf and $csv->is_binary ($c)) {
		from_to ($val, "utf-8", "ucs2");
		$wks->write_unicode ($h, $c, $val);
		next;
		}

	    if ($csv->is_quoted ($c)) {
		$val =~ s/\r\n/\n/g;
		if ($utf) {
		    from_to ($val, "utf-8", "ucs2");
		    $val =~ m/\n/
			? $wks->write_unicode ($h, $c, $val, $fmt{wrap})
			: $wks->write_unicode ($h, $c, $val);
		    }
		else {
		    $val =~ m/\n/
			? $wks->write_string  ($h, $c, $val, $fmt{wrap})
			: $wks->write_string  ($h, $c, $val);
		    }
		next;
		}

	    if (!$dtc or $dtc->{$c + 1}) {
		my @d = (0, 0, 0);	# Y, M, D
		$val =~ m/^(\d{4})(\d{2})(\d{2})$/   and @d = ($1, $2, $3);
		$val =~ m/^(\d{2})-(\d{2})-(\d{4})$/ and @d = ($3, $2, $1);
		if ( $d[2] >=    1 && $d[2] <=   31 &&
		     $d[1] >=    1 && $d[1] <=   12 &&
		     $d[0] >= 1900 && $d[0] <= 2199) {
		    my $dm = Days_in_Month (@d[0,1]);
		    $d[2] <   1 and $d[2] = 1;
		    $d[2] > $dm and $d[2] = $dm;
		    my $dt = 2 + Delta_Days (1900, 1, 1, @d);
		    $wks->write ($h, $c, $dt, $fmt{date});
		    next;
		    }
		}
	    if ($crf and $val =~ m/^\s*\Q$crf\E\s*([0-9.]+)$/) {
		$wks->write ($h, $c, $1 + 0, $fmt{currency});
		next;
		}

	    if (!$frm && $val =~ m/^=/) {
		$wks->write_string  ($h, $c, $val);
		}
	    else {
		$wks->write ($h, $c, $val);
		}
	    }
	++$h % 100 or $opt_v && printf STDERR "%6d x %6d\r", $w, $h;
	} continue { $row = undef }
    close $fh;
    $opt_v && printf STDERR "%6d x %6d\n", $w, $h;

    $wks->set_column ($_, $_, $w[$_]) for 0 .. $#w;
    }
$opt_v and say "Writing $xls";
$wbk->close ();