File: csv2xls

package info (click to toggle)
libtext-csv-xs-perl 1.60-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 1,368 kB
  • sloc: perl: 8,771; makefile: 9
file content (234 lines) | stat: -rwxr-xr-x 7,579 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#!/pro/bin/perl

# csv2xls: Convert csv to xls
#	   (m)'23 [14 Nov 2023] Copyright H.M.Brand 2007-2025

use 5.012000;
use warnings;

our $VERSION = "1.81";

sub usage {
    my $err = shift and select STDERR;
    print <<"EOU";
usage: csv2xls [-s <sep>] [-q <quot>] [-w <width>] [-d <dtfmt>]
               [-o <xls>] [file.csv]
       -s <sep>   use <sep>   as seperator char, auto-detect, default = ','
                  The string "tab" is allowed.
       -e <esc>   use <esc>   as escape    char, auto-detect, default = '"'
                  The string "undef" is allowed.
       -q <quot>  use <quot>  as quotation char,              default = '"'
                  The string "undef" will disable quotation.
       -w <width> use <width> as default minimum column width default = 4
       -o <xls>   write output to file named <xls>, defaults
                  to input file name with .csv replaced with .xls
                  if from standard input, defaults to csv2xls.xls
       -F         allow formula's. Otherwise fields starting with
                  an equal sign are forced to string
         --Fa=aaa Define formula action: die/croak/diag/empty/undef
         --Fd     Formula's will cause a die
         --Fc     Formula's will cause a croak
         --FD     Formula's will cause a warning (this is the default)
         --Fe     Formula's will be replaced by the empty string
         --Fu     Formula's will be replaced with an undefined cell
       -f         force usage of <xls> if already exists (unlink before use)
       -d <dtfmt> use <dtfmt> as date formats.   Default = 'dd-mm-yyyy'
       -C <C:fmt> use <fmt> as currency formats for currency <C>, no default
       -D cols    only convert dates in columns <cols>. Default is everywhere.
       -u         CSV is UTF8
       -v [<lvl>] verbosity (default = 1)
EOU
    exit $err;
    } # usage

use Getopt::Long qw(:config bundling nopermute noignorecase passthrough);
my $sep;	# Set after reading first line in a flurry attempt to auto-detect
my $quo = '"';
my $esc = '"';
my $wdt = 4;		# Default minimal column width
my $xls;		# Excel out file name
my $frc = 0;		# Force use of file
my $utf = 0;		# Data is encoded in Unicode
my $frm = 0;		# Allow formula's
my $fac = "diag";	# Formula action (default is warn only)
my $dtf = "dd-mm-yyyy";	# Date format
my $crf = "";		# Currency format, e.g.: $:### ### ##0.00
my $opt_v = 1;
my $dtc;

GetOptions (
    "help|?"		=> sub { usage (0); },

    "c|s|sep=s"		=> \$sep,
    "q|quo=s"		=> \$quo,
    "e|esc=s"		=> \$esc,
    "w|width=i"		=> \$wdt,
    "o|x|out=s"		=> \$xls,
    "d|date-fmt=s"	=> \$dtf,
    "D|date-col=s"	=> \$dtc,
    "C|curr-fmt=s"	=> \$crf,
    "f|force!"		=> \$frc,
    "F|formulas!"	=> \$frm,
      "Fa=s"		=> \$fac,
      "Fd"		=> sub { $fac = "die";		},
      "Fc"		=> sub { $fac = "croak";	},
      "FD"		=> sub { $fac = "diag";		},
      "Fe"		=> sub { $fac = "empty";	},
      "Fu"		=> sub { $fac = "undef";	},
    "u|utf-8!"		=> \$utf,
    "v|verbose:1"	=> \$opt_v,
    ) or usage (1);

my $title = @ARGV && -f $ARGV[0] ? $ARGV[0] : "csv2xls";
($xls ||= $title) =~ s/(?:\.csv)?$/.xls/i;

-s $xls && $frc and unlink $xls;
if (-s $xls) {
    print STDERR "File '$xls' already exists. Overwrite? [y/N] > N\b";
    scalar <STDIN> =~ m/^[yj](?:es|a)?$/i or exit;
    }

# Don't split ourselves when modules do it _much_ better, and follow the standards
use Text::CSV_XS;
use Date::Calc qw( Delta_Days Days_in_Month );
use Spreadsheet::WriteExcel;
use Encode qw( from_to encode );

my $wbk = Spreadsheet::WriteExcel->new ($xls);
my $snm = $title;
   $snm =~ s{\.csv$}{}i;
   $snm =~ s{.*/}{};
   $snm ||= "Sheet 1";
($_ = length $snm) > 31 and substr $snm, 31, $_ - 31, "";
my $wks = $wbk->add_worksheet ($snm);
   $dtf =~ s/j/y/g;
my %fmt = (
    date => $wbk->add_format (align => "center", num_format => $dtf),
    rest => $wbk->add_format (align => "left"),
    wrap => $wbk->add_format (text_wrap => 1),
    );
$crf =~ s/^([^:]+):(.*)/$1/ and $fmt{currency} = $wbk->add_format (
    num_format	=> "$1 $2",
    align	=> "right",
    );

my ($h, $w, @w) = (0, 1); # data height, -width, and default column widths
my $row;
my $firstline;
unless ($sep) { # No sep char passed, try to auto-detect;
    while (<>) {
	m/\S/ or next;	# Skip empty leading blank lines
	$sep = # start auto-detect with quoted strings
	       m/["\d];["\d;]/  ? ";"  :
	       m/["\d],["\d,]/  ? ","  :
	       m/["\d]\t["\d,]/ ? "\t" :
	       # If neither, then for unquoted strings
	       m/\w;[\w;]/      ? ";"  :
	       m/\w,[\w,]/      ? ","  :
	       m/\w\t[\w,]/     ? "\t" :
				  ";"  ;
	    # Yeah I know it should be a ',' (hence Csv), but the majority
	    # of the csv files to be shown comes from fucking Micky$hit,
	    # that uses semiColon ';' instead.
	$firstline = $_;
	last;
	}
    $firstline or die "The sourcefile does not contain any usable data\n";
    }
my $csv = Text::CSV_XS-> new ({
    sep_char       => $sep eq "tab"   ? "\t"  : $sep,
    quote_char     => $quo eq "undef" ? undef : $quo,
    escape_char    => $esc eq "undef" ? undef : $esc,
    binary         => 1,
    keep_meta_info => 1,
    auto_diag      => 1,
    formula        => $fac,
    });
if ($firstline) {
    $csv->parse ($firstline) or die $csv->error_diag ();
    $row = [ $csv->fields ];
    }
if ($opt_v > 3) {
    foreach my $k (qw( sep_char quote_char escape_char )) {
	my $c = $csv->$k () || "undef";
	$c =~ s/\t/\\t/g;
	$c =~ s/\r/\\r/g;
	$c =~ s/\n/\\n/g;
	$c =~ s/\0/\\0/g;
	$c =~ s/([\x00-\x1f\x7f-\xff])/sprintf"\\x{%02x}",ord$1/ge;
	printf STDERR "%-11s = %s\n", $k, $c;
	}
    }

if (my $rows = $dtc) {
    $rows =~ s/-$/-999/;			# 3,6-
    $rows =~ s/-/../g;
    eval "\$dtc = { map { \$_ => 1 } $rows }";
    }

while ($row && @$row or $row = $csv->getline (*ARGV)) {
    my @row = @$row;
    @row > $w and push @w, ($wdt) x (($w = @row) - @w);
    foreach my $c (0 .. $#row) {
	my $val = $row[$c] // "";
	my $l = length $val;
	$l > ($w[$c] // -1) and $w[$c] = $l;

	if ($utf and $csv->is_binary ($c)) {
	    eval { # from_to requires raw
		my $raw = encode ("UTF-8", $val, Encode::FB_CROAK);
		$val = $raw;
		};
	    from_to ($val, "utf-8", "ucs2");
	    $wks->write_unicode ($h, $c, $val);
	    next;
	    }

	if ($csv->is_quoted ($c)) {
	    if ($utf) {
		from_to ($val, "utf-8", "ucs2");
		$val =~ m/\n/
		    ? $wks->write_unicode ($h, $c, $val, $fmt{wrap})
		    : $wks->write_unicode ($h, $c, $val);
		}
	    else {
		$val =~ m/\n/
		    ? $wks->write_string  ($h, $c, $val, $fmt{wrap})
		    : $wks->write_string  ($h, $c, $val);
		}
	    next;
	    }

	if (!$dtc or $dtc->{$c + 1}) {
	    my @d = (0, 0, 0);	# Y, M, D
	    $val =~ m/^(\d{4})(\d{2})(\d{2})$/   and @d = ($1, $2, $3);
	    $val =~ m/^(\d{2})-(\d{2})-(\d{4})$/ and @d = ($3, $2, $1);
	    if ( $d[2] >=    1 && $d[2] <=   31 &&
		 $d[1] >=    1 && $d[1] <=   12 &&
		 $d[0] >= 1900 && $d[0] <= 2199) {
		my $dm = Days_in_Month (@d[0,1]);
		$d[2] <   1 and $d[2] = 1;
		$d[2] > $dm and $d[2] = $dm;
		my $dt = 2 + Delta_Days (1900, 1, 1, @d);
		$wks->write ($h, $c, $dt, $fmt{date});
		next;
		}
	    }
	if ($crf and $val =~ m/^\s*\Q$crf\E\s*([0-9.]+)$/) {
	    $wks->write ($h, $c, $1 + 0, $fmt{currency});
	    next;
	    }

	if (!$frm && $val =~ m/^=/) {
	    $wks->write_string  ($h, $c, $val);
	    }
	else {
	    $wks->write ($h, $c, $val);
	    }
	}
    ++$h % 100 or $opt_v && printf STDERR "%6d x %6d\r", $w, $h;
    } continue { $row = undef }
$opt_v && printf STDERR "%6d x %6d\n", $w, $h;

$wks->set_column ($_, $_, $w[$_]) for 0 .. $#w;
$wbk->close ();