File: xlsx2csv

package info (click to toggle)
libspreadsheet-read-perl 0.93-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 1,180 kB
sloc: perl: 7,309; xml: 751; lisp: 293; makefile: 8
file content (273 lines) | stat: -rwxr-xr-x 6,690 bytes
#!/usr/bin/perl

# xls2csv: Convert supported spreadsheet formats to CSV
#	   (m)'24 [2024-12-16] Copyright H.M.Brand 2008-2025

require 5.008001;

use strict;
use warnings;

our $VERSION = "3.11";
(my $cmd = $0) =~ s{.*/}{};

use Text::CSV_XS;
use List::Util        qw( first );
use Spreadsheet::Read qw( ReadData row );

sub usage {
    my $err = shift and select STDERR;
    print "usage: $cmd [-A [-N | -J c] | -o file.csv] [-s sep] [-f] [-i] file.xls\n";
    print "       $cmd --help | --man | --info\n";
    print "          --list    List supported spreadsheet formats and exit\n";
    print "    -A    --all     Export all sheets        (filename-sheetname.csv)\n";
    print "    -N    --no-pfx  No filename prefix on -A (sheetname.csv)\n";
    print "    -Z    --zip     Convert sheets to CSV's in ZIP\n";
    print "    -J s  --join=s  Use s to join filename-sheetname (-)\n";
    print "    -o f  --out=f   Set output filename\n";
    print "    -i f  --in=f    Set input  filename\n";
    print "    -f    --force   Force overwrite output if exists\n";
    print "    -s s  --sep=s   Set CSV separator character\n";
    print "Unless -A is used, all other options are passed on to xlscat\n";
    @_ and print join "\n", @_, "";
    exit $err;
    } # usage

use Getopt::Long qw( :config bundling noignorecase passthrough );
GetOptions (
    "help|?"    	=> sub { usage 0; },
    "V|version" 	=> sub { print "$cmd [$VERSION]\n"; exit 0; },
      "man"		=> sub { pod_nroff    (); },
      "info"		=> sub { pod_text     (); },
      "list"		=> sub { list_parsers (); },

    "o|c|out=s" 	=> \ my $csv,
    "i|x|in=s"		=> \ my $xls,
    "f|force!"  	=> \ my $opt_f,

    "s|sep=s"		=> \ my $opt_s,

    "A|all!"    	=> \ my $opt_A,
    "N|no-pfx!" 	=> \ my $opt_N,
    "Z|zip!"		=> \ my $opt_Z,
    "J|join=s"		=> \(my $opt_J = "-"),

    "v|verbose:1"	=> \(my $opt_v = 0),
    ) or usage 1;

sub list_parsers {
    print "Ext   Parser module             Req   Has Def\n";
    print "----- ----------------------- ----- ----- ---\n";
    for (Spreadsheet::Read::parsers ()) {
	printf "%-5s %-23s %5s %5s %s\n",
	    $_->{ext}, $_->{mod}, $_->{min}, $_->{vsn}, $_->{def} ? "<==" : "";
	}
    exit 0;
    } # list_parsers

sub pod_text {
    require Pod::Text::Color;
    my $m = $ENV{NO_COLOR} ? "Pod::Text" : "Pod::Text::Color";
    my $p = $m->new ();
    open my $fh, ">", \my $out;
    $p->parse_from_file ($0, $fh);
    close $fh;
    print $out;
    exit 0;
    } # pod_text

sub pod_nroff {
    first { -x "$_/nroff" } grep { -d } split m/:+/ => $ENV{PATH} or pod_text ();

    require Pod::Man;
    my $p = Pod::Man->new ();
    open my $fh, "|-", "nroff", "-man";
    $p->parse_from_file ($0, $fh);
    close $fh;
    exit 0;
    } # pod_nroff

unless ($xls) {
    foreach my $i (reverse 0 .. $#ARGV) {
	-f $ARGV[$i] or next;
	$xls = splice @ARGV, $i, 1;
	last;
	}
    }

   $xls or usage 1, "No input file";
-r $xls or usage 1, "Input file unreadable";
-s $xls or usage 1, "Input file empty";

my @known_ext = Spreadsheet::Read::parses (0);
   @known_ext or @known_ext = qw( csv ods sc sxc xls xlsx );
my $valid_ext = do { local $" = "|"; qr{ \. (?: @known_ext )}xi };

if ($opt_Z) {
    require Archive::Zip;
    $@ and die "--zip requires Archive::Zip, which could not be loaded\n";
    }

my %e = (n => "\n", t => "\t", e => "\e", r => "\r");
$opt_s and $opt_s =~ s/\\+([nter])/$e{$1}/g;

if ($opt_A || $opt_Z) {
    $opt_v and warn "Reading $xls ...\n";
    my $ss = ReadData ($xls) or die "Cannot read/parse $xls\n";
    my $az = $opt_Z ? Archive::Zip->new : undef;
    $csv and $xls = $csv;
    $xls =~ s/$valid_ext $//ix;
    $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1, eol => "\r\n" });
    $opt_s and $csv->sep_char ($opt_s);
    foreach my $si (1 .. $ss->[0]{sheets}) {
	my $s  = $ss->[$si]   or next;
		 $s->{maxcol} or next;
	my $mr = $s->{maxrow} or next;
	my $sn = $s->{label} || "sheet-$si";
	   $sn =~ s/\s+$//;
	   $sn =~ s/^\s+//;
	   $sn =~ s/[^-\w.]+/_/g; # remove any special chars from worksheet name
	if ($opt_Z) {
	    open my $fh, ">:encoding(utf-8)", \my $data;
	    $csv->print ($fh, [ row ($s, $_) ]) for 1 .. $mr;
	    close $fh;
	    $az->addString ($data, "$sn.csv");
	    next;
	    }
	my $fn = $opt_N ? "$sn.csv" : "$xls$opt_J$sn.csv";
	-f $fn && !$opt_f and die "$fn already exists\n";
	warn "Saving sheet to $fn ...\n";
	open my $fh, ">:encoding(utf-8)", $fn or die "$fn: $!\n";
	$csv->print ($fh, [ row ($s, $_) ]) for 1 .. $mr;
	close $fh;
	}
    if ($opt_Z) {
	my $fn = "$xls.zip";
	-f $fn && $opt_f and unlink $fn;
	-f $fn and die "$fn already exists\n";
	$az->writeToFileNamed ($fn) == 0 or die "$fn: $!\n";
	}
    exit;
    }

$csv or ($csv = $xls) =~ s/$valid_ext $/.csv/xi;
if (-f $csv) {
    $opt_f or die "$csv already exists\n";
    unlink $csv;
    }

warn "Converting $xls to $csv ...\n";
open STDOUT, ">", $csv or die "$csv: $!\n";
$^O eq "MSWin32" and $xls = qq{"$xls"};
$opt_s and unshift @ARGV => "--sep=$opt_s";
$opt_v and unshift @ARGV => "--verbose=$opt_v";
exec { "xlscat" } "xlscat", "-c", @ARGV, $xls;

__END__

=pod

=head1 NAME

   xlsx2csv - convert spreadsheet to CSV(s)

=head1 SYNOPSIS

   xlsx2csv [ -A [ -N || -J str ] | -o file.csv ] [-f] [-i] file.xls
   xlsx2csv   --help | --man | --info

=head1 DESCRIPTION

Convert a spreadsheet (all formats supported by L<Spreadsheet::Read>) to CSV
(using L<Text::CSV_XS>).

=head1 OPTIONS

=over

=item -?

=item --help

Print short usage and exit.

=item --man

Print this help using nroff and exit.

=item --info

Print this help and exit.

=item -V

=item --version

Print the version and exit.

=item -f

=item --force

Overwrite existing output file.

=item -A

=item --all

Output data from all sheets.

Each sheet will go to extra file with name built from output CSV-file and
sheet name.

=item -N

=item --no-pfx

If specified, dump all will not use output CSV-file but sheet name only.

=item -J str

=item --join=str

If specified, output file names under C<-A> will join on C<str> instead of
the default C<->.

=item -o CSV-file

=item -c CSV-file

=item --out=CSV-file

Output file name (used only with C<< --all >> and without C<< --no-pfx >>).

Default value is derived from XLS-file.

=item -i XLS-file

=item -s XLS-file

=item --in=XLS-file

Allows to specify input xls file.

Default: Last ARGUMENT file that exists.

=back

=head1 SEE ALSO

L<Spreadsheet::Read>, L<Text::CSV_XS>, L<xlscat>

=head1 AUTHOR

H.Merijn Brand <perl5@tux.freedom.nl>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2008-2025 H.Merijn Brand

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut