File: docx2txt.bat

package info (click to toggle)
docx2txt 1.4-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 188 kB
  • sloc: perl: 391; sh: 49; makefile: 35
file content (206 lines) | stat: -rw-r--r-- 4,551 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
@echo off

:: docx2txt, a command-line utility to convert Docx documents to text format.
:: Copyright (C) 2008-now Sandeep Kumar
::
:: This program is free software; you can redistribute it and/or modify
:: it under the terms of the GNU General Public License as published by
:: the Free Software Foundation; either version 3 of the License, or
:: (at your option) any later version.
::
:: This program is distributed in the hope that it will be useful,
:: but WITHOUT ANY WARRANTY; without even the implied warranty of
:: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
:: GNU General Public License for more details.
::
:: You should have received a copy of the GNU General Public License
:: along with this program; if not, write to the Free Software
:: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

::
:: A simple commandline .docx to .txt converter
::
:: This batch file is a wrapper around core docx2txt.pl script.
::
:: Author : Sandeep Kumar (shimple0 -AT- Yahoo .DOT. COM)
::
:: ChangeLog :
::
::    17/09/2009 - Initial version of this file. It has similar functionality
::                 as corresponding unix shell script.
::    21/09/2009 - Updations to deal with paths containing spacess.
::    22/09/2009 - Code reorganization, mainly around delayedexpansion command
::                 extension.
::    24/09/2009 - Required docx2txt.pl is expected in same location as this
::                 batch file.
::


::
:: Set path (without surrounding quotes) to perl binary.
::

set PERL=C:\Program Files\strawberry-perl-5.10.0.6\perl\bin\perl.exe

::
:: If CAKECMD variable is set, batch file will unzip the content of argument
:: .docx file in a directory and pass that directory as the argument to the
:: docx2txt.pl script.
::

:: set CAKECMD=C:\Program Files\cake\CakeCmd.exe


::
:: Ensure that required command extensions are enabled.
::

setlocal enableextensions
setlocal enabledelayedexpansion


::
:: docx2txt.pl is expected to be in same location as this batch file.
::

set DOCX2TXT_PL=%~dp0docx2txt.pl

if not exist "%DOCX2TXT_PL%" (
    echo.
    echo Can not continue without "%DOCX2TXT_PL%".
    echo.
    goto END
)


::
:: Check if this batch file is invoked correctly.
::
if "%~1" == "" goto USAGE
if not "%~2" == "" goto USAGE
goto CHECK_ARG


:USAGE

echo.
echo Usage : "%~0" file.docx
echo.
echo 	"file.docx" can also specify a directory holding the unzipped
echo 	content of a .docx file.
echo.
goto END


::
:: Check if argument specifies a directory or a file.
::

:CHECK_ARG

set INPARG=%~1

if exist %~s1\nul (
    set ARGISDIR=y
    :: Remove any trailing '\'s from input directory name.
:INP_IS_DIR
    set LastChar=%INPARG:~-1%
    if not "!LastChar!" == "\" goto GENERATE_TXTFILE_NAME
    set INPARG=%INPARG:~0,-1%
    goto INP_IS_DIR
) else if not exist "%~1" (
    echo.
    echo Argument file/directory "%~1" does not exist.
    echo.
    goto END
)


::
:: Generate output textfile name from input argument.
::

:GENERATE_TXTFILE_NAME

set FILEEXT=%INPARG:~-5%
if "%FILEEXT%" == ".docx" (
    set TXTFILE=%INPARG:~0,-5%.txt
) else (
    set TXTFILE=%INPARG%.txt
)


::
:: Check whether output text file already exists, and whether user wants to
:: overwrite that.
::

if exist "%TXTFILE%" (
    echo.
    echo Output file "%TXTFILE%" already exists.
    set /P confirm=Overwrite "%TXTFILE%" [Y/N - Default Y] ?

    if /I "!confirm!" == "N" (
        echo.
        echo Please copy "%TXTFILE%" somewhere else and rerun this batch file.
        echo.
        goto END
    )
)


::
:: Since docx2txt.pl script expects an unzipper that can send the extracted
:: file to stdout. If CakeCmd.exe is being used as unzipper, then extract the
:: contents into a directory and pass that directory as the argument to the
:: perl script.
::

if defined ARGISDIR goto CONVERT

if defined CAKECMD (
    rename "%~1" "%~1.zip"
    echo y | "%CAKECMD%" extract "%~1.zip" \ "%~1" > nul
    set RENAMEBACK=yes
)


::
:: Invoke docx2txt.pl perl script to do the actual text extraction
::

:CONVERT

"%PERL%" "%DOCX2TXT_PL%" "%INPARG%" "%TXTFILE%" 

if %ERRORLEVEL% == 2 (
    echo.
    echo Failed to extract text from "%~1"!
    echo.
) else if %ERRORLEVEL% == 0 (
    echo.
    echo Text extracted from "%~1" is available in "%TXTFILE%".
    echo.
)


:END

if defined RENAMEBACK (
    rmdir /S /Q "%~1"
    rename "%~1.zip" "%~1"
)

endlocal
endlocal

set PERL=
set DOCX2TXT_PL=
set CAKECMD=

set FILEEXT=
set INPARG=
set TXTFILE=
set ARGISDIR=
set RENAMEBACK=
set confirm=