File: ParseDict.py

package info (click to toggle)
insighttoolkit 3.20.1%2Bgit20120521-3
links: PTS, VCS
area: main
in suites: wheezy
size: 80,652 kB
sloc: cpp: 458,133; ansic: 196,223; fortran: 28,000; python: 3,839; tcl: 1,811; sh: 1,184; java: 583; makefile: 430; csh: 220; perl: 193; xml: 20
file content (735 lines) | stat: -rwxr-xr-x 22,687 bytes
parent folder | download | duplicates (4)
#! /usr/bin/env python
"""
Let's write our own python parser to clean up the pdf (after 
pdftotext of course). 
Instructions: run pdftotext like this:

$ pdftotext -f 9 -l 81 -raw -nopgbrk 04_06PU.PDF 04_06PU-3.txt

then run the python parser like this:

$ python ParseDict.py 04_06PU.txt dicomV3.dic
"""
import re,os

"""
PdfTextParser takes as input a text file (produced by pdftotext)
and create as output a clean file (ready to be processed) by
DicomV3Expander
Warning: PdfTextParser does not expand:
- (xxxx,xxxx to xxxx) xxxxxxxxxxxx
or
- (12xx, 3456) comment...

"""
class PdfTextParser:
  # Cstor
  def __init__(self):
    self._InputFilename = ''
    self._OutputFilename = ''
    self._Infile = 0
    self._OutLines = []
    self._PreviousBuffers = []

  def SetInputFileName(self,s):
    self._InputFilename = s

  def SetOutputFileName(self,s):
    self._OutputFilename = s
  
  # Function returning if s is a comment for sure
  def IsAComment(self,s):
    #print s,  len(s)
    if s == "Tag Name VR VM":
      return True
    elif s == "PS 3.6-2003":
      return True
    elif s == "PS 3.6-2004":
      return True
    patt = re.compile('^Page [0-9]+$') 
    if( patt.match(s) ):
      return True
    return False

  def IsAStartingLine(self,s):
    patt = re.compile('^\\([0-9a-fA-Fx]+,[0-9a-fA-F]+\\) (.*)$') 
    if( patt.match(s) ):
      return True
    return False

  def IsAFullLine(self,s):
    patt = re.compile('^\\([0-9a-fA-Fx]+,[0-9a-fA-F]+\\) (.*) [A-Z][A-Z] [0-9]$')
    if( patt.match(s) ):
      return True
    return False

  # FIXME this function could be avoided...
  def IsSuspicious(self,s):
    l = len(s)
    if l > 80:
      return True
    return False

  def AddOutputLine(self,s):
    assert not self.IsAComment(s)
    self._OutLines.append(s + '\n')

  def Open(self):
    self._Infile = file(self._InputFilename, 'r')
    for line in self._Infile.readlines():
      line = line[:-1] # remove '\n'
      if not self.IsAComment( line ):
        if self.IsAStartingLine(line):
          #print "Previous buffer:",self._PreviousBuffers
          previousbuffer = ' '.join(self._PreviousBuffers)
          if self.IsAStartingLine(previousbuffer):
            if not self.IsSuspicious(previousbuffer):
              self.AddOutputLine(previousbuffer)
            else:
              # this case should not happen if I were to rewrite the
              # thing I should be able to clean that
              #print "Suspicious:", previousbuffer
              #print "List is:", self._PreviousBuffers
              s = self._PreviousBuffers[0]
              if self.IsAFullLine(s):
                # That means we have a weird line that does not start
                # as usual (xxxx,xxxx) therefore we tried constructing
                # a buffer using a the complete previous line...
                #print "Full line:", s
                self.AddOutputLine(s)
                s2 = ' '.join(self._PreviousBuffers[1:])
                #print "Other Full line:", s2
                self.AddOutputLine(s2)
              else:
                # we have a suspicioulsy long line, so what that could
                # happen, let's check:
                if self.IsAFullLine(previousbuffer):
                  self.AddOutputLine(previousbuffer)
                else:
                  # This is the only case where we do not add
                  # previousbuffer to the _OutLines
                  print "Suspicious and Not a full line:", s
          else:
            if previousbuffer:
              print "Not a buffer:", previousbuffer
          # We can clean buffer, since only the case 'suspicious' +
          # 'Not a full line' has not added buffer to the list
          self._PreviousBuffers = []
          # In all cases save the line for potentially growing this line
          assert not self.IsAComment(line)
          self._PreviousBuffers.append(line)
        else:
          #print "Not a line",line
          assert not self.IsAComment(line)
          self._PreviousBuffers.append(line)
      else:
        #print "Comment:",line
        previousbuffer = ' '.join(self._PreviousBuffers)
        if previousbuffer and self.IsAStartingLine(previousbuffer):
          #print "This line is added:", previousbuffer
          self.AddOutputLine( previousbuffer )
        else:
          #print "Line is comment:", line
          print "Buffer is:", previousbuffer
        # Ok this is a comment we can safely clean the buffer:
        self._PreviousBuffers = []
    self.Write()

  def Write(self):
    outfile = file(self._OutputFilename, 'w')
    outfile.writelines( self._OutLines )
    outfile.close()
    self._Infile.close()
    
  # Main function to call for parsing
  def Parse(self):
    self.Open()

"""
subclass
"""
class UIDParser(PdfTextParser):
  def IsAStartingLine(self,s):
    patt = re.compile('^1.2.840.10008.[0-9.]+ (.*)$') 
    if( patt.match(s) ):
      return True
    #print "Is Not:", s
    return False

  def IsAFullLine(self,s):
    patt = re.compile('^1.2.840.10008.[0-9.]+ (.*) PS ?[0-9].1?[0-9]$') 
    if( patt.match(s) ):
      return True
    patt = re.compile('^1.2.840.10008.[0-9.]+ (.*) Well-known frame of reference$') 
    if( patt.match(s) ):
      return True
    patt = re.compile('^1.2.840.10008.[0-9.]+ (.*) \\(Retired\\)$') 
    if( patt.match(s) ):
      return True
    return False

  def IsAComment(self,s):
    if PdfTextParser.IsAComment(self,s):
      return True
    # else let's enhance the super class
    patt = re.compile('^SPM2 (.*) http(.*)$') 
    if( patt.match(s) ):
      return True
    return False

  def AddOutputLine(self,s):
    if self.IsAFullLine(s):
      return PdfTextParser.AddOutputLine(self,s)
    print "Discarding:", s


"""
TransferSyntaxParser
"""
class TransferSyntaxParser(UIDParser):
  def IsAFullLine(self,s):
    patt = re.compile('^(.*) Transfer Syntax PS ?[0-9].1?[0-9]$') 
    if patt.match(s):
      return UIDParser.IsAStartingLine(self,s)
    print "Not a TS:", s
    return False
    
"""
Papyrus parser
pdftotext -f 19 -l 41 -raw -nopgbrk /tmp/Papyrus31Specif.pdf /tmp/Papyrus31Specif.txt 

I need to do a second pass for pages:
#29 since I need to find [0-9.]+
#40,41 since it start with number in two columns !!
""" 
class PapyrusParser(PdfTextParser):
  def __init__(self):
    self._PreviousPage = 0
    self._PreviousNumber = 0
    PdfTextParser.__init__(self)

  def IsAStartingLine(self,s):
    patt = re.compile('^[A-Za-z \'\(\)]+ +\\([0-9A-F]+,[0-9A-F]+\\) +(.*)$') 
    if( patt.match(s) ):
      return True
    # After page 39, lines are like:
    patt = re.compile('^[0-9x]+ [0-9xA-F]+ .*$') 
    if( patt.match(s) ):
      #print "PAge 39", s
      return True
    return False

  def IsAFullLine(self,s):
    patt = re.compile('^[A-Za-z \'\(\)]+ +\\([0-9A-F]+,[0-9A-F]+\\) +(.*)$') 
    if( patt.match(s) ):
      return True
    # After page 39, lines are like:
    patt = re.compile('^[0-9x]+ [0-9xA-F]+ .* [A-Z][A-Z] [0-9].*$') 
    if( patt.match(s) ):
      #print "PAge 39", s
      return True
    return False

  def IsAComment(self,s):
    # dummy case:
    if s == 'Attribute Name Tag Type Attribute Description':
      #print "Dummy", s
      return True
    patt = re.compile('^.*ANNEXE.*$')
    if patt.match(s):
      return True
    # Indicate page #, spaces ending with only one number
    # Sometime there is a line with only one number, we need to
    # make sure that page # is strictly increasing
    patt = re.compile('^[1-9][0-9]+$') 
    if( patt.match(s) ):
      p = eval(s)
      if( p > self._PreviousPage):
        #print "Page #", p
        self._PreviousNumber = 0
        self._PreviousPage = p
        return True
#      else:
#        print "PAGE ERROR:", s
    # Now within each page there is a comment that start with a #
    # let's do the page approach wich reset at each page
    patt = re.compile('^[0-9]+$') 
    if( patt.match(s) ):
      if( eval(s) > self._PreviousNumber):
        #print "Number #", eval(s)
        self._PreviousNumber = eval(s)
        return True
      #else:
      #  print "ERROR:", s
    return False

  def AddOutputLine(self,s):
    assert not self.IsAComment(s)
    s = s.replace('\n','')
    #print "REMOVE return:", s
    patt = re.compile('^([A-Za-z \'\(\)]+) (\\([0-9A-F]+,[0-9A-F]+\\)) ([0-9C]+) (.*)$') 
    m = patt.match(s)
    ss = 'dummy (0000,0000) 0'
    if m:
      ss = m.group(2) + ' ' + m.group(3) + ' ' + m.group(1)
    else:
      patt = re.compile('^([A-Za-z \'\(\)]+) (\\([0-9A-F]+,[0-9A-F]+\\)) (.*)$') 
      m = patt.match(s)
      if m:
        ss = m.group(2) + ' 0 ' + m.group(1)
      else:
        ss = s
        # There is two case one that end with all capital letter
        # explaining the 'DEFINED TERMS'
        patt = re.compile('^[0-9x]+ [0-9xA-F]+ .* [A-Z][A-Z] [0-9] [A-Z, ]$') 
        #patt = re.compile('^[0-9x]+ [0-9xA-F]+ .* [A-Z][A-Z] [0-9]|1\\-n [A-Z, |3.0]+$') 
        #patt = re.compile('^[0-9x]+ [0-9xA-F]+ .* [A-Z][A-Z] [01n-] [A-Z, |3.0]+$') 
        if patt.match(s):
          print "Match", s
          ss = ''
    self._OutLines.append(ss + '\n')

  def Open(self):
    self._Infile = file(self._InputFilename, 'r')
    for line in self._Infile.readlines():
      line = line[:-1] # remove '\n'
      if not self.IsAComment( line ):
        if self.IsAStartingLine(line):
          #print "Previous buffer:",self._PreviousBuffers
          previousbuffer = ' '.join(self._PreviousBuffers)
          if self.IsAFullLine(previousbuffer):
            self.AddOutputLine(previousbuffer)
          else:
            if previousbuffer:
              print "Not a buffer:", previousbuffer
          # We can clean buffer, since only the case 'suspicious' +
          # 'Not a full line' has not added buffer to the list
          self._PreviousBuffers = []
          # In all cases save the line for potentially growing this line
          # just to be safe remove any white space at begining of string
          assert not self.IsAComment(line)
          self._PreviousBuffers.append(line.strip())
        else:
          #print "Not a line",line
          assert not self.IsAComment(line)
          # just to be safe remove any white space at begining of string
          self._PreviousBuffers.append(line.strip())
      else:
        #print "Previous buffer:",self._PreviousBuffers
        previousbuffer = ' '.join(self._PreviousBuffers)
        if previousbuffer and self.IsAStartingLine(previousbuffer):
          #print "This line is added:", previousbuffer
          self.AddOutputLine( previousbuffer )
#        else:
#          #print "Line is comment:", line
#          print "Buffer is:", previousbuffer
        # Ok this is a comment we can safely clean the buffer:
        self._PreviousBuffers = []
    self.Write()

"""
Parser for:
GE Medical Systems HISPEED ADVANTAGE CT/i CONFORMANCE STATEMENT
pdftotext -f 81 -l 90 -raw -nopgbrk 2162114_100r5.pdf 2162114_100r5.txt
"""
class GEMSParser(PdfTextParser):
#  def __init__(self):
#    PdfTextParser.__init__(self)

  def IsAStartingLine(self,s):
    #patt = re.compile('^[A-Za-z \'\(\)]+ +\\([0-9A-F]+,[0-9A-F]+\\) +(.*)$') 
    patt = re.compile('^[A-Za-z0-9 .#(),_/-]+ +\\([0-9A-F]+, ?[0-9A-F]+\\) +(.*)$')
    if( patt.match(s) ):
      return True
    return False

  def IsAFullLine(self,s):
    #patt = re.compile('^[A-Za-z \'\(\)]+ +\\([0-9A-F]+,[0-9A-F]+\\) +(.*)$') 
    patt = re.compile('^[A-Za-z0-9 .#(),_/-]+ +\\([0-9A-F]+, ?[0-9A-F]+\\) [A-Z][A-Z] [0-9]+$') 
    if( patt.match(s) ):
      return True
    print "Not full:", s
    return False

  def IsAComment(self,s):
    if PdfTextParser.IsAComment(self,s):
      return True
    #patt = re.compile('^.*GE Medical Systems LightSpeed QX/i CONFORMANCE STATEMENT REV 2.2 sm 2288567-100.*$')
    #if patt.match(s):
    #  return True
    patt = re.compile('^.*GE Medical Systems HISPEED ADVANTAGE CT/i CONFORMANCE STATEMENT.*$') 
    if patt.match(s):
      return True
    patt = re.compile('^GE Medical Systems LightSpeed QX/i CONFORMANCE STATEMENT.*$')
    if patt.match(s):
      return True
    patt = re.compile('^Attribute Name Tag VR VM$')
    if patt.match(s):
      return True
    patt = re.compile('^B.[1-9].*Private .*$')
    if patt.match(s):
      return True
    patt = re.compile('^Table B.1.? .* Private .*$')
    if patt.match(s):
      return True
    patt = re.compile('^Note :.*$')
    if patt.match(s):
      return True
    patt = re.compile('^7.11.1$')
    if patt.match(s):
      return True
    return False

  def AddOutputLine(self,s):
    #print s
    assert not self.IsAComment(s)
    patt = re.compile('^([A-Za-z0-9 .#(),_/-]+) +\\(([0-9A-F]+), ?([0-9A-F]+)\\) ([A-Z][A-Z]) ([0-9]+)$') 
    m = patt.match(s)
    if m:
      ss = m.group(2).lower() + ' ' + m.group(3).lower() + ' ' + m.group(4) + ' ' + m.group(5) + ' ' + m.group(1)
      self._OutLines.append(ss + '\n')
    else:
      print 'OOOPs', s


"""
This class is meant to expand line like:
- (xxxx,xxxx to xxxx) xxxxxxxxxxxx
or
- (12xx, 3456) comment...

"""
class DicomV3Expander:
  def __init__(self):
    self._InputFilename = ''
    self._OutputFilename = ''
    self._OutLines = []

  def SetInputFileName(self,s):
    self._InputFilename = s

  def SetOutputFileName(self,s):
    self._OutputFilename = s
 
  # Function to turn into lower case a tag:
  # ex: (ABCD, EF01) -> (abcd, ef01)
  def LowerCaseTag(self,s):
    #print "Before:", s[:-1]
    patt = re.compile('^(\\([0-9a-fA-F]+,[0-9a-fA-F]+\\))(.*)$')
    m = patt.match(s)
    if m:
      s1 = m.group(1)
      s2 = m.group(2)
      return s1.lower() + s2
    else:
      patt = re.compile('^[0-9a-fA-F]+ [0-9a-fA-F]+ [A-Z][A-Z] [0-9n-] .*$')
      if patt.match(s):
        return s
      else:
        print "Impossible case:", s
        os.sys.exit(1)

  def AddOutputLine(self,s):
    if s.__class__ == list:
      for i in s:
        self._OutLines.append(i + '\n')
    else:
      self._OutLines.append(s + '\n')

  # Expand the line approriaetkly and also add it to the
  # _OutLines list
  def ExpandLine(self, s):
    assert s[-1] == '\n'
    s = s[:-1]  # remove \n
    list = []
    if self.NeedToExpansion(s, list):
      self.AddOutputLine(list) # list != []
    elif self.NeedGroupXXExpansion(s, list):
      self.AddOutputLine(list) # list != []
    elif self.NeedElemXXExpansion(s, list):
      self.AddOutputLine(list) # list != []
    else:
      self.AddOutputLine(self.LowerCaseTag(s))

  # If line is like:
  # (0020,3100 to 31FF) Source Image Ids RET
  def NeedToExpansion(self,s, list):
    patt = re.compile('^\\(([0-9a-fA-F]+),([0-9a-fA-F]+) to ([0-9a-fA-F]+)\\)(.*)$')
    m = patt.match(s)
    if m:
      #print m.groups()
      gr = m.group(1)
      el_start = '0x'+m.group(2)
      el_end = '0x'+m.group(3)
      for i in range(eval(el_start), eval(el_end)):
        el = hex(i)[2:]
        l = '('+gr+','+el+')'+m.group(4)
        list.append(l)
      return True
    return False

  # If line is like:
  # (50xx,1200) Number of Patient Related Studies IS 1
  def NeedGroupXXExpansion(self,s,list):
    patt = re.compile('^\\(([0-9a-fA-F]+)xx,([0-9a-fA-F]+)\\)(.*)$')
    m = patt.match(s)
    if m:
      #print m.groups()
      gr_start = m.group(1)
      el = m.group(2)
      #el_start = '0x'+m.group(2)
      #el_end = '0x'+m.group(3)
      start = '0x'+gr_start+'00'
      end   = '0x'+gr_start+'FF'
      for i in range(eval(start), eval(end)):
        gr = hex(i)[2:]
        l = '('+gr+','+el+')'+m.group(3)
        #print l
        list.append(l)
      return True
    return False

  # If line is like:
  # (2001,xx00) Number of Patient Related Studies IS 1
  def NeedElemXXExpansion(self,s,list):
    patt = re.compile('^([0-9a-fA-F]+) ([0-9a-fA-F]+)xx(.*)$')
    m = patt.match(s)
    if m:
      #print m.groups()
      gr = m.group(1)
      el_start = m.group(2)
      start = '0x00'
      end   = '0xFF'
      for i in range(eval(start), eval(end)):
        el = '%02x'% i
        l = '('+gr+','+el_start+el+')'+m.group(3)
        print l
        list.append(l)
      return True
    else:
      patt = re.compile('^([0-9a-fA-F]+) xx([0-9a-fA-F]+)(.*)$')
      m = patt.match(s)
      if m:
        #print m.groups()
        gr = m.group(1)
        el_start = m.group(2)
        start = '0x00'
        end   = '0xFF'
        for i in range(eval(start), eval(end)):
          el = '%02x'% i
          l = '('+gr+','+el+el_start+')'+m.group(3)
          print l
          list.append(l)
        return True
    return False

  def Write(self):
    outfile = file(self._OutputFilename, 'w')
    outfile.writelines( self._OutLines )
    outfile.close()

  def Expand(self):
    infile = file(self._InputFilename,'r')
    for line in infile.readlines():
      # ExpandLine also LowerCase the line
      self.ExpandLine(line) # l is [1,n] lines
    self.Write()
    infile.close()

"""
Parse line from a philips document, line are like this:

Syncra Scan Type 2005,10A1 VR = CS, VM = 1
"""
class InteraParser:
  def __init__(self):
    self._InputFilename = ''
    self._OutputFilename = ''

  def Reformat(self,s):
    assert self.IsGood(s)
    patt = re.compile("^([A-Za-z0-9 -]+) ([0-9A-Z]+),([0-9A-Z]+) VR = ([A-Z][A-Z]), VM = (.*)$")
    m = patt.match(s)
    if m:
      dicom = m.group(2) + ' ' + m.group(3) + ' ' + m.group(4) + ' ' + m.group(5) + ' ' + m.group(1)
      return dicom
    else:
      print "oops"

  def IsGood(self,s):
    patt = re.compile("^[A-Za-z0-9 -]+ [0-9A-Z]+,[0-9A-Z]+ VR = [A-Z][A-Z], VM = .*$")
    if patt.match(s):
      return True
    print "Not good:", s
    return False

  def SetInputFileName(self,s):
    self._InputFilename = s

  def SetOutputFileName(self,s):
    self._OutputFilename = s
  
  def Parse(self):
    infile = file(self._InputFilename, 'r')
    outLines = []
    for line in infile.readlines():
      print self.Reformat(line)
      outLines.append( self.Reformat(line) + '\n' )
    outfile = file(self._OutputFilename, 'w')
    outfile.writelines( outLines )
    outfile.close()
 
"""
Parse line from a dicom3tools document, line are like this:

(0003,0008) VERS="SSPI" VR="US"   VM="1"        Owner="SIEMENS ISI"             Keyword="ISICommandField"                       Name="ISI Command Field"
"""
class Dicom3ToolsParser:
  def __init__(self):
    self._InputFilename = ''
    self._OutputFilename = ''

  def Reformat(self,s):
    assert self.IsGood(s)
    patt = re.compile("^\(([0-9a-f]+),([0-9a-f]+)\)\s+VERS=\".*\"\s+VR=\"([A-Z][A-Z])\"\s+VM=\"(.*)\"\s+Owner=\".*\"\s+Keyword=\".*\"\s+Name=\"(.*)\"$")
    m = patt.match(s)
    dicom = ''
    if m:
      # Apparently some have Name == '?', skip those
      name = m.group(5)
      if name != '?' and name != '? ':
        dicom = m.group(1) + ' ' + m.group(2) + ' ' + m.group(3) + ' ' + m.group(4) + ' ' + m.group(5)
      else:
        print "oops"
    else:
      print "oops"
    return dicom

  def IsGood(self,s):
    #patt = re.compile("^\([0-9a-f]+,[0-9a-f]+\) VERS=\".*\" VR=\"[A-Z][A-Z]\" VM=\".*\" Owner=\".*\" Keyword=\".*\" Name=\".*\"$")
    patt = re.compile("^\([0-9a-f]+,[0-9a-f]+\)\s+VERS=\".*\"\s+VR=\"[A-Z][A-Z]\"\s+VM=\".*\"\s+Owner=\".*\"\s+Keyword=\".*\"\s+Name=\".*\".*$")
    if patt.match(s):
      return True
    print "Not good:", s
    return False

  def SetInputFileName(self,s):
    self._InputFilename = s

  def SetOutputFileName(self,s):
    self._OutputFilename = s
  
  def Parse(self):
    infile = file(self._InputFilename, 'r')
    outLines = []
    for line in infile.readlines():
      newline = self.Reformat(line)
      print newline
      if newline:
        outLines.append( newline + '\n' )
    outfile = file(self._OutputFilename, 'w')
    outfile.writelines( outLines )
    outfile.close()
 
"""
Parse line from a PhilipsAdvance document, line are like this:

GE Advance Implementation Version Name (0009,1001) 3 LO 2 n/a
"""
class GEAdvanceParser:
  def __init__(self):
    self._InputFilename = ''
    self._OutputFilename = ''

  def Reformat(self,s):
    assert self.IsGood(s)
    #patt = re.compile("^\(([0-9a-f]+),([0-9a-f]+)\)\s+VERS=\".*\"\s+VR=\"([A-Z][A-Z])\"\s+VM=\"(.*)\"\s+Owner=\".*\"\s+Keyword=\".*\"\s+Name=\"(.*)\"$")
    patt = re.compile("^([A-Za-z0-9 ._>]+) \\(([0-9A-F]+),([0-9A-F]+)\\) [0-9] ([A-Z][A-Z]) ([0-9]) .*$")
    m = patt.match(s)
    dicom = ''
    if m:
      dicom = m.group(2) + ' ' + m.group(3).lower() + ' ' + m.group(4) + ' ' + m.group(5) + ' ' + m.group(1)
    else:
      print "oops"
    return dicom

  def IsGood(self,s):
    #patt = re.compile("^\([0-9a-f]+,[0-9a-f]+\)\s+VERS=\".*\"\s+VR=\"[A-Z][A-Z]\"\s+VM=\".*\"\s+Owner=\".*\"\s+Keyword=\".*\"\s+Name=\".*\".*$")
    patt = re.compile("^[A-Za-z0-9 ._>]+ \\([0-9A-F]+,[0-9A-F]+\\) [0-9] [A-Z][A-Z] [0-9] .*$")
    if patt.match(s):
      return True
    print "Not good:", s
    return False

  def SetInputFileName(self,s):
    self._InputFilename = s

  def SetOutputFileName(self,s):
    self._OutputFilename = s
  
  def Parse(self):
    infile = file(self._InputFilename, 'r')
    outLines = []
    for line in infile.readlines():
      newline = self.Reformat(line)
      #print newline
      if newline:
        outLines.append( newline + '\n' )
    outfile = file(self._OutputFilename, 'w')
    outfile.writelines( outLines )
    outfile.close()
 
if __name__ == "__main__":
  argc = len(os.sys.argv )
  if ( argc < 3 ):
    print "Sorry, wrong list of args"
    os.sys.exit(1) #error

  inputfilename = os.sys.argv[1]
  outputfilename = os.sys.argv[2]
  tempfile = "/tmp/mytemp"
  """
  dp = PdfTextParser()
  dp.SetInputFileName( inputfilename )
  #dp.SetOutputFileName( outputfilename )
  dp.SetOutputFileName( tempfile )
  dp.Parse()
  exp = DicomV3Expander()
  #exp.SetInputFileName( tempfile )
  exp.SetInputFileName( inputfilename )
  exp.SetOutputFileName( outputfilename )
  exp.Expand()

  dp = TransferSyntaxParser()
  dp.SetInputFileName( inputfilename )
  dp.SetOutputFileName( outputfilename )
  dp.Parse()

  dp = PapyrusParser()
  dp.SetInputFileName( inputfilename )
  dp.SetOutputFileName( outputfilename )
  dp.Parse()

  dp = InteraParser()
  dp.SetInputFileName( inputfilename )
  dp.SetOutputFileName( outputfilename )
  dp.Parse()
  dp = GEMSParser()
  dp.SetInputFileName( inputfilename )
  dp.SetOutputFileName( outputfilename )
  dp.Parse()

  """
  dp = Dicom3ToolsParser()
  dp.SetInputFileName( inputfilename )
  dp.SetOutputFileName( outputfilename )
  dp.Parse()

  """
  dp = GEAdvanceParser()
  dp.SetInputFileName( inputfilename )
  dp.SetOutputFileName( outputfilename )
  dp.Parse()
  """

  #print dp.IsAStartingLine( "(0004,1212) File-set Consistency Flag US 1\n" )