1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
|
#----------------------------------------------------------------------
# Name: Search.py
# Purpose: Searching html/txt file
#
# Author: Riaan Booysen
#
# Created: 2000/01/08
# RCS-ID: $Id: Search.py,v 1.12 2004/08/16 13:15:39 riaan Exp $
# Copyright: (c) 1999 - 2004 Riaan Booysen
# Licence: GPL
#----------------------------------------------------------------------
import os
import string, time
from wxPython.wx import wxProgressDialog, wxPD_CAN_ABORT, wxPD_APP_MODAL, wxPD_AUTO_HIDE, true, false
def count(filename, pattern, caseSensitive):
try: f = open(filename, 'r')
except IOError: return 0
try:
data = f.read()
if not caseSensitive:
data = data.lower()
pattern = pattern.lower()
return data.count(pattern)
finally:
f.close()
def findInText(sourcelines, pattern, caseSensitive, includeLine = 0):
results = []
if not caseSensitive:
#sourcelines = map(lambda sourceline: sourceline.lower(), sourcelines)
sourcelines = [sourceline.lower() for sourceline in sourcelines]
pattern = pattern.lower()
#matches = map(lambda x, y: (x, y), sourcelines, range(len(sourcelines)))
matches = zip(sourcelines, range(len(sourcelines)))
for line, sourceIdx in matches:
idx = -1
while 1:
idx = line.find(pattern, idx + 1)
if idx == -1: break
else:
result = [sourceIdx, idx]
if includeLine:
result.append(line)
results.append(tuple(result))
return results
def findInFile(filename, pattern, caseSensitive, includeLine = 0):
results = []
try: f = open(filename, 'r')
except IOError: return results
try:
sourcelines = f.readlines()
return findInText(sourcelines, pattern, caseSensitive, includeLine)
finally:
f.close()
def defaultProgressCallback(dlg, count, file, msg):
dlg.cont = dlg.Update(min(dlg.max-1, count), msg +' '+ file)
def findInFiles(parent, srchPath, pattern, callback = defaultProgressCallback, deeperPath = '', filemask = ('.htm', '.html', '.txt'), progressMsg = 'Search help files...', dlg = None, joiner = '/'):
results = []
names = os.listdir(srchPath)
cnt = 0
owndlg = false
maxval = len(names)
if not dlg:
dlg = wxProgressDialog(progressMsg, 'Searching...', maxval, parent,
wxPD_CAN_ABORT | wxPD_APP_MODAL | wxPD_AUTO_HIDE)
dlg.max = maxval
dlg.cont = 1
owndlg = true
try:
for file in names:
filePath = os.path.join(srchPath, file)
if os.path.isdir(filePath):
results.extend(findInFiles(parent, filePath, pattern,
callback, deeperPath+file+joiner, filemask, dlg = dlg, joiner = joiner))
else:
ext = os.path.splitext(file)[1]
if ext in filemask or ('.*' in filemask and ext):
callback(dlg, cnt, file, 'Searching')
ocs = count(filePath, pattern, 0)
if ocs:
results.append((ocs, deeperPath+file))
else:
callback(dlg, cnt, file, 'Skipping')
if cnt < maxval -1:
cnt = cnt + 1
if not dlg.cont:
break
return results
finally:
if owndlg:
dlg.Destroy()
class _file_iter:
def __init__(self, folders, file_filter, bIncludeFilter = 1, bRecursive = 1):
"""
folders - list of folders to go through. This list must not be empty
otherwise LookupError will be thrown
file_filter - may be right name could be file filter by file
extension, if file_filter is empty then all files will
be included.
bIncludeFilter - this flag indicates how to treat file_filter. If
bIncludeFilter == True then all files that meets
file_filter criteria will be included to resulting
list
bRecursive - whether to walk through directories in recursive way or
not
"""
self._folders = folders
if not self._folders:
raise LookupError("Root folder was not specified")
self._filters = []
for sExt in file_filter:
self._filters.append(sExt.lower())
self._is_include_filter = bIncludeFilter
self._is_recursive = bRecursive
self._files = [] #resulting list
def _is_to_include(self, sFullFileName):
"""This function will return true if file must be included and false if not"""
if not self._filters:
return 1 #all files must be included
tpFileNameOnly = os.path.split( sFullFileName )
sExt = '*.' + tpFileNameOnly[-1].split('.')[-1]
if sExt.lower() in self._filters:
#file extension within filters
#if _is_include_filter = 1 then file must be included
return self._is_include_filter
else:
#file extension not in filters
#if _is_include_filter = 1 then file must be skiped
return not self._is_include_filter
def _GetFolderFileLists(self, sFullFolderName):
"""This function will return tuple(folders, files) where files is a list
all files, according to file_filter and folders is all subfolders
of given folders. All results are full names
"""
lstFiles, lstFolders = [], []
#getting all files from folder
lstContents = os.listdir(sFullFolderName)
for sPath in lstContents:
#building full file name
sFullPath = os.path.join(sFullFolderName, sPath)
if os.path.isfile( sFullPath ) and self._is_to_include( sFullPath ):
lstFiles.append( sFullPath )
elif os.path.isdir( sFullPath ):
lstFolders.append( sFullPath )
else:
pass
return lstFolders, lstFiles
def _walk(self):
"""This function will work through foldres and collect all files"""
lstFolders = self._folders[:]
while lstFolders:
sCurrFolder = lstFolders.pop(0)
lstToWalkFolders, lstFiles = self._GetFolderFileLists(sCurrFolder)
if self._is_recursive:
lstFolders.extend( lstToWalkFolders )
self._files.extend( lstFiles )
def __call__(self):
self._files = []
self._walk()
return self._files
def listFiles(folders, file_filter, bIncludeFilter=1, bRecursive=1):
return _file_iter(folders, file_filter, bIncludeFilter, bRecursive)()
if __name__ == '__main__':
from wxPython.wx import *
wxPySimpleApp()
f = wxFrame(None, -1, 'results', size=(0, 0))
print findInFiles(f, os.path.abspath('ExternalLib'), 'riaan', filemask = ('.*',))
|