1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
|
#!/usr/bin/env python
# encoding: utf-8
# Baptiste Lepilleur, 2009
from dircache import listdir
import re
import fnmatch
import os.path
# These fnmatch expressions are used by default to prune the directory tree
# while doing the recursive traversal in the glob_impl method of glob function.
prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS '
# These fnmatch expressions are used by default to exclude files and dirs
# while doing the recursive traversal in the glob_impl method of glob function.
##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split()
# These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree
# while doing the recursive traversal in the glob_impl method of glob function.
default_excludes = '''
**/*~
**/#*#
**/.#*
**/%*%
**/._*
**/CVS
**/CVS/**
**/.cvsignore
**/SCCS
**/SCCS/**
**/vssver.scc
**/.svn
**/.svn/**
**/.git
**/.git/**
**/.gitignore
**/.bzr
**/.bzr/**
**/.hg
**/.hg/**
**/_MTN
**/_MTN/**
**/_darcs
**/_darcs/**
**/.DS_Store '''
DIR = 1
FILE = 2
DIR_LINK = 4
FILE_LINK = 8
LINKS = DIR_LINK | FILE_LINK
ALL_NO_LINK = DIR | FILE
ALL = DIR | FILE | LINKS
_ANT_RE = re.compile( r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)' )
def ant_pattern_to_re( ant_pattern ):
"""Generates a regular expression from the ant pattern.
Matching convention:
**/a: match 'a', 'dir/a', 'dir1/dir2/a'
a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b'
*.py: match 'script.py' but not 'a/script.py'
"""
rex = ['^']
next_pos = 0
sep_rex = r'(?:/|%s)' % re.escape( os.path.sep )
## print 'Converting', ant_pattern
for match in _ANT_RE.finditer( ant_pattern ):
## print 'Matched', match.group()
## print match.start(0), next_pos
if match.start(0) != next_pos:
raise ValueError( "Invalid ant pattern" )
if match.group(1): # /**/
rex.append( sep_rex + '(?:.*%s)?' % sep_rex )
elif match.group(2): # **/
rex.append( '(?:.*%s)?' % sep_rex )
elif match.group(3): # /**
rex.append( sep_rex + '.*' )
elif match.group(4): # *
rex.append( '[^/%s]*' % re.escape(os.path.sep) )
elif match.group(5): # /
rex.append( sep_rex )
else: # somepath
rex.append( re.escape(match.group(6)) )
next_pos = match.end()
rex.append('$')
return re.compile( ''.join( rex ) )
def _as_list( l ):
if isinstance(l, basestring):
return l.split()
return l
def glob(dir_path,
includes = '**/*',
excludes = default_excludes,
entry_type = FILE,
prune_dirs = prune_dirs,
max_depth = 25):
include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)]
exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)]
prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)]
dir_path = dir_path.replace('/',os.path.sep)
entry_type_filter = entry_type
def is_pruned_dir( dir_name ):
for pattern in prune_dirs:
if fnmatch.fnmatch( dir_name, pattern ):
return True
return False
def apply_filter( full_path, filter_rexs ):
"""Return True if at least one of the filter regular expression match full_path."""
for rex in filter_rexs:
if rex.match( full_path ):
return True
return False
def glob_impl( root_dir_path ):
child_dirs = [root_dir_path]
while child_dirs:
dir_path = child_dirs.pop()
for entry in listdir( dir_path ):
full_path = os.path.join( dir_path, entry )
## print 'Testing:', full_path,
is_dir = os.path.isdir( full_path )
if is_dir and not is_pruned_dir( entry ): # explore child directory ?
## print '===> marked for recursion',
child_dirs.append( full_path )
included = apply_filter( full_path, include_filter )
rejected = apply_filter( full_path, exclude_filter )
if not included or rejected: # do not include entry ?
## print '=> not included or rejected'
continue
link = os.path.islink( full_path )
is_file = os.path.isfile( full_path )
if not is_file and not is_dir:
## print '=> unknown entry type'
continue
if link:
entry_type = is_file and FILE_LINK or DIR_LINK
else:
entry_type = is_file and FILE or DIR
## print '=> type: %d' % entry_type,
if (entry_type & entry_type_filter) != 0:
## print ' => KEEP'
yield os.path.join( dir_path, entry )
## else:
## print ' => TYPE REJECTED'
return list( glob_impl( dir_path ) )
if __name__ == "__main__":
import unittest
class AntPatternToRETest(unittest.TestCase):
## def test_conversion( self ):
## self.assertEqual( '^somepath$', ant_pattern_to_re( 'somepath' ).pattern )
def test_matching( self ):
test_cases = [ ( 'path',
['path'],
['somepath', 'pathsuffix', '/path', '/path'] ),
( '*.py',
['source.py', 'source.ext.py', '.py'],
['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c'] ),
( '**/path',
['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'],
['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath'] ),
( 'path/**',
['path/a', 'path/path/a', 'path//'],
['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a'] ),
( '/**/path',
['/path', '/a/path', '/a/b/path/path', '/path/path'],
['path', 'path/', 'a/path', '/pathsuffix', '/somepath'] ),
( 'a/b',
['a/b'],
['somea/b', 'a/bsuffix', 'a/b/c'] ),
( '**/*.py',
['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'],
['script.pyc', 'script.pyo', 'a.py/b'] ),
( 'src/**/*.py',
['src/a.py', 'src/dir/a.py'],
['a/src/a.py', '/src/a.py'] ),
]
for ant_pattern, accepted_matches, rejected_matches in list(test_cases):
def local_path( paths ):
return [ p.replace('/',os.path.sep) for p in paths ]
test_cases.append( (ant_pattern, local_path(accepted_matches), local_path( rejected_matches )) )
for ant_pattern, accepted_matches, rejected_matches in test_cases:
rex = ant_pattern_to_re( ant_pattern )
print 'ant_pattern:', ant_pattern, ' => ', rex.pattern
for accepted_match in accepted_matches:
print 'Accepted?:', accepted_match
self.assert_( rex.match( accepted_match ) is not None )
for rejected_match in rejected_matches:
print 'Rejected?:', rejected_match
self.assert_( rex.match( rejected_match ) is None )
unittest.main()
|