1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
#!/usr/bin/env python3
#******************************************************************************
# urltools.py, provides functions for parsing and modifying URLs.
#
# TreeLine, an information storage program
# Copyright (C) 2018, Douglas W. Bell
#
# This is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License, either Version 2 or any later
# version. This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY. See the included LICENSE file for details.
#******************************************************************************
import re
import sys
import os.path
_urlRegExp = re.compile(r'([a-z]{2,}://)?(?:/?([a-z]:))?(.*)', re.IGNORECASE)
def splitUrl(url):
"""Return a tuple of scheme, drive letter and address.
If any are not present, return empty strings.
Arguments:
url -- a string with the original URL
"""
if os.sep == '\\':
url = url.replace('\\', '/')
scheme, drive, address = _urlRegExp.match(url).groups('')
scheme = scheme[:-3]
if not scheme and url.startswith('mailto:'):
scheme = 'mailto'
drive = ''
address = url[7:]
return (scheme, drive, address)
def extractScheme(url):
"""Return the scheme from this URL, or an empty string if none is given.
Arguments:
url -- a string with the original URL
"""
scheme, drive, address = splitUrl(url)
return scheme
def extractAddress(url):
"""Remove the scheme from this URL and return the address.
Includes the drive letter if present.
Arguments:
url -- a string with the original URL
"""
scheme, drive, address = splitUrl(url)
return drive + address
def replaceScheme(scheme, url):
"""Replace any scheme in url with the given scheme and return.
The scheme is not included with a relative file path.
Arguments:
scheme -- the new scheme to add
url -- the address be modified
"""
oldScheme, drive, address = splitUrl(url)
if drive:
drive = '/' + drive
elif scheme == 'file' and not address.startswith('/'):
return address
elif scheme == 'mailto':
return '{0}:{1}'.format(scheme, address)
return '{0}://{1}{2}'.format(scheme, drive, address)
def shortName(url):
"""Return a default short name using the base portion of the URL filename.
Arguments:
url -- a string with the original URL
"""
scheme, drive, address = splitUrl(url)
name = os.path.basename(address)
if not name: # remove trailing separator if there is no basename
name = os.path.basename(address[:-1])
if scheme == 'mailto' or '@' in name:
name = name.split('@', 1)[0]
return name
def isRelative(url):
"""Return true if this URL is a relative path.
Any scheme or drive letter is considered absolute and returns false.
Arguments:
url -- a string with the original URL
"""
scheme, drive, address = splitUrl(url)
if scheme or drive or address.startswith('/'):
return False
return True
def toAbsolute(url, refPath, addScheme=True):
"""Convert a relative file URL to an absolute URL and return it.
Arguments:
url -- a string with the original URL
refPath -- the path that the URL is relative to
addScheme -- add the 'file' scheme to result if true
"""
scheme, drive, address = splitUrl(url)
url = os.path.normpath(os.path.join(refPath, drive + address))
if addScheme:
return replaceScheme('file', url)
if os.sep == '\\':
url = url.replace('\\', '/')
return url
def toRelative(url, refPath):
"""Convert an absolute file URL to a relative URL and return it.
Arguments:
url -- a string with the original URL
refPath -- the path that the URL is relative to
"""
scheme, drive, address = splitUrl(url)
if drive or address.startswith('/'):
try:
url = os.path.relpath(drive + address, refPath)
except ValueError:
pass
if os.sep == '\\':
url = url.replace('\\', '/')
return url
def which(fileName):
"""Return the full path if the fileName is found somewhere in the PATH.
If not found, return an empty string.
Similar to the Linux which command.
Arguments:
fileName -- the name to search for
"""
extList = ['']
if sys.platform.startswith('win'):
extList.extend(os.getenv('PATHEXT', '').split(os.pathsep))
for path in os.get_exec_path():
for ext in extList:
fullPath = os.path.join(path, fileName + ext)
if os.access(fullPath, os.X_OK):
return fullPath
return ''
|