1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
|
"""Define the File Type.
The primary purpose of the File object is to track the protocol to be used
to transfer the file as well as to give the appropriate filepath depending
on where (client-side, remote-side, intermediary-side) the File.filepath is
being called from.
"""
import logging
import os
from typing import Optional, Union
from urllib.parse import urlparse
import typeguard
logger = logging.getLogger(__name__)
class File:
"""The Parsl File Class.
This represents the global, and sometimes local, URI/path to a file.
Staging-in mechanisms may annotate a file with a local path recording
the path at the far end of a staging action. It is up to the user of
the File object to track which local scope that local path actually
refers to.
"""
@typeguard.typechecked
def __init__(self, url: Union[os.PathLike, str]):
"""Construct a File object from a url string.
Args:
- url (string or PathLike) : url of the file e.g.
- 'input.txt'
- pathlib.Path('input.txt')
- 'file:///scratch/proj101/input.txt'
- 'globus://go#ep1/~/data/input.txt'
- 'globus://ddb59aef-6d04-11e5-ba46-22000b92c6ec/home/johndoe/data/input.txt'
"""
self.url = str(url)
parsed_url = urlparse(self.url)
self.scheme = parsed_url.scheme if parsed_url.scheme else 'file'
self.netloc = parsed_url.netloc
self.path = parsed_url.path
self.filename = os.path.basename(self.path)
self.local_path: Optional[str] = None
def cleancopy(self) -> "File":
"""Returns a copy of the file containing only the global immutable state,
without any mutable site-local local_path information. The returned File
object will be as the original object was when it was constructed.
"""
logger.debug("Making clean copy of File object {}".format(repr(self)))
return File(self.url)
def __str__(self) -> str:
return self.filepath
def __repr__(self) -> str:
content = [
f"{type(self).__name__}",
f"at 0x{id(self):x}",
f"url={self.url}",
f"scheme={self.scheme}",
f"netloc={self.netloc}",
f"path={self.path}",
f"filename={self.filename}",
]
if self.local_path is not None:
content.append(f"local_path={self.local_path}")
return f"<{' '.join(content)}>"
def __fspath__(self) -> str:
return self.filepath
@property
def filepath(self) -> str:
"""Return the resolved filepath on the side where it is called from.
The appropriate filepath will be returned when called from within
an app running remotely as well as regular python on the submit side.
Only file: scheme URLs make sense to have a submit-side path, as other
URLs are not accessible through POSIX file access.
Returns:
- filepath
"""
if self.local_path is not None:
return self.local_path
if self.scheme in ['file']:
return self.path
else:
raise ValueError("No local_path set for {}".format(repr(self)))
|