File: offutils.py

package info (click to toggle)
offpunk 3.1-2
links: PTS, VCS
area: main
in suites: forky, sid
size: 2,040 kB
sloc: python: 6,437; sh: 117; makefile: 2
file content (616 lines) | stat: -rw-r--r-- 22,454 bytes
parent folder | download | duplicates (2)
#!/bin/python

# This file contains some utilities common to offpunk, ansicat and netcache.
# Currently, there are the following utilities:
#
# run : run a shell command and get the results with some security
# term_width : get or set the width to display on the terminal

import gettext
import io
import os
import shlex
import shutil
import subprocess
import sys
import urllib.parse
import tempfile

import netcache

# We can later add some logic to decide this based on OS family/version if needed?
# With "None", the defaults should make this work in Debian and RedHat based systems at least
# "None" would default to sys.base_prefix + "/share/locale/"
# (i.e., "/usr/share/locale")
# sys.base_prefix is always "/usr"
# sys.prefix however, is either "/usr" or the path to the virtualenv we're in
# this next line makes i18n work if offpunk is installed with pipx for example:
_LOCALE_DIR = sys.prefix + "/share/locale/"

gettext.bindtextdomain('offpunk', _LOCALE_DIR)
gettext.textdomain('offpunk')
_ = gettext.gettext

CACHE_VERSION = 1
CERT_VERSION = 1

# In terms of arguments, this can take an input file/string to be passed to
# stdin, a parameter to do (well-escaped) "%" replacement on the command, a
# flag requesting that the output go directly to the stdout, and a list of
# additional environment variables to set.  An additional optional argument can
# be used to supress output to stderr.
def run(cmd, *, input=None, parameter=None, direct_output=False, env={}, no_err=False):
    if parameter:
        cmd = cmd % shlex.quote(parameter)
    e = os.environ
    e.update(env)
    if isinstance(input, io.IOBase):
        stdin = input
        input = None
    else:
        if input:
            input = input.encode()
        stdin = None

    stderr = subprocess.DEVNULL if no_err else subprocess.STDOUT
    if not direct_output:
        # subprocess.check_output() wouldn't allow us to pass stdin.
        result = subprocess.run(
            cmd,
            check=True,
            env=e,
            input=input,
            shell=True,
            stdin=stdin,
            stdout=subprocess.PIPE,
            stderr=stderr,
        )
        return result.stdout.decode()
    else:
        subprocess.run(cmd, env=e, input=input, shell=True, stdin=stdin, stderr=stderr)

# CMDS is a dic that contains, for each "command" (as a key), the default
# invocation for this command (including options and, optionnaly, a full path)
# If the value for key "command" is None of False, the command cannot be called
# By default, we populate with all commands we might use. 
# The CMDS dic must be populated assuming everything is installed.
# Check will done later to ensure the command really exist and add
# default options but the full path could be patched here if need and will be preserved
CMDS = {
    "grep"       : "grep",
    "xdg-open"   : "xdg-open",
    "less"       : "less",
    "cat"        : "cat",
    "chafa"      : "chafa",
    "timg"       : "timg",
    "file"       : "file",
    "tmux"       : "tmux",
    "xsel"       : "xsel",
    "xclip"      : "xclip",
    "wl-copy"    : "wl-copy",
    "wl-paste"   : "wl-paste",
    "pbcopy"     : "pbcopy",
    "pbpaste"    : "pbpaste",
        }

# We check that the commands exists and are available
for cmd in CMDS.keys():
    if not CMDS[cmd] or not shutil.which(CMDS[cmd]): CMDS[cmd] = None

# let’s find if grep supports --color=auto
try:
    test = subprocess.run(
        [CMDS["grep"], "--color=auto", "x"],
        input=b"x",
        check=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
    )
    CMDS["grep"] += " --color=auto"
except Exception:
    pass

# Let’s change our default less and cat commands
less_version = 0
if not shutil.which(CMDS["less"]):
    print(_('Please install the pager "less" to run Offpunk.'))
    print(_("If you wish to use another pager, send me an email !"))
    print(
        _('(I’m really curious to hear about people not having "less" on their system.)')
    )
    sys.exit()
output = run(CMDS["less"] + " --version")
# We get less Version (which is the only integer on the first line)
words = output.split("\n")[0].split()
less_version = 0
for w in words:
    # On macOS the version can be something like 581.2 not just an int:
    if all(_.isdigit() for _ in w.split(".")):
        less_version = int(w.split(".", 1)[0])
# restoring position only works for version of less > 572
if less_version >= 572:
    _LESS_RESTORE_POSITION = True
else:
    _LESS_RESTORE_POSITION = False
# _DEFAULT_LESS = "less -EXFRfM -PMurl\ lines\ \%lt-\%lb/\%L\ \%Pb\%$ %s"
# -E : quit when reaching end of file (to behave like "cat")
# -F : quit if content fits the screen (behave like "cat")
# -X : does not clear the screen
# -R : interpret ANSI colors correctly
# -f : suppress warning for some contents
# -M : long prompt (to have info about where you are in the file)
# -W : hilite the new first line after a page skip (space)
# -i : ignore case in search
# -S : do not wrap long lines. Wrapping is done by offpunk, longlines
# are there on purpose (such in asciiart)
# --incsearch : incremental search starting rev581
less_prompt = "page %%d/%%D- lines %%lb/%%L - %%Pb\\%%"
if less_version >= 581:
    less_base = CMDS["less"] + ' --incsearch --save-marks -~ -XRfWiS -P "%s"' % less_prompt
elif less_version >= 572:
    less_base = CMDS["less"] + " --save-marks -XRfMWiS"
else:
    less_base = CMDS["less"] + " -XRfMWiS"
CMDS["less"] = less_base + " \"+''\" %s"
CMDS["cat"] = less_base + " -EF %s"



# We upgrade the cache only once at startup, hence the CACHE_UPGRADED variable
# This is only to avoid unnecessary checks each time the cache is accessed
CACHE_UPGRADED = False


def upgrade_cache(cache_folder):
    # Let’s read current version of the cache
    version_path = cache_folder + ".version"
    current_version = 0
    if os.path.exists(version_path):
        current_str = None
        with open(version_path) as f:
            current_str = f.read()
            f.close()
        try:
            current_version = int(current_str)
        except Exception:
            current_version = 0
    # Now, let’s upgrade the cache if needed
    while current_version < CACHE_VERSION:
        current_version += 1
        import netcache_migration
        upgrade_func = getattr(netcache_migration, "upgrade_to_" + str(current_version))
        upgrade_func(cache_folder)
        with open(version_path, "w") as f:
            f.write(str(current_version))
            f.close()
    CACHE_UPGRADED = True


CERT_UPGRADED = False


def upgrade_cert(config_folder: str, data_folder: str) -> None:
    # read the current version
    certdata = os.path.join(data_folder, "certs")
    if not os.path.exists(certdata):
        os.makedirs(certdata, exist_ok=True)
    version_path = os.path.join(certdata, ".version")
    current_version = 0
    if os.path.exists(version_path):
        current_str = None
        with open(version_path) as f:
            current_str = f.read()
            f.close()
        try:
            current_version = int(current_str)
        except Exception:
            current_version = 0
    else:
        current_version = 0
    # Now, let’s upgrade the certificate storage if needed
    while current_version < CERT_VERSION:
        current_version += 1
        import cert_migration
        upgrade_func = getattr(cert_migration, "upgrade_to_" + str(current_version))
        upgrade_func(data_folder, config_folder)
        with open(version_path, "w") as f:
            f.write(str(current_version))
            f.close()
    CERT_UPGRADED = True


# get xdg folder. Folder should be "cache", "data" or "config"
def xdg(folder="cache"):
    # Config directories
    # We implement our own python-xdg to avoid conflict with existing libraries.
    _home = os.path.expanduser("~")
    data_home = os.environ.get("XDG_DATA_HOME") or os.path.join(
        _home, ".local", "share"
    )
    config_home = os.environ.get("XDG_CONFIG_HOME") or os.path.join(_home, ".config")
    _CONFIG_DIR = os.path.join(os.path.expanduser(config_home), "offpunk/")
    _DATA_DIR = os.path.join(os.path.expanduser(data_home), "offpunk/")
    _old_config = os.path.expanduser("~/.offpunk/")
    # Look for pre-existing config directory, if any
    if os.path.exists(_old_config):
        _CONFIG_DIR = _old_config
    # if no XDG .local/share and not XDG .config, we use the old config
    if not os.path.exists(data_home) and os.path.exists(_old_config):
        _DATA_DIR = _CONFIG_DIR
    # get _CACHE_PATH from OFFPUNK_CACHE_PATH environment variable
    #  if OFFPUNK_CACHE_PATH empty, set default to ~/.cache/offpunk
    cache_home = os.environ.get("XDG_CACHE_HOME") or os.path.join(_home, ".cache")
    _CACHE_PATH = os.environ.get(
        "OFFPUNK_CACHE_PATH", os.path.join(os.path.expanduser(cache_home), "offpunk/")
    )
    # Check that the cache path ends with "/"
    if not _CACHE_PATH.endswith("/"):
        _CACHE_PATH += "/"
    os.makedirs(_CACHE_PATH, exist_ok=True)
    if folder == "cache" and not CACHE_UPGRADED:
        upgrade_cache(_CACHE_PATH)
    if folder == "cache":
        return _CACHE_PATH
    elif folder == "config":
        return _CONFIG_DIR
    elif folder == "data":
        if not CERT_UPGRADED:
            upgrade_cert(_CONFIG_DIR, _DATA_DIR)
        return _DATA_DIR
    else:
        print(_("No XDG folder for %s. Check your code.") % folder)
        return None


#Return a list of the commands that must be run
#if skip_go = True, any command changing the url will be ignored (go, tour)
#if not interactive, only redirects and handlers are considered
def init_config(rcfile=None,skip_go=False,interactive=True,verbose=True):
    cmds = []
    if not rcfile:
        rcfile = os.path.join(xdg("config"), "offpunkrc")
    if os.path.exists(rcfile):
        if verbose:
            print(_("Using config %s") % rcfile)
        with open(rcfile,"r") as fp:
            for line in fp:
                line = line.strip()
                #Is this a command to go to an url ?
                is_go = any(line.startswith(x) for x in ("go","g","tour","t"))
                #Is this a command necessary, even when non-interactive ?
                is_necessary = any(line.startswith(x) for x in ("redirect","handler","set"))
                if is_necessary:
                    cmds.append(line)
                elif interactive:
                    if skip_go and is_go:
                        if verbose:
                            print(_("Skipping startup command \"%s\" due to provided URL")%line)
                        continue
                    else:
                        cmds.append(line)
    return cmds

# An IPV6 URL should be put between []
# We try to detect them has location with more than 2 ":"
def fix_ipv6_url(url):
    if not url or url.startswith("mailto"):
        return url
    if "://" in url:
        schema, schemaless = url.split("://", maxsplit=1)
    else:
        schema, schemaless = None, url
    if "/" in schemaless:
        netloc, rest = schemaless.split("/", 1)
        if netloc.count(":") > 2 and "[" not in netloc and "]" not in netloc:
            schemaless = "[" + netloc + "]" + "/" + rest
    elif schemaless.count(":") > 2 and "[" not in schemaless and "]" not in schemaless:
        schemaless = "[" + schemaless + "]/"
    if schema:
        return schema + "://" + schemaless
    return schemaless


# Cheap and cheerful URL detector
def looks_like_url(word):
    try:
        if not word.strip():
            return False
        url = fix_ipv6_url(word).strip()
        parsed = urllib.parse.urlparse(url)
        # sometimes, urllib crashed only when requesting the port
        port = parsed.port
        scheme = word.split("://")[0]
        mailto = word.startswith("mailto:")
        start = scheme in netcache.standard_ports
        local = scheme in ["file", "list"]
        if mailto:
            return "@" in word
        elif not local:
            if start:
                # IPv4
                if "." in word or "localhost" in word:
                    return True
                # IPv6
                elif "[" in word and ":" in word and "]" in word:
                    return True
                else:
                    return False
            else:
                return False
            return start and ("." in word or "localhost" in word or ":" in word)
        else:
            return "/" in word
    except ValueError:
        return False


# Those two functions add/remove the mode to the
# URLs. This is a gross hack to remember the mode
def mode_url(url, mode):
    if mode and mode != "readable" and "##offpunk=" not in url:
        url += "##offpunk_mode=" + mode
    return url


def unmode_url(url):
    if url:
        mode = None
        splitted = url.split("##offpunk_mode=")
        if len(splitted) > 1:
            url = splitted[0]
            mode = splitted[1]
        return [url, mode]
    else:
        return [None,None]

#This function gives the root of an URL 
# expect if the url contains /user/ or ~username/
#in that case, it considers it as a multi-user servers
# it returns the root URL 
# except if "return_value=name" then it return a name for that root
# which is hostname by default or username if applicable
# if absolute is set, it doesn’t care about users
# if return_value="list", then a list of all the steps until the root is returned,
# Starting from URL at position 0 to root at position -1
def find_root(url,absolute=False,return_value=""):
    parsed = urllib.parse.urlparse(url)
    #by default, root is the true root
    name = parsed.netloc
    path = "/"
    subpath = parsed.path.split("/")
    dismissed = ""
    if parsed.scheme == 'gopher' and len(subpath) >= 2:
        # remove the type, add "1" (root is always gonna be "folder") later
        subpath.remove(subpath[1])
        # now "subpath" has the same number of elements as gemini and http
    if not absolute:
        #As subpath starts with "/", subpathsplit("/")[0] is always ""
        # handling http://server/users/janedoe/ case
        if len(subpath) > 2 and subpath[1] in ["user","users"]:
            dismissed = "/" + subpath[1] + "/"
            name = subpath[2]
            path = path.join(subpath[:3]) + "/"
            subpath = subpath[2:]
            # we will thus dism
        # handling http://server/~janedoe/ case
        elif len(subpath) > 1 and subpath[1].startswith("~"):
            dismissed = "/"
            name = subpath[1].lstrip("~")
            path = path.join(subpath[:2]) + "/"
            subpath = subpath[1:]
    if return_value == "name":
        return name
    elif return_value == "list":
        # we gradually reduce subpath to build the toreturn list
        # we put url in the place 0: "up 0" is keeping same url
        toreturn = [url]
        # we loop while:
        # there’s something in the subpath elements
        # we didn’t catch the root path
        newpath =  dismissed + "/".join(subpath)
        if parsed.scheme == 'gopher':
            newpath =  "/1" + newpath
        while len(subpath) > 0 and len(newpath) > len(path):
            subpath.pop(-1) 
            newpath =  dismissed + "/".join(subpath)
            if parsed.scheme == 'gopher':
                newpath =  "/1" + newpath
            if not newpath.endswith("/"): newpath += "/"
            newurl = urllib.parse.urlunparse((parsed.scheme, \
                        parsed.netloc, newpath, "","",""))
            if newurl not in toreturn:
                toreturn.append(newurl)
        return toreturn
    else:
        if parsed.scheme == 'gopher':
            # root is always going to be directory
            path = '/1'+path
        root = urllib.parse.urlunparse((parsed.scheme, parsed.netloc, path, "","",""))
        return root



global TERM_WIDTH
TERM_WIDTH = 72


# if absolute, returns the real terminal width, not the text width
def term_width(new_width=None, absolute=False):
    if new_width:
        global TERM_WIDTH
        TERM_WIDTH = new_width
    cur = shutil.get_terminal_size()[0]
    if absolute:
        return cur
    width = TERM_WIDTH
    if cur < width:
        width = cur
    return width


def is_local(url):
    if not url:
        return True
    elif "://" in url:
        scheme, path = url.split("://", maxsplit=1)
        return scheme in ["file", "mail", "list", "mailto"]
    else:
        return True

# open XDG mail client to compose an email to dest.
# If toconfirm=True, the user is asked to confirm that he want to send an email
# If allowemptydest, then the mail client will be used to choose the destination
def send_email(dest,subject=None,body=None,toconfirm=True,allowemptydest=True):
    if not allowemptydest and "@" not in dest:
        print(_("%s is not a valid email address")%dest)
        return
    if toconfirm:
        #TRANSLATORS please keep the 'Y/N' as is
        resp = input(_("Send an email to %s Y/N? ") % dest)
        confirmed = resp.strip().lower() in ("y", "yes")
    else:
        confirmed = True
    if confirmed:
        if CMDS["xdg-open"]:
            param = dest
            if subject or body:
                param += "?"
            if subject:
                param += "subject=%s"%urllib.parse.quote(subject)
                if body:
                    param += "&"
            if body:
                param += "body=%s"%urllib.parse.quote(body)
            run(CMDS["xdg-open"] + " mailto:%s", parameter=param, direct_output=True)
        else:
            print(_("Cannot find a mail client to send mail to %s") % inpath)
            print(_("Please install xdg-open (usually from xdg-util package)"))

# Take an URL in UTF-8 and replace all the characters by the proper % chars
# like " " becomes "%20"
def urlify(url):
    parsed = urllib.parse.urlparse(url)
    #do not urlify local, mailto and gopher links
    if parsed.scheme in ["", "mailto", "gopher"]:
        return url
    else:
        #we need to unquote it first, in case it’s already quoted
        newpath = urllib.parse.unquote(parsed.path)
        #we only quote the path part
        newpath = urllib.parse.quote(newpath)
        newparsed = parsed._replace(path=newpath)
        return urllib.parse.urlunparse(newparsed)

# This method return the image URL or invent it if it’s a base64 inline image
# It returns [url,image_data] where image_data is None for normal image
def looks_like_base64(src, baseurl):
    imgdata = None
    imgname = src
    if src and src.startswith("data:image/"):
        if ";base64," in src:
            splitted = src.split(";base64,")
            # splitted[0] is something like data:image/jpg
            if "/" in splitted[0]:
                extension = splitted[0].split("/")[1]
            else:
                extension = "data"
            imgdata = splitted[1]
            imgname = imgdata[:20] + "." + extension
            imgurl = urllib.parse.urljoin(baseurl, imgname)
        else:
            # We can’t handle other data:image such as svg for now
            imgurl = None
    else:
        imgurl = urllib.parse.urljoin(baseurl, imgname)
        imgurl = urlify(imgurl)
    return imgurl, imgdata

#if returnkey=True, we return [redirection, matching pattern]
def get_url_redirected(url,redirectlist,returnkey=False):
    parsed =urllib.parse.urlparse(url)
    netloc = parsed.netloc
    if netloc.startswith("www."):
        netloc = netloc[4:]
    matching_key = None
    match = False
    keys = list(redirectlist.keys())
    while not match and len(keys) > 0:
        key = keys.pop(0)
        match = key == netloc
        #We also match subdomains
        if key.startswith("*"):
            match = netloc.endswith(key[1:])
        if match:
            matching_key = key
    if matching_key:
        value = redirectlist[matching_key]
    else:
        value = None
    if returnkey:
        return [value,matching_key]
    else:
        return value

# Return None if not blocked, else return the blocking rule
def get_url_blocking_rule(url,redirectlist):
    redir,key = get_url_redirected(url,redirectlist,returnkey=True)
    if redir and redir.lower() == "blocked":
        return key 
    else:
        return None
def is_url_blocked(url,redirectlist):
    if get_url_blocking_rule(url,redirectlist): return True
    else: return False

# Method for editing a file, or a temporal file.
# It will find the user's editor, and if path to edit is None,
# it will create a temporal file, add some text_to_append to it
# (useful for instructions, context, placeholders, examples)
# open it in the editor, return its contents if it was a temporal file
def edit_file(path_to_edit, text_to_append="", options={}):
    return_content=False
    user_editor= None

    if "editor" in options and options["editor"]:
        user_editor = options["editor"]
    elif os.environ.get("VISUAL"):
        user_editor = os.environ.get("VISUAL")
    elif os.environ.get("EDITOR"):
        user_editor = os.environ.get("EDITOR")

    if user_editor == None:
        print(_("No valid editor has been found."))
        print(
            _("You can use the following command to set your favourite editor:")
        )
        #TRANSLATORS keep 'set editor', it's a command
        print(_("set editor EDITOR"))
        print(_("or use the $VISUAL or $EDITOR environment variables."))
        return

    if path_to_edit is None:
        f = tempfile.NamedTemporaryFile(suffix=".tmp")
        # only append to temp files
        # we also only return the content in this case
        if not text_to_append == "":
            f.write(text_to_append)
            f.flush()
        path_to_edit = f.name
        return_content = True

    try:
        # Note that we intentionally don't quote the editor.
        # In the unlikely case `editor` includes a percent
        # sign, we also escape it for the %-formatting.
        cmd = user_editor.replace("%", "%%") + " %s"
        run(cmd, parameter=path_to_edit, direct_output=True)
    except Exception as err:
        print(err)
        print(_('Please set a valid editor with "set editor"'))
    if return_content:
        f.seek(0)
        # lines are returned "raw" as a list of ... byte-streams?
        # process outside as needed (see netcache.external_editor_input)
        blob = f.readlines()
        f.close()
        return blob