File: importer.py

package info (click to toggle)
qutebrowser 3.6.1-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 21,424 kB
sloc: python: 72,323; javascript: 31,862; sh: 874; xml: 147; makefile: 52
file content (330 lines) | stat: -rwxr-xr-x 11,467 bytes
parent folder | download | duplicates (2)
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Claude (longneck) <longneck@scratchbook.ch>
# SPDX-FileCopyrightText: Florian Bruhin (The Compiler) <mail@qutebrowser.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later


"""Tool to import data from other browsers.

Currently importing bookmarks from Netscape HTML Bookmark files, Chrome
profiles, and Mozilla profiles is supported.
"""

import contextlib
import argparse
import textwrap
import sqlite3
import os
import urllib.parse
import json
import string


def main():
    args = get_args()
    bookmark_types = []
    output_format = None
    input_format = args.input_format
    if args.search_output:
        bookmark_types = ['search']
        if args.oldconfig:
            output_format = 'oldsearch'
        else:
            output_format = 'search'
    else:
        if args.bookmark_output:
            output_format = 'bookmark'
        elif args.quickmark_output:
            output_format = 'quickmark'
        if args.import_bookmarks:
            bookmark_types.append('bookmark')
        if args.import_keywords:
            bookmark_types.append('keyword')
    if not bookmark_types:
        bookmark_types = ['bookmark', 'keyword']
    if not output_format:
        output_format = 'quickmark'

    import_function = {
        'html': import_html_bookmarks,
        'mozilla': import_moz_places,
        'chrome': import_chrome,
    }
    import_function[input_format](args.bookmarks, bookmark_types,
                                  output_format)


def get_args():
    """Get the argparse parser."""
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=textwrap.dedent('''
            To import bookmarks, you'll need the path to your profile or an
            exported HTML file from your browser's bookmark manager. Redirect
            the output from this script to the appropriate file in your
            qutebrowser config directory (listed in the output of :version),
            usually done with the '>' operator; for example,
                ./importer.py -i mozilla your_profile_path > ~/.config/qutebrowser/quickmarks

            Common browsers with native input format support:
                chrome: Chrome, Chromium, Edge
                mozilla: Firefox, SeaMonkey, Pale Moon
        '''))
    parser.add_argument(
        '-i',
        '--input-format',
        help="Which input format? Defaults to html",
        choices=['html', 'mozilla', 'chrome'],
        default='html',
        required=False)
    parser.add_argument(
        '-b',
        '--bookmark-output',
        help="Output in bookmark format.",
        action='store_true',
        default=False,
        required=False)
    parser.add_argument(
        '-q',
        '--quickmark-output',
        help="Output in quickmark format (default).",
        action='store_true',
        default=False,
        required=False)
    parser.add_argument(
        '-s',
        '--search-output',
        help="Output config.py search engine format (negates -B and -K)",
        action='store_true',
        default=False,
        required=False)
    parser.add_argument(
        '--oldconfig',
        help="Output search engine format for old qutebrowser.conf format",
        default=False,
        action='store_true',
        required=False)
    parser.add_argument(
        '-B',
        '--import-bookmarks',
        help="Import plain bookmarks (can be combiend with -K)",
        action='store_true',
        default=False,
        required=False)
    parser.add_argument(
        '-K',
        '--import-keywords',
        help="Import keywords (can be combined with -B)",
        action='store_true',
        default=False,
        required=False)
    parser.add_argument(
        'bookmarks',
        help="Bookmarks file (html format) or "
        "profile folder (Mozilla format)")
    args = parser.parse_args()
    return args


def search_escape(url):
    """Escape URLs such that preexisting { and } are handled properly.

    Will obviously trash a properly-formatted qutebrowser URL.
    """
    return url.replace('{', '{{').replace('}', '}}')


def opensearch_convert(url):
    """Convert a basic OpenSearch URL into something qutebrowser can use.

    Exceptions:
        KeyError:
            An unknown and required parameter is present in the URL. This
            usually means there's browser/addon specific functionality needed
            to build the URL (I'm looking at you and your browser, Google) that
            obviously won't be present here.
    """
    subst = {
        'searchTerms': '%s',  # for proper escaping later
        'language': '*',
        'inputEncoding': 'UTF-8',
        'outputEncoding': 'UTF-8'
    }

    # remove optional parameters (even those we don't support)
    for param in string.Formatter().parse(url):
        if param[1]:
            if param[1].endswith('?'):
                url = url.replace('{' + param[1] + '}', '')
            elif param[2] and param[2].endswith('?'):
                url = url.replace('{' + param[1] + ':' + param[2] + '}', '')
    return search_escape(url.format(**subst)).replace('%s', '{}')


def import_html_bookmarks(bookmarks_file, bookmark_types, output_format):
    """Import bookmarks from a NETSCAPE-Bookmark-file v1.

    Generated by Chromium, Firefox, IE and possibly more browsers. Not all
    export all possible bookmark types:
        - Firefox mostly works with everything
        - Chrome doesn't support keywords at all; searches are a separate
          database
    """
    import bs4
    with open(bookmarks_file, encoding='utf-8') as f:
        soup = bs4.BeautifulSoup(f, 'html.parser')
    bookmark_query = {
        'search': lambda tag: (
            (tag.name == 'a') and
            ('shortcuturl' in tag.attrs) and
            ('%s' in tag['href'])),
        'keyword': lambda tag: (
            (tag.name == 'a') and
            ('shortcuturl' in tag.attrs) and
            ('%s' not in tag['href'])),
        'bookmark': lambda tag: (
            (tag.name == 'a') and
            ('shortcuturl' not in tag.attrs) and
            (tag.string)),
    }
    output_template = {
        'search': {
            'search':
            "c.url.searchengines['{tag[shortcuturl]}'] = "
            "'{tag[href]}' #{tag.string}"
        },
        'oldsearch': {
            'search': '{tag[shortcuturl]} = {tag[href]} #{tag.string}',
        },
        'bookmark': {
            'bookmark': '{tag[href]} {tag.string}',
            'keyword': '{tag[href]} {tag.string}'
        },
        'quickmark': {
            'bookmark': '{tag.string} {tag[href]}',
            'keyword': '{tag[shortcuturl]} {tag[href]}'
        }
    }
    bookmarks = []
    for typ in bookmark_types:
        tags = soup.find_all(bookmark_query[typ])
        for tag in tags:
            if typ == 'search':
                tag['href'] = search_escape(tag['href']).replace('%s', '{}')
            if tag['href'] not in bookmarks:
                bookmarks.append(
                    output_template[output_format][typ].format(tag=tag))
    for bookmark in bookmarks:
        print(bookmark)


def import_moz_places(profile, bookmark_types, output_format):
    """Import bookmarks from a Mozilla profile's places.sqlite database."""
    place_query = {
        'bookmark': (
            "SELECT DISTINCT moz_bookmarks.title,moz_places.url "
            "FROM moz_bookmarks,moz_places "
            "WHERE moz_places.id=moz_bookmarks.fk "
            "AND moz_places.id NOT IN (SELECT place_id FROM moz_keywords) "
            "AND moz_places.url NOT LIKE 'place:%';"
        ),  # Bookmarks with no keywords assigned
        'keyword': (
            "SELECT moz_keywords.keyword,moz_places.url "
            "FROM moz_keywords,moz_places,moz_bookmarks "
            "WHERE moz_places.id=moz_bookmarks.fk "
            "AND moz_places.id=moz_keywords.place_id "
            "AND moz_places.url NOT LIKE '%!%s%' ESCAPE '!';"
        ),  # Bookmarks with keywords assigned but no %s substitution
        'search': (
            "SELECT moz_keywords.keyword, "
            "    moz_bookmarks.title, "
            "    search_conv(moz_places.url) AS url "
            "FROM moz_keywords,moz_places,moz_bookmarks "
            "WHERE moz_places.id=moz_bookmarks.fk "
            "AND moz_places.id=moz_keywords.place_id "
            "AND moz_places.url LIKE '%!%s%' ESCAPE '!';"
        )  # bookmarks with keyword and %s substitution
    }
    out_template = {
        'bookmark': {
            'bookmark': '{url} {title}',
            'keyword': '{url} {keyword}'
        },
        'quickmark': {
            'bookmark': '{title} {url}',
            'keyword': '{keyword} {url}'
        },
        'oldsearch': {
            'search': '{keyword} {url} #{title}'
        },
        'search': {
            'search': "c.url.searchengines['{keyword}'] = '{url}' #{title}"
        }
    }

    def search_conv(url):
        return search_escape(url).replace('%s', '{}')

    places_sqlite = os.path.join(profile, "places.sqlite")
    with contextlib.closing(sqlite3.connect(places_sqlite)) as places:
        places.create_function('search_conv', 1, search_conv)
        places.row_factory = sqlite3.Row
        c = places.cursor()
        for typ in bookmark_types:
            c.execute(place_query[typ])
            for row in c:
                print(out_template[output_format][typ].format(**row))


def import_chrome(profile, bookmark_types, output_format):
    """Import bookmarks and search keywords from Chrome-type profiles.

    On Chrome, keywords and search engines are the same thing and handled in
    their own database table; bookmarks cannot have associated keywords. This
    is why the dictionary lookups here are much simpler.
    """
    out_template = {
        'bookmark': '{url} {name}',
        'quickmark': '{name} {url}',
        'search': "c.url.searchengines['{keyword}'] = '{url}'",
        'oldsearch': '{keyword} {url}'
    }

    if 'search' in bookmark_types:
        webdata_db = os.path.join(profile, 'Web Data')
        with contextlib.closing(sqlite3.connect(webdata_db)) as webdata:
            c = webdata.cursor()
            c.execute('SELECT keyword,url FROM keywords;')
            for keyword, url in c:
                try:
                    url = opensearch_convert(url)
                    print(out_template[output_format].format(
                        keyword=keyword, url=url))
                except KeyError:
                    print('# Unsupported parameter in url for {}; skipping....'.
                        format(keyword))

    else:
        with open(os.path.join(profile, 'Bookmarks'), encoding='utf-8') as f:
            bookmarks = json.load(f)

        def bm_tree_walk(bm, template):
            """Recursive function to walk through bookmarks."""
            if not isinstance(bm, dict):
                return
            assert 'type' in bm, bm
            if bm['type'] == 'url':
                if urllib.parse.urlparse(bm['url']).scheme != 'chrome':
                    print(template.format(**bm))
            elif bm['type'] == 'folder':
                for child in bm['children']:
                    bm_tree_walk(child, template)

        for root in bookmarks['roots'].values():
            bm_tree_walk(root, out_template[output_format])


if __name__ == '__main__':
    main()