File: test_file.py

package info (click to toggle)
linkchecker 10.6.0-2
links: PTS, VCS
area: main
in suites: forky, sid
size: 3,132 kB
sloc: python: 13,154; makefile: 134; sh: 71; xml: 36; sql: 20; javascript: 19; php: 2
file content (194 lines) | stat: -rw-r--r-- 6,265 bytes
parent folder | download | duplicates (2)
# Copyright (C) 2004-2014 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Test file parsing.
"""
import os
from pathlib import Path
import sys
import zipfile

import pytest

from tests import need_network, need_word, need_pdflib
from . import LinkCheckTest, get_file


def unzip(filename, targetdir):
    """Unzip given zipfile into targetdir."""
    # There are likely problems with zipfile and non-Unicode filenames
    # https://github.com/python/cpython/issues/83042
    # https://github.com/python/cpython/issues/72267
    # https://github.com/python/cpython/issues/95463
    zf = zipfile.ZipFile(filename)
    for name in zf.namelist():
        if name.endswith("/"):
            os.mkdir(os.path.join(targetdir, name), 0o700)
        else:
            outfile = open(os.path.join(targetdir, name), "wb")
            try:
                outfile.write(zf.read(name))
            finally:
                outfile.close()


class TestFile(LinkCheckTest):
    """
    Test file:// link checking (and file content parsing).
    """

    def test_html(self):
        self.file_test("file.html")

    @need_network
    def test_html_url_quote(self):
        self.file_test("file_url_quote.html")

    def test_wml(self):
        self.file_test("file.wml")

    def test_text(self):
        self.file_test("file.txt")

    def test_asc(self):
        self.file_test("file.asc")

    def test_css(self):
        self.file_test("file.css")

    def test_php(self):
        self.file_test("file.php")

    def test_empty(self):
        self.file_test("empty.html")

    @need_word
    def test_word(self):
        confargs = dict(enabledplugins=["WordParser"])
        self.file_test("file.doc", confargs=confargs)

    @need_pdflib
    def test_pdf(self):
        confargs = dict(enabledplugins=["PdfParser"])
        self.file_test("file.pdf", confargs=confargs)

    def test_markdown(self):
        confargs = dict(enabledplugins=["MarkdownCheck"])
        if sys.version_info < (3, 12, 5):
            markdown_file = "file-octet-stream.markdown"
        else:
            markdown_file = "file.markdown"
        self.file_test(markdown_file, confargs=confargs)

    def test_urllist(self):
        self.file_test("urllist.txt")

    @pytest.mark.xfail(strict=True)
    def test_directory_listing(self):
        # unpack non-unicode filename which cannot be stored
        # in the SF subversion repository
        if os.name != "posix" or sys.platform != "linux":
            pytest.skip("Not running on POSIX or Linux")
        dirname = get_file("dir")
        if not os.path.isdir(dirname):
            unzip(dirname + ".zip", os.path.dirname(dirname))
        self.file_test("dir")

    def test_directory_listing_unicode(self):
        if os.name != "posix" or sys.platform != "linux":
            pytest.skip("Not running on POSIX or Linux")
        dirname = Path(get_file("udir"))
        dirname.mkdir(exist_ok=True)
        Path(dirname, "í»¯¿.dat").touch()
        self.file_test("udir")

    def test_unicode_filename(self):
        # a unicode filename
        self.file_test("Мошкова.bin")

    def test_good_file(self):
        url = "file://%(curdir)s/%(datadir)s/file.txt" % self.get_attrs()
        nurl = self.norm(url)
        resultlines = [
            "url %s" % url,
            "cache key %s" % nurl,
            "real url %s" % nurl,
            "valid",
        ]
        self.direct(url, resultlines)

    def test_bad_file(self):
        if os.name == "nt":
            # Fails on NT platforms and I am too lazy to fix
            # Cause: url get quoted %7C which gets lowercased to
            # %7c and this fails.
            pytest.skip("Running on Windows")
        url = "file:/%(curdir)s/%(datadir)s/file.txt" % self.get_attrs()
        nurl = self.norm(url)
        resultlines = [
            "url %s" % url,
            "cache key %s" % nurl,
            "real url %s" % nurl,
            "error",
        ]
        self.direct(url, resultlines)

    def test_good_file_missing_dslash(self):
        # good file (missing double slash)
        attrs = self.get_attrs()
        url = "file:%(curdir)s/%(datadir)s/file.txt" % attrs
        resultlines = [
            "url %s" % url,
            "cache key file://%(curdir)s/%(datadir)s/file.txt" % attrs,
            "real url file://%(curdir)s/%(datadir)s/file.txt" % attrs,
            "valid",
        ]
        self.direct(url, resultlines)

    def test_good_dir(self):
        url = "file://%(curdir)s/%(datadir)s/" % self.get_attrs()
        resultlines = [
            "url %s" % url,
            "cache key %s" % url,
            "real url %s" % url,
            "valid",
        ]
        self.direct(url, resultlines)

    def test_good_dir_space(self):
        url = "file://%(curdir)s/%(datadir)s/a b/" % self.get_attrs()
        nurl = self.norm(url)
        url2 = "file://%(curdir)s/%(datadir)s/a b/el.html" % self.get_attrs()
        nurl2 = self.norm(url2)
        url3 = "file://%(curdir)s/%(datadir)s/a b/t.txt" % self.get_attrs()
        nurl3 = self.norm(url3)
        resultlines = [
            "url %s" % url,
            "cache key %s" % nurl,
            "real url %s" % nurl,
            "valid",
            "url el.html",
            "cache key %s" % nurl2,
            "real url %s" % nurl2,
            "name el.html",
            "valid",
            "url t.txt",
            "cache key %s" % nurl3,
            "real url %s" % nurl3,
            "name t.txt",
            "valid",
        ]
        self.direct(url, resultlines, recursionlevel=2)