File: text-parser.cpp

package info (click to toggle)
ktp-common-internals 0.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 1,436 kB
  • sloc: cpp: 4,196; makefile: 4; sh: 3
file content (94 lines) | stat: -rw-r--r-- 3,027 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
/*
 * Text Parser common class
 * Copyright (C) 2004 Peter Simonsson <psn@linux.se>
 * Copyright (C) 2006-2008 Eike Hein <hein@kde.org>
 * Copyright (C) 2011 Przemek Czekaj <xcojack@gmail.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

#include "text-parser.h"

#include <QtCore/QLatin1String>

namespace KTp
{

TextParser* TextParser::s_instance = NULL;

/**
 * RegExp for url detection
 */
static QRegExp s_urlPattern(QString(QLatin1String("\\b((?:(?:([a-z][\\w\\.-]+:/{1,3})|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|\\}\\]|[^\\s`!()\\[\\]{};:'\".,<>?%1%2%3%4%5%6])|[a-z0-9.\\-+_]+@[a-z0-9.\\-]+[.][a-z]{1,5}[^\\s/`!()\\[\\]{};:'\".,<>?%1%2%3%4%5%6]))"))
                            .arg(QChar(0x00AB)).arg(QChar(0x00BB)).arg(QChar(0x201C)).arg(QChar(0x201D)).arg(QChar(0x2018)).arg(QChar(0x2019)));

TextParser::TextParser(QObject* parent)
    : QObject(parent)
{
}

TextParser* TextParser::instance()
{
    if (!s_instance) {
        s_instance = new TextParser(0);
    }

    return s_instance;
}

TextUrlData TextParser::extractUrlData(const QString& text, bool doUrlFixup)
{
    TextUrlData data;
    QString htmlText(text);
    s_urlPattern.setCaseSensitivity(Qt::CaseInsensitive);

    int pos = 0;
    int urlLen = 0;

    QString protocol;
    QString href;

    while ((pos = s_urlPattern.indexIn(htmlText, pos)) >= 0) {
        urlLen = s_urlPattern.matchedLength();
        href = htmlText.mid(pos, urlLen);

        data.urlRanges << QPair<int, int>(pos, href.length());
        pos += href.length();

        if (doUrlFixup) {
            protocol.clear();
            if (s_urlPattern.cap(2).isEmpty()) {
                QString urlPatternCap1(s_urlPattern.cap(1));
                if (urlPatternCap1.contains(QLatin1Char('@'))) {
                    protocol = QLatin1String("mailto:");
                } else if (urlPatternCap1.startsWith(QLatin1String("ftp."), Qt::CaseInsensitive)) {
                    protocol = QLatin1String("ftp://");
                } else {
                    protocol = QLatin1String("http://");
                }
            }

            href = protocol + href;
            data.fixedUrls.append(href);
        }
    }
    return data;
}

TextParser::~TextParser()
{
}

}