File: kdev-pg-unicode-loader.cpp

package info (click to toggle)
kdevelop-pg-qt 1.0.0-3
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 6,144 kB
  • ctags: 3,624
  • sloc: cpp: 19,239; lex: 945; ansic: 716; yacc: 615; ruby: 68; sh: 14; lisp: 10; fortran: 6; makefile: 3
file content (127 lines) | stat: -rw-r--r-- 4,271 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/* This file is part of kdev-pg-qt
 * Copyright (C) 2011 Jonathan Schmidt-Dominé <devel@the-user.org>
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 * 
 * You should have received a copy of the GNU Library General Public License
 * along with this library; see the file COPYING.LIB.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */

#include "kdev-pg-unicode-loader.h"

#include "kdev-pg.h"
#include "kdev-pg-regexp.h"

#include <QFile>

namespace KDevPG
{

#define SET_CHAR(str, i, x) \
{ \
x = 0; \
for(; ; ++i) \
{ \
  x *= 16; \
  if(str[i] >= 'a' && str[i] <= 'f') \
    x += (str[i] - 'a' + 10); \
  else if(str[i] >= 'A' && str[i] <= 'F') \
    x += (str[i] - 'A' + 10); \
  else if(str[i] >= '0' && str[i] <= '9') \
    x += (str[i] - '0'); \
  else \
    break; \
} \
x /= 16; \
}

void standardFormat(const QString fileName)
{
  QMap<QByteArray, GNFA> res;
  QFile file(fileName);
  if(file.open(QIODevice::ReadOnly))
  {
    while(!file.atEnd())
    {
      auto line = file.readLine();
      if(line.size() > 0 && line[0] != '#')
      {
        if(line[0] != '#')
        {
          int idxDotDot = line.indexOf("..");
          if(idxDotDot != -1)
          {
            quint32 start;
            int i = 0;
            SET_CHAR(line, i, start)
            assert(i <= idxDotDot);
            i += 2;
            quint32 end = 0;
            int idxSemicolon = line.indexOf(';', idxDotDot + 2);
            SET_CHAR(line, i, end)
            assert(i <= idxSemicolon);
            QByteArray name = line.mid(idxSemicolon+1, (uint)(line.indexOf('#', idxSemicolon + 1)) - idxSemicolon - 1).trimmed().toLower();
            name.replace(' ', '_');
            name.replace('-', '_');
            auto toInsert = GNFA::range(start, end+1);
            if(globalSystem.regexpById[name] == 0)
              globalSystem.regexpById[name] = new GNFA(toInsert);
            else
              *globalSystem.regexpById[name] |= toInsert;
          }
          else
          {
            quint32 single;
            int i = 0;
            SET_CHAR(line, i, single);
            int idxSemicolon = line.indexOf(';', i);
            QByteArray name = line.mid(idxSemicolon+1, (uint)(line.indexOf('#', idxSemicolon + 1)) - idxSemicolon - 1).trimmed().toLower();
            name.replace(' ', '_');
            name.replace('-', '_');
            auto toInsert = GNFA::character(single);
            if(globalSystem.regexpById[name] == 0)
              globalSystem.regexpById[name] = new GNFA(toInsert);
            else
              *globalSystem.regexpById[name] |= toInsert;
          }
        }
      }
    }
  }
  else
    qFatal("** ERROR Failed to open unicode-data-file ``%s''", fileName.toUtf8().data());
}

void loadUnicodeData()
{
  static bool loaded = false;
  if(!loaded)
  {
    loaded = true;
    standardFormat(":/unidata/Blocks.txt");
    standardFormat(":/unidata/PropList.txt");
    standardFormat(":/unidata/DerivedCoreProperties.txt");
    standardFormat(":/unidata/Scripts.txt");
    standardFormat(":/unidata/ScriptExtensions.txt");
    standardFormat(":/unidata/DerivedNumericType.txt");
    globalSystem.regexpById["num"] = new GNFA(*globalSystem.regexpById["numeric"]);
    *globalSystem.regexpById["num"] |= *globalSystem.regexpById["digit"];
    *globalSystem.regexpById["num"] |= *globalSystem.regexpById["decimal"];
    globalSystem.regexpById["ascii-range"] = new GNFA(GNFA::range(0, 0x80));
    globalSystem.regexpById["latin1-range"] = new GNFA(GNFA::range(0, 0x100));
    // IndicMatraCategory and IndicSyllabicCategory: same format, but should have a prefix, names like “vowel” are confusing when used for Indian vowels only
    // named sequences: other format
  }
}

}