File: url.cpp

package info (click to toggle)
aspell 0.60.6-1
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 10,000 kB
  • ctags: 4,862
  • sloc: sh: 48,145; cpp: 22,153; perl: 1,546; ansic: 1,535; makefile: 684; sed: 16
file content (66 lines) | stat: -rw-r--r-- 1,764 bytes parent folder | download | duplicates (12)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
// This file is part of The New Aspell
// Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
// version 2.0 or 2.1.  You should have received a copy of the LGPL
// license along with this library if you did not you can find
// it at http://www.gnu.org/.

#include "settings.h"
#include "indiv_filter.hpp"
#include "key_info.hpp"

namespace {
  using namespace acommon;

  class UrlFilter : public IndividualFilter {
  public:
    PosibErr<bool> setup(Config *);
    void reset() {}
    void process(FilterChar * &, FilterChar * &);
  };

  PosibErr<bool> UrlFilter::setup(Config *) 
  {
    name_ = "url-filter";
    order_num_ = 0.95;
    return true;
  }

  static bool url_char(char c)
  {
    return c != '"' && c != ' ' && c != '\n' && c != '\t';
  }
 
  void UrlFilter::process(FilterChar * & str, FilterChar * & end)
  {
    for (FilterChar * cur = str; cur < end; ++cur) 
    {
      if (!url_char(*cur)) continue;
      FilterChar * cur0 = cur;
      bool blank_out = false;
      int point_chars = 0;
      // only consider special url deciding characters if they are in
      // the middle of a word
      for (++cur; cur + 1 < end && url_char(cur[1]); ++cur) {
        if (blank_out) continue;
        if ((cur[0] == '/' && (point_chars > 0 || cur[1] == '/'))
            || cur[0] == '@') {
          blank_out = true;
        } else if (cur[0] == '.' && cur[1] != '.') { 
          // count multiple '.' as one
          if (point_chars < 1) ++point_chars;
          else                 blank_out = true;
        }
      }
      ++cur;
      if (blank_out) {
	for (; cur0 != cur; ++cur0) *cur0 = ' ';
      }
    }
  }
}

C_EXPORT 
IndividualFilter * new_aspell_url_filter() {
  return new UrlFilter;                                
}