File: simple-tokenize.cpp

package info (click to toggle)
llama.cpp 7593%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 71,012 kB
  • sloc: cpp: 329,391; ansic: 48,249; python: 32,103; lisp: 10,053; sh: 6,070; objc: 1,349; javascript: 924; xml: 384; makefile: 233
file content (37 lines) | stat: -rw-r--r-- 863 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#include "simple-tokenize.h"

std::vector<std::string> simple_tokenize(const std::string & input) {
    std::vector<std::string> result;
    std::string              current;

    for (size_t i = 0; i < input.size(); i++) {
        switch (input[i]) {
            case ' ':
            case '\n':
            case '\t':
            case '{':
            case '}':
            case ',':
            case '[':
            case '"':
            case ']':
            case '.':
            case '<':
            case '>':
            case '=':
            case '/':
                if (!current.empty()) {
                    result.push_back(current);
                    current.clear();
                }
            default:;
        }
        current += input[i];
    }

    if (!current.empty()) {
        result.push_back(current);
    }

    return result;
}