File: wordtokenizer.h

package info (click to toggle)

source-highlight 3.1.7-1

links: PTS
area: main
in suites: jessie, jessie-kfreebsd
size: 10,332 kB
ctags: 5,233
sloc: sh: 11,270; cpp: 10,206; ansic: 9,515; makefile: 1,865; lex: 1,200; yacc: 1,021; php: 213; perl: 211; awk: 98; erlang: 94; lisp: 90; java: 75; ruby: 69; python: 61; asm: 43; ml: 38; ada: 36; haskell: 27; xml: 23; cs: 11; sql: 8; tcl: 6; sed: 4

file content (38 lines) | stat: -rw-r--r-- 946 bytes

parent folder | download | duplicates (8)

//
// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004-2008
//
// Copyright: See COPYING file that comes with this distribution
//

#ifndef WORDTOKENIZER_H_
#define WORDTOKENIZER_H_

#include <string>
#include <algorithm>
#include <list>

namespace srchilite {

/**
 * Tokenizes a paragraph separating words from spaces
 */
class WordTokenizer {
public:
    /**
     * Results of the tokenizer; each element is a pair where the first
     * string represents a possible space and the second string a possible word.
     * The two elements are mutually exclusive
     */
    typedef std::list<std::pair<std::string, std::string> > WordTokenizerResults;

    /**
     * Tokenizes the passed string and stores the results.
     * @param s the string to tokenize
     * @param results where to store the results
     */
    static void tokenize(const std::string &s, WordTokenizerResults &results);
};

}

#endif /*WORDTOKENIZER_H_*/