File: tokenizer.rb

package info (click to toggle)

ruby-github-linguist 7.27.0-1

links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 14,204 kB
sloc: ruby: 1,872; lex: 173; ansic: 35; makefile: 9

file content (20 lines) | stat: -rw-r--r-- 484 bytes

parent folder | download | duplicates (4)

require 'strscan'
require 'linguist/linguist'

module Linguist
  # Generic programming language tokenizer.
  #
  # Tokens are designed for use in the language bayes classifier.
  # It strips any data strings or comments and preserves significant
  # language symbols.
  class Tokenizer
    # Public: Extract tokens from data
    #
    # data - String to tokenize
    #
    # Returns Array of token Strings.
    def self.tokenize(data)
      new.extract_tokens(data)
    end
  end
end