File: tokenizer.py

package info (click to toggle)
python-jsbeautifier 1.15.3-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 756 kB
sloc: python: 13,140; sh: 14; makefile: 7
file content (138 lines) | stat: -rw-r--r-- 4,389 bytes
parent folder | download | duplicates (3)
# The MIT License (MIT)
#
# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import re
from ..core.inputscanner import InputScanner
from ..core.token import Token
from ..core.tokenstream import TokenStream
from ..core.pattern import Pattern
from ..core.whitespacepattern import WhitespacePattern

__all__ = ["TOKEN", "Tokenizer", "TokenizerPatterns", "TokenTypes"]


class TokenTypes:
    START = "TK_START"
    RAW = "TK_RAW"
    EOF = "TK_EOF"

    def __init__(self):
        pass


TOKEN = TokenTypes()


class TokenizerPatterns:
    def __init__(self, input_scanner):
        self.whitespace = WhitespacePattern(input_scanner)


class Tokenizer:
    def __init__(self, input_string, options):
        self._input = InputScanner(input_string)
        self._options = options
        self.__tokens = None

        self._patterns = TokenizerPatterns(self._input)

    def tokenize(self):
        self._input.restart()
        self.__tokens = TokenStream()

        current = None
        previous = Token(TOKEN.START, "")
        open_token = None
        open_stack = []
        comments = TokenStream()

        while previous.type != TOKEN.EOF:
            current = self.__get_next_token_with_comments(previous, open_token)

            if self._is_opening(current):
                open_stack.append(open_token)
                open_token = current
            elif open_token is not None and self._is_closing(current, open_token):
                current.opened = open_token
                open_token.closed = current
                open_token = open_stack.pop()
                current.parent = open_token

            self.__tokens.add(current)
            previous = current
        return self.__tokens

    def __get_next_token_with_comments(self, previous, open_token):
        current = self._get_next_token(previous, open_token)

        if self._is_comment(current):
            comments = TokenStream()
            while self._is_comment(current):
                comments.add(current)
                current = self._get_next_token(previous, open_token)

            if not comments.isEmpty():
                current.comments_before = comments
                comments = TokenStream()

        current.parent = open_token
        current.previous = previous
        previous.next = current

        return current

    def _is_first_token(self):
        return self.__tokens.isEmpty()

    def _reset(self):
        pass

    def _get_next_token(self, previous_token, open_token):
        self._readWhitespace()
        resulting_string = self._input.read(re.compile(r".+"))
        if resulting_string:
            return self._create_token(TOKEN.RAW, resulting_string)
        else:
            return self._create_token(TOKEN.EOF, "")

    def _is_comment(self, current_token):
        return False

    def _is_opening(self, current_token):
        return False

    def _is_closing(self, current_token, open_token):
        return False

    def _create_token(self, token_type, text):
        token = Token(
            token_type,
            text,
            self._patterns.whitespace.newline_count,
            self._patterns.whitespace.whitespace_before_token,
        )
        return token

    def _readWhitespace(self):
        return self._patterns.whitespace.read()