1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
|
# SPDX-License-Identifier: GPL-3.0-or-later
# Copyright (C) 2025-2026 Benjamin Abendroth <braph93@gmx.de>
'''Module for parsing shell-like commands with logical operators.'''
from .string_stream import StringStream
# pylint: disable=too-few-public-methods
class Literal:
'''Represents a literal string.'''
def __init__(self, string):
self.string = string
def __str__(self):
return self.string
def __repr__(self):
return f'Literal({self.string!r})'
class Command:
'''Represents a command.'''
def __init__(self):
self.args = []
def __repr__(self):
return f'Command({self.args!r})'
class Lexer(StringStream):
'''Lexer class.'''
def parse(self):
'''Split string into tokens (e.g. logical operators) and literals.'''
tokens = []
while True:
token = self._parse_token()
if token is not None:
tokens.append(token)
else:
return tokens
def _parse_token(self):
c = self.peek()
if c is None:
return None
if c in ('!', '(', ')'):
self.advance(1)
return c
if c.isspace():
self.advance(1)
return self._parse_token()
if c == '&':
if not self.peek(1) == '&':
raise ValueError('Single `&` found')
self.advance(2)
return '&&'
if c == '|':
if not self.peek(1) == '|':
raise ValueError('Single `|` found')
self.advance(2)
return '||'
return self._parse_literal()
def _parse_literal(self):
literal = ''
while True:
c = self.peek()
if c is None:
return Literal(literal)
if c == '"':
literal += self.parse_shell_double_quote(in_quotes=False)
elif c == "'":
literal += self.parse_shell_single_quote(in_quotes=False)
elif c.isspace() or c in ('&', '|', '!', '(', ')'):
return Literal(literal)
else:
self.advance(1)
literal += c
def make_commands(tokens):
'''Parse tokens.
Input:
[Literal("foo"), Literal("bar"), '&&' Literal("baz")]
Output:
[Command(["foo", "bar"]), '&&', Command(["baz"])]
'''
new_tokens = []
current = Command()
for token in tokens:
if token in ('&&', '||', '!', '(', ')'):
if current.args:
new_tokens.append(current)
current = Command()
new_tokens.append(token)
else:
current.args.append(str(token))
if current.args:
new_tokens.append(current)
return new_tokens
def check_syntax(tokens):
'''Checks tokens for syntax errors.'''
last = None
parentheses = 0
for token in tokens:
if token == '(':
parentheses += 1
if last not in (None, '&&', '||', '!', '('):
raise ValueError("Unexpected `(`")
elif token == ')':
parentheses -= 1
if parentheses < 0 or last in (None, '(', '&&', '||', '!'):
raise ValueError("Unexpected `)`")
elif token in ('&&', '||'):
if last in (None, '(', '&&', '||', '!'):
raise ValueError(f"Unexpected `{token}`")
elif token == '!':
if last not in (None, '(', '&&', '||', '!'):
raise ValueError("Unexpected `!`")
last = token
if parentheses > 0:
raise ValueError("Unclosed `(`")
if last is None:
raise ValueError("No command found")
def parse(string):
'''Parse a string and turn it into And/Or/Not/Command objects.'''
lex_tokens = Lexer(string).parse()
tokens = make_commands(lex_tokens)
check_syntax(tokens)
return tokens
|