File: simple-parser-example.rst

package info (click to toggle)
php-doctrine-lexer 3.0.1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 240 kB
  • sloc: php: 485; xml: 66; makefile: 18
file content (103 lines) | stat: -rw-r--r-- 2,683 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
Simple Parser Example
=====================

Extend the ``Doctrine\Common\Lexer\AbstractLexer`` class and implement
the ``getCatchablePatterns``, ``getNonCatchablePatterns``, and ``getType``
methods. Here is a very simple example lexer implementation named ``CharacterTypeLexer``.
It tokenizes a string to ``T_UPPER``, ``T_LOWER`` and``T_NUMBER`` tokens:

.. code-block:: php
    <?php

    use Doctrine\Common\Lexer\AbstractLexer;

    /**
     * @extends AbstractLexer<CharacterTypeLexer::T_*, string>
     */
    class CharacterTypeLexer extends AbstractLexer
    {
        const T_UPPER =  1;
        const T_LOWER =  2;
        const T_NUMBER = 3;

        protected function getCatchablePatterns(): array
        {
            return [
                '[a-bA-Z0-9]',
            ];
        }

        protected function getNonCatchablePatterns(): array
        {
            return [];
        }

        protected function getType(&$value): int
        {
            if (is_numeric($value)) {
                return self::T_NUMBER;
            }

            if (strtoupper($value) === $value) {
                return self::T_UPPER;
            }

            if (strtolower($value) === $value) {
                return self::T_LOWER;
            }
        }
    }

Use ``CharacterTypeLexer`` to extract an array of upper case characters:

.. code-block:: php
    <?php

    class UpperCaseCharacterExtracter
    {
        public function __construct(private CharacterTypeLexer $lexer)
        {
        }

        /** @return list<string> */
        public function getUpperCaseCharacters(string $string): array
        {
            $this->lexer->setInput($string);
            $this->lexer->moveNext();

            $upperCaseChars = [];
            while (true) {
                if (!$this->lexer->lookahead) {
                    break;
                }

                $this->lexer->moveNext();

                if ($this->lexer->token->isA(CharacterTypeLexer::T_UPPER)) {
                    $upperCaseChars[] = $this->lexer->token->value;
                }
            }

            return $upperCaseChars;
        }
    }

    $upperCaseCharacterExtractor = new UpperCaseCharacterExtracter(new CharacterTypeLexer());
    $upperCaseCharacters = $upperCaseCharacterExtractor->getUpperCaseCharacters('1aBcdEfgHiJ12');

    print_r($upperCaseCharacters);

The variable ``$upperCaseCharacters`` contains all of the upper case
characters:

.. code-block:: php
    Array
    (
        [0] => B
        [1] => E
        [2] => H
        [3] => J
    )

This is a simple example but it should demonstrate the low level API
that can be used to build more complex parsers.