Description: Adapt to Python 3.12 tokenizing f-strings

Origin: upstream 2a50dc02bb709161d62d6f7dc5d6f2733e534c09 + 87c06c0f5745f577c12db39852c6f763a2d41954 + f2c761320a5a73d6027ae3649843e6bf6a24f324
Author: Hocnonsense, Rebecca N. Palmer <rebecca_palmer@zoho.com>

--- a/snakemake/parser.py
+++ b/snakemake/parser.py
@@ -3,9 +3,10 @@ __copyright__ = "Copyright 2022, Johanne
 __email__ = "johannes.koester@uni-due.de"
 __license__ = "MIT"
 
+import sys
 import textwrap
 import tokenize
-from typing import Any, Dict, Generator, List, Optional
+from typing import Any, Callable, Dict, Generator, List, Optional
 
 import snakemake
 from snakemake import common, sourcecache, workflow
@@ -55,6 +56,10 @@ def is_string(token):
     return token.type == tokenize.STRING
 
 
+def is_fstring_start(token):
+    return sys.version_info >= (3, 12) and token.type == tokenize.FSTRING_START
+
+
 def is_eof(token):
     return token.type == tokenize.ENDMARKER
 
@@ -74,7 +79,7 @@ class TokenAutomaton:
     def __init__(self, snakefile: "Snakefile", base_indent=0, dedent=0, root=True):
         self.root = root
         self.snakefile = snakefile
-        self.state = None
+        self.state: Callable[[tokenize.TokenInfo], Generator] = None  # type: ignore
         self.base_indent = base_indent
         self.line = 0
         self.indent = 0
@@ -95,11 +100,37 @@ class TokenAutomaton:
             self.indent = token.end[1] - self.base_indent
             self.was_indented |= self.indent > 0
 
+    def parse_fstring(self, token: tokenize.TokenInfo):
+        # only for python >= 3.12, since then python changed the
+        # parsing manner of f-string, see
+        # [pep-0701](https://peps.python.org/pep-0701)
+        isin_fstring = 1
+        t = token.string
+        for t1 in self.snakefile:
+            if t1.type == tokenize.FSTRING_START:
+                isin_fstring += 1
+                t += t1.string
+            elif t1.type == tokenize.FSTRING_END:
+                isin_fstring -= 1
+                t += t1.string
+            elif t1.type == tokenize.FSTRING_MIDDLE:
+                t += t1.string.replace("{", "{{").replace("}", "}}")
+            else:
+                t += t1.string
+            if isin_fstring == 0:
+                break
+        if hasattr(self, "cmd") and self.cmd[-1][1] == token:
+            self.cmd[-1] = t, token
+        return t
+
     def consume(self):
         for token in self.snakefile:
             self.indentation(token)
             try:
                 for t, orig in self.state(token):
+                    # python >= 3.12 only
+                    if is_fstring_start(token):
+                        t = self.parse_fstring(token)
                     if self.lasttoken == "\n" and not t.isspace():
                         yield INDENT * self.effective_indent, orig
                     yield t, orig
@@ -125,6 +156,7 @@ class TokenAutomaton:
 
 class KeywordState(TokenAutomaton):
     prefix = ""
+    start: Callable[[], Generator[str, None, None]]
 
     def __init__(self, snakefile, base_indent=0, dedent=0, root=True):
         super().__init__(snakefile, base_indent=base_indent, dedent=dedent, root=root)
@@ -569,10 +601,10 @@ class AbstractCmd(Run):
         super().__init__(
             snakefile, rulename, base_indent=base_indent, dedent=dedent, root=root
         )
-        self.cmd = list()
+        self.cmd: list[tuple[str, tokenize.TokenInfo]] = []
         self.token = None
         if self.overwrite_cmd is not None:
-            self.block_content = self.overwrite_block_content
+            self.block_content = self.overwrite_block_content  # type: ignore
 
     def is_block_end(self, token):
         return (self.line and self.indent <= 0) or is_eof(token)
@@ -597,7 +629,7 @@ class AbstractCmd(Run):
         yield INDENT * (self.effective_indent + 1)
         yield self.end_func
         yield "("
-        yield "\n".join(self.cmd)
+        yield from self.cmd
         yield from self.args()
         yield "\n"
         yield ")"
@@ -610,19 +642,18 @@ class AbstractCmd(Run):
             self.error(
                 "Command must be given as string after the shell keyword.", token
             )
-        for t in self.end():
-            yield t, self.token
+        yield from super().decorate_end(self.token)
 
     def block_content(self, token):
         self.token = token
-        self.cmd.append(token.string)
+        self.cmd.append((token.string, token))
         yield token.string, token
 
     def overwrite_block_content(self, token):
         if self.token is None:
             self.token = token
             cmd = repr(self.overwrite_cmd)
-            self.cmd.append(cmd)
+            self.cmd.append((cmd, token))
             yield cmd, token
 
 
@@ -1290,8 +1321,8 @@ def parse(path, workflow, overwrite_shel
             )
             snakefile.lines += t.count("\n")
             compilation.append(t)
-        compilation = "".join(format_tokens(compilation))
-        if linemap:
-            last = max(linemap)
-            linemap[last + 1] = linemap[last]
-        return compilation, linemap, snakefile.rulecount
+    join_compilation = "".join(format_tokens(compilation))
+    if linemap:
+        last = max(linemap)
+        linemap[last + 1] = linemap[last]
+    return join_compilation, linemap, snakefile.rulecount
--- /dev/null
+++ b/tests/test_fstring/expected-results/SID23454678.txt
@@ -0,0 +1 @@
+rrr
--- /dev/null
+++ b/tests/test_fstring/Snakefile
@@ -0,0 +1,51 @@
+shell.executable("bash")
+
+PREFIX = "SID23454678"
+mid = ".t"
+
+rule unit1:
+    output:
+        f"{PREFIX}{mid}xt",
+    shell:
+        "echo '>'{output}'<'; echo 'rrr' > {output}; sleep 1"
+
+
+rule unit2:
+    shell:
+        f"ls"
+
+assert (
+    f"""
+{
+  "hello, snakemake"
+}
+"""
+    == """
+hello, snakemake
+"""
+)
+assert (
+    f"""
+    {
+  "hello, snakemake"
+}
+"""
+    == """
+    hello, snakemake
+"""
+)
+
+if 1:
+    assert (
+        f"""
+{
+  "hello, snakemake"
+}
+"""
+        == """
+hello, snakemake
+"""
+    )
+
+assert f"FORMAT['{PREFIX}']['{{}}']" == "FORMAT['SID23454678']['{}']"
+assert f"FORMAT['{PREFIX}'][}}'{{'{{]" == "FORMAT['SID23454678'][}'{'{]"
--- a/tests/tests.py
+++ b/tests/tests.py
@@ -1986,6 +1986,10 @@ def test_ensure_checksum_fail():
     run(dpath("test_ensure"), targets=["d"], shouldfail=True)
 
 
+def test_fstring():
+    run(dpath("test_fstring"), targets=["SID23454678.txt"])
+
+
 @skip_on_windows
 def test_github_issue1261():
     run(dpath("test_github_issue1261"), shouldfail=True, check_results=True)
