From 2d52c17cabed4811e4eac46af2cfe775d46d540b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= <samu.roettgermann@t-online.de>
Date: Sat, 6 Jun 2026 00:41:37 +0200
Subject: [PATCH 1/8] enhanced codeblock regex and made stricter requirements
 for valid codeblocks

---
 bot/exts/info/codeblock/_parsing.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/bot/exts/info/codeblock/_parsing.py b/bot/exts/info/codeblock/_parsing.py
index abad09eef1..0408692295 100644
--- a/bot/exts/info/codeblock/_parsing.py
+++ b/bot/exts/info/codeblock/_parsing.py
@@ -35,11 +35,11 @@
     fr"""
     (?P<ticks>
         (?P<tick>[{''.join(_TICKS)}]) # Put all ticks into a character class within a group.
-        \2{{2}}                       # Match previous group 2 more times to ensure the same char.
+        \2*                           # Match previous group up to N more times to ensure the same char.
     )
-    (?P<lang>[A-Za-z0-9\+\-\.]+\n)?   # Optionally match a language specifier followed by a newline.
+    (?P<lang>[A-Za-z0-9+\-.]+\s)?     # Optionally match a language specifier followed by a whitespace.
     (?P<code>.+?)                     # Match the actual code within the block.
-    \1                                # Match the same 3 ticks used at the start of the block.
+    \1                                # Match the same N ticks used at the start of the block.
     """,
     re.DOTALL | re.VERBOSE
 )
@@ -86,9 +86,9 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None:
     for match in _RE_CODE_BLOCK.finditer(message):
         # Used to ensure non-matched groups have an empty string as the default value.
         groups = match.groupdict("")
-        language = groups["lang"].strip()  # Strip the newline cause it's included in the group.
+        language = groups["lang"].strip()  # Strip the whitespace cause it's included in the group.
 
-        if groups["tick"] == BACKTICK and language:
+        if groups["tick"] == BACKTICK and len(groups["ticks"]) == 3 and language and ("\n" in groups["lang"]):
             log.trace("Message has a valid code block with a language; returning None.")
             return None
         if has_lines(groups["code"], constants.CodeBlock.minimum_lines):

From e23f230c85899ae40e7db2311c8447112cd136f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= <samu.roettgermann@t-online.de>
Date: Sat, 6 Jun 2026 00:52:40 +0200
Subject: [PATCH 2/8] added empty newline for readability

---
 bot/exts/info/codeblock/_parsing.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bot/exts/info/codeblock/_parsing.py b/bot/exts/info/codeblock/_parsing.py
index 0408692295..e19934ba7a 100644
--- a/bot/exts/info/codeblock/_parsing.py
+++ b/bot/exts/info/codeblock/_parsing.py
@@ -91,6 +91,7 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None:
         if groups["tick"] == BACKTICK and len(groups["ticks"]) == 3 and language and ("\n" in groups["lang"]):
             log.trace("Message has a valid code block with a language; returning None.")
             return None
+
         if has_lines(groups["code"], constants.CodeBlock.minimum_lines):
             code_block = CodeBlock(groups["code"], language, groups["tick"])
             code_blocks.append(code_block)

From 72a988b976eb3fa3f1a3fb2a8e1ddb25e6f5a7aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= <samu.roettgermann@t-online.de>
Date: Sat, 6 Jun 2026 00:53:41 +0200
Subject: [PATCH 3/8] minor code cleanup

---
 bot/exts/info/codeblock/_parsing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bot/exts/info/codeblock/_parsing.py b/bot/exts/info/codeblock/_parsing.py
index e19934ba7a..0569b1433e 100644
--- a/bot/exts/info/codeblock/_parsing.py
+++ b/bot/exts/info/codeblock/_parsing.py
@@ -182,7 +182,7 @@ def parse_bad_language(content: str) -> BadLanguage | None:
     )
 
 
-def _get_leading_spaces(content: str) -> int:
+def _get_leading_spaces(content: str) -> int | None:
     """Return the number of spaces at the start of the first line in `content`."""
     leading_spaces = 0
     for char in content:
@@ -190,6 +190,7 @@ def _get_leading_spaces(content: str) -> int:
             leading_spaces += 1
         else:
             return leading_spaces
+
     return None
 
 

From 9b9bdb12d14db84f33adeef9c8bdb2fbfa364cf5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= <samu.roettgermann@t-online.de>
Date: Sun, 7 Jun 2026 00:32:51 +0200
Subject: [PATCH 4/8] added unit tests for the codeblock detection

---
 tests/bot/exts/info/codeblock/__init__.py     |   0
 tests/bot/exts/info/codeblock/test_parsing.py | 153 ++++++++++++++++++
 2 files changed, 153 insertions(+)
 create mode 100644 tests/bot/exts/info/codeblock/__init__.py
 create mode 100644 tests/bot/exts/info/codeblock/test_parsing.py

diff --git a/tests/bot/exts/info/codeblock/__init__.py b/tests/bot/exts/info/codeblock/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/bot/exts/info/codeblock/test_parsing.py b/tests/bot/exts/info/codeblock/test_parsing.py
new file mode 100644
index 0000000000..53353fadbd
--- /dev/null
+++ b/tests/bot/exts/info/codeblock/test_parsing.py
@@ -0,0 +1,153 @@
+import unittest
+
+from bot.exts.info.codeblock import _parsing as parsing
+
+
+class FindFaultyCodeblocksTest(unittest.TestCase):
+    def test_should_recognize_missing_language(self):
+        message = """```
+x = 4
+y = 2
+print("abc")
+```"""
+        faulty_code_blocks = parsing.find_faulty_code_blocks(message)
+        self.assertIsNotNone(faulty_code_blocks)
+        self.assertEqual(len(faulty_code_blocks), 1)
+
+    def test_should_recognize_contained_codeblock(self):
+        message = """'
+wouldn't it be easier to do:
+```py
+say_hi = lambda:
+    print('hello')
+    print('world')
+say_hi()
+
+'
+```
+
+'"""
+        faulty_code_blocks = parsing.find_faulty_code_blocks(message)
+        self.assertIsNone(faulty_code_blocks)
+
+    def test_should_recognize_contained_codeblock_even_if_that_breaks_formatting(self):
+        message = """```
+        ```py
+        x = 4
+        y = 3
+        z = 2
+        print("abc")
+        ```
+        ```
+        """
+        faulty_code_blocks = parsing.find_faulty_code_blocks(message)
+        self.assertIsNone(faulty_code_blocks)
+
+    def test_should_not_recognize_normal_single_quotes(self):
+        """normal single quotes refers to single quotes that appear normally in text,
+        like for example in "I'll", "We're", etc."""
+        message = """I'm writing line 1
+        and we're writing line 2
+        we'll also be checking another of those
+        and some odd 'variations
+        isn't it beautiful?"""
+
+        faulty_code_blocks = parsing.find_faulty_code_blocks(message)
+        self.assertIsNotNone(faulty_code_blocks)
+        self.assertEqual(len(faulty_code_blocks), 0)
+
+    def test_should_not_recognize_quoting_single_quotes(self):
+        message = """ 'I am doing a long quote.
+        Sure, I could just use the > character
+        for correct quoting
+        but whatever...
+        End of quote' """
+
+        faulty_code_blocks = parsing.find_faulty_code_blocks(message)
+        self.assertIsNotNone(faulty_code_blocks)
+        self.assertEqual(len(faulty_code_blocks), 0)
+
+
+    def test_should_not_recognize_normal_double_quotes(self):
+        """normal double quotes refer to double quotes that appear normally in text to quote something"""
+        message = """ "I am doing a long quote.
+        Sure, I could just use the > character
+        for correct quoting
+        but whatever...
+        End of quote" """
+
+        faulty_code_blocks = parsing.find_faulty_code_blocks(message)
+        self.assertIsNotNone(faulty_code_blocks)
+        self.assertEqual(len(faulty_code_blocks), 0)
+
+    def test_should_not_recognize_normal_double_quotes_python_text(self):
+        message = """ "python is a great language
+        great
+        great
+        great language
+        enough lines?
+        yes" """
+
+        faulty_code_blocks = parsing.find_faulty_code_blocks(message)
+        self.assertIsNotNone(faulty_code_blocks)
+        self.assertEqual(len(faulty_code_blocks), 0)
+
+    def test_should_recognize_single_backtick_no_language(self):
+        message = """`
+        x = 4
+        y = 3
+        z = 2
+        print("abc")
+        `"""
+
+        faulty_code_blocks = parsing.find_faulty_code_blocks(message)
+        self.assertIsNotNone(faulty_code_blocks)
+        self.assertEqual(len(faulty_code_blocks), 1)
+
+    def test_should_recognize_single_backtick_with_language(self):
+        message = """`py
+        x = 4
+        y = 3
+        z = 2
+        print("abc")
+        `"""
+
+        faulty_code_blocks = parsing.find_faulty_code_blocks(message)
+        self.assertIsNotNone(faulty_code_blocks)
+        self.assertEqual(len(faulty_code_blocks), 1)
+
+    def test_should_recognize_single_single_quote_with_py_language(self):
+        message = """'py
+        x = 4
+        y = 3
+        z = 2
+        print("abc")
+        '"""
+
+        faulty_code_blocks = parsing.find_faulty_code_blocks(message)
+        self.assertIsNotNone(faulty_code_blocks)
+        self.assertEqual(len(faulty_code_blocks), 1)
+
+    def test_should_recognize_single_single_quote_with_python_language(self):
+        message = """'python
+        x = 4
+        y = 3
+        z = 2
+        print("abc")
+        '"""
+
+        faulty_code_blocks = parsing.find_faulty_code_blocks(message)
+        self.assertIsNotNone(faulty_code_blocks)
+        self.assertEqual(len(faulty_code_blocks), 1)
+
+    def test_should_recognize_wrong_number_of_backticks(self):
+        message = """``py
+        x = 4
+        y = 3
+        z = 2
+        print("abc")
+        ``"""
+
+        faulty_code_blocks = parsing.find_faulty_code_blocks(message)
+        self.assertIsNotNone(faulty_code_blocks)
+        self.assertEqual(len(faulty_code_blocks), 1)

From f272a86b7f449fe02a7b8a51550b30bc86ac2195 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= <samu.roettgermann@t-online.de>
Date: Sun, 7 Jun 2026 00:35:28 +0200
Subject: [PATCH 5/8] made faulty codeblock detection more restrictive and
 improved bad ticks error message

---
 bot/exts/info/codeblock/_instructions.py |  4 ++--
 bot/exts/info/codeblock/_parsing.py      | 25 ++++++++++++++++++------
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/bot/exts/info/codeblock/_instructions.py b/bot/exts/info/codeblock/_instructions.py
index 210217ccaf..e855717aab 100644
--- a/bot/exts/info/codeblock/_instructions.py
+++ b/bot/exts/info/codeblock/_instructions.py
@@ -38,7 +38,7 @@ def _get_bad_ticks_message(code_block: _parsing.CodeBlock) -> str | None:
     valid_ticks = f"\\{_parsing.BACKTICK}" * 3
     instructions = (
         "You are using the wrong character instead of backticks. "
-        f"Use {valid_ticks}, not `{code_block.tick * 3}`."
+        f"Use {valid_ticks}, not `{code_block.ticks}`."
     )
 
     log.trace("Check if the bad ticks code block also has issues with the language specifier.")
@@ -138,7 +138,7 @@ def get_instructions(content: str) -> str | None:
     """
     log.trace("Getting formatting instructions.")
 
-    blocks = _parsing.find_code_blocks(content)
+    blocks = _parsing.find_faulty_code_blocks(content)
     if blocks is None:
         log.trace("At least one valid code block found; no instructions to return.")
         return None
diff --git a/bot/exts/info/codeblock/_parsing.py b/bot/exts/info/codeblock/_parsing.py
index 0569b1433e..bac6b862f7 100644
--- a/bot/exts/info/codeblock/_parsing.py
+++ b/bot/exts/info/codeblock/_parsing.py
@@ -6,6 +6,8 @@
 from collections.abc import Sequence
 from typing import NamedTuple
 
+import regex
+
 from bot import constants
 from bot.log import get_logger
 from bot.utils import has_lines
@@ -33,6 +35,8 @@
 
 _RE_CODE_BLOCK = re.compile(
     fr"""
+    (?:^|                             # the ticks need to start at the front of a line to be recognized
+    \s)                               # or need to have a preceeding whitespace (to avoid detection of words like I'll).
     (?P<ticks>
         (?P<tick>[{''.join(_TICKS)}]) # Put all ticks into a character class within a group.
         \2*                           # Match previous group up to N more times to ensure the same char.
@@ -43,6 +47,7 @@
     """,
     re.DOTALL | re.VERBOSE
 )
+_RE_CODE_BLOCK_REGEX = regex.compile(_RE_CODE_BLOCK.pattern, regex.DOTALL | regex.VERBOSE)
 
 _RE_LANGUAGE = re.compile(
     fr"""
@@ -59,6 +64,7 @@ class CodeBlock(NamedTuple):
 
     content: str
     language: str
+    ticks: str
     tick: str
 
 
@@ -70,9 +76,9 @@ class BadLanguage(NamedTuple):
     has_terminal_newline: bool
 
 
-def find_code_blocks(message: str) -> Sequence[CodeBlock] | None:
+def find_faulty_code_blocks(message: str) -> Sequence[CodeBlock] | None:
     """
-    Find and return all Markdown code blocks in the `message`.
+    Find and return all faulty Markdown code blocks in the `message`.
 
     Code blocks with 3 or fewer lines are excluded.
 
@@ -83,7 +89,7 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None:
     log.trace("Finding all code blocks in a message.")
 
     code_blocks = []
-    for match in _RE_CODE_BLOCK.finditer(message):
+    for match in _RE_CODE_BLOCK_REGEX.finditer(message, overlapped=True):
         # Used to ensure non-matched groups have an empty string as the default value.
         groups = match.groupdict("")
         language = groups["lang"].strip()  # Strip the whitespace cause it's included in the group.
@@ -92,11 +98,18 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None:
             log.trace("Message has a valid code block with a language; returning None.")
             return None
 
-        if has_lines(groups["code"], constants.CodeBlock.minimum_lines):
-            code_block = CodeBlock(groups["code"], language, groups["tick"])
+        if not has_lines(groups["code"], constants.CodeBlock.minimum_lines):
+            log.trace("Skipped a code block shorter than 4 lines.")
+            continue
+
+        if (groups["tick"] == BACKTICK
+                or (language in PY_LANG_CODES and is_python_code(groups["code"]))
+                or len(groups["ticks"]) >= 2):
+            log.trace("Message has an invalid code block.")
+            code_block = CodeBlock(groups["code"], language, groups["ticks"], groups["tick"])
             code_blocks.append(code_block)
         else:
-            log.trace("Skipped a code block shorter than 4 lines.")
+            log.trace("Skipped invalid code block due to uncertainty if it is supposed to be a code block.")
 
     return code_blocks
 

From eb4a021b8ef01292fe6269abc95e3bb7076bfbc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= <samu.roettgermann@t-online.de>
Date: Sun, 7 Jun 2026 00:50:18 +0200
Subject: [PATCH 6/8] improved string formatting in the tests

---
 tests/bot/exts/info/codeblock/test_parsing.py | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/bot/exts/info/codeblock/test_parsing.py b/tests/bot/exts/info/codeblock/test_parsing.py
index 53353fadbd..4507fcaa54 100644
--- a/tests/bot/exts/info/codeblock/test_parsing.py
+++ b/tests/bot/exts/info/codeblock/test_parsing.py
@@ -6,27 +6,27 @@
 class FindFaultyCodeblocksTest(unittest.TestCase):
     def test_should_recognize_missing_language(self):
         message = """```
-x = 4
-y = 2
-print("abc")
-```"""
+        x = 4
+        y = 2
+        print("abc")
+        ```"""
         faulty_code_blocks = parsing.find_faulty_code_blocks(message)
         self.assertIsNotNone(faulty_code_blocks)
         self.assertEqual(len(faulty_code_blocks), 1)
 
     def test_should_recognize_contained_codeblock(self):
         message = """'
-wouldn't it be easier to do:
-```py
-say_hi = lambda:
-    print('hello')
-    print('world')
-say_hi()
+        wouldn't it be easier to do:
+        ```py
+        say_hi = lambda:
+            print('hello')
+            print('world')
+        say_hi()
 
-'
-```
+        '
+        ```
 
-'"""
+        '"""
         faulty_code_blocks = parsing.find_faulty_code_blocks(message)
         self.assertIsNone(faulty_code_blocks)
 

From d92d178c7ec8cf1f5bc52a7d852335e66b277437 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20R=C3=B6ttgermann?=
 <38440557+SamuelRoettgermann@users.noreply.github.com>
Date: Sun, 7 Jun 2026 01:08:46 +0200
Subject: [PATCH 7/8] Update bot/exts/info/codeblock/_parsing.py

Update regex comment

Co-authored-by: Joe Banks <joe@jb3.dev>
---
 bot/exts/info/codeblock/_parsing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bot/exts/info/codeblock/_parsing.py b/bot/exts/info/codeblock/_parsing.py
index bac6b862f7..c6f1ef2de2 100644
--- a/bot/exts/info/codeblock/_parsing.py
+++ b/bot/exts/info/codeblock/_parsing.py
@@ -36,7 +36,7 @@
 _RE_CODE_BLOCK = re.compile(
     fr"""
     (?:^|                             # the ticks need to start at the front of a line to be recognized
-    \s)                               # or need to have a preceeding whitespace (to avoid detection of words like I'll).
+    \s)                               # or need to have a preceding whitespace (to avoid detection of words like I'll).
     (?P<ticks>
         (?P<tick>[{''.join(_TICKS)}]) # Put all ticks into a character class within a group.
         \2*                           # Match previous group up to N more times to ensure the same char.

From 2ade4a3d56c86affa4c186b8a7c1ce7c169806f1 Mon Sep 17 00:00:00 2001
From: Samuel <samu.roettgermann@t-online.de>
Date: Tue, 9 Jun 2026 15:34:33 +0200
Subject: [PATCH 8/8] added is_python field for code blocks; added comment
 explaining regex duplication

---
 bot/exts/info/codeblock/_instructions.py |  8 ++++----
 bot/exts/info/codeblock/_parsing.py      | 11 +++++++----
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/bot/exts/info/codeblock/_instructions.py b/bot/exts/info/codeblock/_instructions.py
index e855717aab..7204dbeae4 100644
--- a/bot/exts/info/codeblock/_instructions.py
+++ b/bot/exts/info/codeblock/_instructions.py
@@ -44,7 +44,7 @@ def _get_bad_ticks_message(code_block: _parsing.CodeBlock) -> str | None:
     log.trace("Check if the bad ticks code block also has issues with the language specifier.")
     addition_msg = _get_bad_lang_message(code_block.content)
     if not addition_msg and not code_block.language:
-        addition_msg = _get_no_lang_message(code_block.content)
+        addition_msg = _get_no_lang_message(code_block)
 
     # Combine the back ticks message with the language specifier message. The latter will
     # already have an example code block.
@@ -112,7 +112,7 @@ def _get_bad_lang_message(content: str) -> str | None:
     return None
 
 
-def _get_no_lang_message(content: str) -> str | None:
+def _get_no_lang_message(code_block: _parsing.CodeBlock) -> str | None:
     """
     Return instructions on specifying a language for a code block.
 
@@ -120,7 +120,7 @@ def _get_no_lang_message(content: str) -> str | None:
     """
     log.trace("Creating instructions for a missing language.")
 
-    if _parsing.is_python_code(content):
+    if code_block.is_python:
         example_blocks = _get_example("py")
 
         # Note that _get_bad_ticks_message expects the first line to have two newlines.
@@ -160,6 +160,6 @@ def get_instructions(content: str) -> str | None:
             # Check for a bad language first to avoid parsing content into an AST.
             instructions = _get_bad_lang_message(block.content)
             if not instructions:
-                instructions = _get_no_lang_message(block.content)
+                instructions = _get_no_lang_message(block)
 
     return instructions
diff --git a/bot/exts/info/codeblock/_parsing.py b/bot/exts/info/codeblock/_parsing.py
index c6f1ef2de2..22cbe4344e 100644
--- a/bot/exts/info/codeblock/_parsing.py
+++ b/bot/exts/info/codeblock/_parsing.py
@@ -47,7 +47,8 @@
     """,
     re.DOTALL | re.VERBOSE
 )
-_RE_CODE_BLOCK_REGEX = regex.compile(_RE_CODE_BLOCK.pattern, regex.DOTALL | regex.VERBOSE)
+# copy of _RE_CODE_BLOCK. Done like this for highlighting reasons (regex.compile doesn't properly highlight)
+_REGEX_CODE_BLOCK = regex.compile(_RE_CODE_BLOCK.pattern, regex.DOTALL | regex.VERBOSE)
 
 _RE_LANGUAGE = re.compile(
     fr"""
@@ -66,6 +67,7 @@ class CodeBlock(NamedTuple):
     language: str
     ticks: str
     tick: str
+    is_python: bool
 
 
 class BadLanguage(NamedTuple):
@@ -89,7 +91,7 @@ def find_faulty_code_blocks(message: str) -> Sequence[CodeBlock] | None:
     log.trace("Finding all code blocks in a message.")
 
     code_blocks = []
-    for match in _RE_CODE_BLOCK_REGEX.finditer(message, overlapped=True):
+    for match in _REGEX_CODE_BLOCK.finditer(message, overlapped=True):
         # Used to ensure non-matched groups have an empty string as the default value.
         groups = match.groupdict("")
         language = groups["lang"].strip()  # Strip the whitespace cause it's included in the group.
@@ -102,11 +104,12 @@ def find_faulty_code_blocks(message: str) -> Sequence[CodeBlock] | None:
             log.trace("Skipped a code block shorter than 4 lines.")
             continue
 
+        is_python = is_python_code(groups["code"])
         if (groups["tick"] == BACKTICK
-                or (language in PY_LANG_CODES and is_python_code(groups["code"]))
+                or (language in PY_LANG_CODES and is_python)
                 or len(groups["ticks"]) >= 2):
             log.trace("Message has an invalid code block.")
-            code_block = CodeBlock(groups["code"], language, groups["ticks"], groups["tick"])
+            code_block = CodeBlock(groups["code"], language, groups["ticks"], groups["tick"], is_python)
             code_blocks.append(code_block)
         else:
             log.trace("Skipped invalid code block due to uncertainty if it is supposed to be a code block.")