Skip to content
12 changes: 6 additions & 6 deletions bot/exts/info/codeblock/_instructions.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ def _get_bad_ticks_message(code_block: _parsing.CodeBlock) -> str | None:
valid_ticks = f"\\{_parsing.BACKTICK}" * 3
instructions = (
"You are using the wrong character instead of backticks. "
f"Use {valid_ticks}, not `{code_block.tick * 3}`."
f"Use {valid_ticks}, not `{code_block.ticks}`."
)

log.trace("Check if the bad ticks code block also has issues with the language specifier.")
addition_msg = _get_bad_lang_message(code_block.content)
if not addition_msg and not code_block.language:
addition_msg = _get_no_lang_message(code_block.content)
addition_msg = _get_no_lang_message(code_block)

# Combine the back ticks message with the language specifier message. The latter will
# already have an example code block.
Expand Down Expand Up @@ -112,15 +112,15 @@ def _get_bad_lang_message(content: str) -> str | None:
return None


def _get_no_lang_message(content: str) -> str | None:
def _get_no_lang_message(code_block: _parsing.CodeBlock) -> str | None:
"""
Return instructions on specifying a language for a code block.

If `content` is not valid Python or Python REPL code, return None.
"""
log.trace("Creating instructions for a missing language.")

if _parsing.is_python_code(content):
if code_block.is_python:
example_blocks = _get_example("py")

# Note that _get_bad_ticks_message expects the first line to have two newlines.
Expand All @@ -138,7 +138,7 @@ def get_instructions(content: str) -> str | None:
"""
log.trace("Getting formatting instructions.")

blocks = _parsing.find_code_blocks(content)
blocks = _parsing.find_faulty_code_blocks(content)
if blocks is None:
log.trace("At least one valid code block found; no instructions to return.")
return None
Expand All @@ -160,6 +160,6 @@ def get_instructions(content: str) -> str | None:
# Check for a bad language first to avoid parsing content into an AST.
instructions = _get_bad_lang_message(block.content)
if not instructions:
instructions = _get_no_lang_message(block.content)
instructions = _get_no_lang_message(block)

return instructions
28 changes: 22 additions & 6 deletions bot/exts/info/codeblock/_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from collections.abc import Sequence
from typing import NamedTuple

import regex

from bot import constants
from bot.log import get_logger
from bot.utils import has_lines
Expand Down Expand Up @@ -33,6 +35,8 @@

_RE_CODE_BLOCK = re.compile(
fr"""
(?:^| # the ticks need to start at the front of a line to be recognized
\s) # or need to have a preceding whitespace (to avoid detection of words like I'll).
(?P<ticks>
(?P<tick>[{''.join(_TICKS)}]) # Put all ticks into a character class within a group.
\2* # Match previous group up to N more times to ensure the same char.
Expand All @@ -43,6 +47,8 @@
""",
re.DOTALL | re.VERBOSE
)
# copy of _RE_CODE_BLOCK. Done like this for highlighting reasons (regex.compile doesn't properly highlight)
_REGEX_CODE_BLOCK = regex.compile(_RE_CODE_BLOCK.pattern, regex.DOTALL | regex.VERBOSE)

_RE_LANGUAGE = re.compile(
fr"""
Expand All @@ -59,7 +65,9 @@ class CodeBlock(NamedTuple):

content: str
language: str
ticks: str
tick: str
is_python: bool


class BadLanguage(NamedTuple):
Expand All @@ -70,9 +78,9 @@ class BadLanguage(NamedTuple):
has_terminal_newline: bool


def find_code_blocks(message: str) -> Sequence[CodeBlock] | None:
def find_faulty_code_blocks(message: str) -> Sequence[CodeBlock] | None:
Comment thread
SamuelRoettgermann marked this conversation as resolved.
"""
Find and return all Markdown code blocks in the `message`.
Find and return all faulty Markdown code blocks in the `message`.

Code blocks with 3 or fewer lines are excluded.

Expand All @@ -83,7 +91,7 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None:
log.trace("Finding all code blocks in a message.")

code_blocks = []
for match in _RE_CODE_BLOCK.finditer(message):
for match in _REGEX_CODE_BLOCK.finditer(message, overlapped=True):
# Used to ensure non-matched groups have an empty string as the default value.
groups = match.groupdict("")
language = groups["lang"].strip() # Strip the whitespace cause it's included in the group.
Expand All @@ -92,11 +100,19 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None:
log.trace("Message has a valid code block with a language; returning None.")
return None

if has_lines(groups["code"], constants.CodeBlock.minimum_lines):
code_block = CodeBlock(groups["code"], language, groups["tick"])
if not has_lines(groups["code"], constants.CodeBlock.minimum_lines):
log.trace("Skipped a code block shorter than 4 lines.")
continue

is_python = is_python_code(groups["code"])
if (groups["tick"] == BACKTICK
or (language in PY_LANG_CODES and is_python)
or len(groups["ticks"]) >= 2):
log.trace("Message has an invalid code block.")
code_block = CodeBlock(groups["code"], language, groups["ticks"], groups["tick"], is_python)
code_blocks.append(code_block)
else:
log.trace("Skipped a code block shorter than 4 lines.")
log.trace("Skipped invalid code block due to uncertainty if it is supposed to be a code block.")

return code_blocks

Expand Down
Empty file.
153 changes: 153 additions & 0 deletions tests/bot/exts/info/codeblock/test_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import unittest

from bot.exts.info.codeblock import _parsing as parsing


class FindFaultyCodeblocksTest(unittest.TestCase):
Comment thread
SamuelRoettgermann marked this conversation as resolved.
def test_should_recognize_missing_language(self):
message = """```
x = 4
y = 2
print("abc")
```"""
faulty_code_blocks = parsing.find_faulty_code_blocks(message)
self.assertIsNotNone(faulty_code_blocks)
self.assertEqual(len(faulty_code_blocks), 1)

def test_should_recognize_contained_codeblock(self):
message = """'
wouldn't it be easier to do:
```py
say_hi = lambda:
print('hello')
print('world')
say_hi()

'
```

'"""
faulty_code_blocks = parsing.find_faulty_code_blocks(message)
self.assertIsNone(faulty_code_blocks)

def test_should_recognize_contained_codeblock_even_if_that_breaks_formatting(self):
message = """```
```py
x = 4
y = 3
z = 2
print("abc")
```
```
"""
faulty_code_blocks = parsing.find_faulty_code_blocks(message)
self.assertIsNone(faulty_code_blocks)

def test_should_not_recognize_normal_single_quotes(self):
"""normal single quotes refers to single quotes that appear normally in text,
like for example in "I'll", "We're", etc."""
message = """I'm writing line 1
and we're writing line 2
we'll also be checking another of those
and some odd 'variations
isn't it beautiful?"""

faulty_code_blocks = parsing.find_faulty_code_blocks(message)
self.assertIsNotNone(faulty_code_blocks)
self.assertEqual(len(faulty_code_blocks), 0)

def test_should_not_recognize_quoting_single_quotes(self):
message = """ 'I am doing a long quote.
Sure, I could just use the > character
for correct quoting
but whatever...
End of quote' """

faulty_code_blocks = parsing.find_faulty_code_blocks(message)
self.assertIsNotNone(faulty_code_blocks)
self.assertEqual(len(faulty_code_blocks), 0)


def test_should_not_recognize_normal_double_quotes(self):
"""normal double quotes refer to double quotes that appear normally in text to quote something"""
message = """ "I am doing a long quote.
Sure, I could just use the > character
for correct quoting
but whatever...
End of quote" """

faulty_code_blocks = parsing.find_faulty_code_blocks(message)
self.assertIsNotNone(faulty_code_blocks)
self.assertEqual(len(faulty_code_blocks), 0)

def test_should_not_recognize_normal_double_quotes_python_text(self):
message = """ "python is a great language
great
great
great language
enough lines?
yes" """

faulty_code_blocks = parsing.find_faulty_code_blocks(message)
self.assertIsNotNone(faulty_code_blocks)
self.assertEqual(len(faulty_code_blocks), 0)

def test_should_recognize_single_backtick_no_language(self):
message = """`
x = 4
y = 3
z = 2
print("abc")
`"""

faulty_code_blocks = parsing.find_faulty_code_blocks(message)
self.assertIsNotNone(faulty_code_blocks)
self.assertEqual(len(faulty_code_blocks), 1)

def test_should_recognize_single_backtick_with_language(self):
message = """`py
x = 4
y = 3
z = 2
print("abc")
`"""

faulty_code_blocks = parsing.find_faulty_code_blocks(message)
self.assertIsNotNone(faulty_code_blocks)
self.assertEqual(len(faulty_code_blocks), 1)

def test_should_recognize_single_single_quote_with_py_language(self):
message = """'py
x = 4
y = 3
z = 2
print("abc")
'"""

faulty_code_blocks = parsing.find_faulty_code_blocks(message)
self.assertIsNotNone(faulty_code_blocks)
self.assertEqual(len(faulty_code_blocks), 1)

def test_should_recognize_single_single_quote_with_python_language(self):
message = """'python
x = 4
y = 3
z = 2
print("abc")
'"""

faulty_code_blocks = parsing.find_faulty_code_blocks(message)
self.assertIsNotNone(faulty_code_blocks)
self.assertEqual(len(faulty_code_blocks), 1)

def test_should_recognize_wrong_number_of_backticks(self):
message = """``py
x = 4
y = 3
z = 2
print("abc")
``"""

faulty_code_blocks = parsing.find_faulty_code_blocks(message)
self.assertIsNotNone(faulty_code_blocks)
self.assertEqual(len(faulty_code_blocks), 1)