Skip to content
This repository has been archived by the owner on Apr 4, 2024. It is now read-only.

LiteralString and LiteralStringTest #47

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 169 additions & 0 deletions python/selfie-lib/selfie_lib/Literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from abc import abstractmethod
from .EscapeLeadingWhitespace import EscapeLeadingWhitespace
import io
import re

T = TypeVar("T")

Expand Down Expand Up @@ -71,6 +72,174 @@ def parse(self, string: str, language: Language) -> int:
return int(string.replace("_", ""))


TRIPLE_QUOTE = '"""'


class LiteralString(LiteralFormat[str]):
def encode(
self, value: str, language: Language, encoding_policy: EscapeLeadingWhitespace
) -> str:
if "/n" not in value:
if language == Language.PYTHON:
return self._encodeSinglePython(value)
else:
if language == Language.PYTHON:
return self.encodeMultiPython(value, encoding_policy)

def parse(self, string: str, language: Language) -> str:
if not string.startswith(TRIPLE_QUOTE):
if language == Language.PYTHON:
return self._parseSinglePython(string)
else:
if language == Language.PYTHON:
return self.parseMultiPython(string)
SDelgado-21 marked this conversation as resolved.
Show resolved Hide resolved

def _encodeSinglePython(self, value: str) -> str:
source = io.StringIO()
source.write('"')
for char in value:
if char == "\b":
source.write("\\b")
elif char == "\n":
source.write("\\n")
elif char == "\r":
source.write("\\r")
elif char == "\t":
source.write("\\t")
elif char == '"':
source.write('\\"')
elif char == "\\":
source.write("\\\\")
elif self._is_control_char(char):
source.write("\\u" + str(ord(char)).zfill(4))
else:
source.write(char)
source.write('"')
return source.getvalue()

def _is_control_char(self, c: str) -> bool:
return c in "\u0000\u001f" or c == "\u007f"

# combined logic from parseSingleJava and parseSingleJavaish
def _parseSinglePython(self, source_with_quotes: str) -> str:
assert source_with_quotes.startswith('"')
assert source_with_quotes.endswith('"')
source = source_with_quotes[1:-1]
to_unescape = self.inline_backslashes(source) # changed from inline_dollar
return self._unescape_python(to_unescape)

def encodeMultiPython(
self, arg: str, escape_leading_whitespace: EscapeLeadingWhitespace
) -> str:
escape_backslashes = arg.replace("\\", "\\\\")
escape_triple_quotes = escape_backslashes.replace(TRIPLE_QUOTE, '\\"\\"\\"')

def protect_trailing_whitespace(line):
if line.endswith(" "):
return line[:-1] + "\x20"
SDelgado-21 marked this conversation as resolved.
Show resolved Hide resolved
elif line.endswith("\t"):
return line[:-1] + "\\t"
else:
return line

lines = escape_triple_quotes.splitlines()
protect_whitespace = "\n".join(
escape_leading_whitespace.escape_line(
protect_trailing_whitespace(line), "\x20", "\\t"
SDelgado-21 marked this conversation as resolved.
Show resolved Hide resolved
)
for line in lines
)

return f"{TRIPLE_QUOTE}\n{protect_whitespace}{TRIPLE_QUOTE}"

_char_literal_pattern = re.compile(r"""\{'(\\?.)'\}""")

def inline_backslashes(self, source: str) -> str:
def replace_char(char_literal: str) -> str:
if len(char_literal) == 1:
return char_literal
elif len(char_literal) == 2 and char_literal[0] == "\\":
if char_literal[1] == "t":
return "\t"
elif char_literal[1] == "b":
return "\b"
elif char_literal[1] == "n":
return "\n"
elif char_literal[1] == "r":
return "\r"
elif char_literal[1] == "'":
return "'"
elif char_literal[1] == "\\":
return "\\"
else:
raise ValueError(f"Unknown character literal {char_literal}")
else:
raise ValueError(f"Unknown character literal {char_literal}")

return self._char_literal_pattern.sub(
lambda match: replace_char(match.group(1)), source
)

def _unescape_python(self, source: str) -> str:
value = io.StringIO()
i = 0
while i < len(source):
c = source[i]
if c == "\\":
i += 1
c = source[i]
if c == '"':
value.write('"')
elif c == "\\":
value.write("\\")
elif c == "b":
value.write("\b")
elif c == "f":
value.write("\f")
elif c == "n":
value.write("\n")
elif c == "r":
value.write("\r")
elif c == "s":
value.write(" ")
elif c == "t":
value.write("\t")
elif c == "u":
code = int(source[i + 1 : i + 5], 16)
value.write(chr(code))
i += 4
else:
raise ValueError(f"Unknown escape sequence {c}")
else:
value.write(c)
i += 1
return value.getvalue()

def parseMultiPython(self, source_with_quotes: str) -> str:
assert source_with_quotes.startswith(TRIPLE_QUOTE + "\n")
assert source_with_quotes.endswith(TRIPLE_QUOTE)

source = source_with_quotes[len(TRIPLE_QUOTE) + 1 : -len(TRIPLE_QUOTE)]
lines = source.split("\n")

common_prefix = min(
(line[: len(line) - len(line.lstrip())] for line in lines if line.strip()),
default="",
)

def remove_common_prefix(line: str) -> str:
return line[len(common_prefix) :] if common_prefix else line

def handle_escape_sequences(line: str) -> str:
return self._unescape_python(line.rstrip())

return "\n".join(
handle_escape_sequences(remove_common_prefix(line))
for line in lines
if line.strip()
)


class LiteralBoolean(LiteralFormat[bool]):
def encode(
self, value: bool, language: Language, encoding_policy: EscapeLeadingWhitespace
Expand Down
65 changes: 65 additions & 0 deletions python/selfie-lib/tests/LiteralString_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import pytest
from selfie_lib.Literals import LiteralString
from selfie_lib.EscapeLeadingWhitespace import EscapeLeadingWhitespace


class TestLiteralString:
@pytest.mark.parametrize(
"value, expected",
[("1", '"1"'), ("\\", '"\\\\"'), ("1\n\tABC", '"1\\n\\tABC"')],
)
def test_encode_single(self, value, expected):
literal_string = LiteralString()
actual = literal_string._encodeSinglePython(value)
print(actual)
assert actual == expected

@pytest.mark.parametrize(
"value, expected",
[("1", "`1`"), ("\\", "`\\\\`"), ("1\n\tABC", "`1\\n\\tABC`")],
)
def test_encode_single_with_dollars(self, value, expected):
literal_string = LiteralString()
actual = literal_string._encodeSinglePython(value)
assert actual == expected.replace("`", '"')

# Failing due to EscapeLeadingWhitespace always being NEVER
# and not an Always option like in the original test case
@pytest.mark.parametrize(
"value, expected",
[
("1", "'''\n1'''"),
("\\", "'''\n\\\\'''"),
(
" leading\ntrailing ",
"'''\n" + "\x20 leading\n" + "trailing \x20'''",
),
],
)
def test_encode_multi(self, value, expected):
literal_string = LiteralString()
actual = literal_string.encodeMultiPython(value, EscapeLeadingWhitespace.NEVER)
assert actual == expected.replace("'", '"')

@pytest.mark.parametrize(
"value, expected", [("1", "1"), ("\\\\", "\\"), ("1\\n\\tABC", "1\n\tABC")]
)
def test_parse_single(self, value, expected):
literal_string = LiteralString()
actual = literal_string._parseSinglePython(f'"{value.replace("'", "\"")}"')
assert actual == expected

@pytest.mark.parametrize(
"value, expected",
[
("\n123\nabc", "123\nabc"),
("\n 123\n abc", "123\nabc"),
("\n 123 \n abc\t", "123\nabc"),
("\n 123 \n abc\t", "123\nabc"),
("\n 123 \\s\n abc\t\\s", "123 \nabc\t "),
],
)
def test_parse_multi(self, value, expected):
literal_string = LiteralString()
actual = literal_string.parseMultiPython(f'"""{value.replace("'", "\"")}"""')
assert actual == expected