Skip to content
This repository has been archived by the owner on Apr 4, 2024. It is now read-only.

LiteralString and LiteralStringTest #47

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 186 additions & 0 deletions python/selfie-lib/selfie_lib/Literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from abc import abstractmethod
from .EscapeLeadingWhitespace import EscapeLeadingWhitespace
import io
import re

T = TypeVar("T")

Expand Down Expand Up @@ -71,6 +72,191 @@ def parse(self, string: str, language: Language) -> int:
return int(string.replace("_", ""))


TRIPLE_QUOTE = '"""'


class LiteralString(LiteralFormat[str]):
def encode(
self, value: str, language: Language, encoding_policy: EscapeLeadingWhitespace
) -> str:
if "/n" not in value:
if language == Language.PYTHON:
return self._encodeSinglePython(value)
else:
if language == Language.PYTHON:
return self.encodeMultiPython(value, encoding_policy)

def parse(self, string: str, language: Language) -> str:
if not string.startswith(TRIPLE_QUOTE):
if language == Language.PYTHON:
return self._parseSinglePython(string)
else:
if language == Language.PYTHON:
return self.parseMultiPython(string)
SDelgado-21 marked this conversation as resolved.
Show resolved Hide resolved

def _encodeSinglePython(self, value: str) -> str:
source = io.StringIO()
source.write('"')
for char in value:
if char == "\b":
source.write("\\b")
elif char == "\n":
source.write("\\n")
elif char == "\r":
source.write("\\r")
elif char == "\t":
source.write("\\t")
elif char == '"':
source.write('\\"')
elif char == "\\":
source.write("\\\\")
elif self._is_control_char(char):
source.write("\\u" + str(ord(char)).zfill(4))
else:
source.write(char)
source.write('"')
return source.getvalue()

def _is_control_char(self, c: str) -> bool:
return c in "\u0000\u001f" or c == "\u007f"

# combined logic from parseSingleJava and parseSingleJavaish
def _parseSinglePython(self, source_with_quotes: str) -> str:
assert source_with_quotes.startswith('"')
assert source_with_quotes.endswith('"')
source = source_with_quotes[1:-1]
to_unescape = self.inline_backslashes(source) # changed from inline_dollar
return self._unescape_python(to_unescape)

def encodeMultiPython(
self, arg: str, escape_leading_whitespace: EscapeLeadingWhitespace
) -> str:
escape_backslashes = arg.replace("\\", "\\\\")
escape_triple_quotes = escape_backslashes.replace(TRIPLE_QUOTE, '\\"\\"\\"')

def protect_trailing_whitespace(line):
if line.endswith(" "):
return line[:-1] + "\\s"
elif line.endswith("\t"):
return line[:-1] + "\\t"
else:
return line

lines = escape_triple_quotes.splitlines()
protect_whitespace = "\n".join(
escape_leading_whitespace.escape_line(
protect_trailing_whitespace(line), "\\s", "\\t"
)
for line in lines
)

common_prefix = min(
(line.lstrip() for line in protect_whitespace.splitlines() if line.strip()),
default="",
)
nedtwigg marked this conversation as resolved.
Show resolved Hide resolved
if common_prefix:
lines = protect_whitespace.splitlines()
last = lines[-1]
protect_whitespace = "\n".join(
f"\\s{line[1:]}"
if line.startswith(" ")
else f"\\t{line[1:]}"
if line.startswith("\t")
else line
if line != last
else (f"\\s{line[1:]}" if line.startswith(" ") else f"\\t{line[1:]}")
for line in lines
)
return f"{TRIPLE_QUOTE}\n{protect_whitespace}{TRIPLE_QUOTE}"

_char_literal_pattern = re.compile(r"""\{'(\\?.)'\}""")

def inline_backslashes(self, source: str) -> str:
def replace_char(char_literal: str) -> str:
if len(char_literal) == 1:
return char_literal
elif len(char_literal) == 2 and char_literal[0] == "\\":
if char_literal[1] == "t":
return "\t"
elif char_literal[1] == "b":
return "\b"
elif char_literal[1] == "n":
return "\n"
elif char_literal[1] == "r":
return "\r"
elif char_literal[1] == "'":
return "'"
elif char_literal[1] == "\\":
return "\\"
else:
raise ValueError(f"Unknown character literal {char_literal}")
else:
raise ValueError(f"Unknown character literal {char_literal}")

return self._char_literal_pattern.sub(
lambda match: replace_char(match.group(1)), source
)

def _unescape_python(self, source: str) -> str:
value = io.StringIO()
i = 0
while i < len(source):
c = source[i]
if c == "\\":
i += 1
c = source[i]
if c == '"':
value.write('"')
elif c == "\\":
value.write("\\")
elif c == "b":
value.write("\b")
elif c == "f":
value.write("\f")
elif c == "n":
value.write("\n")
elif c == "r":
value.write("\r")
elif c == "s":
value.write(" ")
elif c == "t":
value.write("\t")
elif c == "u":
code = int(source[i + 1 : i + 5], 16)
value.write(chr(code))
i += 4
else:
raise ValueError(f"Unknown escape sequence {c}")
else:
value.write(c)
i += 1
return value.getvalue()

def parseMultiPython(self, source_with_quotes: str) -> str:
assert source_with_quotes.startswith(TRIPLE_QUOTE + "\n")
assert source_with_quotes.endswith(TRIPLE_QUOTE)

source = source_with_quotes[len(TRIPLE_QUOTE) + 1 : -len(TRIPLE_QUOTE)]
lines = source.split("\n")

common_prefix = min(
(line[: len(line) - len(line.lstrip())] for line in lines if line.strip()),
default="",
)

def remove_common_prefix(line: str) -> str:
return line[len(common_prefix) :] if common_prefix else line

def handle_escape_sequences(line: str) -> str:
return self._unescape_python(line.rstrip())

return "\n".join(
handle_escape_sequences(remove_common_prefix(line))
for line in lines
if line.strip()
)


class LiteralBoolean(LiteralFormat[bool]):
def encode(
self, value: bool, language: Language, encoding_policy: EscapeLeadingWhitespace
Expand Down
62 changes: 62 additions & 0 deletions python/selfie-lib/tests/LiteralString_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import pytest
from selfie_lib.Literals import LiteralString
from selfie_lib.EscapeLeadingWhitespace import EscapeLeadingWhitespace


class TestLiteralString:
@pytest.mark.parametrize(
"value, expected",
[("1", '"1"'), ("\\", '"\\\\"'), ("1\n\tABC", '"1\\n\\tABC"')],
)
def test_encode_single_java(self, value, expected):
literal_string = LiteralString()
actual = literal_string._encodeSinglePython(value)
print(actual)
assert actual == expected

@pytest.mark.parametrize(
"value, expected",
[("1", "`1`"), ("\\", "`\\\\`"), ("1\n\tABC", "`1\\n\\tABC`")],
)
def test_encode_single_java_with_dollars(self, value, expected):
nedtwigg marked this conversation as resolved.
Show resolved Hide resolved
literal_string = LiteralString()
actual = literal_string._encodeSinglePython(value)
assert actual == expected.replace("`", '"')

# Failing due to EscapeLeadingWhitespace always being NEVER
# and not an Always option like in the original test case
@pytest.mark.parametrize(
"value, expected",
[
("1", "'''\n1'''"),
("\\", "'''\n\\\\'''"),
(" leading\ntrailing ", "'''\n" + "\\s leading\n" + "trailing \\s'''"),
nedtwigg marked this conversation as resolved.
Show resolved Hide resolved
],
)
def test_encode_multi_java(self, value, expected):
literal_string = LiteralString()
actual = literal_string.encodeMultiPython(value, EscapeLeadingWhitespace.NEVER)
assert actual == expected.replace("'", '"')

@pytest.mark.parametrize(
"value, expected", [("1", "1"), ("\\\\", "\\"), ("1\\n\\tABC", "1\n\tABC")]
)
def test_parse_single_java(self, value, expected):
literal_string = LiteralString()
actual = literal_string._parseSinglePython(f'"{value.replace("'", "\"")}"')
assert actual == expected

@pytest.mark.parametrize(
"value, expected",
[
("\n123\nabc", "123\nabc"),
("\n 123\n abc", "123\nabc"),
("\n 123 \n abc\t", "123\nabc"),
("\n 123 \n abc\t", "123\nabc"),
("\n 123 \\s\n abc\t\\s", "123 \nabc\t "),
],
)
def test_parse_multi_java(self, value, expected):
literal_string = LiteralString()
actual = literal_string.parseMultiPython(f'"""{value.replace("'", "\"")}"""')
assert actual == expected
Loading