You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
183 lines
5.2 KiB
183 lines
5.2 KiB
2 years ago
|
import codecs
|
||
|
import re
|
||
|
from typing import (IO, Iterator, Match, NamedTuple, Optional, # noqa:F401
|
||
|
Pattern, Sequence, Tuple)
|
||
|
|
||
|
|
||
|
def make_regex(string: str, extra_flags: int = 0) -> Pattern[str]:
|
||
|
return re.compile(string, re.UNICODE | extra_flags)
|
||
|
|
||
|
|
||
|
_newline = make_regex(r"(\r\n|\n|\r)")
|
||
|
_multiline_whitespace = make_regex(r"\s*", extra_flags=re.MULTILINE)
|
||
|
_whitespace = make_regex(r"[^\S\r\n]*")
|
||
|
_export = make_regex(r"(?:export[^\S\r\n]+)?")
|
||
|
_single_quoted_key = make_regex(r"'([^']+)'")
|
||
|
_unquoted_key = make_regex(r"([^=\#\s]+)")
|
||
|
_equal_sign = make_regex(r"(=[^\S\r\n]*)")
|
||
|
_single_quoted_value = make_regex(r"'((?:\\'|[^'])*)'")
|
||
|
_double_quoted_value = make_regex(r'"((?:\\"|[^"])*)"')
|
||
|
_unquoted_value = make_regex(r"([^\r\n]*)")
|
||
|
_comment = make_regex(r"(?:[^\S\r\n]*#[^\r\n]*)?")
|
||
|
_end_of_line = make_regex(r"[^\S\r\n]*(?:\r\n|\n|\r|$)")
|
||
|
_rest_of_line = make_regex(r"[^\r\n]*(?:\r|\n|\r\n)?")
|
||
|
_double_quote_escapes = make_regex(r"\\[\\'\"abfnrtv]")
|
||
|
_single_quote_escapes = make_regex(r"\\[\\']")
|
||
|
|
||
|
|
||
|
Original = NamedTuple(
|
||
|
"Original",
|
||
|
[
|
||
|
("string", str),
|
||
|
("line", int),
|
||
|
],
|
||
|
)
|
||
|
|
||
|
Binding = NamedTuple(
|
||
|
"Binding",
|
||
|
[
|
||
|
("key", Optional[str]),
|
||
|
("value", Optional[str]),
|
||
|
("original", Original),
|
||
|
("error", bool),
|
||
|
],
|
||
|
)
|
||
|
|
||
|
|
||
|
class Position:
|
||
|
def __init__(self, chars: int, line: int) -> None:
|
||
|
self.chars = chars
|
||
|
self.line = line
|
||
|
|
||
|
@classmethod
|
||
|
def start(cls) -> "Position":
|
||
|
return cls(chars=0, line=1)
|
||
|
|
||
|
def set(self, other: "Position") -> None:
|
||
|
self.chars = other.chars
|
||
|
self.line = other.line
|
||
|
|
||
|
def advance(self, string: str) -> None:
|
||
|
self.chars += len(string)
|
||
|
self.line += len(re.findall(_newline, string))
|
||
|
|
||
|
|
||
|
class Error(Exception):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class Reader:
|
||
|
def __init__(self, stream: IO[str]) -> None:
|
||
|
self.string = stream.read()
|
||
|
self.position = Position.start()
|
||
|
self.mark = Position.start()
|
||
|
|
||
|
def has_next(self) -> bool:
|
||
|
return self.position.chars < len(self.string)
|
||
|
|
||
|
def set_mark(self) -> None:
|
||
|
self.mark.set(self.position)
|
||
|
|
||
|
def get_marked(self) -> Original:
|
||
|
return Original(
|
||
|
string=self.string[self.mark.chars:self.position.chars],
|
||
|
line=self.mark.line,
|
||
|
)
|
||
|
|
||
|
def peek(self, count: int) -> str:
|
||
|
return self.string[self.position.chars:self.position.chars + count]
|
||
|
|
||
|
def read(self, count: int) -> str:
|
||
|
result = self.string[self.position.chars:self.position.chars + count]
|
||
|
if len(result) < count:
|
||
|
raise Error("read: End of string")
|
||
|
self.position.advance(result)
|
||
|
return result
|
||
|
|
||
|
def read_regex(self, regex: Pattern[str]) -> Sequence[str]:
|
||
|
match = regex.match(self.string, self.position.chars)
|
||
|
if match is None:
|
||
|
raise Error("read_regex: Pattern not found")
|
||
|
self.position.advance(self.string[match.start():match.end()])
|
||
|
return match.groups()
|
||
|
|
||
|
|
||
|
def decode_escapes(regex: Pattern[str], string: str) -> str:
|
||
|
def decode_match(match: Match[str]) -> str:
|
||
|
return codecs.decode(match.group(0), 'unicode-escape') # type: ignore
|
||
|
|
||
|
return regex.sub(decode_match, string)
|
||
|
|
||
|
|
||
|
def parse_key(reader: Reader) -> Optional[str]:
|
||
|
char = reader.peek(1)
|
||
|
if char == "#":
|
||
|
return None
|
||
|
elif char == "'":
|
||
|
(key,) = reader.read_regex(_single_quoted_key)
|
||
|
else:
|
||
|
(key,) = reader.read_regex(_unquoted_key)
|
||
|
return key
|
||
|
|
||
|
|
||
|
def parse_unquoted_value(reader: Reader) -> str:
|
||
|
(part,) = reader.read_regex(_unquoted_value)
|
||
|
return re.sub(r"\s+#.*", "", part).rstrip()
|
||
|
|
||
|
|
||
|
def parse_value(reader: Reader) -> str:
|
||
|
char = reader.peek(1)
|
||
|
if char == u"'":
|
||
|
(value,) = reader.read_regex(_single_quoted_value)
|
||
|
return decode_escapes(_single_quote_escapes, value)
|
||
|
elif char == u'"':
|
||
|
(value,) = reader.read_regex(_double_quoted_value)
|
||
|
return decode_escapes(_double_quote_escapes, value)
|
||
|
elif char in (u"", u"\n", u"\r"):
|
||
|
return u""
|
||
|
else:
|
||
|
return parse_unquoted_value(reader)
|
||
|
|
||
|
|
||
|
def parse_binding(reader: Reader) -> Binding:
|
||
|
reader.set_mark()
|
||
|
try:
|
||
|
reader.read_regex(_multiline_whitespace)
|
||
|
if not reader.has_next():
|
||
|
return Binding(
|
||
|
key=None,
|
||
|
value=None,
|
||
|
original=reader.get_marked(),
|
||
|
error=False,
|
||
|
)
|
||
|
reader.read_regex(_export)
|
||
|
key = parse_key(reader)
|
||
|
reader.read_regex(_whitespace)
|
||
|
if reader.peek(1) == "=":
|
||
|
reader.read_regex(_equal_sign)
|
||
|
value = parse_value(reader) # type: Optional[str]
|
||
|
else:
|
||
|
value = None
|
||
|
reader.read_regex(_comment)
|
||
|
reader.read_regex(_end_of_line)
|
||
|
return Binding(
|
||
|
key=key,
|
||
|
value=value,
|
||
|
original=reader.get_marked(),
|
||
|
error=False,
|
||
|
)
|
||
|
except Error:
|
||
|
reader.read_regex(_rest_of_line)
|
||
|
return Binding(
|
||
|
key=None,
|
||
|
value=None,
|
||
|
original=reader.get_marked(),
|
||
|
error=True,
|
||
|
)
|
||
|
|
||
|
|
||
|
def parse_stream(stream: IO[str]) -> Iterator[Binding]:
|
||
|
reader = Reader(stream)
|
||
|
while reader.has_next():
|
||
|
yield parse_binding(reader)
|