"""Extracting and changing portions of the current line
All functions take cursor offset from the beginning of the line and the line of
Python code, and return None, or a tuple of the start index, end index, and the
word."""
import re
from dataclasses import dataclass
from itertools import chain
from .lazyre import LazyReCompile
@dataclass
class LinePart:
start: int
stop: int
word: str
_current_word_re = LazyReCompile(r"(? LinePart | None:
"""the object.attribute.attribute just before or under the cursor"""
start = cursor_offset
end = cursor_offset
word = None
for m in _current_word_re.finditer(line):
if m.start(1) < cursor_offset <= m.end(1):
start = m.start(1)
end = m.end(1)
word = m.group(1)
if word is None:
return None
return LinePart(start, end, word)
# pieces of regex to match repr() of several hashable built-in types
_match_all_dict_keys = r"""[^\]]*"""
# https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
_match_single_quote_str_bytes = r"""
# bytes repr() begins with `b` character; bytes and str begin with `'`
b?'
# match escape sequence; this handles `\'` in the string repr()
(?:\\['"nabfrtvxuU\\]|
# or match any non-`\` and non-single-quote character (most of the string)
[^'\\])*
# matches hanging `\` or ending `'` if one is present
[\\']?
"""
# bytes and str repr() only uses double quotes if the string contains 1 or more
# `'` character and exactly 0 `"` characters
_match_double_quote_str_bytes = r"""
# bytes repr() begins with `b` character
b?"
# string continues until a `"` character is reached
[^"]*
# end matching at closing double-quote if one is present
"?"""
# match valid identifier name followed by `[` character
_match_dict_before_key = r"""[\w_][\w0-9._]*\["""
_current_dict_key_re = LazyReCompile(
f"{_match_dict_before_key}((?:"
f"{_match_single_quote_str_bytes}|"
f"{_match_double_quote_str_bytes}|"
f"{_match_all_dict_keys}|)*)",
re.VERBOSE,
)
def current_dict_key(cursor_offset: int, line: str) -> LinePart | None:
"""If in dictionary completion, return the current key"""
for m in _current_dict_key_re.finditer(line):
if m.start(1) <= cursor_offset <= m.end(1):
return LinePart(m.start(1), m.end(1), m.group(1))
return None
# capture valid identifier name if followed by `[` character
_capture_dict_name = r"""([\w_][\w0-9._]*)\["""
_current_dict_re = LazyReCompile(
f"{_capture_dict_name}((?:"
f"{_match_single_quote_str_bytes}|"
f"{_match_double_quote_str_bytes}|"
f"{_match_all_dict_keys}|)*)",
re.VERBOSE,
)
def current_dict(cursor_offset: int, line: str) -> LinePart | None:
"""If in dictionary completion, return the dict that should be used"""
for m in _current_dict_re.finditer(line):
if m.start(2) <= cursor_offset <= m.end(2):
return LinePart(m.start(1), m.end(1), m.group(1))
return None
_current_string_re = LazyReCompile(
'''(?P(?:""")|"|(?:''\')|')(?:((?P.+?)(?P=open))|'''
"""(?P.+))"""
)
def current_string(cursor_offset: int, line: str) -> LinePart | None:
"""If inside a string of nonzero length, return the string (excluding
quotes)
Weaker than bpython.Repl's current_string, because that checks that a
string is a string based on previous lines in the buffer."""
for m in _current_string_re.finditer(line):
i = 3 if m.group(3) else 4
if m.start(i) <= cursor_offset <= m.end(i):
return LinePart(m.start(i), m.end(i), m.group(i))
return None
_current_object_re = LazyReCompile(r"([\w_][\w0-9_]*)[.]")
def current_object(cursor_offset: int, line: str) -> LinePart | None:
"""If in attribute completion, the object on which attribute should be
looked up."""
match = current_word(cursor_offset, line)
if match is None:
return None
s = ".".join(
m.group(1)
for m in _current_object_re.finditer(match.word)
if m.end(1) + match.start < cursor_offset
)
if not s:
return None
return LinePart(match.start, match.start + len(s), s)
_current_object_attribute_re = LazyReCompile(r"([\w_][\w0-9_]*)[.]?")
def current_object_attribute(cursor_offset: int, line: str) -> LinePart | None:
"""If in attribute completion, the attribute being completed"""
# TODO replace with more general current_expression_attribute
match = current_word(cursor_offset, line)
if match is None:
return None
matches = _current_object_attribute_re.finditer(match.word)
next(matches)
for m in matches:
if m.start(1) + match.start <= cursor_offset <= m.end(1) + match.start:
return LinePart(
m.start(1) + match.start, m.end(1) + match.start, m.group(1)
)
return None
_current_from_import_from_re = LazyReCompile(
r"from +([\w0-9_.]*)(?:\s+import\s+([\w0-9_]+[,]?\s*)+)*"
)
def current_from_import_from(cursor_offset: int, line: str) -> LinePart | None:
"""If in from import completion, the word after from
returns None if cursor not in or just after one of the two interesting
parts of an import: from (module) import (name1, name2)
"""
# TODO allow for as's
for m in _current_from_import_from_re.finditer(line):
if (m.start(1) < cursor_offset <= m.end(1)) or (
m.start(2) < cursor_offset <= m.end(2)
):
return LinePart(m.start(1), m.end(1), m.group(1))
return None
_current_from_import_import_re_1 = LazyReCompile(
r"from\s+([\w0-9_.]*)\s+import"
)
_current_from_import_import_re_2 = LazyReCompile(r"([\w0-9_]+)")
_current_from_import_import_re_3 = LazyReCompile(r", *([\w0-9_]*)")
def current_from_import_import(
cursor_offset: int, line: str
) -> LinePart | None:
"""If in from import completion, the word after import being completed
returns None if cursor not in or just after one of these words
"""
baseline = _current_from_import_import_re_1.search(line)
if baseline is None:
return None
match1 = _current_from_import_import_re_2.search(line[baseline.end() :])
if match1 is None:
return None
for m in chain(
(match1,),
_current_from_import_import_re_3.finditer(line[baseline.end() :]),
):
start = baseline.end() + m.start(1)
end = baseline.end() + m.end(1)
if start < cursor_offset <= end:
return LinePart(start, end, m.group(1))
return None
_current_import_re_1 = LazyReCompile(r"import")
_current_import_re_2 = LazyReCompile(r"([\w0-9_.]+)")
_current_import_re_3 = LazyReCompile(r"[,][ ]*([\w0-9_.]*)")
def current_import(cursor_offset: int, line: str) -> LinePart | None:
# TODO allow for multiple as's
baseline = _current_import_re_1.search(line)
if baseline is None:
return None
match1 = _current_import_re_2.search(line[baseline.end() :])
if match1 is None:
return None
for m in chain(
(match1,), _current_import_re_3.finditer(line[baseline.end() :])
):
start = baseline.end() + m.start(1)
end = baseline.end() + m.end(1)
if start < cursor_offset <= end:
return LinePart(start, end, m.group(1))
return None
_current_method_definition_name_re = LazyReCompile(r"def\s+([a-zA-Z_][\w]*)")
def current_method_definition_name(
cursor_offset: int, line: str
) -> LinePart | None:
"""The name of a method being defined"""
for m in _current_method_definition_name_re.finditer(line):
if m.start(1) <= cursor_offset <= m.end(1):
return LinePart(m.start(1), m.end(1), m.group(1))
return None
_current_single_word_re = LazyReCompile(r"(? LinePart | None:
"""the un-dotted word just before or under the cursor"""
for m in _current_single_word_re.finditer(line):
if m.start(1) <= cursor_offset <= m.end(1):
return LinePart(m.start(1), m.end(1), m.group(1))
return None
def current_dotted_attribute(cursor_offset: int, line: str) -> LinePart | None:
"""The dotted attribute-object pair before the cursor"""
match = current_word(cursor_offset, line)
if match is not None and "." in match.word[1:]:
return match
return None
_current_expression_attribute_re = LazyReCompile(
r"[.]\s*((?:[\w_][\w0-9_]*)|(?:))"
)
def current_expression_attribute(
cursor_offset: int, line: str
) -> LinePart | None:
"""If after a dot, the attribute being completed"""
# TODO replace with more general current_expression_attribute
for m in _current_expression_attribute_re.finditer(line):
if m.start(1) <= cursor_offset <= m.end(1):
return LinePart(m.start(1), m.end(1), m.group(1))
return None
def cursor_on_closing_char_pair(
cursor_offset: int, line: str, ch: str | None = None
) -> tuple[bool, bool]:
"""Checks if cursor sits on closing character of a pair
and whether its pair character is directly behind it
"""
on_closing_char, pair_close = False, False
if line is None:
return on_closing_char, pair_close
if cursor_offset < len(line):
cur_char = line[cursor_offset]
if cur_char in CHARACTER_PAIR_MAP.values():
on_closing_char = True if ch is None else cur_char == ch
if cursor_offset > 0:
prev_char = line[cursor_offset - 1]
if (
on_closing_char
and prev_char in CHARACTER_PAIR_MAP
and CHARACTER_PAIR_MAP[prev_char] == cur_char
):
pair_close = True if ch is None else prev_char == ch
return on_closing_char, pair_close