Source code for cpp_linter.git.git_str
"""This was reintroduced to deal with any bugs in pygit2 (or the libgit2 C library it
binds to). The `parse_diff()` function here is only used when
:py:meth:`pygit2.Diff.parse_diff()` function fails in `cpp_linter.git.parse_diff()`"""
import re
from typing import Optional, List, Tuple, cast
from ..common_fs import FileObj, has_line_changes
from ..common_fs.file_filter import FileFilter
from ..loggers import logger
DIFF_FILE_DELIMITER = re.compile(r"^diff --git a/.*$", re.MULTILINE)
DIFF_FILE_NAME = re.compile(r"^\+\+\+\sb?/(.*)$", re.MULTILINE)
DIFF_RENAMED_FILE = re.compile(r"^rename to (.*)$", re.MULTILINE)
DIFF_BINARY_FILE = re.compile(r"^Binary\sfiles\s", re.MULTILINE)
HUNK_INFO = re.compile(r"^@@\s\-\d+,?\d*\s\+(\d+,?\d*)\s@@", re.MULTILINE)
def _get_filename_from_diff(front_matter: str) -> Optional[re.Match]:
"""Get the filename from content in the given diff front matter."""
filename_match = DIFF_FILE_NAME.search(front_matter)
if filename_match is not None:
return filename_match
# check for renamed file name
rename_match = DIFF_RENAMED_FILE.search(front_matter)
if rename_match is not None and front_matter.lstrip().startswith("similarity"):
return rename_match
# We may need to compensate for other instances where the filename is
# not directly after `+++ b/`. Binary files are another example of this.
if DIFF_BINARY_FILE.search(front_matter) is None:
# log the case and hope it helps in the future
logger.warning( # pragma: no cover
"Unrecognized diff starting with:\n%s",
"\n".join(front_matter.splitlines()),
)
return None
[docs]
def parse_diff(
full_diff: str,
file_filter: FileFilter,
lines_changed_only: int,
) -> List[FileObj]:
"""Parse a given diff into file objects.
:param full_diff: The complete diff for an event.
:param file_filter: A `FileFilter` object.
:param lines_changed_only: A value that dictates what file changes to focus on.
:returns: A `list` of `FileObj` instances containing information about the files
changed.
"""
file_objects: List[FileObj] = []
logger.error("Using pure python to parse diff because pygit2 failed!")
file_diffs = DIFF_FILE_DELIMITER.split(full_diff.lstrip("\n"))
for diff in file_diffs:
if not diff or diff.lstrip().startswith("deleted file"):
continue
first_hunk = HUNK_INFO.search(diff)
hunk_start = -1 if first_hunk is None else first_hunk.start()
diff_front_matter = diff[:hunk_start]
filename_match = _get_filename_from_diff(diff_front_matter)
if filename_match is None:
continue
filename = cast(str, filename_match.groups(0)[0])
if first_hunk is None:
continue
if not file_filter.is_source_or_ignored(filename):
continue
diff_chunks, additions = _parse_patch(diff[first_hunk.start() :])
if has_line_changes(lines_changed_only, diff_chunks, additions):
file_objects.append(FileObj(filename, additions, diff_chunks))
return file_objects
def _parse_patch(full_patch: str) -> Tuple[List[List[int]], List[int]]:
"""Parse a diff's patch accordingly.
:param full_patch: The entire patch of hunks for 1 file.
:returns:
A `tuple` of lists where
- Index 0 is the ranges of lines in the diff. Each item in this `list` is a
2 element `list` describing the starting and ending line numbers.
- Index 1 is a `list` of the line numbers that contain additions.
"""
ranges: List[List[int]] = []
# additions is a list line numbers in the diff containing additions
additions: List[int] = []
line_numb_in_diff: int = 0
chunks = HUNK_INFO.split(full_patch)
for index, chunk in enumerate(chunks):
if index % 2 == 1:
# each odd element holds the starting line number and number of lines
if "," in chunk:
start_line, hunk_length = [int(x) for x in chunk.split(",")]
else:
start_line = int(chunk)
hunk_length = 1
ranges.append([start_line, hunk_length + start_line])
line_numb_in_diff = start_line
continue
# each even element holds the actual line changes
for i, line in enumerate(chunk.splitlines()):
if line.startswith("+"):
additions.append(line_numb_in_diff)
if not line.startswith("-") and i: # don't increment on first line
line_numb_in_diff += 1
return (ranges, additions)