diff.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. #!/usr/bin/env python
  2. ## Copyright (c) 2012 The WebM project authors. All Rights Reserved.
  3. ##
  4. ## Use of this source code is governed by a BSD-style license
  5. ## that can be found in the LICENSE file in the root of the source
  6. ## tree. An additional intellectual property rights grant can be found
  7. ## in the file PATENTS. All contributing project authors may
  8. ## be found in the AUTHORS file in the root of the source tree.
  9. ##
  10. """Classes for representing diff pieces."""
  11. __author__ = "jkoleszar@google.com"
  12. import re
  13. class DiffLines(object):
  14. """A container for one half of a diff."""
  15. def __init__(self, filename, offset, length):
  16. self.filename = filename
  17. self.offset = offset
  18. self.length = length
  19. self.lines = []
  20. self.delta_line_nums = []
  21. def Append(self, line):
  22. l = len(self.lines)
  23. if line[0] != " ":
  24. self.delta_line_nums.append(self.offset + l)
  25. self.lines.append(line[1:])
  26. assert l+1 <= self.length
  27. def Complete(self):
  28. return len(self.lines) == self.length
  29. def __contains__(self, item):
  30. return item >= self.offset and item <= self.offset + self.length - 1
  31. class DiffHunk(object):
  32. """A container for one diff hunk, consisting of two DiffLines."""
  33. def __init__(self, header, file_a, file_b, start_a, len_a, start_b, len_b):
  34. self.header = header
  35. self.left = DiffLines(file_a, start_a, len_a)
  36. self.right = DiffLines(file_b, start_b, len_b)
  37. self.lines = []
  38. def Append(self, line):
  39. """Adds a line to the DiffHunk and its DiffLines children."""
  40. if line[0] == "-":
  41. self.left.Append(line)
  42. elif line[0] == "+":
  43. self.right.Append(line)
  44. elif line[0] == " ":
  45. self.left.Append(line)
  46. self.right.Append(line)
  47. elif line[0] == "\\":
  48. # Ignore newline messages from git diff.
  49. pass
  50. else:
  51. assert False, ("Unrecognized character at start of diff line "
  52. "%r" % line[0])
  53. self.lines.append(line)
  54. def Complete(self):
  55. return self.left.Complete() and self.right.Complete()
  56. def __repr__(self):
  57. return "DiffHunk(%s, %s, len %d)" % (
  58. self.left.filename, self.right.filename,
  59. max(self.left.length, self.right.length))
  60. def ParseDiffHunks(stream):
  61. """Walk a file-like object, yielding DiffHunks as they're parsed."""
  62. file_regex = re.compile(r"(\+\+\+|---) (\S+)")
  63. range_regex = re.compile(r"@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?")
  64. hunk = None
  65. while True:
  66. line = stream.readline()
  67. if not line:
  68. break
  69. if hunk is None:
  70. # Parse file names
  71. diff_file = file_regex.match(line)
  72. if diff_file:
  73. if line.startswith("---"):
  74. a_line = line
  75. a = diff_file.group(2)
  76. continue
  77. if line.startswith("+++"):
  78. b_line = line
  79. b = diff_file.group(2)
  80. continue
  81. # Parse offset/lengths
  82. diffrange = range_regex.match(line)
  83. if diffrange:
  84. if diffrange.group(2):
  85. start_a = int(diffrange.group(1))
  86. len_a = int(diffrange.group(3))
  87. else:
  88. start_a = 1
  89. len_a = int(diffrange.group(1))
  90. if diffrange.group(5):
  91. start_b = int(diffrange.group(4))
  92. len_b = int(diffrange.group(6))
  93. else:
  94. start_b = 1
  95. len_b = int(diffrange.group(4))
  96. header = [a_line, b_line, line]
  97. hunk = DiffHunk(header, a, b, start_a, len_a, start_b, len_b)
  98. else:
  99. # Add the current line to the hunk
  100. hunk.Append(line)
  101. # See if the whole hunk has been parsed. If so, yield it and prepare
  102. # for the next hunk.
  103. if hunk.Complete():
  104. yield hunk
  105. hunk = None
  106. # Partial hunks are a parse error
  107. assert hunk is None