|
1 | 1 | import os |
| 2 | +import subprocess |
2 | 3 |
|
3 | 4 |
|
4 | 5 | class GitCommit(object): |
@@ -158,119 +159,75 @@ def _build_line_mappings(self, start, finish): |
158 | 159 | forward = {} |
159 | 160 | backward = {} |
160 | 161 |
|
161 | | - # Get information about blank lines: The git diff porcelain format |
162 | | - # (which we use for everything else) doesn't distinguish between |
163 | | - # additions and removals, so this is a very dirty hack to get around |
164 | | - # the problem. |
165 | | - p = os.popen('git diff %s %s -- %s | grep -E "^[+-]$"' % ( |
166 | | - start, |
167 | | - finish, |
168 | | - self.path, |
169 | | - )) |
170 | | - blank_lines = [l.strip() for l in p.readlines()] |
171 | | - |
172 | | - p = os.popen('git diff --word-diff=porcelain %s %s -- %s' % ( |
173 | | - start, |
174 | | - finish, |
175 | | - self.path, |
176 | | - )) |
177 | | - |
178 | | - # The diff output is in sections: A header line (indicating the |
179 | | - # range of lines this section covers) and then a number of |
180 | | - # content lines. |
181 | | - |
182 | | - sections = [] |
183 | | - |
184 | | - # Skip initial headers: They don't interest us. |
185 | | - line = p.readline() |
186 | | - while line != '' and not line.startswith('@@'): |
187 | | - line = p.readline() |
188 | | - |
189 | | - while line: |
190 | | - header_line = line |
191 | | - content_lines = [] |
192 | | - |
193 | | - line = p.readline() |
194 | | - while line and not line.startswith('@@'): |
195 | | - content_lines.append(line) |
196 | | - line = p.readline() |
197 | | - |
198 | | - sections.append((header_line, content_lines, )) |
199 | | - |
200 | | - |
201 | | - start_ln = finish_ln = 0 |
202 | | - for header_line, content_lines in sections: |
203 | | - # The headers line has the format '@@ +a,b -c,d @@[ e]' where |
204 | | - # a is the first line number shown from start and b is the |
205 | | - # number of lines shown from start, and c is the first line |
206 | | - # number show from finish and d is the number of lines show |
207 | | - # from from finish, and e is Git's guess at the name of the |
208 | | - # context (and is not always present) |
209 | | - |
210 | | - headers = header_line.strip('@ \n').split(' ') |
211 | | - headers = map(lambda x: x.strip('+-').split(','), headers) |
212 | | - |
213 | | - start_range = map(int, headers[0]) |
214 | | - finish_range = map(int, headers[1]) |
215 | | - |
216 | | - while start_ln < start_range[0] - 1 and \ |
217 | | - finish_ln < finish_range[0] - 1: |
218 | | - forward[start_ln] = finish_ln |
219 | | - backward[finish_ln] = start_ln |
220 | | - start_ln += 1 |
221 | | - finish_ln += 1 |
222 | | - |
223 | | - # Now we're into the diff itself. Individual lines of input |
224 | | - # are separated by a line containing only a '~', this helps |
225 | | - # to distinguish between an addition, a removal, and a change. |
226 | | - |
227 | | - line_iter = iter(content_lines) |
228 | | - try: |
229 | | - while True: |
230 | | - group_size = -1 |
231 | | - line_delta = 0 |
232 | | - line = ' ' |
233 | | - while line != '~': |
234 | | - if line.startswith('+'): |
235 | | - line_delta += 1 |
236 | | - elif line.startswith('-'): |
237 | | - line_delta -= 1 |
238 | | - |
239 | | - group_size += 1 |
240 | | - line = line_iter.next().rstrip() |
241 | | - |
242 | | - if group_size == 0: |
243 | | - # Two '~' lines next to each other means a blank |
244 | | - # line has been either added or removed. Git |
245 | | - # doesn't tell us which. This is all crazy. |
246 | | - if blank_lines.pop(0) == '+': |
247 | | - line_delta += 1 |
248 | | - else: |
249 | | - line_delta -= 1 |
250 | | - |
251 | | - if line_delta == 1: |
252 | | - backward[finish_ln] = None |
253 | | - finish_ln += 1 |
254 | | - elif line_delta == -1: |
255 | | - forward[start_ln] = None |
256 | | - start_ln += 1 |
257 | | - else: |
258 | | - forward[start_ln] = finish_ln |
259 | | - backward[finish_ln] = start_ln |
260 | | - start_ln += 1 |
261 | | - finish_ln += 1 |
262 | | - except StopIteration: |
263 | | - pass |
264 | | - |
265 | | - # Make sure the mappings stretch the the beginning and end of |
266 | | - # the files. |
| 162 | + # We use `diff` to track blocks of added, deleted and unchanged lines |
| 163 | + # in order to build the line mapping. |
| 164 | + # Its `--old/new/unchanged-group-format` flags make this very easy; |
| 165 | + # it generates output like this: |
| 166 | + # u 8 |
| 167 | + # o 3 |
| 168 | + # n 4 |
| 169 | + # u 1 |
| 170 | + # for a diff in which the first 8 lines are unchanged, then 3 deleted, |
| 171 | + # then 4 added and then 1 unchanged. |
| 172 | + # Below, we parse this output. |
| 173 | + # |
| 174 | + # In order to get the file contents of the two commits into `diff`, |
| 175 | + # we use the equivalent of bash's /dev/fd/N based process subsititution, |
| 176 | + # which would look like this: |
| 177 | + # diff <(git show commit1:file) <(git show commit2:file) |
| 178 | + # (this works on all platforms where bash process substitution works). |
| 179 | + |
| 180 | + p_start = os.popen('git show %s:%s' % (start, self.path)) |
| 181 | + p_finish = os.popen('git show %s:%s' % (finish, self.path)) |
| 182 | + |
| 183 | + p_diff = subprocess.Popen([ |
| 184 | + 'diff', |
| 185 | + '/dev/fd/' + str(p_start.fileno()), |
| 186 | + '/dev/fd/' + str(p_finish.fileno()), |
| 187 | + '--old-group-format=o %dn\n', # lower case n for old file |
| 188 | + '--new-group-format=n %dN\n', # upper case N for new file |
| 189 | + '--unchanged-group-format=u %dN\n', # for unchanged it doesn't matter if n or N |
| 190 | + ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| 191 | + |
| 192 | + (out, err) = p_diff.communicate() |
| 193 | + assert err == '' |
| 194 | + |
| 195 | + # Unfortunately, splitting the empty string in Python still gives us a singleton |
| 196 | + # empty line (`''.split('\n') == ['']`), so we handle that case here. |
| 197 | + diff_lines = [] if out == '' else out.strip().split('\n') |
| 198 | + |
| 199 | + start_ln = 0 |
| 200 | + finish_ln = 0 |
| 201 | + |
| 202 | + for line in diff_lines: |
| 203 | + assert len(line) >= 3 |
| 204 | + # Parse the output created with `diff` above. |
| 205 | + typ, num_lines_str = line.split(' ') |
| 206 | + num_lines = int(num_lines_str) |
| 207 | + |
| 208 | + if typ == 'u': # unchanged lines, advance both sides |
| 209 | + for i in range(num_lines): |
| 210 | + forward[start_ln] = finish_ln |
| 211 | + backward[finish_ln] = start_ln |
| 212 | + start_ln += 1 |
| 213 | + finish_ln += 1 |
| 214 | + elif typ == 'o': # old/deleted lines, advance left side as they only exist there |
| 215 | + for i in range(num_lines): |
| 216 | + forward[start_ln] = None |
| 217 | + start_ln += 1 |
| 218 | + elif typ == 'n': # new/added lines, advance right side as they only exist there |
| 219 | + for i in range(num_lines): |
| 220 | + backward[finish_ln] = None |
| 221 | + finish_ln += 1 |
267 | 222 |
|
268 | 223 | p = os.popen('git show %s:%s' % (start, self.path)) |
269 | 224 | start_len = len(p.readlines()) |
270 | 225 |
|
271 | 226 | p = os.popen('git show %s:%s' % (finish, self.path)) |
272 | 227 | finish_len = len(p.readlines()) |
273 | 228 |
|
| 229 | + # Make sure the mappings stretch the the beginning and end of |
| 230 | + # the files. |
274 | 231 | while start_ln <= start_len and finish_ln <= finish_len: |
275 | 232 | forward[start_ln] = finish_ln |
276 | 233 | backward[finish_ln] = start_ln |
|
0 commit comments