diff options
author | Philip Withnall <philip.withnall@collabora.co.uk> | 2016-08-08 17:34:57 +0100 |
---|---|---|
committer | Thibault Saunier <tsaunier@gnome.org> | 2016-08-09 14:37:47 -0400 |
commit | ea6b591d6ae4e3e57cb4985e823e8e7d2700dae3 (patch) | |
tree | c668542da9b06e67359f0b23228737c87ee57d9d | |
parent | a42d52a20098860c367df07d83f85d1a555d6ed6 (diff) |
git-phab: Fix content type detection for binary files
The mime_type field of GitPython’s Blob object is unreliable: it detects
the type based entirely on the blob’s filename, which results in weird
types for files like ‘configure.ac’. Instead, since we have the data
available, we can check whether any of the bytes in the file are
non-ASCII, which is precisely when we should use a binary diff.
Signed-off-by: Philip Withnall <philip.withnall@collabora.co.uk>
Differential Revision: https://phabricator.freedesktop.org/D1258
-rwxr-xr-x | git-phab | 26 |
1 files changed, 20 insertions, 6 deletions
@@ -761,6 +761,19 @@ Paste API Token from that page and press <enter>: """ % self.phabricator_uri) return subprocess.check_output(command).decode("utf-8") + def blob_is_binary(self, blob): + if not blob: + return False + + bytes = blob.data_stream[-1].read() + # The mime_type field of a gitpython blob is based only on its filename + # which means that files like 'configure.ac' will return weird MIME + # types, unsuitable for working out whether they are text. Instead, + # check whether any of the bytes in the blob are non-ASCII. + textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | + set(range(0x20, 0x100)) - {0x7f}) + return bool(bytes.translate(None, textchars)) + def get_changes_for_diff(self, diff): def file_len(fname): i = 0 @@ -813,8 +826,8 @@ Paste API Token from that page and press <enter>: """ % self.phabricator_uri) elif l.startswith("-"): removed_lines += 1 - is_text = diff.b_blob.mime_type.startswith( - "text/") if diff.b_blob else True + is_text = (not self.blob_is_binary(diff.a_blob) and + not self.blob_is_binary(diff.b_blob)) if is_text: if diff.deleted_file: file_length = 0 @@ -936,8 +949,8 @@ Paste API Token from that page and press <enter>: """ % self.phabricator_uri) diffs = self.get_git_diffs(commit) has_binary = False for d in diffs: - if d.b_blob and not d.b_blob.mime_type.startswith( - "text/"): + if d.b_blob and \ + self.blob_is_binary(d.b_blob): has_binary = True break @@ -989,7 +1002,8 @@ Paste API Token from that page and press <enter>: """ % self.phabricator_uri) phab_fields.append("Projects: %s" % ','.join(self.project_phids)) - summary = ('\n'.join(body) + '\n' + '\n'.join(git_fields)).strip('\r\n') + summary = ('\n'.join(body) + '\n' + + '\n'.join(git_fields)).strip('\r\n') revision_id = self.get_differential_id(self.repo.head.commit) if revision_id: @@ -1718,7 +1732,7 @@ Paste API Token from that page and press <enter>: """ % self.phabricator_uri) git_fields.append(field) msg = self.format_commit_msg(subject, body, git_fields, - phab_fields, True) + phab_fields, True) self.repo.git.commit(amend=True, message=msg) orig_branch.commit = self.repo.head.commit |