summaryrefslogtreecommitdiff
path: root/patch.py
diff options
context:
space:
mode:
Diffstat (limited to 'patch.py')
-rw-r--r--patch.py658
1 files changed, 658 insertions, 0 deletions
diff --git a/patch.py b/patch.py
new file mode 100644
index 0000000..195eec6
--- /dev/null
+++ b/patch.py
@@ -0,0 +1,658 @@
+""" Patch utility to apply unified diffs
+
+ Brute-force line-by-line non-recursive parsing
+
+ Copyright (c) 2008-2010 anatoly techtonik
+ Available under the terms of MIT license
+
+ Project home: http://code.google.com/p/python-patch/
+
+
+ $Id: patch.py 101 2010-08-29 08:57:30Z techtonik $
+ $HeadURL: http://python-patch.googlecode.com/svn/trunk/patch.py $
+"""
+
+__author__ = "techtonik.rainforce.org"
+__version__ = "10.08"
+
+import copy
+import logging
+import re
+# cStringIO doesn't support unicode in 2.5
+from StringIO import StringIO
+
+from os.path import exists, isfile, abspath
+from os import unlink
+
+
+#------------------------------------------------
+# Logging is controlled by "python_patch" logger
+
+debugmode = False
+
+logger = logging.getLogger("python_patch")
+loghandler = logging.StreamHandler()
+logger.addHandler(loghandler)
+
+debug = logger.debug
+info = logger.info
+warning = logger.warning
+
+#: disable library logging by default
+logger.setLevel(logging.CRITICAL)
+
+#------------------------------------------------
+
+# constants for patch types
+
+DIFF = PLAIN = "plain"
+HG = MERCURIAL = "mercurial"
+SVN = SUBVERSION = "svn"
+
+
+def fromfile(filename):
+ """ Parse patch file and return Patch() object
+ """
+ debug("reading %s" % filename)
+ fp = open(filename, "rb")
+ patch = Patch(fp)
+ fp.close()
+ return patch
+
+
+def fromstring(s):
+ """ Parse text string and return Patch() object
+ """
+ return Patch( StringIO(s) )
+
+
+
+class HunkInfo(object):
+ """ Parsed hunk data container (hunk starts with @@ -R +R @@) """
+
+ def __init__(self):
+ self.startsrc=None #: line count starts with 1
+ self.linessrc=None
+ self.starttgt=None
+ self.linestgt=None
+ self.invalid=False
+ self.text=[]
+
+ def copy(self):
+ return copy.copy(self)
+
+# def apply(self, estream):
+# """ write hunk data into enumerable stream
+# return strings one by one until hunk is
+# over
+#
+# enumerable stream are tuples (lineno, line)
+# where lineno starts with 0
+# """
+# pass
+
+
+
+class Patch(object):
+
+ def __init__(self, stream=None):
+
+ # define Patch data members
+ # table with a row for every source file
+
+ #: list of source filenames
+ self.source=None
+ self.target=None
+ #: list of lists of hunks
+ self.hunks=None
+ #: file endings statistics for every hunk
+ self.hunkends=None
+ #: headers for each file
+ self.header=None
+
+ #: patch type - one of constants
+ self.type = None
+
+ if stream:
+ self.parse(stream)
+
+ def copy(self):
+ return copy.copy(self)
+
+ def parse(self, stream):
+ """ parse unified diff """
+ self.header = []
+
+ self.source = []
+ self.target = []
+ self.hunks = []
+ self.hunkends = []
+
+ # define states (possible file regions) that will direct the parser flow
+ headscan = False # scanning header before the patch body
+ filenames = False # lines starting with --- and +++
+
+ hunkhead = False # @@ -R +R @@ sequence
+ hunkbody = False #
+ hunkskip = False # skipping invalid hunk mode
+
+ headscan = True
+ lineends = dict(lf=0, crlf=0, cr=0)
+ nextfileno = 0
+ nexthunkno = 0 #: even if index starts with 0 user messages number hunks from 1
+
+ # hunkinfo holds parsed values, hunkactual - calculated
+ hunkinfo = HunkInfo()
+ hunkactual = dict(linessrc=None, linestgt=None)
+
+
+ class wrapumerate(enumerate):
+ """Enumerate wrapper that uses boolean end of stream status instead of
+ StopIteration exception, and properties to access line information.
+ """
+
+ def __init__(self, *args, **kwargs):
+ # we don't call parent, it is magically created by __new__ method
+
+ self._exhausted = False
+ self._lineno = False # after end of stream equal to the num of lines
+ self._line = False # will be reset to False after end of stream
+
+ def next(self):
+ """Try to read the next line and return True if it is available,
+ False if end of stream is reached."""
+ if self._exhausted:
+ return False
+
+ try:
+ self._lineno, self._line = super(wrapumerate, self).next()
+ except StopIteration:
+ self._exhausted = True
+ self._line = False
+ return False
+ return True
+
+ @property
+ def is_empty(self):
+ return self._exhausted
+
+ @property
+ def line(self):
+ return self._line
+
+ @property
+ def lineno(self):
+ return self._lineno
+
+
+ # start of main cycle
+ # each parsing block already has line available in fe.line
+ fe = wrapumerate(stream)
+ while fe.next():
+
+ # read out header
+ if headscan:
+ header = ''
+ while not fe.is_empty and not fe.line.startswith("--- "):
+ header += fe.line
+ fe.next()
+ if fe.is_empty:
+ # this is actually a loop exit
+ warning("stream ended while scanning patch header at line %d" % fe.lineno)
+ continue
+ self.header.append(header)
+
+ headscan = False
+ # switch to filenames state
+ filenames = True
+
+ line = fe.line
+ lineno = fe.lineno
+
+
+ # hunkskip and hunkbody code skipped until definition of hunkhead is parsed
+ if hunkbody:
+ # process line first
+ if re.match(r"^[- \+\\]", line):
+ # gather stats about line endings
+ if line.endswith("\r\n"):
+ self.hunkends[nextfileno-1]["crlf"] += 1
+ elif line.endswith("\n"):
+ self.hunkends[nextfileno-1]["lf"] += 1
+ elif line.endswith("\r"):
+ self.hunkends[nextfileno-1]["cr"] += 1
+
+ if line.startswith("-"):
+ hunkactual["linessrc"] += 1
+ elif line.startswith("+"):
+ hunkactual["linestgt"] += 1
+ elif not line.startswith("\\"):
+ hunkactual["linessrc"] += 1
+ hunkactual["linestgt"] += 1
+ hunkinfo.text.append(line)
+ # todo: handle \ No newline cases
+ else:
+ warning("invalid hunk no.%d at %d for target file %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
+ # add hunk status node
+ self.hunks[nextfileno-1].append(hunkinfo.copy())
+ self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
+ # switch to hunkskip state
+ hunkbody = False
+ hunkskip = True
+
+ # check exit conditions
+ if hunkactual["linessrc"] > hunkinfo.linessrc or hunkactual["linestgt"] > hunkinfo.linestgt:
+ warning("extra hunk no.%d lines at %d for target %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
+ # add hunk status node
+ self.hunks[nextfileno-1].append(hunkinfo.copy())
+ self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
+ # switch to hunkskip state
+ hunkbody = False
+ hunkskip = True
+ elif hunkinfo.linessrc == hunkactual["linessrc"] and hunkinfo.linestgt == hunkactual["linestgt"]:
+ self.hunks[nextfileno-1].append(hunkinfo.copy())
+ # switch to hunkskip state
+ hunkbody = False
+ hunkskip = True
+
+ # detect mixed window/unix line ends
+ ends = self.hunkends[nextfileno-1]
+ if ((ends["cr"]!=0) + (ends["crlf"]!=0) + (ends["lf"]!=0)) > 1:
+ warning("inconsistent line ends in patch hunks for %s" % self.source[nextfileno-1])
+ if debugmode:
+ debuglines = dict(ends)
+ debuglines.update(file=self.target[nextfileno-1], hunk=nexthunkno)
+ debug("crlf: %(crlf)d lf: %(lf)d cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines)
+
+ if hunkskip:
+ match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
+ if match:
+ # switch to hunkhead state
+ hunkskip = False
+ hunkhead = True
+ elif line.startswith("--- "):
+ # switch to filenames state
+ hunkskip = False
+ filenames = True
+ if debugmode and len(self.source) > 0:
+ debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
+
+ if filenames:
+ if line.startswith("--- "):
+ if nextfileno in self.source:
+ warning("skipping invalid patch for %s" % self.source[nextfileno])
+ del self.source[nextfileno]
+ # double source filename line is encountered
+ # attempt to restart from this second line
+ re_filename = "^--- ([^\t]+)"
+ match = re.match(re_filename, line)
+ # todo: support spaces in filenames
+ if match:
+ self.source.append(match.group(1).strip())
+ else:
+ warning("skipping invalid filename at line %d" % lineno)
+ # switch back to headscan state
+ filenames = False
+ headscan = True
+ elif not line.startswith("+++ "):
+ if nextfileno in self.source:
+ warning("skipping invalid patch with no target for %s" % self.source[nextfileno])
+ del self.source[nextfileno]
+ else:
+ # this should be unreachable
+ warning("skipping invalid target patch")
+ filenames = False
+ headscan = True
+ else:
+ if nextfileno in self.target:
+ warning("skipping invalid patch - double target at line %d" % lineno)
+ del self.source[nextfileno]
+ del self.target[nextfileno]
+ nextfileno -= 1
+ # double target filename line is encountered
+ # switch back to headscan state
+ filenames = False
+ headscan = True
+ else:
+ re_filename = "^\+\+\+ ([^\t]+)"
+ match = re.match(re_filename, line)
+ if not match:
+ warning("skipping invalid patch - no target filename at line %d" % lineno)
+ # switch back to headscan state
+ filenames = False
+ headscan = True
+ else:
+ self.target.append(match.group(1).strip())
+ nextfileno += 1
+ # switch to hunkhead state
+ filenames = False
+ hunkhead = True
+ nexthunkno = 0
+ self.hunks.append([])
+ self.hunkends.append(lineends.copy())
+ continue
+
+ if hunkhead:
+ match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
+ if not match:
+ if nextfileno-1 not in self.hunks:
+ warning("skipping invalid patch with no hunks for file %s" % self.target[nextfileno-1])
+ # switch to headscan state
+ hunkhead = False
+ headscan = True
+ continue
+ else:
+ # switch to headscan state
+ hunkhead = False
+ headscan = True
+ else:
+ hunkinfo.startsrc = int(match.group(1))
+ hunkinfo.linessrc = 1
+ if match.group(3): hunkinfo.linessrc = int(match.group(3))
+ hunkinfo.starttgt = int(match.group(4))
+ hunkinfo.linestgt = 1
+ if match.group(6): hunkinfo.linestgt = int(match.group(6))
+ hunkinfo.invalid = False
+ hunkinfo.text = []
+
+ hunkactual["linessrc"] = hunkactual["linestgt"] = 0
+
+ # switch to hunkbody state
+ hunkhead = False
+ hunkbody = True
+ nexthunkno += 1
+ continue
+
+ if not hunkskip:
+ warning("patch stream incomplete")
+ # sys.exit(?)
+ else:
+ # duplicated message when an eof is reached
+ if debugmode and len(self.source) > 0:
+ debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
+
+ debug("total files: %d total hunks: %d" % (len(self.source), sum(len(hset) for hset in self.hunks)))
+
+
+ def apply(self):
+ """ apply parsed patch """
+
+ total = len(self.source)
+ for fileno, filename in enumerate(self.source):
+
+ f2patch = filename
+ if not exists(f2patch):
+ f2patch = self.target[fileno]
+ if not exists(f2patch):
+ warning("source/target file does not exist\n--- %s\n+++ %s" % (filename, f2patch))
+ continue
+ if not isfile(f2patch):
+ warning("not a file - %s" % f2patch)
+ continue
+ filename = f2patch
+
+ debug("processing %d/%d:\t %s" % (fileno+1, total, filename))
+
+ # validate before patching
+ f2fp = open(filename)
+ hunkno = 0
+ hunk = self.hunks[fileno][hunkno]
+ hunkfind = []
+ hunkreplace = []
+ validhunks = 0
+ canpatch = False
+ for lineno, line in enumerate(f2fp):
+ if lineno+1 < hunk.startsrc:
+ continue
+ elif lineno+1 == hunk.startsrc:
+ hunkfind = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " -"]
+ hunkreplace = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " +"]
+ #pprint(hunkreplace)
+ hunklineno = 0
+
+ # todo \ No newline at end of file
+
+ # check hunks in source file
+ if lineno+1 < hunk.startsrc+len(hunkfind)-1:
+ if line.rstrip("\r\n") == hunkfind[hunklineno]:
+ hunklineno+=1
+ else:
+ debug("hunk no.%d doesn't match source file %s" % (hunkno+1, filename))
+ # file may be already patched, but we will check other hunks anyway
+ hunkno += 1
+ if hunkno < len(self.hunks[fileno]):
+ hunk = self.hunks[fileno][hunkno]
+ continue
+ else:
+ break
+
+ # check if processed line is the last line
+ if lineno+1 == hunk.startsrc+len(hunkfind)-1:
+ debug("file %s hunk no.%d -- is ready to be patched" % (filename, hunkno+1))
+ hunkno+=1
+ validhunks+=1
+ if hunkno < len(self.hunks[fileno]):
+ hunk = self.hunks[fileno][hunkno]
+ else:
+ if validhunks == len(self.hunks[fileno]):
+ # patch file
+ canpatch = True
+ break
+ else:
+ if hunkno < len(self.hunks[fileno]):
+ warning("premature end of source file %s at hunk %d" % (filename, hunkno+1))
+
+ f2fp.close()
+
+ if validhunks < len(self.hunks[fileno]):
+ if self._match_file_hunks(filename, self.hunks[fileno]):
+ warning("already patched %s" % filename)
+ else:
+ warning("source file is different - %s" % filename)
+ if canpatch:
+ backupname = filename+".orig"
+ if exists(backupname):
+ warning("can't backup original file to %s - aborting" % backupname)
+ else:
+ import shutil
+ shutil.move(filename, backupname)
+ if self.write_hunks(backupname, filename, self.hunks[fileno]):
+ info("successfully patched %d/%d:\t %s" % (fileno+1, total, filename))
+ unlink(backupname)
+ else:
+ warning("error patching file %s" % filename)
+ shutil.copy(filename, filename+".invalid")
+ warning("invalid version is saved to %s" % filename+".invalid")
+ # todo: proper rejects
+ shutil.move(backupname, filename)
+
+ # todo: check for premature eof
+
+
+ def can_patch(self, filename):
+ """ Check if specified filename can be patched. Returns None if file can
+ not be found among source filenames. False if patch can not be applied
+ clearly. True otherwise.
+
+ :returns: True, False or None
+ """
+ idx = self._get_file_idx(filename, source=True)
+ if idx == None:
+ return None
+ return self._match_file_hunks(filename, self.hunks[idx])
+
+
+ def _match_file_hunks(self, filepath, hunks):
+ matched = True
+ fp = open(abspath(filepath))
+
+ class NoMatch(Exception):
+ pass
+
+ lineno = 1
+ line = fp.readline()
+ hno = None
+ try:
+ for hno, h in enumerate(hunks):
+ # skip to first line of the hunk
+ while lineno < h.starttgt:
+ if not len(line): # eof
+ debug("check failed - premature eof before hunk: %d" % (hno+1))
+ raise NoMatch
+ line = fp.readline()
+ lineno += 1
+ for hline in h.text:
+ if hline.startswith("-"):
+ continue
+ if not len(line):
+ debug("check failed - premature eof on hunk: %d" % (hno+1))
+ # todo: \ No newline at the end of file
+ raise NoMatch
+ if line.rstrip("\r\n") != hline[1:].rstrip("\r\n"):
+ debug("file is not patched - failed hunk: %d" % (hno+1))
+ raise NoMatch
+ line = fp.readline()
+ lineno += 1
+
+ except NoMatch:
+ matched = False
+ # todo: display failed hunk, i.e. expected/found
+
+ fp.close()
+ return matched
+
+
+ def patch_stream(self, instream, hunks):
+ """ Generator that yields stream patched with hunks iterable
+
+ Converts lineends in hunk lines to the best suitable format
+ autodetected from input
+ """
+
+ # todo: At the moment substituted lineends may not be the same
+ # at the start and at the end of patching. Also issue a
+ # warning/throw about mixed lineends (is it really needed?)
+
+ hunks = iter(hunks)
+
+ srclineno = 1
+
+ lineends = {'\n':0, '\r\n':0, '\r':0}
+ def get_line():
+ """
+ local utility function - return line from source stream
+ collecting line end statistics on the way
+ """
+ line = instream.readline()
+ # 'U' mode works only with text files
+ if line.endswith("\r\n"):
+ lineends["\r\n"] += 1
+ elif line.endswith("\n"):
+ lineends["\n"] += 1
+ elif line.endswith("\r"):
+ lineends["\r"] += 1
+ return line
+
+ for hno, h in enumerate(hunks):
+ debug("hunk %d" % (hno+1))
+ # skip to line just before hunk starts
+ while srclineno < h.startsrc:
+ yield get_line()
+ srclineno += 1
+
+ for hline in h.text:
+ # todo: check \ No newline at the end of file
+ if hline.startswith("-") or hline.startswith("\\"):
+ get_line()
+ srclineno += 1
+ continue
+ else:
+ if not hline.startswith("+"):
+ get_line()
+ srclineno += 1
+ line2write = hline[1:]
+ # detect if line ends are consistent in source file
+ if sum([bool(lineends[x]) for x in lineends]) == 1:
+ newline = [x for x in lineends if lineends[x] != 0][0]
+ yield line2write.rstrip("\r\n")+newline
+ else: # newlines are mixed
+ yield line2write
+
+ for line in instream:
+ yield line
+
+
+ def write_hunks(self, srcname, tgtname, hunks):
+ src = open(srcname, "rb")
+ tgt = open(tgtname, "wb")
+
+ debug("processing target file %s" % tgtname)
+
+ tgt.writelines(self.patch_stream(src, hunks))
+
+ tgt.close()
+ src.close()
+ return True
+
+
+ def _get_file_idx(self, filename, source=None):
+ """ Detect index of given filename within patch.
+
+ :param filename:
+ :param source: search filename among sources (True),
+ targets (False), or both (None)
+ :returns: int or None
+ """
+ filename = abspath(filename)
+ if source == True or source == None:
+ for i,fnm in enumerate(self.source):
+ if filename == abspath(fnm):
+ return i
+ if source == False or source == None:
+ for i,fnm in enumerate(self.target):
+ if filename == abspath(fnm):
+ return i
+
+
+
+
+if __name__ == "__main__":
+ from optparse import OptionParser
+ from os.path import exists
+ import sys
+
+ opt = OptionParser(usage="%prog [options] unipatch-file", version="python-patch %s" % __version__)
+ opt.add_option("-q", "--quiet", action="store_const", dest="verbosity",
+ const=0, help="print only warnings and errors", default=1)
+ opt.add_option("-v", "--verbose", action="store_const", dest="verbosity",
+ const=2, help="be verbose")
+ opt.add_option("--debug", action="store_true", dest="debugmode", help="debug mode")
+ (options, args) = opt.parse_args()
+
+ if not args:
+ opt.print_version()
+ opt.print_help()
+ sys.exit()
+ debugmode = options.debugmode
+ patchfile = args[0]
+ if not exists(patchfile) or not isfile(patchfile):
+ sys.exit("patch file does not exist - %s" % patchfile)
+
+
+ verbosity_levels = {0:logging.WARNING, 1:logging.INFO, 2:logging.DEBUG}
+ loglevel = verbosity_levels[options.verbosity]
+ logformat = "%(message)s"
+ if debugmode:
+ loglevel = logging.DEBUG
+ logformat = "%(levelname)8s %(message)s"
+ logger.setLevel(loglevel)
+ loghandler.setFormatter(logging.Formatter(logformat))
+
+
+
+ patch = fromfile(patchfile)
+ #pprint(patch)
+ patch.apply()
+
+ # todo: document and test line ends handling logic - patch.py detects proper line-endings
+ # for inserted hunks and issues a warning if patched file has incosistent line ends