#! /usr/bin/env perl # # Copyright (C) 2008 Funambol, Inc. # Copyright (C) 2008-2009 Patrick Ohly # Copyright (C) 2009 Intel Corporation # # Usage: # # Either normalizes a file or compares two of them in a side-by-side # diff. # # Checks environment variables: # # CLIENT_TEST_SERVER=funambol|scheduleworld|egroupware|synthesis # Enables code which simplifies the text files just like # certain well-known servers do. This is useful for testing # to ignore the data loss introduced by these servers or (for # users) to simulate the effect of these servers on their data. # # CLIENT_TEST_CLIENT=evolution|addressbook (Mac OS X/iPhone) # Same as for servers this replicates the effect of storing # data in the clients. # # CLIENT_TEST_LEFT_NAME="before sync" # CLIENT_TEST_RIGHT_NAME="after sync" # CLIENT_TEST_REMOVED="removed during sync" # CLIENT_TEST_ADDED="added during sync" # Setting these variables changes the default legend # print above the left and right file during a # comparison. # # CLIENT_TEST_COMPARISON_FAILED=1 # Overrides the default error code when changes are found. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) version 3. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301 USA use strict; # Various crashes have been encountered in the Perl interpreter # executable when enabling UTF-8. It is only needed for nicer # side-by-side comparison of changes (correct column width), # so not much functionality is lost by disabling this. # use encoding 'utf8'; # Instead enable writing the result as UTF-8. Input # files are read as UTF-8 via PerlIO parameters in open(). binmode(STDOUT, ":utf8"); use Algorithm::Diff; use MIME::Base64; use Digest::MD5 qw(md5 md5_hex md5_base64); # ignore differences caused by specific servers or local backends? my $server = $ENV{CLIENT_TEST_SERVER}; my $client = $ENV{CLIENT_TEST_CLIENT} || "evolution"; my $scheduleworld = $server =~ /scheduleworld/; my $synthesis = $server =~ /synthesis/; my $zyb = $server =~ /zyb/; my $mobical = $server =~ /mobical/; my $memotoo = $server =~ /memotoo/; my $nokia_7210c = $server =~ /nokia_7210c/; my $ovi = $server =~ /Ovi/; my $unique_uid = $ENV{CLIENT_TEST_UNIQUE_UID}; my $full_timezones = $ENV{CLIENT_TEST_FULL_TIMEZONES}; # do not simplify VTIMEZONE definitions my $no_timezones = $ENV{CLIENT_TEST_NO_TIMEZONES}; # TODO: this hack ensures that any synchronization is limited to # properties supported by Synthesis. Remove this again. # $synthesis = 1; my $exchange = $server =~ /exchange/; # Exchange via ActiveSync my $egroupware = $server =~ /egroupware/; my $funambol = $server =~ /funambol/; my $googlesyncml = $server eq "google"; my $googlecaldav = $server eq "googlecalendar"; my $googlecarddav = $server eq "googlecontacts"; my $googleeas = $server eq "googleeas"; my $google_valarm = $ENV{CLIENT_TEST_GOOGLE_VALARM}; my $yahoo = $server =~ /yahoo/; my $davical = $server =~ /davical/; my $apple = $server =~ /apple/; my $oracle = $server =~ /oracle/; my $radicale = $server =~ /radicale/; my $zimbra = $server =~ /zimbra/; my $evolution = $client =~ /evolution/; my $addressbook = $client =~ /addressbook/; my $akonadi = $server =~ /kde/; sub Usage { print "$0 = 60) { $minutes++; $seconds -= 60; } while ($minutes >= 60) { $hours++; $minutes -= 60; } while ($hours >= 24) { $days++; $hours -= 24; } $value = $sign; $value .= ($days . "D") if $days; $value .= ($hours . "H") if $hours; $value .= ($minutes . "M") if $minutes; $value .= ($seconds . "S") if $seconds; return $value; } # decode base64 string, return size and hash sub describeBase64 { my $data = decode_base64($1); return sprintf("%d b64 characters = %d bytes, %s md5sum", length($1), length($data), md5_hex($data)); } # called for one VCALENDAR (with single VEVENT/VTODO/VJOURNAL) or VCARD, # returns normalized one sub NormalizeItem { my $width = shift; $_ = shift; # Reduce \N to \n (both are allowed in vCard 3.0). # Using a regular expression is a bit too broad # because it also matches \\N, which must not be # changed. s/\\N/\\n/g; # Ignore blank lines. Akonadi inserts them. s/\n{2,}/\n/s; # undo line continuation s/\n\s//gs; # ignore charset specifications, assume UTF-8 s/;CHARSET="?UTF-8"?//g; # UID may differ, but only in vCards and journal entries: # in calendar events the UID needs to be preserved to handle # meeting invitations/replies correctly s/((VCARD|VJOURNAL).*)^UID:[^\n]*\n/$1/msg; # intentional changes to UID are acceptable when running with CLIENT_TEST_UNIQUE_UID if ($unique_uid) { s/UID:UNIQUE-UID-\d+-/UID:/g; } # merge all CATEGORIES properties into one comma-separated one while ( s/^CATEGORIES:([^\n]*)\n(.*)^CATEGORIES:([^\n]*)\n/CATEGORIES:$1,$3\n$2/ms ) {} # exact order of categories is irrelevant s/^CATEGORIES:(\S+)/"CATEGORIES:" . sortlist($1)/mge; # expand shortcuts to TYPE= while (s/^(ADR|EMAIL|TEL)([^:\n]*);(HOME|OTHER|WORK|PARCEL|INTERNET|CAR|VOICE|CELL|PAGER)/$1;TYPE=$3/mg) {} # the distinction between an empty and a missing property # is vague and handled differently, so ignore empty properties s/^[^:\n]*:;*\n//mg; # use separate TYPE= fields while( s/^(\w*[^:\n]*);TYPE=(\w*),(\w*)/$1;TYPE=$2;TYPE=$3/mg ) {} # make TYPE uppercase (in vCard 3.0 at least those parameters are case-insensitive) while( s/^(\w*[^:\n]*);TYPE=(\w*?[a-z]\w*?)([;:])/ $1 . ";TYPE=" . uppercase($2) . $3 /mge ) {} # Replace parameters with a sorted parameter list. Cannot be done with # a regular expression because of quoted strings. While we know exact # parameter values, normalize them to use quoted strings if and only # if the content is more complex than alphanumeric plus underscore and # hyphen. my @lines; my ($propname, $sep, $rest); foreach (split /\n/) { ($propname, $sep, $rest) = /^([^;:]+)([:;])(.*)/; if ($sep eq ";") { my @params; my $c; my $i = 0; my $n = length($rest); my $quoted = 0; my $start = 0; while ($i < $n) { $c = substr($rest, $i, 1); $i++; if ($quoted) { if ($c eq '"') { $quoted = 0; } } else { if ($c eq '"') { $quoted = 1; } elsif ($c eq ';' || $c eq ':') { my $param = substr($rest, $start, $i - $start - 1); my ($name, $value) = $param =~ /^([^=]*)="?([^"]*)"?$/; if ($value =~ /^[a-zA-Z0-9_-]*$/) { $param = $name . '=' . $value; } else { $param = $name . '="' . $value . '"'; } push @params, $param; $start = $i; if ($c eq ':') { last; } } } } $_ = $propname . ';' . join(";", sort(@params)) . ':' . substr($rest, $start); } push @lines, $_; } $_ = join("\n", @lines); # VALUE=DATE is the default, no need to show it s/^(EXDATE|BDAY);VALUE=DATE:/\1:/mg; # default opacity is OPAQUE s/^TRANSP:OPAQUE\r?\n?//gm; # multiple EXDATEs may be joined into one, use separate properties as normal form s/^(EXDATE[^:]*):(.*)(\r?\n)/splitvalue($1, $2, $3)/mge; # sort value lists of specific properties s!^(RRULE.*):(.*)!$1 . ":" . join(';',sort(split(/;/, $2)))!meg; # INTERVAL=1 is the default and thus can be removed s/^RRULE(.*?);INTERVAL=1(;|$)/RRULE$1$2/mg; # Ignore remaining "other" email, address and telephone type - this is # an Evolution specific extension which might not be preserved. s/^(ADR|EMAIL|TEL)([^:\n]*);TYPE=OTHER/$1$2/mg; # TYPE=PREF on the other hand is not used by Evolution, but # might be sent back. s/^(ADR|EMAIL)([^:\n]*);TYPE=PREF/$1$2/mg; # Evolution does not need TYPE=INTERNET for email s/^(EMAIL)([^:\n]*);TYPE=INTERNET/$1$2/mg; # ignore TYPE=PREF in address, does not matter in Evolution s/^((ADR|LABEL)[^:\n]*);TYPE=PREF/$1/mg; # ignore extra separators in multi-value fields s/^((ORG|N|(ADR[^:\n]*?)):.*?);*$/$1/mg; # the type of certain fields is ignore by Evolution s/^X-(AIM|GROUPWISE|ICQ|YAHOO);TYPE=HOME/X-$1/gm; # Evolution ignores an additional pager type s/^TEL;TYPE=PAGER;TYPE=WORK/TEL;TYPE=PAGER/gm; # PAGER property is sent by Evolution, but otherwise ignored s/^LABEL[;:].*\n//mg; # TYPE=VOICE is the default in Evolution and may or may not appear in the vcard; # this simplification is a bit too agressive and hides the problematic # TYPE=PREF,VOICE combination which Evolution does not handle :-/ s/^TEL([^:\n]*);TYPE=VOICE,([^:\n]*):/TEL$1;TYPE=$2:/mg; s/^TEL([^:\n]*);TYPE=([^;:\n]*),VOICE([^:\n]*):/TEL$1;TYPE=$2$3:/mg; s/^TEL([^:\n]*);TYPE=VOICE([^:\n]*):/TEL$1$2:/mg; # don't care about the TYPE property of PHOTOs s/^PHOTO;(.*)TYPE=[A-Z]*/PHOTO;$1/mg; # encoding is not case sensitive, skip white space in the middle of binary data if (s/^PHOTO;.*?ENCODING=(b|B|BASE64).*?:\s*/PHOTO;ENCODING=B: /mgi) { if ($memotoo) { # transcodes image data, can't compare it s/(^PHOTO.*:).*/$1/mg; } else { while (s/^PHOTO(.*?): (\S+)[\t ]+(\S+)/PHOTO$1: $2$3/mg) {} } } # Don't show base64 encoded PHOTO data (makes diff very long). Instead # decode and show size + hash. s/^PHOTO;ENCODING=B: (.*)$/"PHOTO: " . describeBase64($1)/mge; # special case for the inlining of the local test case PHOTO s!^PHOTO;;VALUE=uri:file://testcases/local.png$!PHOTO;;VALUE=uri:!m; s!^PHOTO;ENCODING=B: iVBORw0KGgoAAAANSUh.*UQOVkeH/aKBSLM04QlMqAAFNBTl\+CjN9AAAAAElFTkSuQmCC$!PHOTO;;VALUE=uri:!m; # ignore extra day factor in front of weekday s/^RRULE:(.*)BYDAY=\+?1(\D)/RRULE:$1BYDAY=$2/mg; # remove default VALUE=DATE-TIME s/^(DTSTART|DTEND)([^:\n]*);VALUE=DATE-TIME/$1$2/mg; # remove default LANGUAGE=en-US s/^([^:\n]*);LANGUAGE=en-US/$1/mg; # normalize values which look like a date to YYYYMMDD because the hyphen is optional s/:(\d{4})-(\d{2})-(\d{2})/:$1$2$3/g; # mailto is case insensitive s/^((ATTENDEE|ORGANIZER).*):[Mm][Aa][Ii][Ll][Tt][Oo]:/$1:mailto:/mg; # remove fields which may differ s/^(PRODID|CREATED|DTSTAMP|LAST-MODIFIED|REV)(;X-VOBJ-FLOATINGTIME-ALLOWED=(TRUE|FALSE))?:.*\r?\n?//gm; # remove optional properties and parameters s/^(METHOD|X-WSS-[A-Z]*|X-WR-[A-Z]*|CALSCALE|X-KDE-ICAL-IMPLEMENTATION-VERSION|X-KDE-KCALCORE-ENABLED):.*\r?\n?//gm; s/^(ATTENDEE[^:]*);X-UID=[^;:]*/$1/mg; # trailing line break(s) in a DESCRIPTION may or may not be # removed or added by servers s/^DESCRIPTION:(.*?)(\\n)+$/DESCRIPTION:$1/gm; # use the shorter property name when there are alternatives, # but avoid duplicates foreach my $i ("SPOUSE", "MANAGER", "ASSISTANT", "ANNIVERSARY") { if (/^X-\Q$i\E:(.*?)$/m) { s/^X-EVOLUTION-\Q$i\E:\Q$1\E\n//m; } } s/^X-EVOLUTION-(SPOUSE|MANAGER|ASSISTANT|ANNIVERSARY)/X-$1/gm; # some properties are always lost because we don't transmit them if ($ENV{CLIENT_TEST_SERVER}) { s/^(X-FOOBAR-EXTENSION|X-TEST)(;[^:;\n]*)*:.*\r?\n?//gm; } # if there is no DESCRIPTION in a VJOURNAL, then use the # summary: that's what is done when exchanging such a # VJOURNAL as plain text if (/^BEGIN:VJOURNAL$/m && !/^DESCRIPTION/m) { s/^SUMMARY:(.*)$/SUMMARY:$1\nDESCRIPTION:$1/m; } # strip configurable X- parameters or properties my $strip = $ENV{CLIENT_TEST_STRIP_PROPERTIES}; if ($strip) { s/^$strip(;[^:;\n]*)*:.*\r?\n?//gm; } $strip = $ENV{CLIENT_TEST_STRIP_PARAMETERS}; if ($strip) { while (s/^(\w+)([^:\n]*);$strip=\d+/$1$2/mg) {} } # strip redundant VTIMEZONE definitions (happen to be # added by Google CalDAV server when storing an all-day event # which doesn't need any time zone definition) # http://code.google.com/p/google-caldav-issues/issues/detail?id=63 # # Also strip all definitions if requested. while (m/(BEGIN:VTIMEZONE.*?TZID:([^\n]*)\n.*?END:VTIMEZONE\n)/gs) { my $def = $1; my $tzid = $2; # Strip all, or not used as parameter? if ($no_timezones || ! m/;TZID="?\Q$tzid\E"?/) { # no, remove definition s!\Q$def\E!!s; } } if (!$full_timezones) { # Strip trailing digits from TZID. They are appended by # Evolution and SyncEvolution to distinguish VTIMEZONE # definitions which have the same TZID, but different rules. # This appending of digits may even get repeated, leading to: # TZID=EST/EDT 1 1 s/(^TZID:|;TZID=)([^;:]*?)( \d+)+/$1$2/gm; # Strip trailing -(Standard) from TZID. Evolution 2.24.5 adds # that (not sure exactly where that comes from). s/(^TZID:|;TZID=)([^;:]*?)-\(Standard\)/$1$2/gm; # VTIMEZONE and TZID do not have to be preserved verbatim as long # as the replacement is still representing the same timezone. # Reduce TZIDs which specify a proper location # to their location part and strip the VTIMEZONE - makes the # diff shorter, too. my $location = "[^\n]*((?:Africa|America|Antarctica|Arctic|Asia|Atlantic|Australia|Brazil|Canada|Chile|Egypt|Eire|Europe|Hongkong|Iceland|India|Iran|Israel|Jamaica|Japan|Kwajalein|Libya|Mexico|Mideast|Navajo|Pacific|Poland|Portugal|Singapore|Turkey|Zulu)[-a-zA-Z0-9_/]*)"; s;^BEGIN:VTIMEZONE.*?^TZID:$location.*^END:VTIMEZONE;BEGIN:VTIMEZONE\n TZID:$1 [...]\nEND:VTIMEZONE;gms; s;TZID="?$location"?;TZID=$1;gm; } # normalize iCalendar 2.0 if (/^BEGIN:(VEVENT|VTODO|VJOURNAL)$/m) { # CLASS=PUBLIC is the default, no need to show it s/^CLASS:PUBLIC\r?\n//m; # RELATED=START is the default behavior s/^TRIGGER([^\n:]*);RELATED=START/TRIGGER$1/mg; # VALUE=DURATION is the default behavior s/^TRIGGER([^\n:]*);VALUE=DURATION/TRIGGER$1/mg; s/^(TRIGGER.*):(\S*)/$1 . ":" . NormalizeTrigger($2)/mge; # INDIVIDUAL is default for CUTYPE. s/;CUTYPE=INDIVIDUAL([;:])/$1/mg; # Print without quotation marks (probably not save in general, but okay for our test data). s/;CN="([^;]*)"/;CN=$1/g; } # Added by EDS >= 2.32, presumably to cache some internal computation. # Because it can be recreated, it doesn't have to be preserved during # sync and such changes can be ignored: # # RRULE:BYDAY=SU;COUNT=10;FREQ=WEEKLY | RRULE;X-EVOLUTION-ENDDATE=20080608T # > 070000Z:BYDAY=SU;COUNT=10;FREQ=WEEK # > LY s/^(\w+)([^:\n]*);X-EVOLUTION-ENDDATE=[0-9TZ]*/$1$2/mg; if ($scheduleworld || $egroupware || $synthesis || $addressbook || $funambol ||$googlesyncml || $googleeas || $googlecarddav || $mobical || $memotoo || $zimbra) { # does not preserve X-EVOLUTION-UI-SLOT= s/^(\w+)([^:\n]*);X-EVOLUTION-UI-SLOT=\d+/$1$2/mg; } if ($scheduleworld) { # cannot distinguish EMAIL types s/^EMAIL;TYPE=\w*/EMAIL/mg; # replaces certain TZIDs with more up-to-date ones s;TZID(=|:)/(scheduleworld.com|softwarestudio.org)/Olson_\d+_\d+/;TZID$1/foo.com/Olson_20000101_1/;mg; } if ($synthesis || $mobical) { # only preserves ORG "Company", but loses "Department" and "Office" s/^ORG:([^;:\n]+)(;[^\n]*)/ORG:$1/mg; } if ($funambol) { # only preserves ORG "Company";"Department", but loses "Office" s/^ORG:([^;:\n]+)(;[^;:\n]*)(;[^\n]*)/ORG:$1$2/mg; # drops the second address line s/^ADR(.*?):([^;]*?);[^;]*?;/ADR$1:$2;;/mg; # has no concept of "preferred" phone number s/^(TEL.*);TYPE=PREF/$1/mg; } if($googlesyncml || $googleeas || $googlecarddav) { # ignore the PHOTO encoding data s/^PHOTO(.*?): .*\n/PHOTO$1: [...]\n/mg; } if($googlesyncml || $googlecarddav) { # FN property gets synthesized by Google. s/^FN:.*\n/FN$1: [...]\n/mg; } # Properties and parameters are case-insensitive. ownCloud uses # X-ABLABEL while everyone else uses X-ABLabel. s/X-ABLABEL/X-ABLabel/g; if ($googlesyncml) { # Not support car type in telephone s!^TEL\;TYPE=CAR(.*)\n!TEL$1\n!mg; # some properties are lost s/^(X-EVOLUTION-FILE-AS|NICKNAME|BDAY|CATEGORIES|CALURI|FBURL|GEO|ROLE|URL|X-AIM|X-EVOLUTION-UI-SLOT|X-ANNIVERSARY|X-ASSISTANT|X-EVOLUTION-BLOG-URL|X-EVOLUTION-VIDEO-URL|X-GROUPWISE|X-ICQ|X-GADUGADU|X-JABBER|X-MSN|X-SIP|X-SKYPE|X-MANAGER|X-SPOUSE|X-MOZILLA-HTML|X-YAHOO)(;[^:;\n]*)*:.*\r?\n?//gm; } if ($googlecaldav) { #several properties are not preserved by Google in icalendar2.0 format s/^(SEQUENCE|X-EVOLUTION-ALARM-UID|TRANSP)(;[^:;\n]*)*:.*\r?\n?//gm; # Google adds calendar owner as attendee of meetings, regardless # whether it was on the original attendee list. Ignore this # during testing by removing all attendees with @googlemail.com # email address. s/^ATTENDEE.*googlemail.com\r?\n//gm; } if ($apple) { # remove some parameters added by Apple Calendar server in CalDAV s/^(ORGANIZER[^:]*);SCHEDULE-AGENT=NONE/$1/gm; s/^(ORGANIZER[^:]*);SCHEDULE-STATUS=5.3/$1/gm; # seems to require a fixed number of recurrences; hmm, okay... s/^RRULE:COUNT=400;FREQ=DAILY/RRULE:FREQ=DAILY/gm; } if ($oracle) { # remove extensions added by server s/^(X-S1CS-RECURRENCE-COUNT)(;[^:;\n]*)*:.*\r?\n?//gm; # ignore loss of LANGUAGE=xxx property in ATTENDEE s/^ATTENDEE([^\n:]*);LANGUAGE=([^\n;:]*)/ATTENDEE$1/mg; } if ($radicale) { # remove extensions added by server s/^(X-RADICALE-NAME)(;[^:;\n]*)*:.*\r?\n?//gm; } if ($googlecaldav || $yahoo) { # default status is CONFIRMED s/^STATUS:CONFIRMED\r?\n?//gm; } # Ignore VALARM ACTION:NONE. This has to be added to avoid default alarms in Google CalDAV. s/^BEGIN:VALARM\r?\n.*?^ACTION:NONE\r?\n.*?^END:VALARM\r?\n//ms; if ($yahoo) { s/^(X-MICROSOFT-[-A-Z0-9]*)(;[^:;\n]*)*:.*\r?\n?//gm; # some properties cannot be stored s/^(FN)(;[^:;\n]*)*:.*\r?\n?//gm; } if ($addressbook) { # some properties cannot be stored s/^(X-MOZILLA-HTML|X-EVOLUTION-FILE-AS|X-EVOLUTION-ANNIVERSARY|X-EVOLUTION-BLOG-URL|X-EVOLUTION-VIDEO-URL|X-GROUPWISE|ROLE|CATEGORIES|FBURL|CALURI|FN)(;[^:;\n]*)*:.*\r?\n?//gm; # only some parts of ADR are preserved my $type; s/^ADR(.*?)\:(.*)/$type=($1 || ""); @_ = split(\/(?[1] - $b->[1]) || # Compare indention, more indented last. ($b->[0] - $a->[0]) || # Compare importance, less important last. $a->[3] cmp $b->[3] } # Compare property name, parameters and value without group tag. @body ); # Combine lines with the same group tag. my %tags; my @tagged; my $tag; my $entry; my $index; foreach (@sorted) { $tag = $_->[2]; # Has a line a group tag? if ($tag) { # Same as one found before? $index = $tags{$tag}; if (defined($index)) { # Append to previous instance of the tag, keeping tag indices the same. push @{$tagged[$index]}, $_; } else { # Add at end, remember index for next line with the same tag. push @tagged, $_; $tags{$tag} = $#tagged; } } else { push @tagged, $_; } } # Convert back into individual, indented text lines. my @expanded; foreach (@tagged) { if ($_->[2]) { if ($#{$_} == 4) { # Simplify IMPP + X-ABLabel:Other to just IMPP without group tag. # For the sake of simplicity we only do that if the number of # grouped properties is exactly two. Otherwise we would have # to search in the list of extra properties. if ($_->[3] =~ /^IMPP[;:]/ && $_->[4][3] =~ /^X-ABLabel:Other$/) { splice(@{$_}, 4); } } if ($#{$_} == 3) { # If the last remaining property is X-ABLabel, then ignore it. # We ignore empty properties, which can cause their labels to # be left as redundant information (happens with Google CardDAV # when sending an empty URL). if ($_->[3] =~ "X-ABLabel:") { next; } # Remove redundant group tag from other properties. $_->[2] = ""; } } push @expanded, (" " x $_->[1]) . ($_->[2] ? "- " : "") . $_->[3]; if ($#{$_} > 3) { foreach ($_->[4,-1]) { push @expanded, (" " x $_->[1]) . " " . $_->[3]; } } } # Create one BEGIN/END block. $_ = join("\n", $begin, @expanded, $end); push @{$formatted[$#formatted]}, $_; } } return ${$formatted[0]}[0]; } # parameters: text, width to use for reformatted lines # returns list of lines without line breaks sub Normalize { $_ = shift; my $width = shift; s/\r//g; my @items = (); # split into individual items foreach $_ ( split( /(?:(?<=\nEND:VCARD)|(?<=\nEND:VCALENDAR))\n*/ ) ) { if (/END:VEVENT\s+BEGIN:VEVENT/s) { # remove multiple events from calendar item s/(BEGIN:VEVENT.*END:VEVENT\n)//s; my $events = $1; my $calendar = $_; my $event; # inject every single one back into the calendar and process the result foreach $event ( split ( /(?:(?<=\nEND:VEVENT))\n*/, $events ) ) { $_ = $calendar; s/\nEND:VCALENDAR/\n$event\nEND:VCALENDAR/; push @items, NormalizeItem($width, $_); } } else { # already a single item push @items, NormalizeItem($width, $_); } } return split( /\n/, join( "\n\n", sort @items )); } # number of columns available for output: # try tput without printing the shells error if not found, # default to 80 my $columns = `which tput >/dev/null 2>/dev/null && tput 2>/dev/null && tput cols`; if ($? || !$columns) { $columns = 80; } if($#ARGV > 1) { # error Usage(); exit 1; } elsif($#ARGV == 1) { # comparison my ($file1, $file2) = ($ARGV[0], $ARGV[1]); my $singlewidth = int(($columns - 3) / 2); $columns = $singlewidth * 2 + 3; my @normal1; my @normal2; if (-d $file1 && -d $file2) { # Both "files" are really directories of individual files. # Don't include files in the comparison which are known # to be identical because the refer to the same inode. # - build map from inode to filename(s) (each inode might be used more than once!) my %files1; my %files2; my @content1; my @content2; my $inode; my $fullname; my $entry; opendir(my $dh, $file1) || die "cannot read $file1: $!"; foreach $entry (grep { -f "$file1/$_" } readdir($dh)) { $fullname = "$file1/$entry"; $inode = (stat($fullname))[1]; if (!$files1{$inode}) { $files1{$inode} = []; } push(@{$files1{$inode}}, $entry); } closedir($dh); # - remove common files, read others opendir(my $dh, $file2) || die "cannot read $file2: $!"; foreach $entry (grep { -f "$file2/$_" } readdir($dh)) { $fullname = "$file2/$entry"; $inode = (stat($fullname))[1]; if (@{$files1{$inode}}) { # randomly match against the last file pop @{$files1{$inode}}; } else { open(IN, "<:utf8", "$fullname") || die "$fullname: $!"; push @content2, ; } } # - read remaining entries from first dir foreach my $array (values %files1) { foreach $entry (@{$array}) { $fullname = "$file1/$entry"; open(IN, "<:utf8", "$fullname") || die "$fullname: $!"; push @content1, ; } } my $content1 = join("", @content1); my $content2 = join("", @content2); @normal1 = Normalize($content1, $singlewidth); @normal2 = Normalize($content2, $singlewidth); } else { if (-d $file1) { open(IN1, "-|:utf8", "find $file1 -type f -print0 | xargs -0 cat") || die "$file1: $!"; } else { open(IN1, "<:utf8", $file1) || die "$file1: $!"; } if (-d $file2) { open(IN2, "-|:utf8", "find $file2 -type f -print0 | xargs -0 cat") || die "$file2: $!"; } else { open(IN2, "<:utf8", $file2) || die "$file2: $!"; } my $buf1 = join("", ); my $buf2 = join("", ); @normal1 = Normalize($buf1, $singlewidth); @normal2 = Normalize($buf2, $singlewidth); close(IN1); close(IN2); } # Produce output where each line is marked as old (aka remove) with o, # as new (aka added) with n, and as unchanged with u at the beginning. # This allows simpler processing below. my $res = 0; if (0) { # $_ = `diff "--old-line-format=o %L" "--new-line-format=n %L" "--unchanged-line-format=u %L" "$normal1" "$normal2"`; # $res = $?; } else { # convert into same format as diff above - this allows reusing the # existing output formatting code my $diffs_ref = Algorithm::Diff::sdiff(\@normal1, \@normal2); @_ = (); my $hunk; foreach $hunk ( @{$diffs_ref} ) { my ($type, $left, $right) = @{$hunk}; if ($type eq "-") { push @_, "o $left"; $res = 1; } elsif ($type eq "+") { push @_, "n $right"; $res = 1; } elsif ($type eq "c") { push @_, "o $left"; push @_, "n $right"; $res = 1; } else { push @_, "u $left"; } } $_ = join("\n", @_); } if ($res) { print $ENV{CLIENT_TEST_HEADER}; printf "%*s | %s\n", $singlewidth, ($ENV{CLIENT_TEST_LEFT_NAME} || "before sync"), ($ENV{CLIENT_TEST_RIGHT_NAME} || "after sync"); printf "%*s <\n", $singlewidth, ($ENV{CLIENT_TEST_REMOVED} || "removed during sync"); printf "%*s > %s\n", $singlewidth, "", ($ENV{CLIENT_TEST_ADDED} || "added during sync"); print "-" x $columns, "\n"; # fix confusing output like: # BEGIN:VCARD BEGIN:VCARD # > N:new;entry # > FN:new # > END:VCARD # > # > BEGIN:VCARD # and replace it with: # > BEGIN:VCARD # > N:new;entry # > FN:new # > END:VCARD # # BEGIN:VCARD BEGIN:VCARD # # With the o/n/u markup this presents itself as: # u BEGIN:VCARD # n N:new;entry # n FN:new # n END:VCARD # n # n BEGIN:VCARD # # The alternative case is also possible: # o END:VCARD # o # o BEGIN:VCARD # o N:old;entry # u END:VCARD # case one above while( s/^u BEGIN:(VCARD|VCALENDAR)\n((?:^n .*\n)+?)^n BEGIN:/n BEGIN:$1\n$2u BEGIN:/m) {} # same for the other direction while( s/^u BEGIN:(VCARD|VCALENDAR)\n((?:^o .*\n)+?)^o BEGIN:/o BEGIN:$1\n$2u BEGIN:/m) {} # case two while( s/^o END:(VCARD|VCALENDAR)\n((?:^o .*\n)+?)^u END:/u END:$1\n$2o END:/m) {} while( s/^n END:(VCARD|VCALENDAR)\n((?:^n .*\n)+?)^u END:/u END:$1\n$2n END:/m) {} # split at end of each record my $spaces = " " x $singlewidth; foreach $_ (split /(?:(?<=. END:VCARD\n)|(?<=. END:VCALENDAR\n))(?:^. \n)*/m, $_) { # ignore unchanged records if (!length($_) || /^((u [^\n]*\n)*(u [^\n]*?))$/s) { next; } # make all lines equally long in terms of printable characters s/^(.*)$/$1 . (" " x ($singlewidth + 2 - length($1)))/gme; # convert into side-by-side output my @buffer = (); foreach $_ (split /\n/, $_) { if (/^u (.*)/) { print join(" <\n", @buffer), " <\n" if $#buffer >= 0; @buffer = (); print $1, " ", $1, "\n"; } elsif (/^o (.*)/) { # preserve in buffer for potential merging with "n " push @buffer, $1; } else { /^n (.*)/; # have line to be merged with? if ($#buffer >= 0) { print shift @buffer, " | ", $1, "\n"; } else { print join(" <\n", @buffer), " <\n" if $#buffer >= 0; print $spaces, " > ", $1, "\n"; } } } print join(" <\n", @buffer), " <\n" if $#buffer >= 0; @buffer = (); print "-" x $columns, "\n"; } } # unlink($normal1); # unlink($normal2); exit($res ? ((defined $ENV{CLIENT_TEST_COMPARISON_FAILED}) ? int($ENV{CLIENT_TEST_COMPARISON_FAILED}) : 1) : 0); } else { # normalize my $in; if( $#ARGV >= 0 ) { my $file1 = $ARGV[0]; if (-d $file1) { open(IN, "-|:utf8", "find $file1 -type f -print0 | xargs -0 cat") || die "$file1: $!"; } else { open(IN, "<:utf8", $file1) || die "$file1: $!"; } $in = *IN{IO}; } else { $in = *STDIN{IO}; } my $buf = join("", <$in>); print STDOUT join("\n", Normalize($buf, $columns)), "\n"; }