#!/usr/bin/perl 
#
# Mark Doll <markdoll@gmx.net>, 2006.
#
# This is a perl implementation of TeXdiff, derived from the original bash and
# perl scripts created by Robert Maron <robmar@mimuw.edu.pl>, available at
# http://www.robmar.net/TexDiff/. This version of texdiff is available at
# http://mark.doll.name/texdiff/.
#
# usage: texdiff old.tex new.tex [diff.tex]
#
# requires the following LaTeX code in the preamble of the LaTeX document:
#
# \usepackage{color} \usepackage{ulem} \usepackage{changebar}
# \newcommand\TLSins[1]{\cbstart{}\textcolor{ins}{\uline{#1}}\cbend{}}
# \newcommand\TLSdel[1]{\cbdelete{}\textcolor{del}{\sout{#1}}}
#
# These macros are NOT automatically interted at \begin{document} like the
# ogqriginal scripts did it, because this will not work on files of a
# multi-file document (those included by \include{} or \input{}).  Furthermore,
# if you insert the macros manually, you can tune them as you like (i. e. change
# text color or position of changebars). Have a look at the documentation of
# the ulem and changebar packages on how to do this!
# 
use strict;

my ($in1,$in2,$out) = @ARGV;

write_temp($in1,'tmp1.'.$$);
write_temp($in2,'tmp2.'.$$);

my $wdiff_cmd = ("wdiff --avoid-wraps ".
                 "--start-delete=\'\\TLSdel{\' --end-delete=\'}TLEdel\' ".
                 "--start-insert=\'\\TLSins{\' --end-insert=\'}TLEins\' ".
                 "tmp1.$$ tmp2.$$");

my $buf = `$wdiff_cmd`;

## original stuff, uncomment if you nee it!
# make sure that the author-thanks argument is contiguous.
#my $tmp = $1 if $buf =~ m/(\\thanks\{.*?\})/s;
#$tmp  =~ s/\n\s+/\n/sg;
#$buf =~ s/\\thanks\{.*?\}/$tmp/s; 
#
# ifthenelse fails with TLSins
#$buf =~ s/\\TLS(ins|del)\{(\\ifthenelse[\{\}\\\w]+)\}TLE$1/$2/sg;
#
# ... as do figures and tables
#$buf =~ s/\\TLSdel\{(\\begin\{(figure|table).*)\}TLEdel//g;
#$buf =~ s/\\TLSins\{(\\begin\{(figure|table).*)\}TLEins/$1/g;

# ... and the bibliography, graphics, and most other commands
## Put any of your home brew macros (like my Abb) in here!
foreach my $term ( qw( Abb label ref cite if biblio includegraphics setboolean ) ) {
    $buf =~ s/\\TLSdel\{.*?\\$term.*?\}TLEdel\n?//g;
    $buf =~ s/\\TLSins\{(.*?\\$term.*?)\}TLEins/$1/g;
}

# remove any blank lines that are marked as del or ins
$buf =~ s/\\TLS(ins|del)\{\s*\}TLE\1//g;

# If an opening brace (and a command and alike preceding it) is included in a
# wdiff, move the brace (and command) out of the new string and prepend it to
# the TLSdel command effectivley moving both TLSdel and the following TLSins
# command inside the braces (the parameter) of the command.
$buf =~ s/(\\TLSdel\{)(\S*?[^\\]\{|\{)(.*?\}TLEdel)\s+?(\\TLSins\{)\2(.*?\}TLEins)/$2$1$3 $4$5/g;

# If a closing braces are included in wdiff, remove them from the deleted old
# string, thus only leaving them in the new one which comes last. As a sanity
# check, an opening brace prevents this removal. A correct handling would
# require counting pairs of braces and removing only unmatched closing braces.
$buf =~ s/(\\TLSdel\{[^\{]*?\})\}TLEdel\s+?(\\TLSins\{[^\{]*?\}\}TLEins)/$1TLEdel $2/g;

# For removed/inderted sections, move wdiff inside; changed sections have been
# handled by the preceding rules; in case of removal, prevent renumbering by
# using asterisk form, in case of insertion, add short form without macros to
# prevent problems with texing
foreach my $term ( qw( chapter section subsection subsubsection paragraph ) ) {
    $buf =~ s/\\TLSdel\{\\$term\{(.*?\}\}TLEdel)/\\$term\*\{\\TLSdel\{$1/g;
    $buf =~ s/\\TLSins\{\\$term\{(.*?)\}\}TLEins/\\$term\[$1\]\{\\TLSins\{$1\}TLEins\}/g;
}

# merge multi-line inserts and deletes
my $newline = rand;
while ( $buf =~ s/(\n\\TLSins\{)([^\%\n]*?)\}TLEins\n\\TLSins\{([^\%])/$1$2$newline        $3/sg ) {}
while ( $buf =~ s/(\n\\TLSdel\{)([^\%\n]*?)\}TLEdel\n\\TLSdel\{([^\%])/$1$2$newline        $3/sg ) {}
$buf =~ s/$newline/\n/g;

# Escaped spaces ("\ ") will result in a baslash before the closing brace of
# \TLS(ins|del){}.  Thus insert a space between the baslash and closing brace
# and remove any white space after the brace.
$buf =~ s/\\(\}TLE(ins|del))\s*/\\ $1/g;
# altative: move "\ " behind the closing brace, so that
# strike/underline does not cover the space
#$buf =~ s/\\\}TLE(ins|del)\s*/\}TLE$1\\ /g;

# remove end marker
$buf =~ s/TLE(ins|del)//g;

# insert \protect
$buf =~ s/\\TLS(ins|del)/\\protect\\TLS$1/g;

# output
if (open(O,">$out")) {
    print O $buf;
    close(O);
} else {
    print STDOUT $buf;
}

# remove temp files
unlink('tmp1.'.$$);
unlink('tmp2.'.$$);

# create temp file from input file by removing all comments
sub write_temp {
    my ($file,$tmp) = @_;

    my $buf = `cat $file`;

    # remove commments including the following newline; this converts multiline
    # macros to single line ones allowing them to be handled correcty by the
    # above regular expressions
    $buf =~ s/%.*?\n//g;

    open(O,">$tmp") or return("can't open $tmp: $!\n");
    print O $buf;
    close O;
}
