Imported contents from xdiff-c-0.9.6.tar.gz (unmodified)
authorMiriam Ruiz <[email protected]>
Fri, 5 Nov 2010 11:53:03 +0000 (5 12:53 +0100)
committerMiriam Ruiz <[email protected]>
Fri, 5 Nov 2010 11:53:03 +0000 (5 12:53 +0100)
Downloaded from http://pages.cs.wisc.edu/~yuanwang/xdiff.html

MD5=924ce757cdc530911a70a9079ce6c99d
SHA1=754ba7cc2e90baed897577e1dce898255d865b29
SHA256=3d58ac0096439341058cd7b0480e2235191c14692d2df156e3a3298da92c5856

12 files changed:
Makefile [new file with mode: 0644]
README [new file with mode: 0644]
XDiff.cpp [new file with mode: 0644]
XDiff.hpp [new file with mode: 0644]
XHash.cpp [new file with mode: 0644]
XHash.hpp [new file with mode: 0644]
XLut.cpp [new file with mode: 0644]
XLut.hpp [new file with mode: 0644]
XParser.cpp [new file with mode: 0644]
XParser.hpp [new file with mode: 0644]
XTree.cpp [new file with mode: 0644]
XTree.hpp [new file with mode: 0644]

diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..97f107b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,75 @@
+#
+# Copyright (c) 2001 - 2005
+#      Yuan Wang. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the 
+# documentation and/or other materials provided with the distribution.
+# 3. Redistributions in any form must be accompanied by information on
+# how to obtain complete source code for the X-Diff software and any
+# accompanying software that uses the X-Diff software.  The source code
+# must either be included in the distribution or be available for no 
+# more than the cost of distribution plus a nominal fee, and must be
+# freely redistributable under reasonable conditions.  For an executable
+# file, complete source code means the source code for all modules it
+# contains.  It does not include source code for modules or files that 
+# typically accompany the major components of the operating system on
+# which the executable file runs.
+#
+# THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+# ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
+# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 
+# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+# POSSIBILITY OF SUCH DAMAGE.
+#
+#
+
+##############################################################################
+#
+#  Makefile to build X-Diff
+#
+#  make:       builds all
+#
+##############################################################################
+
+# Replace it with your own $XERCESROOT
+XERCESROOT = /p/niagara/s/xerces-c-1.4.0
+
+CC = g++
+CFLAGS = -DDEBUG -DLINUX -D_REENTRANT -g -fpic -Wall
+LFLAGS = -DLINUX -fpic
+
+INCLUDES = -I. -I$(XERCESROOT)/include
+LIBS = -L$(XERCESROOT)/lib -static
+LINKS = -lxerces-c1_4 -lc
+
+SOURCES =      XTree.cpp       \
+               XHash.cpp       \
+               XLut.cpp        \
+               XParser.cpp     \
+               XDiff.cpp
+
+HEADERS = $(SOURCES:.cpp=.hpp)
+OBJECTS = $(SOURCES:.cpp=.o)
+
+.SUFFIXES: .cpp .o
+
+.cpp.o:
+       $(CC) $(CFLAGS) $(INCLUDES) -c $<
+
+xdiff: $(OBJECTS)
+       $(CC) $(LFLAGS) $(LIBS) -o xdiff $(OBJECTS) $(LINKS) 
+
+clean:
+       rm -rf *.o core
diff --git a/README b/README
new file mode 100644 (file)
index 0000000..4d847d6
--- /dev/null
+++ b/README
@@ -0,0 +1,121 @@
+
+                       X-Diff  Version 0.9.6
+
+What is it?
+-----------
+
+X-Diff is a tool for detecting difference between two XML documents.
+
+
+Documentation
+-------------
+
+The core algorithm of X-Diff is described in the paper, "X-Diff: An
+Efficient Change Detection Algorithm for XML Documents", ICDE 2003.
+(http://www.cs.wisc.edu/~yuanwang/research/xdiff.pdf).
+
+
+Installation
+------------
+
+-- Java version:
+
+Required: Xerces Java v1.4.0+.
+
+On UNIX/LINUX,
+
+       tar zxf xdiff-j-0.9.6.tar.gz
+       cd X-Diff
+       make
+
+On Windows,
+
+       unzip xdiff-j-0.9.6.zip
+       cd X-Diff
+       make.bat
+
+-- C++ version
+
+Required: Xerces C++ v1.4.0, g++ v2.9.*
+
+       tar zxf xdiff-j-0.9.6.tar.gz
+       cd X-Diff
+       make
+
+
+Running X-Diff
+--------------
+
+java XDiff [-o|-g] [-p percent] xml_file1 xml_file2 result_file
+
+or,
+
+xdiff [-o|-g] [-p percent] xml_file1 xml_file2 result_file
+
+Options:
+
+       The default mode is "-o -p 0.3".
+       
+  -o   The optimal mode, to get the diff result with minimum editing
+       distance.
+       
+  -g   The greedy mode, to get a diff result quickly, the result may not
+       be "optimal".
+       
+  -p   The maximum change percentage allowed. X-Diff will not try to match
+       nodes that are much different from each other.
+
+
+Bug Reporting
+-------------
+
+Please report any bugs or send your comments to Yuan Wang at
+
+
+Copyright Information
+---------------------
+
+X-Diff is an open source product, which open source license permits you to
+use X-Diff at no charge under the condition that if you use the software in
+an application you redistribute, the complete source code for your application
+must be available and freely redistributable under reasonable conditions.
+If you do not want to release the source code for your application, you may
+purchase a license from me.
+
+
+
+Copyright (c) 2001 - 2005
+       Yuan Wang. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the 
+documentation and/or other materials provided with the distribution.
+3. Redistributions in any form must be accompanied by information on
+how to obtain complete source code for the X-Diff software and any
+accompanying software that uses the X-Diff software.  The source code
+must either be included in the distribution or be available for no 
+more than the cost of distribution plus a nominal fee, and must be
+freely redistributable under reasonable conditions.  For an executable
+file, complete source code means the source code for all modules it
+contains.  It does not include source code for modules or files that 
+typically accompany the major components of the operating system on
+which the executable file runs.
+
+THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
+STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 
+IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/XDiff.cpp b/XDiff.cpp
new file mode 100644 (file)
index 0000000..c15a415
--- /dev/null
+++ b/XDiff.cpp
@@ -0,0 +1,2319 @@
+/**
+  * Copyright (c) 2001 - 2005
+  *    Yuan Wang. All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright 
+  * notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  * notice, this list of conditions and the following disclaimer in the 
+  * documentation and/or other materials provided with the distribution.
+  * 3. Redistributions in any form must be accompanied by information on
+  * how to obtain complete source code for the X-Diff software and any
+  * accompanying software that uses the X-Diff software.  The source code
+  * must either be included in the distribution or be available for no
+  * more than the cost of distribution plus a nominal fee, and must be
+  * freely redistributable under reasonable conditions.  For an executable
+  * file, complete source code means the source code for all modules it
+  * contains.  It does not include source code for modules or files that
+  * typically accompany the major components of the operating system on
+  * which the executable file runs.
+  *
+  * THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+  * ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  * POSSIBILITY OF SUCH DAMAGE.
+  *
+  */
+
+#include "XDiff.hpp"
+
+const string   XDiff::USAGE = "xdiff [-o|-g] [-p percent] input_xml1 input_xml2 diff_result\nOptions:\n  The default mode is \"-o -p 0.3\"\n  -o\tThe optimal mode, to get the minimum editing distance.\n  -g\tThe greedy mode, to find a difference quickly.\n  -p\tThe maximum change percentage allowed.\n\tDefault value: 1.0 for -o mode; 0.3 for -g mode.";
+
+const int      XDiff::_CIRCUIT_SIZE = 2048;
+const int      XDiff::_MATRIX_SIZE = 1024;
+const int      XDiff::_ATTRIBUTE_SIZE = 1024;
+const int      XDiff::_TEXT_SIZE = 1024;
+
+/* You can use a different number for sampling here, as sqrt(n) is a safe
+   one, though 3 works pretty well. */
+const int      XDiff::_sampleCount = 3;
+double         XDiff::_NO_MATCH_THRESHOLD = 0.3;
+bool           XDiff::_oFlag = false;
+bool           XDiff::_gFlag = false;
+
+XDiff::XDiff(const char* input1, const char* input2, const char* output)
+{
+       struct timeval  *tv0 = new struct timeval;
+       struct timeval  *tv1 = new struct timeval;
+       struct timeval  *tv2 = new struct timeval;
+       struct timeval  *tv3 = new struct timeval;
+       struct timeval  *tv4 = new struct timeval;
+       struct timezone *tz = new struct timezone;
+
+       // starting time.
+       gettimeofday(tv0, tz);
+
+       // parse the first file.
+       XParser *parser1 = new XParser();
+       _xtree1 = parser1->parse(input1);
+       delete parser1;
+       gettimeofday(tv1, tz);
+
+       // parse the second file.
+       XParser *parser2 = new XParser();
+       _xtree2 = parser2->parse(input2);
+       delete parser2;
+       gettimeofday(tv2, tz);
+
+       // check both root nodes.
+       int     root1 = _xtree1->getRoot();
+       int     root2 = _xtree2->getRoot();
+       if (_xtree1->getHashValue(root1) == _xtree2->getHashValue(root2))
+       {
+               cout << "No difference!" << endl;
+               cout << "Execution time:\t\t" << _diffTime(tv0, tv2) << " s\n";
+               cout << "Parsing " << input1 << ":\t"
+                       << _diffTime(tv0, tv1) << " s\n";
+               cout << "Parsing " << input2 << ":\t"
+                       << _diffTime(tv1, tv2) << " s\n";
+       }
+       else
+       {
+               if (_xtree1->getTag(root1).compare(_xtree2->getValue(root2)) != 0)
+               {
+                       cout << "The root is changed!" << endl;
+                       _xtree1->addMatching(root1, XTree::DELETE, root2);
+                       _xtree2->addMatching(root2, XTree::INSERT, root1);
+               }
+               else
+               {
+                       _attrList1 = new int[_ATTRIBUTE_SIZE];
+                       _attrList2 = new int[_ATTRIBUTE_SIZE];
+                       _textList1 = new int[_TEXT_SIZE];
+                       _textList2 = new int[_TEXT_SIZE];
+                       _attrMatch = new bool[_ATTRIBUTE_SIZE];
+                       _textMatch1 = new bool[_TEXT_SIZE];
+                       _textMatch2 = new bool[_TEXT_SIZE];
+                       _attrHash = new unsigned long long[_ATTRIBUTE_SIZE];
+                       _textHash = new unsigned long long[_TEXT_SIZE];
+                       _attrTag = new string[_ATTRIBUTE_SIZE];
+                       _leastCostMatrix = new int*[_MATRIX_SIZE];
+                       _pathMatrix = new int*[_MATRIX_SIZE];
+                       _circuit = new int[_CIRCUIT_SIZE];
+                       for (int i = 0; i < _MATRIX_SIZE; i++)
+                       {
+                               _leastCostMatrix[i] = new int[_MATRIX_SIZE];
+                               _pathMatrix[i] = new int[_MATRIX_SIZE];
+                       }
+                       _xtree1->addMatching(root1, XTree::CHANGE, root2);
+                       _xtree2->addMatching(root2, XTree::CHANGE, root1);
+                       _seed = (unsigned int)(tv0->tv_usec & 0xffffffff);
+                       _xlut = new XLut((_xtree1->getNodeCount() > 0xffff) ||
+                                        (_xtree2->getNodeCount() > 0xffff));
+                       xdiff(root1, root2, false);
+               }
+
+               // after diffing.
+               gettimeofday(tv3, tz);
+               writeDiff(input1, output);
+               gettimeofday(tv4, tz);
+
+               cout << "Difference detected!" << endl;
+               cout << "Execution time:\t\t" << _diffTime(tv0, tv4) << " s\n";
+               cout << "Parsing " << input1 << ":\t"
+                       << _diffTime(tv0, tv1) << " s\n";
+               cout << "Parsing " << input2 << ":\t"
+                       << _diffTime(tv1, tv2) << " s\n";
+               cout << "Diffing:\t\t" << _diffTime(tv2, tv3) << " s\n";
+               cout << "Writing diff:\t\t" << _diffTime(tv3, tv4) << " s\n";
+
+               delete _xlut;
+               delete[] _attrList1;
+               delete[] _attrList2;
+               delete[] _attrMatch;
+               delete[] _attrHash;
+               delete[] _attrTag;
+               delete[] _circuit;
+               for (int i = 0; i < _MATRIX_SIZE; i++)
+               {
+                       delete[] _leastCostMatrix[i];
+                       delete[] _pathMatrix[i];
+               }
+               delete[] _leastCostMatrix;
+               delete[] _pathMatrix;
+       }
+
+       delete tv0;
+       delete tv1;
+       delete tv2;
+       delete tv3;
+       delete tv4;
+       delete tz;
+}
+
+XDiff::~XDiff()
+{
+       delete _xtree1;
+       delete _xtree2;
+}
+
+void XDiff::xdiff(int pid1, int pid2, bool matchFlag)
+{
+       // diff attributes.
+       int     attrCount1 = 0, attrCount2 = 0;
+       int     attr1 = _xtree1->getFirstAttribute(pid1);
+       while (attr1 != XTree::NULL_NODE)
+       {
+               _attrList1[attrCount1++] = attr1;
+               attr1 = _xtree1->getNextAttribute(attr1);
+       }
+       int     attr2 = _xtree2->getFirstAttribute(pid2);
+       while (attr2 != XTree::NULL_NODE)
+       {
+               _attrList2[attrCount2++] =  attr2;
+               attr2 = _xtree2->getNextAttribute(attr2);
+       }
+       if (attrCount1 > 0)
+       {
+               if (attrCount2 > 0)
+                       diffAttributes(attrCount1, attrCount2);
+               else
+               {
+                       for (int i = 0; i < attrCount1; i++)
+                               _xtree1->addMatching(_attrList1[i],
+                                                    XTree::NO_MATCH);
+               }
+       }
+       else if (attrCount2 > 0) // attrCount1 == 0
+       {
+               for (int i = 0; i < attrCount2; i++)
+                       _xtree2->addMatching(_attrList2[i], XTree::NO_MATCH);
+       }
+
+       // Match element nodes.
+       int     count1 = _xtree1->getChildrenCount(pid1) - attrCount1;
+       int     count2 = _xtree2->getChildrenCount(pid2) - attrCount2;
+
+       if ((count1 == 0) && (count2 == 0))
+       {
+       }
+       else if (count1 == 0)
+       {
+               int     node2 = _xtree2->getFirstChild(pid2);
+               _xtree2->addMatching(node2, XTree::NO_MATCH);
+               for (int i = 1; i < count2; i++)
+               {
+                       node2 = _xtree2->getNextSibling(node2);
+                       _xtree2->addMatching(attr2, XTree::NO_MATCH);
+               }
+       }
+       else if (count2 == 0)
+       {
+               int     node1 = _xtree1->getFirstChild(pid1);
+               _xtree1->addMatching(node1, XTree::NO_MATCH);
+               for (int i = 0; i < count1; i++)
+               {
+                       node1 = _xtree1->getNextSibling(node1);
+                       _xtree1->addMatching(node1, XTree::NO_MATCH);
+               }
+       }
+       else if ((count1 == 1) && (count2 == 1))
+       {
+               int     node1 = _xtree1->getFirstChild(pid1);
+               int     node2 = _xtree2->getFirstChild(pid2);
+
+               if (_xtree1->getHashValue(node1) == _xtree2->getHashValue(node2))
+                       return;
+
+               bool    isE1 = _xtree1->isElement(node1);
+               bool    isE2 = _xtree2->isElement(node2);
+
+               if (isE1 && isE2)
+               {
+                       if (_xtree1->getTag(node1).compare(_xtree2->getTag(node2)) == 0)
+                       {
+                               _xtree1->addMatching(node1, XTree::CHANGE, node2);
+                               _xtree2->addMatching(node2, XTree::CHANGE, node1);
+                               xdiff(node1, node2, matchFlag);
+                       }
+                       else
+                       {
+                               _xtree1->addMatching(node1, XTree::NO_MATCH);
+                               _xtree2->addMatching(node2, XTree::NO_MATCH);
+                       }
+               }
+               else if (!isE1 && !isE2)
+               {
+                       _xtree1->addMatching(node1, XTree::CHANGE, node2);
+                       _xtree2->addMatching(node2, XTree::CHANGE, node1);
+               }
+               else
+               {
+                       _xtree1->addMatching(node1, XTree::NO_MATCH);
+                       _xtree2->addMatching(node2, XTree::NO_MATCH);
+               }
+       }
+       else
+       {
+               int     *elements1 = new int[count1];
+               int     *elements2 = new int[count2];
+               int     elementCount1 = 0, textCount1 = 0;
+               int     elementCount2 = 0, textCount2 = 0;
+
+               int     child1 = _xtree1->getFirstChild(pid1);
+               if (_xtree1->isElement(child1))
+                       elements1[elementCount1++] = child1;
+               else
+                       _textList1[textCount1++] = child1;
+               for (int i = 1; i < count1; i++)
+               {
+                       child1 = _xtree1->getNextSibling(child1);
+                       if (_xtree1->isElement(child1))
+                               elements1[elementCount1++] = child1;
+                       else
+                               _textList1[textCount1++] = child1;
+               }
+
+               int     child2 = _xtree2->getFirstChild(pid2);
+               if (_xtree2->isElement(child2))
+                       elements2[elementCount2++] = child2;
+               else
+                       _textList2[textCount2++] = child2;
+               for (int i = 1; i < count2; i++)
+               {
+                       child2 = _xtree2->getNextSibling(child2);
+                       if (_xtree2->isElement(child2))
+                               elements2[elementCount2++] = child2;
+                       else
+                               _textList2[textCount2++] = child2;
+               }
+
+               // Match text nodes.
+               if (textCount1 > 0)
+               {
+                       if (textCount2 > 0)
+                               diffText(textCount1, textCount2);
+                       else
+                       {
+                               for (int i = 0; i < textCount1; i++)
+                                       _xtree1->addMatching(_textList1[i],
+                                                            XTree::NO_MATCH);
+                       }
+               }
+               else if (textCount2 > 0)        // textCount1 == 0
+               {
+                       for (int i = 0; i < textCount2; i++)
+                               _xtree2->addMatching(_textList2[i],
+                                                    XTree::NO_MATCH);
+               }
+
+               bool    *matched1 = new bool[elementCount1];
+               bool    *matched2 = new bool[elementCount2];
+               int     mcount = _matchFilter(elements1, elements2,
+                                             matched1, matched2,
+                                             elementCount1, elementCount2);
+
+               if ((elementCount1 == mcount) &&
+                   (elementCount2 == mcount))
+               {
+               }
+               else if (elementCount1 == mcount)
+               {
+                       for (int i = 0; i < elementCount2; i++)
+                       {
+                               if (!matched2[i])
+                                       _xtree2->addMatching(elements2[i],
+                                                            XTree::NO_MATCH);
+                       }
+               }
+               else if (elementCount2 == mcount)
+               {
+                       for (int i = 0; i < elementCount1; i++)
+                       {
+                               if (!matched1[i])
+                                       _xtree1->addMatching(elements1[i],
+                                                            XTree::NO_MATCH);
+                       }
+               }
+               else
+               {
+                       // Write the list of unmatched nodes.
+                       int     ucount1 = elementCount1 - mcount;
+                       int     ucount2 = elementCount2 - mcount;
+                       int     *unmatched1 = new int[ucount1];
+                       int     *unmatched2 = new int[ucount2];
+                       int     muc1 = 0, muc2 = 0;
+                       int     start = 0;
+
+                       while ((muc1 < ucount1) && (muc2 < ucount2))
+                       {
+                               for (; (start < elementCount1) && matched1[start]; start++);
+                               string  startTag = _xtree1->getTag(elements1[start]);
+                               int     uele1 = 0, uele2 = 0;
+                               muc1++;
+                               unmatched1[uele1++] = elements1[start];
+                               matched1[start++] = true;
+
+                               for (int i = start; (i < elementCount1) && (muc1 < ucount1); i++)
+                               {
+                                       if (!matched1[i] && (startTag.compare(_xtree1->getTag(elements1[i])) == 0))
+                                       {
+                                               matched1[i] = true;
+                                               muc1++;
+                                               unmatched1[uele1++] = elements1[i];
+                                       }
+                               }
+
+                               for (int i = 0; (i < elementCount2) && (muc2 < ucount2); i++)
+                               {
+                                       if (!matched2[i] && (startTag.compare(_xtree2->getTag(elements2[i])) == 0))
+                                       {
+                                               matched2[i] = true;
+                                               muc2++;
+                                               unmatched2[uele2++] = elements2[i];
+                                       }
+                               }
+
+                               if (uele2 == 0)
+                               {
+                                       for (int i = 0; i < uele1; i++)
+                                               _xtree1->addMatching(unmatched1[i], XTree::NO_MATCH);
+                               }
+                               else
+                               {
+                                       if ((uele1 == 1) && (uele2 == 1))
+                                       {
+                                               _xtree1->addMatching(unmatched1[0], XTree::CHANGE, unmatched2[0]);
+                                               _xtree2->addMatching(unmatched2[0], XTree::CHANGE, unmatched1[0]);
+                                               xdiff(unmatched1[0],
+                                             unmatched2[0], matchFlag);
+                                       }
+                                       // To find minimal-cost matching between those unmatched elements (with the same tag name.
+                                       else if (uele1 >= uele2)
+                                       {
+                                               if ((uele2 <= _sampleCount) ||
+                                                   !_gFlag)
+                                                       matchListO(unmatched1, unmatched2, uele1, uele2, true, matchFlag);
+                                               else
+                                                       matchList(unmatched1, unmatched2, uele1, uele2, true, matchFlag);
+                                       }
+                                       else
+                                       {
+                                               if ((uele1 <= _sampleCount) ||
+                                                   !_gFlag)
+                                                       matchListO(unmatched2, unmatched1, uele2, uele1, false, matchFlag);
+                                               else
+                                                       matchList(unmatched2, unmatched1, uele2, uele1, false, matchFlag);
+                                       }
+                               }
+                       }
+
+                       if (muc1 < ucount1)
+                       {
+                               for (int i = start; i < elementCount1; i++)
+                               {
+                                       if (!matched1[i])
+                                               _xtree1->addMatching(elements1[i], XTree::NO_MATCH);
+                               }
+                       }       
+                       else if (muc2 < ucount2)
+                       {
+                               for (int i = 0; i < elementCount2; i++)
+                               {
+                                       if (!matched2[i])
+                                               _xtree2->addMatching(elements2[i], XTree::NO_MATCH);
+                               }
+                       }
+
+                       delete[] unmatched1;
+                       delete[] unmatched2;
+               }
+
+               delete[] elements1;
+               delete[] elements2;
+               delete[] matched1;
+               delete[] matched2;
+       }
+}
+
+void XDiff::diffAttributes(int attrCount1, int attrCount2)
+{
+       if ((attrCount1 == 1) && (attrCount2 == 1))
+       {
+               int     a1 = _attrList1[0];
+               int     a2 = _attrList2[0];
+               if (_xtree1->getHashValue(a1) == _xtree2->getHashValue(a2))
+                       return;
+
+               if (_xtree1->getTag(a1).compare(_xtree2->getTag(a2)) == 0)
+               {
+                       _xtree1->addMatching(a1, XTree::CHANGE, a2);
+                       _xtree2->addMatching(a2, XTree::CHANGE, a1);
+                       _xtree1->addMatching(_xtree1->getFirstChild(a1),
+                                            XTree::CHANGE,
+                                            _xtree2->getFirstChild(a2));
+                       _xtree2->addMatching(_xtree2->getFirstChild(a2),
+                                            XTree::CHANGE,
+                                            _xtree1->getFirstChild(a1));
+               }
+               else
+               {
+                       _xtree1->addMatching(a1, XTree::NO_MATCH);
+                       _xtree2->addMatching(a2, XTree::NO_MATCH);
+               }
+               return;
+       }
+
+       for (int i = 0; i < attrCount2; i++)
+       {
+               _attrHash[i] = _xtree2->getHashValue(_attrList2[i]);
+               _attrTag[i] = _xtree2->getTag(_attrList2[i]);
+               _attrMatch[i] = false;
+       }
+
+       int     matchCount = 0;
+       for (int i = 0; i < attrCount1; i++)
+       {
+               int     attr1 = _attrList1[i];
+               unsigned long long      ah1 = _xtree1->getHashValue(attr1);
+               string  tag1 = _xtree1->getTag(attr1);
+
+               bool    found = false;
+               for (int j = 0; j < attrCount2; j++)
+               {
+                       int     attr2 = _attrList2[j];
+                       if (_attrMatch[j])
+                               continue;
+                       else if (ah1 == _attrHash[j])
+                       {
+                               _attrMatch[j] = true;
+                               matchCount++;
+                               found = true;
+                               break;
+                       }
+                       else if (tag1.compare(_attrTag[j]) == 0)
+                       {
+                               _attrMatch[j] = true;
+                               matchCount++;
+
+                               _xtree1->addMatching(attr1, XTree::CHANGE, attr2);
+                               _xtree2->addMatching(attr2, XTree::CHANGE, attr1);
+                               _xtree1->addMatching(_xtree1->getFirstChild(attr1), XTree::CHANGE, _xtree2->getFirstChild(attr2));
+                               _xtree2->addMatching(_xtree2->getFirstChild(attr2), XTree::CHANGE, _xtree1->getFirstChild(attr1));
+
+                               found = true;
+                               break;
+                       }
+               }
+
+               if (!found)
+                       _xtree1->addMatching(attr1, XTree::NO_MATCH);
+       }
+
+       if (matchCount != attrCount2)
+       {
+               for (int i = 0; i < attrCount2; i++)
+               {
+                       if (!_attrMatch[i])
+                               _xtree2->addMatching(_attrList2[i], XTree::NO_MATCH);
+               }
+       }
+}
+
+void XDiff::diffText(int textCount1, int textCount2)
+{
+       for (int i = 0; i < textCount1; i++)
+               _textMatch1[i] = false;
+       for (int i = 0; i < textCount2; i++)
+       {
+               _textMatch2[i] = false;
+               _textHash[i] = _xtree2->getHashValue(_textList2[i]);
+       }
+
+       int     mcount = 0;
+       for (int i = 0; i < textCount1; i++)
+       {
+               unsigned long long      hash1 = _xtree1->getHashValue(_textList1[i]);
+               for (int j = 0; j < textCount2; j++)
+               {
+                       if (!_textMatch2[j] && (hash1 == _textHash[j]))
+                       {
+                               _textMatch1[i] = true;
+                               _textMatch2[j] = true;
+                               mcount++;
+                               break;
+                       }
+               }
+
+               if (mcount == textCount2)
+                       break;
+       }
+
+       if ((mcount < textCount1) && (textCount1 <= textCount2))
+       {
+               for (int i = 0, j = 0;
+                    (i < textCount1) && (mcount < textCount1); i++)
+               {
+                       if (_textMatch1[i])
+                               continue;
+                       for (; _textMatch2[j]; j++);
+                       _xtree1->addMatching(_textList1[i], XTree::CHANGE,
+                                            _textList2[j]);
+                       _textMatch1[i] = true;
+                       _xtree2->addMatching(_textList2[j], XTree::CHANGE,
+                                            _textList1[i]);
+                       _textMatch2[j] = true;
+                       mcount++;
+               }
+       }
+       else if ((mcount < textCount2) && (textCount2 < textCount1))
+       {
+               for (int i = 0, j = 0;
+                    (i < textCount2) && (mcount < textCount2); i++)
+               {
+                       if (_textMatch2[i])
+                               continue;
+                       for (; _textMatch1[j]; j++);
+                       _xtree2->addMatching(_textList2[i], XTree::CHANGE,
+                                            _textList1[j]);
+                       _textMatch2[i] = true;
+                       _xtree1->addMatching(_textList1[j], XTree::CHANGE,
+                                            _textList2[i]);
+                       _textMatch1[j] = true;
+                       mcount++;
+               }
+       }
+
+       if (mcount < textCount1)
+       {
+               for (int i = 0; i < textCount1; i++)
+               {
+                       if (!_textMatch1[i])
+                               _xtree1->addMatching(_textList1[i],
+                                                    XTree::NO_MATCH);
+               }
+       }
+       else if (mcount < textCount2)
+       {
+               for (int i = 0; i < textCount2; i++)
+               {
+                       if (!_textMatch2[i])
+                               _xtree2->addMatching(_textList2[i],
+                                                    XTree::NO_MATCH);
+               }
+       }
+}
+
+int XDiff::_matchFilter(int *elements1, int *elements2, bool *matched1,
+                       bool *matched2, int count1, int count2)
+{
+       unsigned long long *value1 = new unsigned long long[count1];
+       unsigned long long *value2 = new unsigned long long[count2];
+
+       for (int i = 0; i < count1; i++)
+       {
+               value1[i] = _xtree1->getHashValue(elements1[i]);
+               matched1[i] = false;
+       }
+       for (int i = 0; i < count2; i++)
+       {
+               value2[i] = _xtree2->getHashValue(elements2[i]);
+               matched2[i] = false;
+       }
+
+       int     mcount = 0;
+       for (int i = 0; i < count2; i++)
+               for (int j = 0; j < count1; j++)
+               {
+                       if (!matched1[j] && !matched2[i] &&
+                           (value1[j] == value2[i]))
+                       {
+                               matched1[j] = true;
+                               matched2[i] = true;
+                               mcount++;
+                               break;
+                       }
+               }
+
+       delete[]        value1;
+       delete[]        value2;
+
+       return mcount;
+}
+
+void XDiff::matchListO(int *nodes1, int *nodes2, int count1, int count2,
+                      bool treeOrder, bool matchFlag)
+{
+       int     **distanceMatrix = new int*[count1+1];
+       int     *matching1 = new int[count1];
+       int     *matching2 = new int[count2];
+
+       // insert cost.
+       distanceMatrix[count1] = new int[count2+1];
+       for (int i = 0; i < count2; i++)
+               distanceMatrix[count1][i] = (treeOrder ? _xtree2->getDecendentsCount(nodes2[i]) : _xtree1->getDecendentsCount(nodes2[i])) + 1;
+
+       for (int i = 0; i < count1; i++)
+       {
+               distanceMatrix[i] = new int[count2+1];
+               int     deleteCost = (treeOrder ? _xtree1->getDecendentsCount(nodes1[i]) : _xtree2->getDecendentsCount(nodes1[i])) + 1;
+               for (int j = 0; j < count2; j++)
+               {
+                       int     dist;
+                       if (matchFlag)
+                               dist = treeOrder ? _xlut->get(nodes1[i], nodes2[j]) : _xlut->get(nodes2[j], nodes1[i]);
+                       else
+                       {
+                               dist = treeOrder ? distance(nodes1[i], nodes2[j], true, XTree::NO_CONNECTION) : distance(nodes2[j], nodes1[i], true, XTree::NO_CONNECTION);
+
+                               // the default mode.
+                               if (!_oFlag && (dist > 1) && (dist >= _NO_MATCH_THRESHOLD * (deleteCost + distanceMatrix[count1][j])))
+                                       dist = XTree::NO_CONNECTION;
+                               if (dist < XTree::NO_CONNECTION)
+                               {
+                                       if (treeOrder)
+                                               _xlut->add(nodes1[i],
+                                                          nodes2[j],
+                                                          dist);
+                                       else
+                                               _xlut->add(nodes2[j],
+                                                          nodes1[i],
+                                                          dist);
+                               }
+                       }
+                       distanceMatrix[i][j] = dist;
+               }
+               // delete cost.
+               distanceMatrix[i][count2] = deleteCost;
+       }
+
+       // compute the minimal cost matching.
+       findMatching(count1, count2, distanceMatrix, matching1, matching2);
+
+       for (int i = 0; i < count1; i++)
+       {
+               if (matching1[i] == XTree::NO_MATCH)
+               {
+                       if (treeOrder)
+                               _xtree1->addMatching(nodes1[i], XTree::NO_MATCH);
+                       else
+                               _xtree2->addMatching(nodes1[i], XTree::NO_MATCH);
+               }
+               else
+               {
+                       if (treeOrder)
+                               _xtree1->addMatching(nodes1[i], XTree::CHANGE,
+                                                    nodes2[matching1[i]]);
+                       else
+                               _xtree2->addMatching(nodes1[i], XTree::CHANGE,
+                                                    nodes2[matching1[i]]);
+               }
+       }
+
+       for (int i = 0; i < count2; i++)
+       {
+               if (matching2[i] == XTree::NO_MATCH)
+               {
+                       if (treeOrder)
+                               _xtree2->addMatching(nodes2[i], XTree::NO_MATCH);
+                       else
+                               _xtree1->addMatching(nodes2[i], XTree::NO_MATCH);
+               }
+               else
+               {
+                       if (treeOrder)
+                               _xtree2->addMatching(nodes2[i], XTree::CHANGE,
+                                                    nodes1[matching2[i]]);
+                       else
+                               _xtree1->addMatching(nodes2[i], XTree::CHANGE,
+                                                    nodes1[matching2[i]]);
+               }
+       }
+
+       for (int i = 0; i < count1; i++)
+       {
+               if (matching1[i] != XTree::NO_MATCH)
+               {
+                       int     todo1 = nodes1[i];
+                       int     todo2 = nodes2[matching1[i]];
+                       if (treeOrder)
+                       {
+                               if (_xtree1->isElement(todo1) &&
+                                   _xtree2->isElement(todo2))
+                                       xdiff(todo1, todo2, true);
+                       }
+                       else
+                       {
+                               if (_xtree1->isElement(todo2) &&
+                                   _xtree2->isElement(todo1))
+                                       xdiff(todo2, todo1, true);
+                       }
+               }
+       }
+
+       delete[]        matching1;
+       delete[]        matching2;
+       for (int i = 0; i <= count1; i++)
+               delete[]        distanceMatrix[i];
+       delete[]        distanceMatrix;
+}
+
+void XDiff::matchList(int *nodes1, int *nodes2, int count1, int count2,
+                     bool treeOrder, bool matchFlag)
+{
+       int     *matching1 = new int[count1];
+       int     *matching2 = new int[count2];
+       for (int i = 0; i < count1; i++)
+               matching1[i] = XTree::NO_MATCH;
+       for (int i = 0; i < count2; i++)
+               matching2[i] = XTree::NO_MATCH;
+
+       if (matchFlag)
+       {
+               for (int i = 0; i < count1; i++)
+               {
+                       for (int j = 0; j < count2; j++)
+                       {
+                               int     d = treeOrder ? _xlut->get(nodes1[i], nodes2[j]) : _xlut->get(nodes2[j], nodes1[i]);
+                               if (d != XTree::NO_CONNECTION)
+                               {
+                                       matching1[i] = j;
+                                       matching2[j] = i;
+                                       break;
+                               }
+                       }
+               }
+       }
+       else
+       {
+               int     scount1 = 0;
+               int     scount2 = 0;
+               int     matchingThreshold = 0;
+               for (int i = 0; ((i < _sampleCount) && (scount2 < count2)); scount2++)
+               {
+                       srand(_seed);
+                       _seed = rand();
+                       int     snode = (int)((long long)_seed * (count2 - scount2) / (RAND_MAX + 1)) + scount2;
+                       int     dist = XTree::NO_CONNECTION;
+                       int     bestmatch = XTree::NULL_NODE;
+                       for (int j = scount1; j < count1; j++)
+                       {
+                               int     d = treeOrder ? distance(nodes1[j], nodes2[snode], false, dist) : distance(nodes2[snode], nodes1[j], false, dist);
+                               if (d < dist)
+                               {
+                                       dist = d;
+                                       bestmatch = j;
+                                       if (d == 1)
+                                               break;
+                               }
+                       }
+
+                       int     deleteCost = (treeOrder ? _xtree2->getDecendentsCount(nodes2[snode]) : _xtree1->getDecendentsCount(nodes2[snode])) + 1;
+                       if ((dist > 1) &&
+                           (dist > _NO_MATCH_THRESHOLD * deleteCost))
+                       {
+                               int     tmp = nodes2[snode];
+                               nodes2[snode] = nodes2[scount2];
+                               nodes2[scount2] = tmp;
+                       }
+                       else
+                       {
+                               int     tmp = nodes1[bestmatch];
+                               nodes1[bestmatch] = nodes1[scount1];
+                               nodes1[scount1] = tmp;
+                               tmp = nodes2[snode];
+                               nodes2[snode] = nodes2[scount2];
+                               nodes2[scount2] = tmp;
+
+                               if (treeOrder)
+                                       _xlut->add(nodes1[scount1], nodes2[scount2], dist);
+                               else
+                                       _xlut->add(nodes2[scount2], nodes1[scount1], dist);
+                               matching1[scount1] = scount2;
+                               matching2[scount2] = scount1;
+
+                               i++;
+                               scount1++;
+                               if (matchingThreshold < dist)
+                                       matchingThreshold = dist;
+                       }
+               }
+
+               for (;scount2 < count2; scount2++)
+               {
+                       int     dist = XTree::NO_CONNECTION;
+                       int     bestmatch = XTree::NO_MATCH;
+                       for (int i = scount1; i < count1; i++)
+                       {
+                               int     d = treeOrder ? distance(nodes1[i], nodes2[scount2], false, dist) : distance(nodes2[scount2], nodes1[i], false, dist);
+                               if (d <= matchingThreshold)
+                               {
+                                       dist = d;
+                                       bestmatch = i;
+                                       break;
+                               }
+                               else if (d < dist)
+                               {
+                                       dist = d;
+                                       bestmatch = i;
+                               }
+                       }
+
+                       if (bestmatch != XTree::NO_MATCH)
+                       {
+                               int     tmp = nodes1[bestmatch];
+                               nodes1[bestmatch] = nodes1[scount1];
+                               nodes1[scount1] = tmp;
+
+                               if (treeOrder)
+                                       _xlut->add(nodes1[scount1], nodes2[scount2], dist);
+                               else
+                                       _xlut->add(nodes2[scount2], nodes1[scount1], dist);
+                               matching1[scount1] = scount2;
+                               matching2[scount2] = scount1;
+                               scount1++;
+                       }
+               }
+       }
+
+       // Record matching
+       for (int i = 0; i < count1; i++)
+       {
+               if (matching1[i] == XTree::NO_MATCH)
+               {
+                       if (treeOrder)
+                               _xtree1->addMatching(nodes1[i], XTree::NO_MATCH);
+                       else
+                               _xtree2->addMatching(nodes1[i], XTree::NO_MATCH);
+               }
+               else
+               {
+                       if (treeOrder)
+                               _xtree1->addMatching(nodes1[i], XTree::CHANGE,
+                                                    nodes2[matching1[i]]);
+                       else
+                               _xtree2->addMatching(nodes1[i], XTree::CHANGE,
+                                                    nodes2[matching1[i]]);
+               }
+       }
+
+       for (int i = 0; i < count2; i++)
+       {
+               if (matching2[i] == XTree::NO_MATCH)
+               {
+                       if (treeOrder)
+                               _xtree2->addMatching(nodes2[i], XTree::NO_MATCH);
+                       else
+                               _xtree1->addMatching(nodes2[i], XTree::NO_MATCH);
+               }
+               else
+               {
+                       if (treeOrder)
+                               _xtree2->addMatching(nodes2[i], XTree::CHANGE,
+                                                    nodes1[matching2[i]]);
+                       else
+                               _xtree1->addMatching(nodes2[i], XTree::CHANGE,
+                                                    nodes1[matching2[i]]);
+               }
+       }
+
+       for (int i = 0; i < count1; i++)
+       {
+               if (matching1[i] != XTree::NO_MATCH)
+               {
+                       int     todo1 = nodes1[i];
+                       int     todo2 = nodes2[matching1[i]];
+                       if (treeOrder)
+                       {
+                               if (_xtree1->isElement(todo1) &&
+                                   _xtree2->isElement(todo2))
+                                       xdiff(todo1, todo2, true);
+                       }
+                       else
+                       {
+                               if (_xtree1->isElement(todo2) &&
+                                   _xtree2->isElement(todo1))
+                                       xdiff(todo2, todo1, true);
+                       }
+               }
+       }
+
+       delete[]        matching1;
+       delete[]        matching2;
+}
+
+int XDiff::distance(int eid1, int eid2, bool toRecord, int threshold)
+{
+       bool    isE1 = _xtree1->isElement(eid1);
+       bool    isE2 = _xtree2->isElement(eid2);
+       if (isE1 && isE2)
+       {
+               if (_xtree1->getTag(eid1).compare(_xtree2->getTag(eid2)) != 0)
+                       return XTree::NO_CONNECTION;
+               else 
+               {
+                       int     dist = _xdiff(eid1, eid2, threshold);
+                       if (toRecord && (dist < XTree::NO_CONNECTION))
+                               _xlut->add(eid1, eid2, dist);
+                       return dist;
+               }
+       }
+       else if (!isE1 && !isE2)
+               return 1;
+       else
+               return XTree::NO_CONNECTION;
+}
+
+int XDiff::_xdiff(int pid1, int pid2, int threshold = XTree::NO_CONNECTION)
+{
+       int     dist = 0;
+
+       // diff attributes.
+       int     attrCount1 = 0, attrCount2 = 0;
+       int     attr1 = _xtree1->getFirstAttribute(pid1);
+       while (attr1 != XTree::NULL_NODE)
+       {
+               _attrList1[attrCount1++] = attr1;
+               attr1 = _xtree1->getNextAttribute(attr1);
+       }
+       int     attr2 = _xtree2->getFirstAttribute(pid2);
+       while (attr2 != XTree::NULL_NODE)
+       {
+               _attrList2[attrCount2++] =  attr2;
+               attr2 = _xtree2->getNextAttribute(attr2);
+       }
+
+       if (attrCount1 == 0)
+               dist = attrCount2 * 2;
+       else if (attrCount2 == 0)
+               dist = attrCount1 * 2;
+       else
+               dist = _diffAttributes(attrCount1, attrCount2);
+       if (!_gFlag && (dist >= threshold))
+               return XTree::NO_CONNECTION;
+
+       // Match element nodes.
+       int     count1 = _xtree1->getChildrenCount(pid1) - attrCount1;
+       int     count2 = _xtree2->getChildrenCount(pid2) - attrCount2;
+
+       if (count1 == 0)
+       {
+               int     node2 = _xtree2->getFirstChild(pid2);
+               while (node2 != XTree::NULL_NODE)
+               {
+                       dist += _xtree2->getDecendentsCount(node2) + 1;
+                       if (!_gFlag && (dist >= threshold))
+                               return XTree::NO_CONNECTION;
+                       node2 = _xtree2->getNextSibling(node2);
+               }
+       }
+       else if (count2 == 0)
+       {
+               int     node1 = _xtree1->getFirstChild(pid1);
+               while (node1 != XTree::NULL_NODE)
+               {
+                       dist += _xtree1->getDecendentsCount(node1) + 1;
+                       if (!_gFlag && (dist >= threshold))
+                               return XTree::NO_CONNECTION;
+                       node1 = _xtree1->getNextSibling(node1);
+               }
+       }
+       else if ((count1 == 1) && (count2 == 1))
+       {
+               int     node1 = _xtree1->getFirstChild(pid1);
+               int     node2 = _xtree2->getFirstChild(pid2);
+
+               if (_xtree1->getHashValue(node1) == _xtree2->getHashValue(node2))
+                       return dist;
+
+               bool    isE1 = _xtree1->isElement(node1);
+               bool    isE2 = _xtree2->isElement(node2);
+
+               if (isE1 && isE2)
+               {
+                       if (_xtree1->getTag(node1).compare(_xtree2->getTag(node2)) == 0)
+                               dist += _xdiff(node1, node2, threshold - dist);
+                       else
+                               dist += _xtree1->getDecendentsCount(node1) + 
+                                       _xtree2->getDecendentsCount(node2) + 2;
+               }
+               else if (!isE1 && !isE2)
+                       dist++;
+               else
+                       dist += _xtree1->getDecendentsCount(node1) +
+                               _xtree2->getDecendentsCount(node2) + 2;
+       }
+       else
+       {
+               int     *elements1 = new int[count1];
+               int     *elements2 = new int[count2];
+               int     elementCount1 = 0, textCount1 = 0;
+               int     elementCount2 = 0, textCount2 = 0;
+
+               int     child1 = _xtree1->getFirstChild(pid1);
+               if (_xtree1->isElement(child1))
+                       elements1[elementCount1++] = child1;
+               else
+                       _textList1[textCount1++] = child1;
+               for (int i = 1; i < count1; i++)
+               {
+                       child1 = _xtree1->getNextSibling(child1);
+                       if (_xtree1->isElement(child1))
+                               elements1[elementCount1++] = child1;
+                       else
+                               _textList1[textCount1++] = child1;
+               }
+
+               int     child2 = _xtree2->getFirstChild(pid2);
+               if (_xtree2->isElement(child2))
+                       elements2[elementCount2++] = child2;
+               else
+                       _textList2[textCount2++] = child2;
+               for (int i = 1; i < count2; i++)
+               {
+                       child2 = _xtree2->getNextSibling(child2);
+                       if (_xtree2->isElement(child2))
+                               elements2[elementCount2++] = child2;
+                       else
+                               _textList2[textCount2++] = child2;
+               }
+
+               // Match text nodes.
+               if (textCount1 == 0)
+               {
+                       dist += textCount2;
+               }
+               else if (textCount2 == 0)
+               {
+                       dist += textCount1;
+               }
+               else
+                       dist += _diffText(textCount1, textCount2);
+
+               if (_gFlag && (dist >= threshold))
+                       return XTree::NO_CONNECTION;
+
+               bool    *matched1 = new bool[elementCount1];
+               bool    *matched2 = new bool[elementCount2];
+               int     mcount = _matchFilter(elements1, elements2,
+                                             matched1, matched2,
+                                             elementCount1, elementCount2);
+
+               if ((elementCount1 == mcount) &&
+                   (elementCount2 == mcount))
+               {
+               }
+               else if (elementCount1 == mcount)
+               {
+                       for (int i = 0; i < elementCount2; i++)
+                       {
+                               if (!matched2[i])
+                               {
+                                       dist += _xtree2->getDecendentsCount(elements2[i]) + 1;
+                                       if (_gFlag && (dist >= threshold))
+                                       {
+                                               dist = XTree::NO_CONNECTION;
+                                               break;
+                                       }
+                               }
+                       }
+               }
+               else if (elementCount2 == mcount)
+               {
+                       for (int i = 0; i < elementCount1; i++)
+                       {
+                               if (!matched1[i])
+                               {
+                                       dist += _xtree1->getDecendentsCount(elements1[i]) + 1;
+                                       if (_gFlag && (dist >= threshold))
+                                       {
+                                               dist = XTree::NO_CONNECTION;
+                                               break;
+                                       }
+                               }
+                       }
+               }
+               else //if ((count1 > mcount) && (count2 > mcount))
+               {
+                       // Write the list of unmatched nodes.
+                       int     ucount1 = elementCount1 - mcount;
+                       int     ucount2 = elementCount2 - mcount;
+                       int     *unmatched1 = new int[ucount1];
+                       int     *unmatched2 = new int[ucount2];
+                       int     muc1 = 0, muc2 = 0, start = 0;
+
+                       while ((muc1 < ucount1) && (muc2 < ucount2))
+                       {
+                               for (; (start < elementCount1) && matched1[start]; start++);
+                               string  startTag = _xtree1->getTag(elements1[start]);
+                               int     uele1 = 0, uele2 = 0;
+                               muc1++;
+                               unmatched1[uele1++] = elements1[start];
+                               matched1[start++] = true;
+
+                               for (int i = start; (i < elementCount1) && (muc1 < ucount1); i++)
+                               {
+                                       if (!matched1[i] && (startTag.compare(_xtree1->getTag(elements1[i])) == 0))
+                                       {
+                                               matched1[i] = true;
+                                               muc1++;
+                                               unmatched1[uele1++] = elements1[i];
+                                       }
+                               }
+
+                               for (int i = 0; (i < elementCount2) && (muc2 < ucount2); i++)
+                               {
+                                       if (!matched2[i] && (startTag.compare(_xtree2->getTag(elements2[i])) == 0))
+                                       {
+                                               matched2[i] = true;
+                                               muc2++;
+                                               unmatched2[uele2++] = elements2[i];
+                                       }
+                               }
+
+                               if (uele2 == 0)
+                               {
+                                       for (int i = 0; i < uele1; i++)
+                                               dist += _xtree1->getDecendentsCount(unmatched1[i]);
+                               }
+                               else
+                               {
+/*
+                                       if ((uele1 == 1) && (uele2 == 1))
+                                       {
+                                               dist += _xdiff(unmatched1[0],
+                                                              unmatched2[0],
+                                                              threshold-dist);
+                                       }
+                                       // To find minimal-cost matching between those unmatched elements (with the same tag name.
+                                       else if (uele1 >= uele2)
+                                       */
+                                       if (uele1 >= uele2)
+                                       {
+                                               if ((uele2 <= _sampleCount) ||
+                                                   !_gFlag)
+                                                       dist += _matchListO(unmatched1, unmatched2, uele1, uele2, true);
+                                               else
+                                                       dist += _matchList(unmatched1, unmatched2, uele1, uele2, true, threshold - dist);
+                                       }
+                                       else
+                                       {
+                                               if ((uele1 <= _sampleCount) ||
+                                                   !_gFlag)
+                                                       dist += _matchListO(unmatched2, unmatched1, uele2, uele1, false);
+                                               else
+                                                       dist += _matchList(unmatched2, unmatched1, uele2, uele1, false, threshold - dist);
+                                       }
+                               }
+
+                               if (_gFlag && (dist >= threshold))
+                               {
+                                       dist = XTree::NO_CONNECTION;
+                                       break;
+                               }
+                       }
+
+                       if (dist < XTree::NO_CONNECTION)
+                       {
+                               if (muc1 < ucount1)
+                               {
+                                       for (int i = start; i < elementCount1; i++)
+                                       {
+                                               if (!matched1[i])
+                                               {
+                                                       dist =+ _xtree1->getDecendentsCount(elements1[i]);
+                                                       if (_gFlag && (dist >= threshold))
+                                                       {
+                                                               dist = XTree::NO_CONNECTION;
+                                                               break;
+                                                       }
+                                               }
+                                       }
+                               }       
+                               else if (muc2 < ucount2)
+                               {
+                                       for (int i = 0; i < elementCount2; i++)
+                                       {
+                                               if (!matched2[i])
+                                               {
+                                                       dist += _xtree2->getDecendentsCount(elements2[i]);
+                                                       if (_gFlag && (dist >= threshold))
+                                                       {
+                                                               dist = XTree::NO_CONNECTION;
+                                                               break;
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+
+                       delete[] unmatched1;
+                       delete[] unmatched2;
+               }
+
+               delete[] elements1;
+               delete[] elements2;
+               delete[] matched1;
+               delete[] matched2;
+       }
+
+       if (!_gFlag || (dist < threshold))
+               return dist;
+       else
+               return XTree::NO_CONNECTION;
+}
+
+int XDiff::_diffAttributes(int attrCount1, int attrCount2)
+{
+       if ((attrCount1 == 1) && (attrCount2 == 1))
+       {
+               int     a1 = _attrList1[0];
+               int     a2 = _attrList2[0];
+               if (_xtree1->getHashValue(a1) == _xtree2->getHashValue(a2))
+                       return 0;
+
+               if (_xtree1->getTag(a1).compare(_xtree2->getTag(a2)) == 0)
+                       return 1;
+               else
+                       return 2;
+       }
+
+       int     dist = 0;
+       for (int i = 0; i < attrCount2; i++)
+       {
+               _attrHash[i] = _xtree2->getHashValue(_attrList2[i]);
+               _attrTag[i] = _xtree2->getTag(_attrList2[i]);
+               _attrMatch[i] = false;
+       }
+
+       int     matchCount = 0;
+       for (int i = 0; i < attrCount1; i++)
+       {
+               int     attr1 = _attrList1[i];
+               unsigned long long      ah1 = _xtree1->getHashValue(attr1);
+               string  tag1 = _xtree1->getTag(attr1);
+
+               bool    found = false;
+               for (int j = 0; j < attrCount2; j++)
+               {
+                       if (_attrMatch[j])
+                               continue;
+                       else if (ah1 == _attrHash[j])
+                       {
+                               _attrMatch[j] = true;
+                               matchCount++;
+                               found = true;
+                               break;
+                       }
+                       else if (tag1.compare(_attrTag[j]) == 0)
+                       {
+                               _attrMatch[j] = true;
+                               matchCount++;
+                               dist++;
+                               found = true;
+                               break;
+                       }
+               }
+
+               if (!found)
+                       dist += 2;
+       }
+
+       dist += (attrCount2 - matchCount) * 2;
+       return dist;
+}
+
+int XDiff::_diffText(int textCount1, int textCount2)
+{
+       for (int i = 0; i < textCount2; i++)
+       {
+               _textMatch2[i] = false;
+               _textHash[i] = _xtree2->getHashValue(_textList2[i]);
+       }
+
+       int     mcount = 0;
+       for (int i = 0; i < textCount1; i++)
+       {
+               unsigned long long      hash1 = _xtree1->getHashValue(_textList1[i]);
+               for (int j = 0; j < textCount2; j++)
+               {
+                       if (!_textMatch2[j] && (hash1 == _textHash[j]))
+                       {
+                               _textMatch2[j] = true;
+                               mcount++;
+                               break;
+                       }
+               }
+
+               if (mcount == textCount2)
+                       break;
+       }
+
+       if (textCount1 >= textCount2)
+               return textCount1 - mcount;
+       else
+               return textCount2 - mcount;
+}
+
+int XDiff::_matchListO(int *nodes1, int *nodes2, int count1, int count2,
+                      bool treeOrder)
+{
+       int     **distanceMatrix = new int*[count1+1];
+       int     *matching1 = new int[count1];
+       int     *matching2 = new int[count2];
+
+       // insert cost.
+       distanceMatrix[count1] = new int[count2+1];
+       for (int i = 0; i < count2; i++)
+               distanceMatrix[count1][i] = (treeOrder ? _xtree2->getDecendentsCount(nodes2[i]) : _xtree1->getDecendentsCount(nodes2[i])) + 1;
+
+       for (int i = 0; i < count1; i++)
+       {
+               distanceMatrix[i] = new int[count2+1];
+               int     deleteCost = (treeOrder ? _xtree1->getDecendentsCount(nodes1[i]) : _xtree2->getDecendentsCount(nodes1[i])) + 1;
+               for (int j = 0; j < count2; j++)
+               {
+                       int     dist = treeOrder ? distance(nodes1[i], nodes2[j], true, XTree::NO_CONNECTION) : distance(nodes2[j], nodes1[i], true, XTree::NO_CONNECTION);
+
+                       // the default mode.
+                       if (!_oFlag && (dist > 1) && 
+                           (dist < XTree::NO_CONNECTION) &&
+                           (dist >= _NO_MATCH_THRESHOLD *
+                            (deleteCost + distanceMatrix[count1][j])))
+                               dist = XTree::NO_CONNECTION;
+
+                       if (dist < XTree::NO_CONNECTION)
+                       {
+                               if (treeOrder)
+                                       _xlut->add(nodes1[i], nodes2[j], dist);
+                               else
+                                       _xlut->add(nodes2[j], nodes1[i], dist);
+                       }
+                       distanceMatrix[i][j] = dist;
+               }
+               // delete cost.
+               distanceMatrix[i][count2] = deleteCost;
+       }
+
+       // compute the minimal cost matching.
+       int     dist_value = findMatching(count1, count2, distanceMatrix,
+                                         matching1, matching2);
+
+       delete[]        matching1;
+       delete[]        matching2;
+       for (int i = 0; i <= count1; i++)
+               delete[]        distanceMatrix[i];
+       delete[]        distanceMatrix;
+
+       return dist_value;
+}
+
+int XDiff::_matchList(int *nodes1, int *nodes2, int count1, int count2,
+                     bool treeOrder, int threshold)
+{
+       int     *matching1 = new int[count1];
+       int     *matching2 = new int[count2];
+       for (int i = 0; i < count1; i++)
+               matching1[i] = XTree::NULL_NODE;
+       for (int i = 0; i < count2; i++)
+               matching2[i] = XTree::NULL_NODE;
+
+       int     Distance = 0;
+       int     scount1 = 0;
+       int     scount2 = 0;
+       int     matchingThreshold = 0;
+
+       for (int i = 0; ((i < _sampleCount) && (scount2 < count2)); scount2++)
+       {
+               int     snode = rand() % (count2 - scount2) + scount2;
+               int     dist = XTree::NO_CONNECTION;
+               int     bestmatch = XTree::NULL_NODE;
+               for (int j = scount1; j < count1; j++)
+               {
+                       int     d = treeOrder ? distance(nodes1[j], nodes2[snode], false, threshold - Distance) : distance(nodes2[snode], nodes1[j], false, threshold - Distance);
+                       if (d < dist)
+                       {
+                               dist = d;
+                               bestmatch = j;
+                               if (d == 1)
+                                       break;
+                       }
+               }
+
+               int     deleteCost = (treeOrder ? _xtree2->getDecendentsCount(nodes2[snode]) : _xtree1->getDecendentsCount(nodes2[snode])) + 1;
+
+               if ((dist > 1) && (dist > _NO_MATCH_THRESHOLD * deleteCost))
+               {
+                       int     tmp = nodes2[snode];
+                       nodes2[snode] = nodes2[scount2];
+                       nodes2[scount2] = tmp;
+                       Distance += deleteCost;
+               }
+               else
+               {
+                       int     tmp = nodes1[bestmatch];
+                       nodes1[bestmatch] = nodes1[scount1];
+                       nodes1[scount1] = tmp;
+                       tmp = nodes2[snode];
+                       nodes2[snode] = nodes2[scount2];
+                       nodes2[scount2] = tmp;
+
+                       if (treeOrder)
+                               _xlut->add(nodes1[scount1], nodes2[scount2], dist);
+                       else
+                               _xlut->add(nodes2[scount2], nodes1[scount1], dist);
+                       matching1[scount1] = scount2;
+                       matching2[scount2] = scount1;
+
+                       i++;
+                       scount1++;
+                       if (matchingThreshold < dist)
+                               matchingThreshold = dist;
+                       Distance += dist;
+               }
+
+               if (Distance >= threshold)
+               {
+                       delete[]        matching1;
+                       delete[]        matching2;
+                       return XTree::NO_CONNECTION;
+               }
+       }
+
+       for (;scount2 < count2; scount2++)
+       {
+               int     deleteCost = (treeOrder ? _xtree2->getDecendentsCount(nodes2[scount2]) : _xtree1->getDecendentsCount(nodes2[scount2])) + 1;
+               int     dist = XTree::NO_CONNECTION;
+               int     bestmatch = XTree::NULL_NODE;
+               for (int i = scount1; i < count1; i++)
+               {
+                       int     d = treeOrder ? distance(nodes1[i], nodes2[scount2], false, threshold - Distance) : distance(nodes2[scount2], nodes1[i], false, threshold - Distance);
+                       if (d <= matchingThreshold)
+                       {
+                               dist = d;
+                               bestmatch = i;
+                               break;
+                       }
+                       else if ((d == 1) || (d < _NO_MATCH_THRESHOLD * dist))
+                       {
+                               dist = d;
+                               bestmatch = i;
+                       }
+               }
+
+               if (bestmatch == XTree::NO_MATCH)
+               {
+                       Distance += deleteCost;
+               }
+               else
+               {
+                       int     tmp = nodes1[bestmatch];
+                       nodes1[bestmatch] = nodes1[scount1];
+                       nodes1[scount1] = tmp;
+
+                       if (treeOrder)
+                               _xlut->add(nodes1[scount1], nodes2[scount2], dist);
+                       else
+                               _xlut->add(nodes2[scount2], nodes1[scount1], dist);
+
+                       matching1[scount1] = scount2;
+                       matching2[scount2] = scount1;
+                       scount1++;
+                       Distance += dist;
+               }
+
+               if (Distance >= threshold)
+               {
+                       delete[]        matching1;
+                       delete[]        matching2;
+                       return XTree::NO_CONNECTION;
+               }
+       }
+
+       for (int i = 0; i < count1; i++)
+       {
+               if (matching1[i] == XTree::NO_MATCH)
+               {
+                       Distance += (treeOrder ? _xtree1->getDecendentsCount(nodes1[i]) : _xtree2->getDecendentsCount(nodes1[i])) + 1;
+                       if (Distance >= threshold)
+                       {
+                               delete[]        matching1;
+                               delete[]        matching2;
+                               return XTree::NO_CONNECTION;
+                       }
+               }
+       }
+
+       delete[]        matching1;
+       delete[]        matching2;
+       return Distance;
+}
+
+int XDiff::findMatching(int count1, int count2, int** dist,
+                       int* matching1, int* matching2)
+{
+       if (count1 == 1)
+       {
+               // count2 == 1
+               if (dist[0][0] < XTree::NO_CONNECTION)
+               {
+                       matching1[0] = 0;
+                       matching2[0] = 0;
+               }
+               else
+               {
+                       matching1[0] = XTree::DELETE;
+                       matching2[0] = XTree::DELETE;
+               }
+
+               return dist[0][0];
+       }
+       else if (count2 == 1)
+       {
+               int     distance = 0;
+               int     mate = 0;
+               int     mindist = XTree::NO_CONNECTION;
+               matching2[0] = XTree::DELETE;
+
+               for (int i = 0; i < count1; i++)
+               {
+                       matching1[i] = XTree::DELETE;
+                       if (mindist > dist[i][0])
+                       {
+                               mindist = dist[i][0];
+                               mate = i;
+                       }
+
+                       // Suppose we delete every node on list1.
+                       distance += dist[i][1];
+               }
+
+               if (mindist < XTree::NO_CONNECTION)
+               {
+                       matching1[mate] = 0;
+                       matching2[0] = mate;
+                       distance += mindist - dist[mate][1];
+               }
+               else
+               {
+                       // Add the delete cost of the single node on list2.
+                       distance += dist[count1][0];
+               }
+
+               return distance;
+       }
+       else if ((count1 == 2) && (count2 == 2))
+       {
+               int     distance1 = dist[0][0] + dist[1][1];
+               int     distance2 = dist[0][1] + dist[1][0];
+               if (distance1 < distance2)
+               {
+                       if (dist[0][0] < XTree::NO_CONNECTION)
+                       {
+                               matching1[0] = 0;
+                               matching2[0] = 0;
+                               distance1 = dist[0][0];
+                       }
+                       else
+                       {
+                               matching1[0] = XTree::DELETE;
+                               matching2[0] = XTree::DELETE;
+                               distance1 = dist[0][2] + dist[2][0];
+                       }
+
+                       if (dist[1][1] < XTree::NO_CONNECTION)
+                       {
+                               matching1[1] = 1;
+                               matching2[1] = 1;
+                               distance1 += dist[1][1];
+                       }
+                       else
+                       {
+                               matching1[1] = XTree::DELETE;
+                               matching2[1] = XTree::DELETE;
+                               distance1 += dist[1][2] + dist[2][1];
+                       }
+
+                       return distance1;
+               }
+               else
+               {
+                       if (dist[0][1] < XTree::NO_CONNECTION)
+                       {
+                               matching1[0] = 1;
+                               matching2[1] = 0;
+                               distance2 = dist[0][1];
+                       }
+                       else
+                       {
+                               matching1[0] = XTree::DELETE;
+                               matching2[1] = XTree::DELETE;
+                               distance2 = dist[0][2] + dist[2][1];
+                       }
+
+                       if (dist[1][0] < XTree::NO_CONNECTION)
+                       {
+                               matching1[1] = 0;
+                               matching2[0] = 1;
+                               distance2 += dist[1][0];
+                       }
+                       else
+                       {
+                               matching1[1] = XTree::DELETE;
+                               matching2[0] = XTree::DELETE;
+                               distance2 += dist[1][2] + dist[2][0];
+                       }
+
+                       return distance2;
+               }
+       }
+       else
+       {
+               return optimalMatching(count1, count2, dist,
+                                      matching1, matching2);
+       }
+}
+
+int XDiff::optimalMatching(int count1, int count2, int** dist,
+                          int* matching1, int* matching2)
+{
+       // Initialize matching. 
+       // Initial guess will be pair-matching between two lists.
+       // Others will be insertion or deletion
+       for (int i = 0; i < count2; i++)
+               matching1[i] = i;
+       for (int i = count2; i < count1; i++)
+               matching1[i] = XTree::DELETE;
+
+       // Three artificial nodes: "start", "end" and "delete".
+       int count = count1 + count2 + 3;
+
+       // Initialize least cost matrix and path matrix.
+       // Both have been initialized at the very beginning.
+
+       // Start algorithm.
+       while (true)
+       {
+               // Construct least cost matrix.
+               constructLCM(dist, matching1, count1, count2);
+
+               // Initialize path matrix.
+               for (int i = 0; i < count; i++)
+                       for (int j = 0; j < count; j++)
+                               _pathMatrix[i][j] = i;
+
+               // Search negative cost circuit.
+               int     clen = searchNCC(count);
+               if (clen > 0)
+               {
+                       // Modify matching.
+                       for (int i = 0, next = 0; i < clen - 1; i++)
+                       {
+                               int     n1 = _circuit[next];
+                               next = _circuit[next+1];
+                               // Node in node list 1.
+                               if ((n1 > 0) && (n1 <= count1))
+                               {
+                                       int     nid1 = n1 - 1;
+                                       int     nid2 = _circuit[next] - count1 - 1;
+                                       if (nid2 == count2)
+                                               nid2 = XTree::DELETE;
+
+                                       matching1[nid1] = nid2;
+                               }
+                       }
+               }
+               else // Stop.
+                       break;
+       }
+
+       int     distance = 0;
+       // Suppose all insertion on list2
+       for (int i = 0; i < count2; i++)
+       {
+               matching2[i] = XTree::INSERT;
+               distance += dist[count1][i];
+       }
+
+       // update distance by looking at matching pairs.
+       for (int i = 0; i < count1; i++)
+       {
+               int     mmm = matching1[i];
+               if (mmm == XTree::DELETE)
+                       distance += dist[i][count2];
+               else
+               {
+                       matching2[mmm] = i;
+                       distance += dist[i][mmm] -
+                                   dist[count1][mmm];
+               }
+       }
+
+       return distance;
+}
+
+void XDiff::constructLCM(int** costMatrix, int* matching,
+                        int nodeCount1, int nodeCount2)
+{
+       // Three artificial nodes: "start", "end" and "delete".
+       int nodeCount = nodeCount1 + nodeCount2 + 3;
+
+       // Initialize.
+       for (int i = 0; i < nodeCount; i++)
+       {
+               for (int j = 0; j < nodeCount; j++)
+               _leastCostMatrix[i][j] = XTree::NO_CONNECTION;
+
+               // self.
+               _leastCostMatrix[i][i] = 0;
+       }
+
+       // Between start node and nodes in list 1.
+       // Start -> node1 = Infinity; node1 -> Start = -0.
+       for (int i = 0; i < nodeCount1; i++)
+               _leastCostMatrix[i+1][0] = 0;
+
+       // Between nodes in list2 and the end node.
+       // Unless matched (later), node2 -> end = 0;
+       // end -> node2 = Infinity.
+       for (int i = 0; i < nodeCount2; i++)
+               _leastCostMatrix[i+nodeCount1+1][nodeCount-1] = 0;
+
+       int deleteCount = 0;
+
+       // Between nodes in list1 and nodes in list2.
+       // For matched, node1 -> node2 = Infinity;
+       // node2 -> node1 = -1 * distance
+       // For unmatched, node1 -> node2 = distance;
+       // node2 -> node1 = Infinity
+       for (int i = 0; i < nodeCount1; i++)
+       {
+               int node1 = i + 1;
+               int node2;
+
+               // According to cost matrix.
+               for (int j = 0; j < nodeCount2; j++)
+               {
+                       node2 = j + nodeCount1 + 1;
+                       _leastCostMatrix[node1][node2] = costMatrix[i][j];
+               }
+
+               // According to matching.
+               if (matching[i] == XTree::DELETE)
+               {
+                       deleteCount++;
+
+                       // node1 -> Delete = Infinity;
+                       // Delete -> node1 = -1 * DELETE_COST
+                       _leastCostMatrix[nodeCount-2][node1] = -1 * costMatrix[i][nodeCount2];
+               }
+               else
+               {
+                       node2 = matching[i] + nodeCount1 + 1;
+
+                       // Between node1 and node2.
+                       _leastCostMatrix[node1][node2] = XTree::NO_CONNECTION;
+                       _leastCostMatrix[node2][node1] = costMatrix[i][matching[i]] * -1;
+
+                       // Between node1 and delete.
+                       _leastCostMatrix[node1][nodeCount-2] = costMatrix[i][nodeCount2];
+
+                       // Between node2 and end.
+                       _leastCostMatrix[node2][nodeCount-1] = XTree::NO_CONNECTION;
+                       _leastCostMatrix[nodeCount-1][node2] = costMatrix[nodeCount1][matching[i]];
+               }
+       }
+
+       // Between the "Delete" and the "End".
+       // If delete all, delete -> end = Infinity; end -> delete = 0.
+       if (deleteCount == nodeCount1)
+               _leastCostMatrix[nodeCount-1][nodeCount-2] = 0;
+       // if no delete, delete -> end = 0; end -> delete = Infinity.
+       else if (deleteCount == 0)
+               _leastCostMatrix[nodeCount-2][nodeCount-1] = 0;
+       // else, both 0;
+       else
+       {
+               _leastCostMatrix[nodeCount-2][nodeCount-1] = 0;
+               _leastCostMatrix[nodeCount-1][nodeCount-2] = 0;
+       }
+}
+
+int XDiff::searchNCC(int nodeCount)
+{
+       for (int k = 0; k < nodeCount; k++)
+       {
+           for (int i = 0; i < nodeCount; i++)
+           {
+               if ((i != k) &&
+                   (_leastCostMatrix[i][k] != XTree::NO_CONNECTION))
+               {
+                   for (int j = 0; j < nodeCount; j++)
+                   {
+                       if ((j != k) &&
+                           (_leastCostMatrix[k][j] != XTree::NO_CONNECTION))
+                       {
+                           int less = _leastCostMatrix[i][k] +
+                                      _leastCostMatrix[k][j];
+                           if (less < _leastCostMatrix[i][j])
+                           {
+                               _leastCostMatrix[i][j] = less;
+                               _pathMatrix[i][j] = k;
+
+                               // Found!
+                               if ((i == j) && (less < 0))
+                               {
+                                   int clen = 0; // the length of the circuit.
+
+                                   // Locate the circuit.
+                                   //circuit.addElement(new Integer(i));
+                                   _circuit[0] = i;
+                                   _circuit[1] = 2;
+
+                                   //circuit.addElement(new Integer(pathMatrix[i][i]));
+                                   _circuit[2] = _pathMatrix[i][i];
+                                   _circuit[3] = 4;
+
+                                   //circuit.addElement(new Integer(i));
+                                   _circuit[4] = i;
+                                   _circuit[5] = -1;
+
+                                   clen = 3;
+
+                                   bool        finish;
+
+                                   do
+                                   {
+                                       finish = true;
+                                       for (int cit = 0, n = 0; cit < clen - 1; cit++)
+                                       {
+                                           int left = _circuit[n];
+                                           int next = _circuit[n+1];
+                                           int right = (next == -1)? -1 : _circuit[next];
+
+                                           //int middle = pathMatrix[circuit[n-1]][circuit[n]];
+                                           int middle = _pathMatrix[left][right];
+
+                                           if (middle != left)
+                                           {
+                                               //circuit.insert( cit, middle );
+                                               _circuit[clen*2] = middle;
+                                               _circuit[clen*2+1] = next;
+                                               _circuit[n+1] = clen*2;
+                                               clen++;
+
+                                               finish = false;
+                                               break;
+                                           }
+                                           n = next;
+                                       }
+                                   } while (!finish);
+
+                                   return clen;
+                               }
+                           }
+                       }
+                   }
+               }
+           }
+       }
+       return 0;
+}
+
+void XDiff::writeDiff(const char* input, const char* output)
+{
+       ifstream        in(input);
+       ofstream        out(output, ios::out|ios::trunc);
+
+       int     root1 = _xtree1->getRoot();
+       int     root2 = _xtree2->getRoot();
+       // the header
+       // XXX <root > is valid and should be treated as <root>;
+       // < root> is NOT valid, so use <root as the comparison key.
+       string  rootTag = "<" + _xtree1->getTag(root1);
+       string  line;
+       while (getline(in, line))
+       {
+               if (line.find(rootTag) != string::npos)
+                       break;
+               out << line << endl;
+       }
+       in.close();
+
+       int     matchType, matchNode;
+       _xtree1->getMatching(root1, matchType, matchNode);
+       if (matchType == XTree::DELETE)
+       {
+               writeDeleteNode(out, root1);
+               writeInsertNode(out, root2);
+       }
+       else
+               writeDiffNode(out, root1, root2);
+
+       out.close();
+}
+
+void XDiff::writeDeleteNode(ofstream &out, int node)
+{
+       if (_xtree1->isElement(node))
+       {
+               string  tag = _xtree1->getTag(node);
+               out << "<" << tag;
+
+               // Attributes.
+               int     attr = _xtree1->getFirstAttribute(node);
+               while (attr > 0)
+               {
+                       string  atag = _xtree1->getTag(attr);
+                       string  value = _xtree1->getAttributeValue(attr);
+                       out << " " << atag << "=\"" << value << "\"";
+                       attr = _xtree1->getNextAttribute(attr);
+               }
+
+               // Child nodes.
+               int     child = _xtree1->getFirstChild(node);
+
+               if (child < 0)
+               {
+                       out << "/><?DELETE " << tag << "?>" << endl;
+                       _needNewLine = false;
+                       return;
+               }
+
+               out << "><?DELETE " << tag << "?>" << endl;
+               _needNewLine = false;
+
+               while (child > 0)
+               {
+                       writeMatchNode(out, _xtree1, child);
+                       child = _xtree1->getNextSibling(child);
+               }
+
+               if (_needNewLine)
+               {
+                       out << endl;
+                       _needNewLine = false;
+               }
+
+               out << "</" << tag << ">" << endl;
+       }
+       else
+       {
+               out << "<?DELETE \"" << constructText(_xtree1, node)
+                       << "\"?>" << endl;
+               _needNewLine = false;
+       }
+}
+
+void XDiff::writeInsertNode(ofstream &out, int node)
+{
+       if (_xtree2->isElement(node))
+       {
+               string  tag = _xtree2->getTag(node);
+               out << "<" << tag;
+
+               // Attributes.
+               int     attr = _xtree2->getFirstAttribute(node);
+               while (attr > 0)
+               {
+                       string  atag = _xtree2->getTag(attr);
+                       string  value = _xtree2->getAttributeValue(attr);
+                       out << " " << atag << "=\"" << value << "\"";
+                       attr = _xtree2->getNextAttribute(attr);
+               }
+
+               // Child nodes.
+               int     child = _xtree2->getFirstChild(node);
+               if (child < 0)
+               {
+                       out << "/><?INSERT " << tag << "?>" << endl;
+                       _needNewLine = false;
+                       return;
+               }
+
+               out << "><?INSERT " << tag << "?>" << endl;
+               _needNewLine = false;
+
+               while (child > 0)
+               {
+                       writeMatchNode(out, _xtree2, child);
+                       child = _xtree2->getNextSibling(child);
+               }
+
+               if (_needNewLine)
+               {
+                       out << endl;
+                       _needNewLine = false;
+               }
+
+               out << "</" << tag << ">" << endl;
+       }
+       else
+       {
+               out << constructText(_xtree2, node) << "<?INSERT?>" << endl;
+               _needNewLine = false;
+       }
+}
+
+void XDiff::writeMatchNode(ofstream &out, XTree *xtree, int node)
+{
+       if (xtree->isElement(node))
+       {
+               string  tag = xtree->getTag(node);
+               if (_needNewLine)
+                       out << endl;
+
+               out << "<" << tag;
+
+               // Attributes.
+               int     attr = xtree->getFirstAttribute(node);
+               while (attr > 0)
+               {
+                       string  atag = xtree->getTag(attr);
+                       string  value = xtree->getAttributeValue(attr);
+                       out << " " << atag << "=\"" << value << "\"";
+                       attr = xtree->getNextAttribute(attr);
+               }
+
+               // Child nodes.
+               int     child = xtree->getFirstChild(node);
+               if (child < 0)
+               {
+                       out << "/>" << endl;
+                       _needNewLine = false;
+                       return;
+               }
+
+               out << ">";
+               _needNewLine = true;
+
+               while (child > 0)
+               {
+                       writeMatchNode(out, xtree, child);
+                       child = xtree->getNextSibling(child);
+               }
+
+               if (_needNewLine)
+               {
+                       out << endl;
+                       _needNewLine = false;
+               }
+
+               out << "</" << tag << ">" << endl;
+       }
+       else
+       {
+               out << constructText(xtree, node);
+               _needNewLine = false;
+       }
+}
+
+void XDiff::writeDiffNode(ofstream &out, int node1, int node2)
+{
+       if (_xtree1->isElement(node1))
+       {
+               string  tag = _xtree1->getTag(node1);
+               if (_needNewLine)
+                       out << endl;
+               out << "<" << tag;
+
+               // Attributes.
+               int     attr1 = _xtree1->getFirstAttribute(node1);
+               string  diffff = "";
+               while (attr1 > 0)
+               {
+                       string  atag = _xtree1->getTag(attr1);
+                       string  value = _xtree1->getAttributeValue(attr1);
+                       int     matchType, matchNode;
+                       _xtree1->getMatching(attr1, matchType, matchNode);
+                       if (matchType == XTree::MATCH)
+                               out << " " << atag << "=\"" << value << "\"";
+                       else if (matchType == XTree::DELETE)
+                       {
+                               out << " " << atag << "=\"" << value << "\"";
+                               diffff += "<?DELETE " + atag + "?>";
+                       }
+                       else
+                       {
+                               string  value2 = _xtree2->getAttributeValue(matchNode);
+                               out << " " << atag << "=\"" << value2 << "\"";
+                               diffff += "<?UPDATE " + atag +
+                                         " FROM \"" + value + "\"?>";
+                       }
+
+                       attr1 = _xtree1->getNextAttribute(attr1);
+               }
+
+               int     attr2 = _xtree2->getFirstAttribute(node2);
+               while (attr2 > 0)
+               {
+                       int     matchType, matchNode;
+                       _xtree2->getMatching(attr2, matchType, matchNode);
+                       if (matchType == XTree::INSERT)
+                       {
+                               string  atag = _xtree2->getTag(attr2);
+                               string  value = _xtree2->getAttributeValue(attr2);
+                               out << " " << atag << "=\"" << value << "\"";
+                               diffff += "<?INSERT " + atag + "?>";
+                       }
+
+                       attr2 = _xtree2->getNextAttribute(attr2);
+               }
+
+               // Child nodes.
+               int     child1 = _xtree1->getFirstChild(node1);
+               if (child1 < 0)
+               {
+                       out << "/>" << diffff << endl;
+                       _needNewLine = false;
+                       return;
+               }
+
+               out << ">" << diffff;
+               _needNewLine = true;
+
+               while (child1 > 0)
+               {
+                       int     matchType, matchNode;
+                       _xtree1->getMatching(child1, matchType, matchNode);
+                       if (matchType == XTree::MATCH)
+                               writeMatchNode(out, _xtree1, child1);
+                       else if (matchType == XTree::DELETE)
+                               writeDeleteNode(out, child1);
+                       else
+                               writeDiffNode(out, child1, matchNode);
+
+                       child1 = _xtree1->getNextSibling(child1);
+               }
+
+               int     child2 = _xtree2->getFirstChild(node2);
+               while (child2 > 0)
+               {
+                       int     matchType, matchNode;
+                       _xtree2->getMatching(child2, matchType, matchNode);
+                       if (matchType == XTree::INSERT)
+                               writeInsertNode(out, child2);
+
+                       child2 = _xtree2->getNextSibling(child2);
+               }
+
+               if (_needNewLine)
+               {
+                       out << endl;
+                       _needNewLine = false;
+               }
+
+               out << "</" << tag << ">" << endl;
+       }
+       else
+       {
+               out << constructText(_xtree2, node2) << "<?UPDATE FROM \"" 
+                       << constructText(_xtree1, node1) << "\"?>";
+               _needNewLine = false;
+       }
+}
+
+string XDiff::constructText(XTree *xtree, int eid)
+{
+       string          text = xtree->getText(eid);
+       vector<size_t>  cdatalist = xtree->getCDATA(eid);
+       if (cdatalist.empty())
+               return text;
+
+       string  buf = "";
+       int     count = cdatalist.size();
+       size_t  lastEnd = 0;
+       for (int i = 0; i < count; i += 2)
+       {
+               size_t  cdataStart = cdatalist[i];
+               size_t  cdataEnd = cdatalist[i+1];
+
+               if (cdataStart > lastEnd)
+                       buf += text.substr(lastEnd, cdataStart - lastEnd);
+               buf += "<![CDATA[" +
+                      text.substr(cdataStart, cdataEnd - cdataStart) +
+                      "]]>";
+               lastEnd = cdataEnd;
+       }
+
+       if (lastEnd < text.length())
+               buf += text.substr(lastEnd);
+
+       return buf;
+}
+
+double XDiff::_diffTime(const struct timeval *time1,
+                       const struct timeval *time2)
+{
+       long    sec = time2->tv_sec - time1->tv_sec;
+       long    usec = time2->tv_usec - time1->tv_usec;
+       if (usec < 0)
+       {
+               sec--;
+               usec += 1000000;
+       }
+
+       return 0.000001 * usec + sec;
+}
+
+int main(int argc, char* args[])
+{
+       try
+       {
+               XMLPlatformUtils::Initialize();
+       }
+       catch (const XMLException& e)
+       {
+               cerr << "Error during initialization! :\n"
+                       << e.getMessage() << endl;
+               exit(1);
+       }
+
+       int     opid = 1;
+       if (argc < 4)
+       {
+               cout << XDiff::USAGE << endl;
+               exit(1);
+       }
+       else if (strcmp(args[1], "-o") == 0)
+       {
+               XDiff::_oFlag = true;
+               opid++;
+       }
+       else if (strcmp(args[1], "-g") == 0)
+       {
+               XDiff::_gFlag = true;
+               opid++;
+       }
+
+       if (strcmp(args[opid], "-p") == 0)
+       {
+               opid++;
+               double  p = strtod(args[opid], NULL);
+               if ((p <= 0) || (p > 1))
+               {
+                       cout << XDiff::USAGE << endl;
+                       exit(1);
+               }
+               XDiff::_NO_MATCH_THRESHOLD = p;
+               opid++;
+       }
+
+       if ((argc - opid) != 3)
+       {
+               cout << XDiff::USAGE << endl;
+               exit(1);
+       }
+
+       XDiff   xdiff(args[opid], args[opid+1], args[opid+2]);
+}
diff --git a/XDiff.hpp b/XDiff.hpp
new file mode 100644 (file)
index 0000000..b87865e
--- /dev/null
+++ b/XDiff.hpp
@@ -0,0 +1,118 @@
+/**
+  * Copyright (c) 2001 - 2005
+  *    Yuan Wang. All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright 
+  * notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  * notice, this list of conditions and the following disclaimer in the 
+  * documentation and/or other materials provided with the distribution.
+  * 3. Redistributions in any form must be accompanied by information on
+  * how to obtain complete source code for the X-Diff software and any
+  * accompanying software that uses the X-Diff software.  The source code
+  * must either be included in the distribution or be available for no
+  * more than the cost of distribution plus a nominal fee, and must be
+  * freely redistributable under reasonable conditions.  For an executable
+  * file, complete source code means the source code for all modules it
+  * contains.  It does not include source code for modules or files that
+  * typically accompany the major components of the operating system on
+  * which the executable file runs.
+  *
+  * THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+  * ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  * POSSIBILITY OF SUCH DAMAGE.
+  *
+  */
+
+#ifndef        __XDIFF__
+#define        __XDIFF__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <fstream>
+#include <math.h>
+#include <algorithm>
+
+#include "XTree.hpp"
+#include "XParser.hpp"
+#include "XLut.hpp"
+
+/**
+  * XDiff computes the difference between two input XML documents.
+  */
+class XDiff
+{
+public:
+       static const string     USAGE;
+       static bool             _oFlag, _gFlag;
+        static double          _NO_MATCH_THRESHOLD;
+       /**
+         * @param      input1  input file #1
+         * @param      input2  input file #2
+         * @param      output  output file
+         */
+       XDiff(const char* input1, const char* input2, const char* output);
+       ~XDiff();
+
+private:
+       static const int        _CIRCUIT_SIZE, _MATRIX_SIZE, _ATTRIBUTE_SIZE;
+       static const int        _TEXT_SIZE, _sampleCount;
+
+       int                     *_attrList1, *_attrList2, *_textList1;
+       int                     *_textList2, *_circuit, _seed;
+       int                     **_leastCostMatrix, **_pathMatrix;
+       bool                    *_attrMatch, *_textMatch1, *_textMatch2;
+       bool                    _needNewLine;
+       unsigned long long      *_attrHash, *_textHash;
+       string                  *_attrTag;
+       XTree                   *_xtree1, *_xtree2;
+       XLut                    *_xlut;
+
+       void xdiff(int pid1, int pid2, bool matchFlag);
+       void diffAttributes(int attrCount1, int attrCount2);
+       void diffText(int textCount1, int textCount2);
+       int _matchFilter(int *elements1, int *elements2, bool *matched1,
+                        bool *matched2, int count1, int count2);
+       void matchListO(int *nodes1, int *nodes2, int count1, int count2,
+                       bool treeOrder, bool matchFlag);
+       void matchList(int *nodes1, int *nodes2, int count1, int count2,
+                      bool treeOrder, bool matchFlag);
+       int distance(int eid1, int eid2, bool toRecord, int threshold);
+       int _xdiff(int pid1, int pid2, int threshold = XTree::NO_CONNECTION);
+       int _diffAttributes(int attrCount1, int attrCount2);
+       int _diffText(int textCount1, int textCount2);
+       int _matchListO(int *nodes1, int *nodes2, int count1, int count2,
+                       bool treeOrder);
+       int _matchList(int *nodes1, int *nodes2, int count1, int count2,
+                      bool treeOrder, int threshold);
+       int findMatching(int count1, int count2, int** dist,
+                        int* matching1, int* matching2);
+       int optimalMatching(int count1, int count2, int** dist,
+                           int* matching1, int* matching2);
+       void constructLCM(int** costMatrix, int* matching,
+                         int nodeCount1, int nodeCount2);
+       int searchNCC(int nodeCount);
+       double _diffTime(const struct timeval *time1,
+                        const struct timeval *time2);
+
+       void writeDiff(const char* input, const char* output);
+       void writeDeleteNode(ofstream &out, int node);
+       void writeInsertNode(ofstream &out, int node);
+       void writeMatchNode(ofstream &out, XTree *xtree, int node);
+       void writeDiffNode(ofstream &out, int node1, int node2);
+       string constructText(XTree *xtree, int eid);
+};
+#endif
diff --git a/XHash.cpp b/XHash.cpp
new file mode 100644 (file)
index 0000000..d00984c
--- /dev/null
+++ b/XHash.cpp
@@ -0,0 +1,323 @@
+/**
+  * Copyright (c) 2001 - 2005
+  *    Yuan Wang. All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright 
+  * notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  * notice, this list of conditions and the following disclaimer in the 
+  * documentation and/or other materials provided with the distribution.
+  * 3. Redistributions in any form must be accompanied by information on
+  * how to obtain complete source code for the X-Diff software and any
+  * accompanying software that uses the X-Diff software.  The source code
+  * must either be included in the distribution or be available for no
+  * more than the cost of distribution plus a nominal fee, and must be
+  * freely redistributable under reasonable conditions.  For an executable
+  * file, complete source code means the source code for all modules it
+  * contains.  It does not include source code for modules or files that
+  * typically accompany the major components of the operating system on
+  * which the executable file runs.
+  *
+  * THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+  * ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  * POSSIBILITY OF SUCH DAMAGE.
+  *
+  */
+
+#include "XHash.hpp"
+
+const unsigned int     XHash::_PC_1[56] =
+{
+       60, 52, 44, 36, 59, 51, 43, 35, 27, 19, 11,  3, 58, 50,
+       42, 34, 26, 18, 10,  2, 57, 49, 41, 33, 25, 17,  9,  1,
+       28, 20, 12,  4, 61, 53, 45, 37, 29, 21, 13,  5, 62, 54,
+       46, 38, 30, 22, 14,  6, 63, 55, 47, 39, 31, 23, 15,  7
+};
+
+const unsigned int     XHash::_PC_2[48] =
+{
+       24, 27, 20,  6, 14, 10,  3, 22,  0, 17,  7, 12,
+        8, 23, 11,  5, 16, 26,  1,  9, 19, 25,  4, 15,
+       54, 43 ,36, 29, 49, 40, 48, 30, 52, 44, 37, 33,
+       46, 35, 50, 41, 28, 53, 51, 55, 32, 45, 39, 42
+};
+
+const unsigned int     XHash::_subKeyRotation[16] =
+{
+       1, 2, 4, 6, 8, 10, 12, 14, 15, 17, 19, 21, 23, 25, 27, 28
+};
+
+const unsigned int     XHash::_IP[64] =
+{
+       57, 49, 41, 33, 25, 17,  9, 1, 59, 51, 43, 35, 27, 19, 11, 3,
+       61, 53, 45, 37, 29, 21, 13, 5, 63, 55, 47, 39, 31, 23, 15, 7,
+       56, 48, 40, 32 ,24, 16,  8, 0, 58, 50, 42, 34, 26, 18, 10, 2,
+       60, 52, 44, 36, 28, 20, 12, 4, 62, 54, 46, 38, 30, 22, 14, 6
+};
+
+const unsigned int     XHash::_FP[64] =
+{
+       39, 7, 47, 15, 55, 23, 63, 31, 38, 6, 46, 14, 54, 22, 62, 30,
+       37, 5, 45, 13, 53, 21, 61, 29, 36, 4, 44, 12, 52, 20, 60, 28,
+       35, 3, 43, 11, 51, 19, 59, 27, 34, 2, 42, 10, 50, 18, 58, 26,
+       33, 1, 41,  9, 49, 17, 57, 25, 32, 0, 40,  8, 48, 16, 56, 24
+};
+
+const unsigned int     XHash::_sBox[8][64] =
+{
+       {
+               0x00808200, 0x00000000, 0x00008000, 0x00808202,
+               0x00808002, 0x00008202, 0x00000002, 0x00008000,
+               0x00000200, 0x00808200, 0x00808202, 0x00000200,
+               0x00800202, 0x00808002, 0x00800000, 0x00000002,
+               0x00000202, 0x00800200, 0x00800200, 0x00008200,
+               0x00008200, 0x00808000, 0x00808000, 0x00800202,
+               0x00008002, 0x00800002, 0x00800002, 0x00008002,
+               0x00000000, 0x00000202, 0x00008202, 0x00800000,
+               0x00008000, 0x00808202, 0x00000002, 0x00808000,
+               0x00808200, 0x00800000, 0x00800000, 0x00000200,
+               0x00808002, 0x00008000, 0x00008200, 0x00800002,
+               0x00000200, 0x00000002, 0x00800202, 0x00008202,
+               0x00808202, 0x00008002, 0x00808000, 0x00800202,
+               0x00800002, 0x00000202, 0x00008202, 0x00808200,
+               0x00000202, 0x00800200, 0x00800200, 0x00000000,
+               0x00008002, 0x00008200, 0x00000000, 0x00808002
+       },
+       {
+               0x40084010, 0x40004000, 0x00004000, 0x00084010,
+               0x00080000, 0x00000010, 0x40080010, 0x40004010,
+               0x40000010, 0x40084010, 0x40084000, 0x40000000,
+               0x40004000, 0x00080000, 0x00000010, 0x40080010,
+               0x00084000, 0x00080010, 0x40004010, 0x00000000,
+               0x40000000, 0x00004000, 0x00084010, 0x40080000,
+               0x00080010, 0x40000010, 0x00000000, 0x00084000,
+               0x00004010, 0x40084000, 0x40080000, 0x00004010,
+               0x00000000, 0x00084010, 0x40080010, 0x00080000,
+               0x40004010, 0x40080000, 0x40084000, 0x00004000,
+               0x40080000, 0x40004000, 0x00000010, 0x40084010,
+               0x00084010, 0x00000010, 0x00004000, 0x40000000,
+               0x00004010, 0x40084000, 0x00080000, 0x40000010,
+               0x00080010, 0x40004010, 0x40000010, 0x00080010,
+               0x00084000, 0x00000000, 0x40004000, 0x00004010,
+               0x40000000, 0x40080010, 0x40084010, 0x00084000
+       },
+       {
+               0x00000104, 0x04010100, 0x00000000, 0x04010004,
+               0x04000100, 0x00000000, 0x00010104, 0x04000100,
+               0x00010004, 0x04000004, 0x04000004, 0x00010000,
+               0x04010104, 0x00010004, 0x04010000, 0x00000104,
+               0x04000000, 0x00000004, 0x04010100, 0x00000100,
+               0x00010100, 0x04010000, 0x04010004, 0x00010104,
+               0x04000104, 0x00010100, 0x00010000, 0x04000104,
+               0x00000004, 0x04010104, 0x00000100, 0x04000000,
+               0x04010100, 0x04000000, 0x00010004, 0x00000104,
+               0x00010000, 0x04010100, 0x04000100, 0x00000000,
+               0x00000100, 0x00010004, 0x04010104, 0x04000100,
+               0x04000004, 0x00000100, 0x00000000, 0x04010004,
+               0x04000104, 0x00010000, 0x04000000, 0x04010104,
+               0x00000004, 0x00010104, 0x00010100, 0x04000004,
+               0x04010000, 0x04000104, 0x00000104, 0x04010000,
+               0x00010104, 0x00000004, 0x04010004, 0x00010100
+       },
+       {
+               0x80401000, 0x80001040, 0x80001040, 0x00000040,
+               0x00401040, 0x80400040, 0x80400000, 0x80001000,
+               0x00000000, 0x00401000, 0x00401000, 0x80401040,
+               0x80000040, 0x00000000, 0x00400040, 0x80400000,
+               0x80000000, 0x00001000, 0x00400000, 0x80401000,
+               0x00000040, 0x00400000, 0x80001000, 0x00001040,
+               0x80400040, 0x80000000, 0x00001040, 0x00400040,
+               0x00001000, 0x00401040, 0x80401040, 0x80000040,
+               0x00400040, 0x80400000, 0x00401000, 0x80401040,
+               0x80000040, 0x00000000, 0x00000000, 0x00401000,
+               0x00001040, 0x00400040, 0x80400040, 0x80000000,
+               0x80401000, 0x80001040, 0x80001040, 0x00000040,
+               0x80401040, 0x80000040, 0x80000000, 0x00001000,
+               0x80400000, 0x80001000, 0x00401040, 0x80400040,
+               0x80001000, 0x00001040, 0x00400000, 0x80401000,
+               0x00000040, 0x00400000, 0x00001000, 0x00401040
+       },
+       {
+               0x00000080, 0x01040080, 0x01040000, 0x21000080,
+               0x00040000, 0x00000080, 0x20000000, 0x01040000,
+               0x20040080, 0x00040000, 0x01000080, 0x20040080,
+               0x21000080, 0x21040000, 0x00040080, 0x20000000,
+               0x01000000, 0x20040000, 0x20040000, 0x00000000,
+               0x20000080, 0x21040080, 0x21040080, 0x01000080,
+               0x21040000, 0x20000080, 0x00000000, 0x21000000,
+               0x01040080, 0x01000000, 0x21000000, 0x00040080,
+               0x00040000, 0x21000080, 0x00000080, 0x01000000,
+               0x20000000, 0x01040000, 0x21000080, 0x20040080,
+               0x01000080, 0x20000000, 0x21040000, 0x01040080,
+               0x20040080, 0x00000080, 0x01000000, 0x21040000,
+               0x21040080, 0x00040080, 0x21000000, 0x21040080,
+               0x01040000, 0x00000000, 0x20040000, 0x21000000,
+               0x00040080, 0x01000080, 0x20000080, 0x00040000,
+               0x00000000, 0x20040000, 0x01040080, 0x20000080
+       },
+       {
+               0x10000008, 0x10200000, 0x00002000, 0x10202008,
+               0x10200000, 0x00000008, 0x10202008, 0x00200000,
+               0x10002000, 0x00202008, 0x00200000, 0x10000008,
+               0x00200008, 0x10002000, 0x10000000, 0x00002008,
+               0x00000000, 0x00200008, 0x10002008, 0x00002000,
+               0x00202000, 0x10002008, 0x00000008, 0x10200008,
+               0x10200008, 0x00000000, 0x00202008, 0x10202000,
+               0x00002008, 0x00202000, 0x10202000, 0x10000000,
+               0x10002000, 0x00000008, 0x10200008, 0x00202000,
+               0x10202008, 0x00200000, 0x00002008, 0x10000008,
+               0x00200000, 0x10002000, 0x10000000, 0x00002008,
+               0x10000008, 0x10202008, 0x00202000, 0x10200000,
+               0x00202008, 0x10202000, 0x00000000, 0x10200008,
+               0x00000008, 0x00002000, 0x10200000, 0x00202008,
+               0x00002000, 0x00200008, 0x10002008, 0x00000000,
+               0x10202000, 0x10000000, 0x00200008, 0x10002008
+       },
+       {
+               0x00100000, 0x02100001, 0x02000401, 0x00000000,
+               0x00000400, 0x02000401, 0x00100401, 0x02100400,
+               0x02100401, 0x00100000, 0x00000000, 0x02000001,
+               0x00000001, 0x02000000, 0x02100001, 0x00000401,
+               0x02000400, 0x00100401, 0x00100001, 0x02000400,
+               0x02000001, 0x02100000, 0x02100400, 0x00100001,
+               0x02100000, 0x00000400, 0x00000401, 0x02100401,
+               0x00100400, 0x00000001, 0x02000000, 0x00100400,
+               0x02000000, 0x00100400, 0x00100000, 0x02000401,
+               0x02000401, 0x02100001, 0x02100001, 0x00000001,
+               0x00100001, 0x02000000, 0x02000400, 0x00100000,
+               0x02100400, 0x00000401, 0x00100401, 0x02100400,
+               0x00000401, 0x02000001, 0x02100401, 0x02100000,
+               0x00100400, 0x00000000, 0x00000001, 0x02100401,
+               0x00000000, 0x00100401, 0x02100000, 0x00000400,
+               0x02000001, 0x02000400, 0x00000400, 0x00100001
+       },
+       {
+               0x08000820, 0x00000800, 0x00020000, 0x08020820,
+               0x08000000, 0x08000820, 0x00000020, 0x08000000,
+               0x00020020, 0x08020000, 0x08020820, 0x00020800,
+               0x08020800, 0x00020820, 0x00000800, 0x00000020,
+               0x08020000, 0x08000020, 0x08000800, 0x00000820,
+               0x00020800, 0x00020020, 0x08020020, 0x08020800,
+               0x00000820, 0x00000000, 0x00000000, 0x08020020,
+               0x08000020, 0x08000800, 0x00020820, 0x00020000,
+               0x00020820, 0x00020000, 0x08020800, 0x00000800,
+               0x00000020, 0x08020020, 0x00000800, 0x00020820,
+               0x08000800, 0x00000020, 0x08000020, 0x08020000,
+               0x08020020, 0x08000000, 0x00020000, 0x08000820,
+               0x00000000, 0x08020820, 0x00020020, 0x08000020,
+               0x08020000, 0x08000800, 0x08000820, 0x00000000,
+               0x08020820, 0x00020800, 0x00020800, 0x00000820,
+               0x00000820, 0x00020020, 0x08000000, 0x08020800
+       }
+};
+
+// From 0x00b5a69788796a5bL
+const unsigned long long       XHash::_initialKey = 0xb5d3a4f186cba8b6ULL;
+unsigned long long     XHash::_keys[16];
+
+void XHash::initialize()
+{
+       makeKeys(_initialKey);
+}
+
+void XHash::initialize(unsigned long long key)
+{
+       makeKeys(key);
+}
+
+unsigned long long XHash::hash(string text)
+{
+       int     len = text.length();
+       unsigned long long      value = 0;
+       for (int pos = 0; pos < len; pos += 8)
+       {
+               int     piecelen = (len - pos >= 8) ? 8 : (len - pos);
+               unsigned long long      piece = (unsigned long long)text.at(pos);
+               for (int i = 1; i < piecelen; i++)
+                       piece = (piece << 8) | text.at(pos + i);
+               piece <<= ((8 - piecelen) * 8);
+               value += (des(piece) & 0xffffffff);
+       }
+       return value;
+}
+
+void XHash::makeKeys(unsigned long long key)
+{
+       unsigned long long      reduced = permutate(key, _PC_1, 56);
+       unsigned int    l = (unsigned int)(reduced >> 28);
+       unsigned int    r = (unsigned int)(reduced & 0xfffffff);
+       for (unsigned int i = 0; i < 16; i++)
+               _keys[i] = permutate(rotate(l, r, _subKeyRotation[i]),
+                                    _PC_2, 48);
+}
+
+unsigned long long XHash::permutate(unsigned long long k,
+                                   const unsigned int p[],
+                                   unsigned int plen)
+{
+       unsigned long long      s = 0;
+       for (unsigned int i = 0; i < plen; i++)
+       {
+               if ((k & (1LL << p[i])) != 0)
+                       s |= 1LL << i;
+       }
+
+       return s;
+}
+
+unsigned long long XHash::rotate(unsigned int l, unsigned int r, unsigned int s)
+{
+       return ((unsigned long long)(((l << s) & 0xfffffff) | (l >> (28 - s))) << 28) | ((r << s) & 0xfffffff) | (r >> (28 - s));
+}
+
+unsigned long long XHash::des(unsigned long long data)
+{
+       unsigned long long      x = permutate(data, _IP, 64);
+       unsigned int    l = (unsigned int)(x >> 32);
+       unsigned int    r = (unsigned int)(x & 0xffffffff);
+       for (int i = 0; i < 16; i++)
+       {
+               unsigned int    tmp = desCore(r, _keys[i]) ^ l;
+               l = r;
+               r = tmp;
+       }
+       unsigned long long      y = ((unsigned long long)r << 32) |
+                                   ((unsigned long long)l & 0xffffffffL);
+       return permutate(y, _FP, 64);
+}
+
+// E-Bit Selection Table is hardcoded in this function.
+int XHash::desCore(unsigned int x, unsigned long long k)
+{
+       unsigned int p = x >> 27;
+       unsigned int q = (p & 3) << 4;
+       unsigned int r = x << 5;
+       p |=  r;
+       r = _sBox[0][(unsigned int)((k >> 42) ^ p) & 0x3f];
+       p >>= 4;
+       r |= _sBox[7][(unsigned int)((k >> 0) ^ p) & 0x3f];
+       p >>= 4;
+       r |= _sBox[6][(unsigned int)((k >> 6) ^ p) & 0x3f];
+       p >>= 4;
+       r |= _sBox[5][(unsigned int)((k >> 12) ^ p) & 0x3f];
+       p >>= 4;
+       r |= _sBox[4][(unsigned int)((k >> 18) ^ p) & 0x3f];
+       p >>= 4;
+       r |= _sBox[3][(unsigned int)((k >> 24) ^ p) & 0x3f];
+       p >>= 4;
+       r |= _sBox[2][(unsigned int)((k >> 30) ^ p) & 0x3f];
+       p >>= 4;
+       r |= _sBox[1][(unsigned int)((k >> 36) ^ (p | q)) & 0x3f];
+       return r;
+}
diff --git a/XHash.hpp b/XHash.hpp
new file mode 100644 (file)
index 0000000..e2b5f6e
--- /dev/null
+++ b/XHash.hpp
@@ -0,0 +1,81 @@
+/**
+  * Copyright (c) 2001 - 2005
+  *    Yuan Wang. All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright 
+  * notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  * notice, this list of conditions and the following disclaimer in the 
+  * documentation and/or other materials provided with the distribution.
+  * 3. Redistributions in any form must be accompanied by information on
+  * how to obtain complete source code for the X-Diff software and any
+  * accompanying software that uses the X-Diff software.  The source code
+  * must either be included in the distribution or be available for no
+  * more than the cost of distribution plus a nominal fee, and must be
+  * freely redistributable under reasonable conditions.  For an executable
+  * file, complete source code means the source code for all modules it
+  * contains.  It does not include source code for modules or files that
+  * typically accompany the major components of the operating system on
+  * which the executable file runs.
+  *
+  * THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+  * ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  * POSSIBILITY OF SUCH DAMAGE.
+  *
+  */
+
+#ifndef        __XHASH__
+#define        __XHASH__
+
+#include <string>
+#include <iostream>
+
+/**
+  * XHash is an implementation of DES
+  */
+class XHash
+{
+public:
+       static void initialize();
+       static void initialize(unsigned long long key);
+       static void finish();
+       
+       static unsigned long long hash(string text);
+
+private:
+       static const unsigned long long _initialKey;
+
+       // Permuted Choices #1 and #2
+       static const unsigned int       _PC_1[56], _PC_2[48];
+       static const unsigned int       _subKeyRotation[16];
+
+       // Initial Permutation and Final Permutation (Inverse IP, or IP^(-1))
+       static const unsigned int       _IP[64],_FP[64];
+
+       static const unsigned int       _sBox[8][64];
+
+       static unsigned long long       _key;
+       static unsigned long long       _keys[];
+
+       static void makeKeys(unsigned long long key);
+       static unsigned long long permutate(unsigned long long k,
+                                           const unsigned int p[],
+                                           unsigned int plen);
+       static unsigned long long rotate(unsigned int l, unsigned int r,
+                                        unsigned int s);
+
+       static unsigned long long des(unsigned long long data);
+       static int desCore(unsigned int x, unsigned long long k);
+};
+#endif
diff --git a/XLut.cpp b/XLut.cpp
new file mode 100644 (file)
index 0000000..c083534
--- /dev/null
+++ b/XLut.cpp
@@ -0,0 +1,101 @@
+/**
+  * Copyright (c) 2001 - 2005
+  *    Yuan Wang. All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright 
+  * notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  * notice, this list of conditions and the following disclaimer in the 
+  * documentation and/or other materials provided with the distribution.
+  * 3. Redistributions in any form must be accompanied by information on
+  * how to obtain complete source code for the X-Diff software and any
+  * accompanying software that uses the X-Diff software.  The source code
+  * must either be included in the distribution or be available for no
+  * more than the cost of distribution plus a nominal fee, and must be
+  * freely redistributable under reasonable conditions.  For an executable
+  * file, complete source code means the source code for all modules it
+  * contains.  It does not include source code for modules or files that
+  * typically accompany the major components of the operating system on
+  * which the executable file runs.
+  *
+  * THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+  * ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  * POSSIBILITY OF SUCH DAMAGE.
+  *
+  */
+
+#include "XLut.hpp"
+
+XLut::XLut(bool con)
+: _xTable(4096)
+{
+       _possibleConflict = con;
+       _conflict = false;
+}
+
+void XLut::add(int eid1, int eid2, int dist)
+{
+       unsigned int    key = ((unsigned int)eid1 << 16) | (eid2 & 0xffff);
+       unsigned long long      value = ((unsigned long long)eid1 << 40) | ((unsigned long long)(eid2 & 0xffffff) << 16) | dist;
+       if (_possibleConflict)
+       {
+               hash_map<unsigned int, unsigned long long>::const_iterator hit = _xTable.find(key);
+               if (hit == _xTable.end())
+                       _xTable[key] = value;
+               else
+               {
+                       do
+                       {
+                               key++;
+                               hit = _xTable.find(key);
+                       }
+                       while (hit != _xTable.end());
+                       _xTable[key] = value;
+                       _conflict = true;
+               }
+       }
+       else
+               _xTable[key] = value;
+}
+
+int XLut::get(int eid1, int eid2)
+{
+       unsigned int    key = ((unsigned int)eid1 << 16) | (eid2 & 0xffff);
+       hash_map<unsigned int, unsigned long long>::const_iterator hit = _xTable.find(key);
+       if (hit == _xTable.end())
+               return XTree::NO_CONNECTION;
+
+       if (!_conflict)
+               return (int)(hit->second & 0xffff);
+       else
+       {
+               unsigned long long      partialValue = ((unsigned long long)eid1 << 40) | ((unsigned long long)(eid2 & 0xffffff) << 16); 
+               unsigned long long      bucket = hit->second;
+               if (((partialValue ^ bucket) >> 16) == 0)
+                       return (int)(bucket & 0xffff);
+               else
+               {
+                       do
+                       {
+                               key++;
+                               hit = _xTable.find(key);
+                               if (hit == _xTable.end())
+                                       return XTree::NO_CONNECTION;
+                               bucket = hit->second;
+                       }
+                       while (((partialValue ^ bucket) >> 16) != 0);
+                       return (int)(bucket & 0xffff);
+               }
+       }
+}
diff --git a/XLut.hpp b/XLut.hpp
new file mode 100644 (file)
index 0000000..f7d2309
--- /dev/null
+++ b/XLut.hpp
@@ -0,0 +1,75 @@
+/**
+  * Copyright (c) 2001 - 2005
+  *    Yuan Wang. All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright 
+  * notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  * notice, this list of conditions and the following disclaimer in the 
+  * documentation and/or other materials provided with the distribution.
+  * 3. Redistributions in any form must be accompanied by information on
+  * how to obtain complete source code for the X-Diff software and any
+  * accompanying software that uses the X-Diff software.  The source code
+  * must either be included in the distribution or be available for no
+  * more than the cost of distribution plus a nominal fee, and must be
+  * freely redistributable under reasonable conditions.  For an executable
+  * file, complete source code means the source code for all modules it
+  * contains.  It does not include source code for modules or files that
+  * typically accompany the major components of the operating system on
+  * which the executable file runs.
+  *
+  * THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+  * ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  * POSSIBILITY OF SUCH DAMAGE.
+  *
+  */
+
+#ifndef        __XLUT__
+#define        __XLUT__
+
+#include <stdio.h>
+#include "XTree.hpp"
+
+/**
+  * XLut is the hash lookup table for distance between nodes. It can be
+  * extended to contain more buckets.
+  */
+class XLut
+{
+public:
+
+       XLut(bool con);
+       ~XLut(){};
+
+       /**
+         * Add a node pair and their distance to this table.
+         * @param      eid1    element id #1
+         * @param      eid2    element id #2
+         * @param      dist    distance
+         */
+       void add(int eid1, int eid2, int dist);
+
+       /**
+         * Get the distance of a node pair.
+         * @param      eid1    element id #1
+         * @param      eid2    element id #2
+         * @return     distance or -1 if not found
+         */
+       int get(int eid1, int eid2);
+
+private:
+       bool    _possibleConflict, _conflict;
+       hash_map<unsigned int, unsigned long long>      _xTable;
+};
+#endif
diff --git a/XParser.cpp b/XParser.cpp
new file mode 100644 (file)
index 0000000..0b3de3b
--- /dev/null
@@ -0,0 +1,301 @@
+/**
+  * Copyright (c) 2001 - 2005
+  *    Yuan Wang. All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright 
+  * notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  * notice, this list of conditions and the following disclaimer in the 
+  * documentation and/or other materials provided with the distribution.
+  * 3. Redistributions in any form must be accompanied by information on
+  * how to obtain complete source code for the X-Diff software and any
+  * accompanying software that uses the X-Diff software.  The source code
+  * must either be included in the distribution or be available for no
+  * more than the cost of distribution plus a nominal fee, and must be
+  * freely redistributable under reasonable conditions.  For an executable
+  * file, complete source code means the source code for all modules it
+  * contains.  It does not include source code for modules or files that
+  * typically accompany the major components of the operating system on
+  * which the executable file runs.
+  *
+  * THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+  * ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  * POSSIBILITY OF SUCH DAMAGE.
+  *
+  */
+
+#include "XParser.hpp"
+
+const char*    XParser::_feature_Validation = "http://xml.org/sax/features/validation";
+const char*    XParser::_feature_NameSpaces = "http://xml.org/sax/features/namespaces";
+//const char*  XParser::_feature_SchemaSupport = "http://apache.org/xml/features/validation/schema";
+//const char*  XParser::_feature_SchemaFullSupport = "http://apache.org/xml/features/validation/schema-full-checking";
+const char*    XParser::_feature_NameSpacePrefixes = "http://xml.org/sax/features/namespace-prefixes";
+bool           XParser::_setValidation = false;
+bool           XParser::_setNameSpaces = true;
+bool           XParser::_setSchemaSupport = true;
+bool           XParser::_setSchemaFullSupport = false;
+bool           XParser::_setNameSpacePrefixes = true;
+
+const char*    XParser::_trimReject = " \t\n";
+
+XParser::XParser()
+{
+       _parser = XMLReaderFactory::createXMLReader();
+       XMLCh*  validation = XMLString::transcode(_feature_Validation);
+       _parser->setFeature((const XMLCh*)validation, _setValidation);
+       delete validation;
+       XMLCh*  namespaces = XMLString::transcode(_feature_NameSpaces);
+       _parser->setFeature((const XMLCh*)namespaces, _setNameSpaces);
+       delete namespaces;
+/*
+       XMLCh*  schemasupport = XMLString::transcode(_feature_SchemaSupport);
+       _parser->setFeature((const XMLCh*)schemasupport, _setSchemaSupport);
+       delete schemasupport;
+       XMLCh*  schemafullsupport = XMLString::transcode(_feature_SchemaFullSupport);
+       _parser->setFeature((const XMLCh*)schemafullsupport, _setSchemaFullSupport);
+       delete schemafullsupport;
+*/
+       XMLCh*  namespaceprefixes = XMLString::transcode(_feature_NameSpacePrefixes);
+       _parser->setFeature((const XMLCh*)namespaceprefixes, _setNameSpacePrefixes);
+       delete namespaceprefixes;
+
+       _parser->setContentHandler(this);
+       _parser->setErrorHandler(this);
+       _parser->setLexicalHandler(this);
+
+       for(int i = 0; i < _STACK_SIZE; i++)
+       {
+               _idStack[i] = 0;
+               _lsidStack[i] = 0;
+               _valueStack[i] = 0;
+       }
+       _stackTop = 0;
+       _currentNodeID = XTree::NULL_NODE;
+       _xtree = new XTree();
+       _elementBuffer = string("");
+}
+
+XParser::~XParser()
+{
+       delete _parser;
+}
+
+XTree* XParser::parse(const char* uri)
+{
+       _idStack[_stackTop] = XTree::NULL_NODE;
+       _readElement = false;
+
+       try
+       {
+               _parser->parse(uri);
+       }
+       catch (const XMLException& e)
+       {
+               cerr << "File not found:\t" << uri << "\nException message:\n"
+                       << e.getMessage() << endl;
+               return NULL;
+       }
+
+       return _xtree;
+}
+
+void XParser::startElement(const XMLCh* const uri, const XMLCh* const local,
+                          const XMLCh* const raw, const Attributes& attrs)
+{
+       // if text is mixed with elements.
+       if (_elementBuffer.length() > 0)
+       {
+               string  text = _trim(_elementBuffer);
+               if (text.length() > 0)
+               {
+                       unsigned long long      value = XHash::hash(text);
+                       int     tid = _xtree->addText(_idStack[_stackTop],
+                                                     _lsidStack[_stackTop],
+                                                     text, value);
+                       _lsidStack[_stackTop] = tid;
+                       _currentNodeID = tid;
+                       _valueStack[_stackTop] += value;
+               }
+       }
+
+       string local_s(XMLString::transcode(local));
+
+//cout << "Add element " << _idStack[_stackTop] << "\t" << _lsidStack[_stackTop] << "\t" << local_s << endl;
+       int     eid = _xtree->addElement(_idStack[_stackTop],
+                                        _lsidStack[_stackTop], local_s);
+       // Update last sibling info.
+       _lsidStack[_stackTop] = eid;
+
+       // Push
+       _stackTop++;
+       _idStack[_stackTop] = eid;
+       _currentNodeID = eid;
+       _lsidStack[_stackTop] = XTree::NULL_NODE;
+       _valueStack[_stackTop] = XHash::hash(local_s);
+
+       // Take care of attributes
+       if (attrs.getLength() > 0)
+       {
+               for (unsigned int i = 0; i < attrs.getLength(); i++)
+               {
+                       string  name(XMLString::transcode(attrs.getQName(i)));
+                       unsigned long long      namehash = XHash::hash(name);
+                       
+                       unsigned long long      attrhash = namehash * namehash;
+                       string  value = "";
+                       char    *valueP = XMLString::transcode(attrs.getValue(i));
+                       if (valueP != NULL)
+                       {
+                               value = string(valueP);
+                               unsigned long long valuehash = XHash::hash(value);
+                               attrhash += valuehash * valuehash;
+                       }
+                       int     aid = _xtree->addAttribute(eid, _lsidStack[_stackTop], name, value, namehash, attrhash);
+
+                       _lsidStack[_stackTop] = aid;
+                       _currentNodeID = aid + 1;
+                       _valueStack[_stackTop] += attrhash * attrhash;
+               }
+       }
+       
+       _readElement = true;
+       _elementBuffer = string("");
+
+}
+
+void XParser::characters(const XMLCh* const ch, const unsigned int length)
+{
+       const char*     str = XMLString::transcode(ch);
+       _elementBuffer.append(str);
+       delete[] str;
+}
+
+void XParser::endElement(const XMLCh* const uri, const XMLCh* const local,
+                        const XMLCh* const raw)
+{
+       if (_readElement)
+       {
+               if (_elementBuffer.length() > 0)
+               {
+                       unsigned long long      value = XHash::hash(_elementBuffer);
+
+                       _currentNodeID = _xtree->addText(_idStack[_stackTop],
+                                                        _lsidStack[_stackTop],
+                                                        _elementBuffer, value);
+
+                       _valueStack[_stackTop] += value;
+               }
+               else    // an empty element
+               {
+                       _currentNodeID = _xtree->addText(_idStack[_stackTop],
+                                                        _lsidStack[_stackTop],
+                                                        "", 0);
+               }
+
+               _readElement = false;
+       }
+       else    // mixed contents
+       {
+               if (_elementBuffer.length() > 0)
+               {
+                       string  text = _trim(_elementBuffer);
+                       if (text.length() > 0)
+                       {
+                               unsigned long long      value = XHash::hash(text);
+                               _currentNodeID =
+                                       _xtree->addText(_idStack[_stackTop],
+                                                       _lsidStack[_stackTop],
+                                                       text, value);
+                               _valueStack[_stackTop] += value;
+                       }
+               }
+       }
+
+       _elementBuffer = string("");
+       _xtree->addHashValue(_idStack[_stackTop], _valueStack[_stackTop]);
+       _valueStack[_stackTop-1] += _valueStack[_stackTop] *
+                                   _valueStack[_stackTop];
+       _lsidStack[_stackTop-1] = _idStack[_stackTop];
+
+       // Pop
+       _stackTop--;
+}
+
+// The lexical handler methods.
+void XParser::startCDATA()
+{
+       int     textid = _currentNodeID + 1;
+       string  text = _elementBuffer;
+       _xtree->addCDATA(textid, text.length());
+}
+
+void XParser::endCDATA()
+{
+       int     textid = _currentNodeID + 1;
+       string  text = _elementBuffer;
+       _xtree->addCDATA(textid, text.length());
+}
+
+string XParser::_trim(const char* input)
+{
+       string  base(input);
+       if (base.length() == 0)
+               return string("");
+
+       int     start = base.find_first_not_of(_trimReject);
+       if (start < 0)
+               return string("");
+
+       int     end = base.find_last_not_of(_trimReject);
+       return base.substr(start, end - start + 1);
+}
+
+string XParser::_trim(string input)
+{
+       if (input.length() == 0)
+               return string("");
+
+       int     start = input.find_first_not_of(_trimReject);
+       if (start < 0)
+               return string("");
+
+       int     end = input.find_last_not_of(_trimReject);
+       return input.substr(start, end - start + 1);
+}
+
+// For testing purpose.
+/*
+int main(int argc, char* args[])
+{
+       try
+       {
+               XMLPlatformUtils::Initialize();
+       }
+       catch (const XMLException& e)
+       {
+               cerr << "Error during initialization! :\n"
+                       << e.getMessage() << endl;
+               exit(1);
+       }
+
+       XParser *parser = new XParser();
+       XTree   *xtree = parser->parse(args[1]);
+
+       xtree->dump();
+
+       delete parser;
+       delete xtree;
+}
+*/
diff --git a/XParser.hpp b/XParser.hpp
new file mode 100644 (file)
index 0000000..1ea7d1a
--- /dev/null
@@ -0,0 +1,119 @@
+/**
+  * Copyright (c) 2001 - 2005
+  *    Yuan Wang. All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright 
+  * notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  * notice, this list of conditions and the following disclaimer in the 
+  * documentation and/or other materials provided with the distribution.
+  * 3. Redistributions in any form must be accompanied by information on
+  * how to obtain complete source code for the X-Diff software and any
+  * accompanying software that uses the X-Diff software.  The source code
+  * must either be included in the distribution or be available for no
+  * more than the cost of distribution plus a nominal fee, and must be
+  * freely redistributable under reasonable conditions.  For an executable
+  * file, complete source code means the source code for all modules it
+  * contains.  It does not include source code for modules or files that
+  * typically accompany the major components of the operating system on
+  * which the executable file runs.
+  *
+  * THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+  * ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  * POSSIBILITY OF SUCH DAMAGE.
+  *
+  */
+
+#ifndef        __XPARSER__
+#define __XPARSER__
+
+#include <string>
+
+#include <sax2/Attributes.hpp>
+#include <sax2/DefaultHandler.hpp>
+#include <sax2/SAX2XMLReader.hpp>
+#include <sax2/XMLReaderFactory.hpp>
+#include <util/PlatformUtils.hpp>
+#include <util/XMLString.hpp>
+
+#include "XTree.hpp"
+#include "XHash.hpp"
+
+#define _STACK_SIZE 64
+
+
+/**
+  * XParser parses an input XML document and constructs an XTree.
+  * Note: this parser may generate incorrect result on characters whose
+  * ASCII value is beyond 127.
+  */
+class XParser : public DefaultHandler
+{
+public:
+
+       XParser();
+       ~XParser();
+
+       /**
+         * Parse an XML document
+         * @param      uri     input XML document
+         * @return     the created XTree
+         */
+       XTree* parse(const char* uri);
+
+       // Document handler methods
+
+       void startElement(const XMLCh* const uri, const XMLCh* const local,
+                         const XMLCh* const raw, const Attributes& attrs);
+
+       void characters(const XMLCh* const ch, const unsigned int length);
+
+       void endElement(const XMLCh* const uri, const XMLCh* const local,
+                       const XMLCh* const raw);
+
+       void startCDATA();
+
+       void endCDATA();
+
+private:
+
+       static const char*      _feature_Validation;
+       static const char*      _feature_NameSpaces;
+       static const char*      _feature_SchemaSupport;
+       static const char*      _feature_SchemaFullSupport;
+       static const char*      _feature_NameSpacePrefixes;
+       static bool             _setValidation, _setNameSpaces;
+       static bool             _setSchemaSupport, _setSchemaFullSupport;
+       static bool             _setNameSpacePrefixes;
+       static const char*      _trimReject;
+
+       SAX2XMLReader   *_parser;
+       XTree           *_xtree;
+       int             _idStack[_STACK_SIZE];
+        int            _lsidStack[_STACK_SIZE]; // id and left sibling
+       unsigned long long      _valueStack[_STACK_SIZE];
+       int             _stackTop, _currentNodeID;
+       bool            _readElement;
+       string          _elementBuffer;
+
+       /**
+         * Trim a string.
+         * @param      input   input string
+         * @return     a trimmed string
+         */
+       string _trim(const char* input);
+       string _trim(string input);
+};
+
+#endif
diff --git a/XTree.cpp b/XTree.cpp
new file mode 100644 (file)
index 0000000..a7aaa18
--- /dev/null
+++ b/XTree.cpp
@@ -0,0 +1,453 @@
+/**
+  * Copyright (c) 2001 - 2005
+  *    Yuan Wang. All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright 
+  * notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  * notice, this list of conditions and the following disclaimer in the 
+  * documentation and/or other materials provided with the distribution.
+  * 3. Redistributions in any form must be accompanied by information on
+  * how to obtain complete source code for the X-Diff software and any
+  * accompanying software that uses the X-Diff software.  The source code
+  * must either be included in the distribution or be available for no
+  * more than the cost of distribution plus a nominal fee, and must be
+  * freely redistributable under reasonable conditions.  For an executable
+  * file, complete source code means the source code for all modules it
+  * contains.  It does not include source code for modules or files that
+  * typically accompany the major components of the operating system on
+  * which the executable file runs.
+  *
+  * THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+  * ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  * POSSIBILITY OF SUCH DAMAGE.
+  *
+  */
+
+#include "XTree.hpp"
+
+const int      XTree::MATCH = 0;
+const int      XTree::NO_MATCH = -1;
+const int      XTree::INSERT = -1;
+const int      XTree::DELETE = -1;
+const int      XTree::CHANGE = 1;
+const int      XTree::NULL_NODE = -1;
+const int      XTree::NO_CONNECTION = 1048576;
+
+const int      XTree::_TOP_LEVEL_CAPACITY = 16384;
+const int      XTree::_BOT_LEVEL_CAPACITY = 4096;
+const int      XTree::_ROOT = 0;
+
+
+XTree::XTree()
+: _tagNames(256),
+  _cdataTable(256)
+{
+       _topCap = _TOP_LEVEL_CAPACITY;
+       _botCap = _BOT_LEVEL_CAPACITY;
+       _initialize();
+}
+
+XTree::XTree(int topcap, int botcap)
+: _tagNames(256),
+  _cdataTable(256)
+{
+       _topCap = topcap;
+       _botCap = botcap;
+       _initialize();
+}
+
+XTree::~XTree()
+{
+       int     size = _elementIndex / _botCap;
+       if (size >= 0)
+       {
+               for (int i = 0; i <= size; i++)
+               {
+                       delete[] _valueIndex[i];
+                       delete[] _firstChild[i];
+                       delete[] _nextSibling[i];
+                       delete[] _childrenCount[i];
+                       delete[] _matching[i];
+                       delete[] _isAttribute[i];
+                       delete[] _hashValue[i];
+               }
+       }
+       delete[] _firstChild;
+       delete[] _nextSibling;
+       delete[] _childrenCount;
+       delete[] _valueIndex;
+       delete[] _matching;
+       delete[] _isAttribute;
+       delete[] _hashValue;
+
+       size = _valueCount / _botCap;
+       for (int i = 0; i <= size; i++)
+               delete[] _value[i];
+       delete[] _value;
+}
+
+void XTree::_initialize()
+{
+       _firstChild     = new int*[_topCap];
+       _nextSibling    = new int*[_topCap];
+       _childrenCount  = new int*[_topCap];
+       _valueIndex     = new int*[_topCap];
+       _matching       = new int*[_topCap];
+       _isAttribute    = new bool*[_topCap];
+       _hashValue      = new unsigned long long*[_topCap];
+       _value          = new string*[_topCap];
+
+       _value[0]       = new string[_botCap];
+       _elementIndex   = -1;
+       _tagIndex       = -1;
+       _valueCount     = _botCap - 1;
+}
+
+void XTree::_expand(int topid)
+{
+       _firstChild[topid]      = new int[_botCap];
+       _nextSibling[topid]     = new int[_botCap];
+       _childrenCount[topid]   = new int[_botCap];
+       _valueIndex[topid]      = new int[_botCap];
+       _matching[topid]        = new int[_botCap];
+       _isAttribute[topid]     = new bool[_botCap];
+       _hashValue[topid]       = new unsigned long long[_botCap];
+
+       for (int i = 0; i < _botCap; i++)
+       {
+               _firstChild[topid][i]   = NULL_NODE;
+               _nextSibling[topid][i]  = NULL_NODE;
+               _childrenCount[topid][i]= 0;
+               _matching[topid][i]     = MATCH;
+               _valueIndex[topid][i]   = -1;
+               _isAttribute[topid][i]  = false;
+       }
+}
+
+int XTree::addElement(int pid, int lsid, string tagName)
+{
+       _elementIndex++;
+
+       int     topid = _elementIndex / _botCap;
+       int     botid = _elementIndex % _botCap;
+       if (botid == 0)
+               _expand(topid);
+
+       // Check if we've got the element name
+       hash_map <string, int, HashString>::const_iterator
+               hit = _tagNames.find(tagName);
+       if (hit != _tagNames.end())
+       {
+               int     id = hit->second;
+               _valueIndex[topid][botid] = id;
+       }
+       else
+       {
+               _tagIndex++;
+               _value[0][_tagIndex] = tagName;
+               _tagNames[tagName] = _tagIndex;
+               _valueIndex[topid][botid] = _tagIndex;
+       }
+
+       if (pid == NULL_NODE)
+               return _elementIndex;
+
+       int     ptopid = pid / _botCap;
+       int     pbotid = pid % _botCap;
+       // parent-child relation or sibling-sibling ralation
+       if (lsid == NULL_NODE)
+               _firstChild[ptopid][pbotid] = _elementIndex;
+       else
+               _nextSibling[lsid/_botCap][lsid%_botCap] = _elementIndex;
+
+       // update children count
+       _childrenCount[ptopid][pbotid]++;
+
+       return _elementIndex;
+}
+
+int XTree::addText(int eid, int lsid, string text, unsigned long long value)
+{
+       _elementIndex++;
+
+       int     topid = _elementIndex / _botCap;
+       int     botid = _elementIndex % _botCap;
+       if (botid == 0)
+               _expand(topid);
+
+       int     etopid = eid / _botCap;
+       int     ebotid = eid % _botCap;
+       if (lsid == NULL_NODE)
+               _firstChild[etopid][ebotid] = _elementIndex;
+       else
+               _nextSibling[lsid/_botCap][lsid%_botCap] = _elementIndex;
+
+       _childrenCount[etopid][ebotid]++;
+       _hashValue[topid][botid] = value;
+
+       _valueCount++;
+       int     vtopid = _valueCount / _botCap;
+       int     vbotid = _valueCount % _botCap;
+       if (vbotid == 0)
+               _value[vtopid] = new string[_botCap];
+
+       _value[vtopid][vbotid] = text;
+       _valueIndex[topid][botid] = _valueCount;
+
+       return _elementIndex;
+}
+
+int XTree::addAttribute(int eid, int lsid, string name, string value,
+                       unsigned long long valuehash,
+                       unsigned long long attrhash)
+{
+       // attribute name first.
+       int     aid = addElement(eid, lsid, name);
+
+       // attribute value second.
+       addText(aid, NULL_NODE, value, valuehash);
+
+       // hash value third
+       int     atopid = aid / _botCap;
+       int     abotid = aid % _botCap;
+       _isAttribute[atopid][abotid] = true;
+       _hashValue[atopid][abotid] = attrhash;
+
+       return aid;
+}
+
+void XTree::addHashValue(int eid, unsigned long long value)
+{
+       _hashValue[eid/_botCap][eid%_botCap] = value;
+}
+
+void XTree::addCDATA(int eid, size_t position)
+{
+       hash_map<int, vector<size_t> >::const_iterator
+               hit = _cdataTable.find(eid);
+       if (hit != _cdataTable.end())
+       {
+               vector<size_t>  poslist = hit->second;
+               poslist.push_back(position);
+               _cdataTable[eid] = poslist;
+       }
+       else
+       {
+               vector<size_t>  poslist;
+               poslist.push_back(position);
+               _cdataTable[eid] = poslist;
+       }
+}
+
+void XTree::addMatching(int eid, int matchType, int matchNode = -1)
+{
+       if (matchType == NO_MATCH)
+               _matching[eid/_botCap][eid%_botCap] = NO_MATCH;
+       else if (matchType == MATCH)
+               _matching[eid/_botCap][eid%_botCap] = MATCH;
+       else
+               _matching[eid/_botCap][eid%_botCap] = matchNode + 1;
+}
+
+void XTree::getMatching(int eid, int &matchType, int &matchNode)
+{
+       int     mid = _matching[eid/_botCap][eid%_botCap];
+       if (mid == NO_MATCH)
+               matchType = NO_MATCH;
+       else if (mid == MATCH)
+               matchType = MATCH;
+       else
+       {
+               matchType = CHANGE;
+               matchNode = mid - 1;
+       }
+}
+
+int XTree::getRoot()
+{
+       return _ROOT;
+}
+
+int XTree::getFirstChild(int eid)
+{
+       int     cid = _firstChild[eid/_botCap][eid%_botCap];
+       while (cid > _ROOT)
+       {
+               int     ctopid = cid / _botCap;
+               int     cbotid = cid % _botCap;
+               if (_isAttribute[ctopid][cbotid])
+                       cid = _nextSibling[ctopid][cbotid];
+               else
+                       return cid;
+       }
+
+       return NULL_NODE;
+}
+
+int XTree::getNextSibling(int eid)
+{
+       return _nextSibling[eid/_botCap][eid%_botCap];
+}
+
+int XTree::getFirstAttribute(int eid)
+{
+       int     aid = _firstChild[eid/_botCap][eid%_botCap];
+       if ((aid > _ROOT) && (_isAttribute[aid/_botCap][aid%_botCap]))
+               return aid;
+       else
+               return NULL_NODE;
+}
+
+int XTree::getNextAttribute(int aid)
+{
+       int     aid1 = _nextSibling[aid/_botCap][aid%_botCap];
+       if ((aid1 > _ROOT) && (_isAttribute[aid1/_botCap][aid1%_botCap]))
+               return aid1;
+       else
+               return NULL_NODE;
+}
+
+string XTree::getAttributeValue(int aid)
+{
+       int     cid = _firstChild[aid/_botCap][aid%_botCap];
+       int     index = _valueIndex[cid/_botCap][cid%_botCap];
+       if (index > _ROOT)
+               return _value[index/_botCap][index%_botCap];
+       else
+               return NULL;
+}
+
+unsigned long long XTree::getHashValue(int eid)
+{
+       return _hashValue[eid/_botCap][eid%_botCap];
+}
+
+vector<size_t> XTree::getCDATA(int eid)
+{
+       hash_map<int, vector<size_t> >::const_iterator
+               hit = _cdataTable.find(eid);
+       if (hit != _cdataTable.end())
+               return hit->second;
+       else
+       {
+               vector<size_t>  a;
+               return a;
+       }
+}
+
+int XTree::getChildrenCount(int eid)
+{
+       return _childrenCount[eid/_botCap][eid%_botCap];
+}
+
+int XTree::getDecendentsCount(int eid)
+{
+       int     topid = eid / _botCap;
+       int     botid = eid % _botCap;
+       int     count = _childrenCount[topid][botid];
+       if (count == 0)
+               return 0;
+
+       int     cid = _firstChild[topid][botid];
+       while (cid > _ROOT)
+       {
+               count += getDecendentsCount(cid);
+               cid = _nextSibling[cid/_botCap][cid%_botCap];
+       }
+
+       return count;
+}
+
+int XTree::getValueIndex(int eid)
+{
+       return _valueIndex[eid/_botCap][eid%_botCap];
+}
+
+string XTree::getValue(int index) 
+{
+       return _value[index/_botCap][index%_botCap];
+}
+
+string XTree::getTag(int eid)
+{
+       int     index = _valueIndex[eid/_botCap][eid%_botCap];
+       return  _value[0][index];
+}
+
+string XTree::getText(int eid)
+{
+       int     index = _valueIndex[eid/_botCap][eid%_botCap];
+       return _value[index/_botCap][index%_botCap];
+}
+
+bool XTree::isElement(int eid)
+{
+       int     index = _valueIndex[eid/_botCap][eid%_botCap];
+       if (index < _botCap)
+               return true;
+       else
+               return false;
+}
+
+bool XTree::isLeaf(int eid)
+{
+       int     index = _valueIndex[eid/_botCap][eid%_botCap];
+       if (index < _botCap)
+               return false;
+       else
+               return true;
+}
+
+bool XTree::isAttribute(int eid)
+{
+       return _isAttribute[eid/_botCap][eid%_botCap];
+}
+
+int XTree::getNodeCount()
+{
+       return _elementIndex;
+}
+
+void XTree::dump()
+{
+       cout << "eid\tfirstC\tnextS\tattr?\tcCount\thash\tmatch\tvalue\n";
+       for (int i = _ROOT; i <= _elementIndex; i++)
+       {
+               int     topid = i / _botCap;
+               int     botid = i % _botCap;
+               int     vid = _valueIndex[topid][botid];
+               int     vtopid = vid / _botCap;
+               int     vbotid = vid % _botCap;
+               cout << i << "\t" << _firstChild[topid][botid] << "\t"
+                       << _nextSibling[topid][botid] << "\t"
+                       << _isAttribute[topid][botid] << "\t"
+                       << _childrenCount[topid][botid] << "\t"
+                       << _hashValue[topid][botid] << "\t"
+                       << _matching[topid][botid] << "\t"
+                       << _value[vtopid][vbotid] << endl;
+       }
+}
+
+void XTree::dumpHash()
+{
+       cout << "hash table:" << _tagNames.size() << endl;
+       hash_map<string, int, HashString>::const_iterator
+               hit;// = _tagNames.begin();
+       for(hit=_tagNames.begin(); hit != _tagNames.end(); hit++)
+       {
+               cout << hit->first << "\t" << hit->second << endl;
+               //hit++;
+       }
+}
diff --git a/XTree.hpp b/XTree.hpp
new file mode 100644 (file)
index 0000000..f9f5f1d
--- /dev/null
+++ b/XTree.hpp
@@ -0,0 +1,193 @@
+/**
+  * Copyright (c) 2001 - 2005
+  *    Yuan Wang. All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  * 1. Redistributions of source code must retain the above copyright 
+  * notice, this list of conditions and the following disclaimer.
+  * 2. Redistributions in binary form must reproduce the above copyright
+  * notice, this list of conditions and the following disclaimer in the 
+  * documentation and/or other materials provided with the distribution.
+  * 3. Redistributions in any form must be accompanied by information on
+  * how to obtain complete source code for the X-Diff software and any
+  * accompanying software that uses the X-Diff software.  The source code
+  * must either be included in the distribution or be available for no
+  * more than the cost of distribution plus a nominal fee, and must be
+  * freely redistributable under reasonable conditions.  For an executable
+  * file, complete source code means the source code for all modules it
+  * contains.  It does not include source code for modules or files that
+  * typically accompany the major components of the operating system on
+  * which the executable file runs.
+  *
+  * THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+  * ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  * POSSIBILITY OF SUCH DAMAGE.
+  *
+  */
+
+#ifndef        __XTREE__
+#define        __XTREE__
+
+#include <iostream>
+#include <string>
+#include <hash_map>
+#include <vector>
+
+class HashString
+{
+public:
+       bool operator()(string const &str) const
+       {
+               return hash<char const *>()(str.c_str());
+       }
+};
+
+
+/**
+  * XTree provides the storage for input XML documents. Basically, an XML
+  * document is parsed by a SAX parser and stored in an XTree.
+  */
+class XTree
+{
+public:
+
+       static const int        MATCH, NO_MATCH, INSERT, DELETE, CHANGE;
+       static const int        NULL_NODE, NO_CONNECTION;
+
+       XTree();
+       XTree(int topcap, int botcap);
+       ~XTree();
+
+       // Start  -- methods for constructing a tree.
+
+       /**
+         * Add a new element to the tree.
+         * @param      pid             parent id
+         * @param      lsid            left-side sibling id
+         * @param      tagName         element name
+         * @return     the element id in the tree.
+         */
+       int addElement(int pid, int lsid, string tagName);
+
+       // Add a text node.
+       int addText(int eid, int lsid, string text, unsigned long long value);
+
+       /**
+         * Add an attribute.
+         * @param      eid     element id
+         * @param      lsid    the sibling id on the left
+         * @param      name    attribute name
+         * @param      value   attribute value
+         * @param      valuehash       hash value of the value
+         * @param      attrhash        hash value of the entire attribute
+         * @return     the element id of the attribute
+         */
+       int addAttribute(int eid, int lsid, string name, string value,
+                        unsigned long long valuehash,
+                        unsigned long long attrhash);
+
+       // Add more information (hash value) to the tree.
+       void addHashValue(int eid, unsigned long long value);
+
+       /**
+         * Add a CDATA section (either a start or an end) to the CDATA
+         * hashtable, in which each entry should have an even number of
+         * position slots. This value is interpreted as a string.
+         * @param      eid             The text node id
+         * @param      position        the section tag position
+         */
+       void addCDATA(int eid, size_t position);
+
+       /**
+         * Add matching information.
+         * @param      eid             element id
+         * @param      matchType       match type
+         * @param      matchNode       matched element id
+         */
+       void addMatching(int eid, int matchType, int matchNode = -1);
+       // End  -- methods for constructing a tree.
+
+       // Start -- methods for accessing a tree.
+       // Get matching information.
+       void getMatching(int eid, int &matchType, int &matchNode);
+
+       int getRoot();
+
+       int getFirstChild(int eid);
+
+       int getNextSibling(int eid);
+
+       int getFirstAttribute(int eid);
+
+       int getNextAttribute(int aid);
+
+       string getAttributeValue(int aid);
+
+       unsigned long long getHashValue(int eid);
+
+       /**
+         * Get the CDATA position list for a text node.
+         * @param      eid             The text node id
+         * @return     the position vector.
+         */
+       vector<unsigned int> getCDATA(int eid);
+
+       int getChildrenCount(int eid);
+
+       int getDecendentsCount(int eid);
+
+       int getValueIndex(int eid);
+
+       string getTag(int eid);
+
+       // Get the value of a leaf node using the value index
+       string getValue(int index);
+
+       // Get the value of a leaf node using the node id
+       string getText(int eid);
+
+       // Check if a node is an element node.
+       bool isElement(int eid);
+
+       // Check if a node is a leaf text node.
+       bool isLeaf(int eid);
+
+       // Check if a ndoe is an attribute node
+       bool isAttribute(int eid);
+
+       // Return the number of nodes in the tree.
+       int getNodeCount();
+
+       // End  -- methods for accessing a tree.
+       // For testing purpose.
+       void dump();
+       void dumpHash();
+
+private:
+
+       static const int        _TOP_LEVEL_CAPACITY, _BOT_LEVEL_CAPACITY;
+       static const int        _ROOT;
+
+       int     _topCap, _botCap, _elementIndex, _tagIndex, _valueCount;
+       int     **_firstChild, **_nextSibling, **_childrenCount;
+       int     **_valueIndex, **_matching;
+       bool    **_isAttribute;
+       unsigned long long      **_hashValue;
+       string  **_value;
+       hash_map<string, int, HashString>       _tagNames;
+       hash_map<int, vector<size_t> >  _cdataTable;
+
+       void _initialize();
+       void _expand(int topid);
+};
+#endif