Index: src/org/carrot2/clustering/stc/PhraseNode.java =================================================================== --- src/org/carrot2/clustering/stc/PhraseNode.java (revision 3549) +++ src/org/carrot2/clustering/stc/PhraseNode.java (working copy) @@ -54,7 +54,6 @@ super(t); docs = new ExtendedBitSet(t.getCurrentDocumentIndex()); - docs.set(t.getCurrentDocumentIndex()); } Index: src/org/carrot2/text/suffixtrees/SuffixTree.java =================================================================== --- src/org/carrot2/text/suffixtrees/SuffixTree.java (revision 3549) +++ src/org/carrot2/text/suffixtrees/SuffixTree.java (working copy) @@ -115,7 +115,10 @@ * If it does, do nothing (path compression - an implicit node is * created). */ - if (edge != null) break; + if (edge != null) + { + break; + } } else { @@ -183,7 +186,6 @@ /* Now the endpoint is the next active point */ active.lastElementIndex++; - active.canonize(); return active; Index: src/org/carrot2/text/suffixtrees/GeneralizedSuffixTree.java =================================================================== --- src/org/carrot2/text/suffixtrees/GeneralizedSuffixTree.java (revision 3549) +++ src/org/carrot2/text/suffixtrees/GeneralizedSuffixTree.java (working copy) @@ -110,23 +110,7 @@ } } - if (startIndex == getCurrentElement().size()) - { - // POSSIBILITY OF PERFORMANCE TUNING: - // iterate through elements of the boundary path only. - for (int i = 0; i < getCurrentElement().size(); i++) - { - activePoint = new Suffix(this, rootNode, i, getCurrentElement() - .size() - 1); - - activePoint.canonize(); - ((GSTNode) activePoint.originNode) - .addIndexedElement(getCurrentElementNumber()); - } - - return rootNode; - } - else + if (startIndex != getCurrentElement().size()) { // insert remaining suffixes. activePoint = new Suffix(this, lastPrefixNode, startIndex, endIndex); @@ -135,6 +119,18 @@ for (int i = endIndex + 1; i < getCurrentElement().size(); i++) insertPrefix(activePoint, i); } + + // POSSIBILITY OF PERFORMANCE TUNING: + // iterate through elements of the boundary path only. + for (int i = 0; i < getCurrentElement().size(); i++) + { + activePoint = new Suffix(this, rootNode, i, getCurrentElement().size() - 1); + activePoint.canonize(); + ((GSTNode) activePoint.originNode) + .addIndexedElement(getCurrentElementNumber()); + } + + return rootNode; } return rootNode;