/*
 * Decompiled with CFR 0.152.
 */
package com.openkm.kea.filter;

import com.openkm.kea.filter.KEAPhraseFilter;
import com.openkm.kea.filter.NumbersFilter;
import com.openkm.kea.stemmers.SremovalStemmer;
import com.openkm.kea.stemmers.Stemmer;
import com.openkm.kea.stopwords.Stopwords;
import com.openkm.kea.stopwords.StopwordsEnglish;
import com.openkm.kea.util.Counter;
import com.openkm.kea.vocab.Vocabulary;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.Vector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import weka.classifiers.Classifier;
import weka.classifiers.bayes.NaiveBayesSimple;
import weka.classifiers.meta.FilteredClassifier;
import weka.classifiers.meta.RegressionByDiscretization;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.supervised.attribute.Discretize;

public class KEAFilter
extends Filter
implements OptionHandler {
    private static Logger log = LoggerFactory.getLogger(KEAFilter.class);
    private static final long serialVersionUID = 1L;
    private int m_DocumentAtt = 0;
    private int m_KeyphrasesAtt = 1;
    private int m_MaxPhraseLength = 5;
    private int m_MinPhraseLength = 1;
    private int m_numPhrases = 10;
    private int m_Indexers = 1;
    private boolean m_DESCRreplace = true;
    public boolean m_NODEfeature = true;
    private boolean m_LENGTHfeature = true;
    private boolean m_STDEVfeature = false;
    private boolean m_KFused = false;
    private boolean m_Debug = false;
    private boolean m_DisallowInternalPeriods = false;
    private int m_MinNumOccur = 2;
    private int m_NumFeatures = 2;
    private int m_TfidfIndex = 0;
    private int m_FirstOccurIndex = 1;
    private int m_LengthIndex = 2;
    private int m_NodeIndex = 3;
    private int m_STDEVIndex = 4;
    private int m_KeyFreqIndex = 3;
    private KEAPhraseFilter m_PunctFilter = null;
    private NumbersFilter m_NumbersFilter = null;
    private Classifier m_Classifier = null;
    public HashMap<String, Counter> m_Dictionary = null;
    private HashMap<String, Counter> m_KeyphraseDictionary = null;
    private int m_NumDocs = 0;
    private Instances m_ClassifierData = null;
    private Stemmer m_Stemmer = new SremovalStemmer();
    private Stopwords m_Stopwords;
    private String m_documentLanguage = "en";
    public Vocabulary m_Vocabulary;
    private String m_vocabulary = "agrovoc";
    private String m_vocabularyFormat = "skos";
    private boolean m_CheckForProperNouns = true;

    public KEAFilter(Stopwords m_Stopwords) {
        this.m_Stopwords = m_Stopwords;
    }

    public void clearVocabulary() {
        this.m_Vocabulary = null;
    }

    public String getVocabulary() {
        return this.m_vocabulary;
    }

    public void setVocabulary(String newM_Vocabulary) {
        this.m_vocabulary = newM_Vocabulary;
    }

    public String getVocabularyFormat() {
        return this.m_vocabularyFormat;
    }

    public void setVocabularyFormat(String newM_VocabularyFormat) {
        this.m_vocabularyFormat = newM_VocabularyFormat;
    }

    public String getDocumentLanguage() {
        return this.m_documentLanguage;
    }

    public void setDocumentLanguage(String newM_documentLanguage) {
        this.m_documentLanguage = newM_documentLanguage;
    }

    public boolean getCheckForProperNouns() {
        return this.m_CheckForProperNouns;
    }

    public void setCheckForProperNouns(boolean newM_CheckProperNouns) {
        this.m_CheckForProperNouns = newM_CheckProperNouns;
    }

    public Stopwords getStopwords() {
        return this.m_Stopwords;
    }

    public void setStopwords(Stopwords newM_Stopwords) {
        this.m_Stopwords = newM_Stopwords;
    }

    public Stemmer getStemmer() {
        return this.m_Stemmer;
    }

    public void setStemmer(Stemmer newStemmer) {
        this.m_Stemmer = newStemmer;
    }

    public int getMinNumOccur() {
        return this.m_MinNumOccur;
    }

    public void setMinNumOccur(int newMinNumOccur) {
        this.m_MinNumOccur = newMinNumOccur;
    }

    public int getMaxPhraseLength() {
        return this.m_MaxPhraseLength;
    }

    public void setMaxPhraseLength(int newMaxPhraseLength) {
        this.m_MaxPhraseLength = newMaxPhraseLength;
    }

    public int getMinPhraseLength() {
        return this.m_MinPhraseLength;
    }

    public void setMinPhraseLength(int newMinPhraseLength) {
        this.m_MinPhraseLength = newMinPhraseLength;
    }

    public int getNumPhrases() {
        return this.m_numPhrases;
    }

    public void setNumPhrases(int newnumPhrases) {
        this.m_numPhrases = newnumPhrases;
    }

    public int getStemmedPhraseIndex() {
        return this.m_DocumentAtt;
    }

    public int getUnstemmedPhraseIndex() {
        return this.m_DocumentAtt + 1;
    }

    public int getProbabilityIndex() {
        int index = this.m_DocumentAtt + 4;
        if (this.m_Debug && this.m_KFused) {
            ++index;
        }
        if (this.m_STDEVfeature) {
            ++index;
        }
        if (this.m_NODEfeature) {
            ++index;
        }
        if (this.m_LENGTHfeature) {
            ++index;
        }
        return index;
    }

    public int getRankIndex() {
        return this.getProbabilityIndex() + 1;
    }

    public int getDocumentAtt() {
        return this.m_DocumentAtt;
    }

    public void setDocumentAtt(int newDocumentAtt) {
        this.m_DocumentAtt = newDocumentAtt;
    }

    public int getKeyphrasesAtt() {
        return this.m_KeyphrasesAtt;
    }

    public void setKeyphrasesAtt(int newKeyphrasesAtt) {
        this.m_KeyphrasesAtt = newKeyphrasesAtt;
    }

    public boolean getDebug() {
        return this.m_Debug;
    }

    public void setDebug(boolean newDebug) {
        this.m_Debug = newDebug;
    }

    public void setKFused(boolean flag) {
        this.m_KFused = flag;
        if (flag) {
            ++this.m_NumFeatures;
        }
    }

    public void setNumFeature() {
        if (this.m_STDEVfeature) {
            ++this.m_NumFeatures;
        }
        if (this.m_NODEfeature) {
            ++this.m_NumFeatures;
        }
        if (this.m_LENGTHfeature) {
            ++this.m_NumFeatures;
        }
    }

    public boolean getKFused() {
        return this.m_KFused;
    }

    public boolean getDisallowInternalPeriods() {
        return this.m_DisallowInternalPeriods;
    }

    public void setDisallowInternalPeriods(boolean disallow) {
        this.m_DisallowInternalPeriods = disallow;
    }

    public void loadThesaurus(Stemmer st, Stopwords sw) {
        this.m_Vocabulary = new Vocabulary(this.m_vocabulary, this.m_vocabularyFormat, this.m_documentLanguage);
        this.m_Vocabulary.setStemmer(st);
        this.m_Vocabulary.setStopwords(sw);
        this.m_Vocabulary.initialize();
        try {
            if (this.m_DESCRreplace) {
                this.m_Vocabulary.buildUSE();
            }
            if (this.m_NODEfeature) {
                this.m_Vocabulary.buildREL();
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void setOptions(String[] options) throws Exception {
        this.setKFused(Utils.getFlag((char)'K', (String[])options));
        this.setDebug(Utils.getFlag((char)'D', (String[])options));
        String docAttIndexString = Utils.getOption((char)'I', (String[])options);
        if (docAttIndexString.length() > 0) {
            this.setDocumentAtt(Integer.parseInt(docAttIndexString) - 1);
        } else {
            this.setDocumentAtt(0);
        }
        String keyphraseAttIndexString = Utils.getOption((char)'J', (String[])options);
        if (keyphraseAttIndexString.length() > 0) {
            this.setKeyphrasesAtt(Integer.parseInt(keyphraseAttIndexString) - 1);
        } else {
            this.setKeyphrasesAtt(1);
        }
        String maxPhraseLengthString = Utils.getOption((char)'M', (String[])options);
        if (maxPhraseLengthString.length() > 0) {
            this.setMaxPhraseLength(Integer.parseInt(maxPhraseLengthString));
        } else {
            this.setMaxPhraseLength(3);
        }
        String minPhraseLengthString = Utils.getOption((char)'M', (String[])options);
        if (minPhraseLengthString.length() > 0) {
            this.setMinPhraseLength(Integer.parseInt(minPhraseLengthString));
        } else {
            this.setMinPhraseLength(1);
        }
        String minNumOccurString = Utils.getOption((char)'O', (String[])options);
        if (minNumOccurString.length() > 0) {
            this.setMinNumOccur(Integer.parseInt(minNumOccurString));
        } else {
            this.setMinNumOccur(2);
        }
        this.setDisallowInternalPeriods(Utils.getFlag((char)'P', (String[])options));
    }

    public String[] getOptions() {
        String[] options = new String[13];
        int current = 0;
        if (this.getKFused()) {
            options[current++] = "-K";
        }
        if (this.getDebug()) {
            options[current++] = "-D";
        }
        options[current++] = "-I";
        options[current++] = "" + (this.getDocumentAtt() + 1);
        options[current++] = "-J";
        options[current++] = "" + (this.getKeyphrasesAtt() + 1);
        options[current++] = "-M";
        options[current++] = "" + this.getMaxPhraseLength();
        options[current++] = "-L";
        options[current++] = "" + this.getMinPhraseLength();
        options[current++] = "-O";
        options[current++] = "" + this.getMinNumOccur();
        if (this.getDisallowInternalPeriods()) {
            options[current++] = "-P";
        }
        while (current < options.length) {
            options[current++] = "";
        }
        return options;
    }

    public Enumeration<Option> listOptions() {
        Vector<Option> newVector = new Vector<Option>(7);
        newVector.addElement(new Option("\tSpecifies whether keyphrase frequency statistic is used.", "K", 0, "-K"));
        newVector.addElement(new Option("\tSets the maximum phrase length (default: 3).", "M", 1, "-M <length>"));
        newVector.addElement(new Option("\tSets the minimum phrase length (default: 1).", "L", 1, "-L <length>"));
        newVector.addElement(new Option("\tTurns debugging mode on.", "D", 0, "-D"));
        newVector.addElement(new Option("\tSets the index of the document attribute (default: 0).", "I", 1, "-I"));
        newVector.addElement(new Option("\tSets the index of the keyphrase attribute (default: 1).", "J", 1, "-J"));
        newVector.addElement(new Option("\tDisallow internal periods.", "P", 0, "-P"));
        newVector.addElement(new Option("\tSet the minimum number of occurences (default: 2).", "O", 1, "-O"));
        return newVector.elements();
    }

    public String globalInfo() {
        return "Converts incoming data into data appropriate for keyphrase classification.";
    }

    public boolean setInputFormat(Instances instanceInfo) throws Exception {
        if (instanceInfo.classIndex() >= 0) {
            throw new Exception("Don't know what do to if class index set!");
        }
        if (!instanceInfo.attribute(this.m_KeyphrasesAtt).isString() || !instanceInfo.attribute(this.m_DocumentAtt).isString()) {
            throw new Exception("Keyphrase attribute and document attribute need to be string attributes.");
        }
        this.m_PunctFilter = new KEAPhraseFilter();
        int[] arr = new int[]{this.m_DocumentAtt};
        this.m_PunctFilter.setAttributeIndicesArray(arr);
        this.m_PunctFilter.setInputFormat(instanceInfo);
        this.m_PunctFilter.setDisallowInternalPeriods(this.getDisallowInternalPeriods());
        if (this.m_vocabulary.equals("none")) {
            this.m_NumbersFilter = new NumbersFilter();
            this.m_NumbersFilter.setInputFormat(this.m_PunctFilter.getOutputFormat());
            super.setInputFormat(this.m_NumbersFilter.getOutputFormat());
        } else {
            super.setInputFormat(this.m_PunctFilter.getOutputFormat());
        }
        return false;
    }

    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.enableAllAttributes();
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enable(Capabilities.Capability.NOMINAL_CLASS);
        result.enable(Capabilities.Capability.NO_CLASS);
        result.enableAllClasses();
        return result;
    }

    public boolean input(Instance instance) throws Exception {
        if (this.getInputFormat() == null) {
            throw new Exception("No input instance format defined");
        }
        if (this.m_NewBatch) {
            this.resetQueue();
            this.m_NewBatch = false;
        }
        if (this.m_Debug) {
            log.info("-- Reading instance");
        }
        this.m_PunctFilter.input(instance);
        this.m_PunctFilter.batchFinished();
        instance = this.m_PunctFilter.output();
        if (this.m_vocabulary.equals("none")) {
            this.m_NumbersFilter.input(instance);
            this.m_NumbersFilter.batchFinished();
            instance = this.m_NumbersFilter.output();
        }
        if (this.m_Dictionary == null) {
            this.bufferInput(instance);
            return false;
        }
        FastVector vector = this.convertInstance(instance, false);
        Enumeration en = vector.elements();
        while (en.hasMoreElements()) {
            Instance inst = (Instance)en.nextElement();
            this.push(inst);
        }
        return true;
    }

    public boolean batchFinished() throws Exception {
        if (this.getInputFormat() == null) {
            throw new Exception("No input instance format defined");
        }
        if (this.m_Dictionary == null) {
            this.buildGlobalDictionaries();
            this.buildClassifier();
            this.convertPendingInstances();
        }
        this.flushInput();
        this.m_NewBatch = true;
        return this.numPendingOutput() != 0;
    }

    public void buildGlobalDictionaries() throws Exception {
        Counter counter;
        HashMap<String, Counter> hash;
        String str;
        int i;
        if (this.m_Debug) {
            log.info("--- Building global dictionaries");
        }
        this.m_Dictionary = new HashMap();
        for (i = 0; i < this.getInputFormat().numInstances(); ++i) {
            str = this.getInputFormat().instance(i).stringValue(this.m_DocumentAtt);
            hash = this.getPhrasesForDictionary(str);
            for (String phrase : hash.keySet()) {
                counter = this.m_Dictionary.get(phrase);
                if (counter == null) {
                    this.m_Dictionary.put(phrase, new Counter());
                    continue;
                }
                counter.increment();
            }
        }
        if (this.m_KFused) {
            if (this.m_Debug) {
                log.info("KF_used feature");
            }
            this.m_KeyphraseDictionary = new HashMap();
            for (i = 0; i < this.getInputFormat().numInstances(); ++i) {
                str = this.getInputFormat().instance(i).stringValue(this.m_KeyphrasesAtt);
                hash = this.getGivenKeyphrases(str, false);
                if (hash == null) continue;
                for (String phrase : hash.keySet()) {
                    counter = this.m_KeyphraseDictionary.get(phrase);
                    if (counter == null) {
                        this.m_KeyphraseDictionary.put(phrase, new Counter());
                        continue;
                    }
                    counter.increment();
                }
            }
        } else {
            this.m_KeyphraseDictionary = null;
        }
        this.m_NumDocs = this.getInputFormat().numInstances();
    }

    private void buildClassifier() throws Exception {
        int i;
        FastVector atts = new FastVector();
        for (i = 0; i < this.getInputFormat().numAttributes(); ++i) {
            if (i == this.m_DocumentAtt) {
                atts.addElement((Object)new Attribute("TFxIDF"));
                atts.addElement((Object)new Attribute("First_occurrence"));
                if (this.m_KFused) {
                    atts.addElement((Object)new Attribute("Keyphrase_frequency"));
                }
                if (this.m_STDEVfeature) {
                    atts.addElement((Object)new Attribute("Standard_deviation"));
                }
                if (this.m_NODEfeature) {
                    atts.addElement((Object)new Attribute("Relations_number"));
                }
                if (!this.m_LENGTHfeature) continue;
                atts.addElement((Object)new Attribute("Phrase_length"));
                continue;
            }
            if (i != this.m_KeyphrasesAtt) continue;
            FastVector vals = new FastVector(2);
            vals.addElement((Object)"False");
            vals.addElement((Object)"True");
            atts.addElement((Object)new Attribute("Keyphrase?"));
        }
        this.m_ClassifierData = new Instances("ClassifierData", atts, 0);
        this.m_ClassifierData.setClassIndex(this.m_NumFeatures);
        if (this.m_Debug) {
            log.info("--- Converting instances for classifier");
        }
        for (i = 0; i < this.getInputFormat().numInstances(); ++i) {
            Instance current = this.getInputFormat().instance(i);
            String keyphrases = current.stringValue(this.m_KeyphrasesAtt);
            HashMap<String, Counter> hashKeyphrases = this.getGivenKeyphrases(keyphrases, false);
            HashMap<String, Counter> hashKeysEval = this.getGivenKeyphrases(keyphrases, true);
            HashMap<String, FastVector> hash = new HashMap<String, FastVector>();
            int length = this.getPhrases(hash, current.stringValue(this.m_DocumentAtt));
            for (String phrase : hash.keySet()) {
                FastVector phraseInfo = hash.get(phrase);
                double[] vals = this.featVals(phrase, phraseInfo, true, hashKeysEval, hashKeyphrases, length, hash);
                Instance inst = new Instance(current.weight(), vals);
                this.m_ClassifierData.add(inst);
            }
        }
        if (this.m_Debug) {
            log.info("--- Building classifier");
        }
        RegressionByDiscretization rvd = new RegressionByDiscretization();
        FilteredClassifier fclass = new FilteredClassifier();
        fclass.setClassifier((Classifier)new NaiveBayesSimple());
        fclass.setFilter((Filter)new Discretize());
        rvd.setClassifier((Classifier)fclass);
        rvd.setNumBins(this.m_Indexers + 1);
        this.m_Classifier = rvd;
        this.m_Classifier.buildClassifier(this.m_ClassifierData);
        if (this.m_Debug) {
            log.info("" + this.m_Classifier);
        }
        this.m_ClassifierData = new Instances(this.m_ClassifierData, 0);
    }

    private double[] featVals(String id, FastVector phraseInfo, boolean training, HashMap<String, Counter> hashKeysEval, HashMap<String, Counter> hashKeyphrases, int length, HashMap<String, FastVector> hash) {
        double c;
        Counter counterLocal = (Counter)phraseInfo.elementAt(1);
        double[] newInst = new double[this.m_NumFeatures + 1];
        Counter counterGlobal = this.m_Dictionary.get(id);
        double localVal = counterLocal.value();
        double globalVal = 0.0;
        if (counterGlobal != null) {
            globalVal = counterGlobal.value();
            if (training) {
                globalVal -= 1.0;
            }
        }
        newInst[this.m_TfidfIndex] = localVal / (double)length * -Math.log((globalVal + 1.0) / ((double)this.m_NumDocs + 1.0));
        Counter counterFirst = (Counter)phraseInfo.elementAt(0);
        newInst[this.m_FirstOccurIndex] = (double)counterFirst.value() / (double)length;
        if (this.m_KFused) {
            Counter keyphraseC = this.m_KeyphraseDictionary.get(id);
            newInst[this.m_KeyFreqIndex] = training && hashKeyphrases != null && hashKeyphrases.containsKey(id) ? (double)(keyphraseC.value() - 1) : (keyphraseC != null ? (double)keyphraseC.value() : 0.0);
        }
        if (this.m_STDEVfeature) {
            double stdev;
            FastVector app = (FastVector)phraseInfo.elementAt(3);
            double[] vals = new double[app.size()];
            for (int i = 0; i < vals.length; ++i) {
                vals[i] = (double)((Counter)app.elementAt(i)).value() / (double)length;
            }
            double mean = Utils.mean((double[])vals);
            double summ = 0.0;
            for (int i = 0; i < vals.length; ++i) {
                double a = vals[i];
                summ += (a - mean) * (a - mean);
            }
            newInst[this.m_STDEVIndex] = stdev = Math.sqrt(summ / (double)app.size());
        }
        if (this.m_NODEfeature) {
            Vector<String> idsRT = this.m_Vocabulary.getRelated(id);
            int intern = 0;
            if (idsRT != null) {
                for (int d = 0; d < idsRT.size(); ++d) {
                    if (hash.get(idsRT.elementAt(d)) == null) continue;
                    ++intern;
                }
            }
            newInst[this.m_NodeIndex] = intern;
        }
        if (this.m_LENGTHfeature) {
            String original = this.m_vocabulary.equals("none") ? id : this.m_Vocabulary.getOrig(id);
            if (original == null) {
                log.info("problem with id " + id);
                newInst[this.m_LengthIndex] = 1.0;
            } else {
                String[] words = KEAFilter.split(original, " ");
                newInst[this.m_LengthIndex] = words.length;
            }
        }
        newInst[this.m_NumFeatures] = hashKeysEval == null ? Instance.missingValue() : (!hashKeysEval.containsKey(id) ? 0.0 : (c = (double)hashKeysEval.get(id).value() / (double)this.m_Indexers));
        return newInst;
    }

    private void convertPendingInstances() throws Exception {
        if (this.m_Debug) {
            log.info("--- Converting pending instances");
        }
        FastVector atts = new FastVector();
        for (int i = 0; i < this.getInputFormat().numAttributes(); ++i) {
            if (i == this.m_DocumentAtt) {
                atts.addElement((Object)new Attribute("N-gram", (FastVector)null));
                atts.addElement((Object)new Attribute("N-gram-original", (FastVector)null));
                atts.addElement((Object)new Attribute("TFxIDF"));
                atts.addElement((Object)new Attribute("First_occurrence"));
                if (this.m_Debug && this.m_KFused) {
                    atts.addElement((Object)new Attribute("Keyphrase_frequency"));
                }
                if (this.m_STDEVfeature) {
                    atts.addElement((Object)new Attribute("Standard_deviation"));
                }
                if (this.m_NODEfeature) {
                    atts.addElement((Object)new Attribute("Relations_number"));
                }
                if (this.m_LENGTHfeature) {
                    atts.addElement((Object)new Attribute("Phrase_length"));
                }
                atts.addElement((Object)new Attribute("Probability"));
                atts.addElement((Object)new Attribute("Rank"));
                continue;
            }
            if (i == this.m_KeyphrasesAtt) {
                FastVector vals = new FastVector(2);
                vals.addElement((Object)"False");
                vals.addElement((Object)"True");
                atts.addElement((Object)new Attribute("Keyphrase?"));
                continue;
            }
            atts.addElement((Object)this.getInputFormat().attribute(i));
        }
        Instances outFormat = new Instances("KEAdata", atts, 0);
        this.setOutputFormat(outFormat);
        for (int i = 0; i < this.getInputFormat().numInstances(); ++i) {
            Instance current = this.getInputFormat().instance(i);
            FastVector vector = this.convertInstance(current, true);
            Enumeration en = vector.elements();
            while (en.hasMoreElements()) {
                Instance inst = (Instance)en.nextElement();
                this.push(inst);
            }
        }
    }

    private FastVector convertInstance(Instance instance, boolean training) throws Exception {
        int i;
        FastVector vector = new FastVector();
        if (this.m_Debug) {
            log.info("-- Converting instance");
        }
        HashMap<String, Counter> hashKeyphrases = null;
        HashMap<String, Counter> hashKeysEval = null;
        if (!instance.isMissing(this.m_KeyphrasesAtt)) {
            String keyphrases = instance.stringValue(this.m_KeyphrasesAtt);
            hashKeyphrases = this.getGivenKeyphrases(keyphrases, false);
            hashKeysEval = this.getGivenKeyphrases(keyphrases, true);
        }
        HashMap<String, FastVector> hash = new HashMap<String, FastVector>();
        int length = this.getPhrases(hash, instance.stringValue(this.m_DocumentAtt));
        int numFeatures = 5;
        if (this.m_Debug && this.m_KFused) {
            ++numFeatures;
        }
        if (this.m_STDEVfeature) {
            ++numFeatures;
        }
        if (this.m_NODEfeature) {
            ++numFeatures;
        }
        if (this.m_LENGTHfeature) {
            ++numFeatures;
        }
        int tfidfAttIndex = this.m_DocumentAtt + 2;
        int distAttIndex = this.m_DocumentAtt + 3;
        int probsAttIndex = this.m_DocumentAtt + numFeatures - 1;
        for (String id : hash.keySet()) {
            FastVector phraseInfo = hash.get(id);
            double[] vals = this.featVals(id, phraseInfo, training, hashKeysEval, hashKeyphrases, length, hash);
            Instance inst = new Instance(instance.weight(), vals);
            inst.setDataset(this.m_ClassifierData);
            double[] probs = this.m_Classifier.distributionForInstance(inst);
            double prob = probs[0];
            double[] newInst = new double[instance.numAttributes() + numFeatures];
            int pos = 0;
            for (int i2 = 0; i2 < instance.numAttributes(); ++i2) {
                if (i2 == this.m_DocumentAtt) {
                    int index = this.outputFormatPeek().attribute(pos).addStringValue(id);
                    newInst[pos++] = index;
                    String orig = (String)phraseInfo.elementAt(2);
                    index = orig != null ? this.outputFormatPeek().attribute(pos).addStringValue(orig) : this.outputFormatPeek().attribute(pos).addStringValue(id);
                    newInst[pos++] = index;
                    newInst[pos++] = inst.value(this.m_TfidfIndex);
                    newInst[pos++] = inst.value(this.m_FirstOccurIndex);
                    if (this.m_Debug && this.m_KFused) {
                        newInst[pos++] = inst.value(this.m_KeyFreqIndex);
                    }
                    if (this.m_STDEVfeature) {
                        newInst[pos++] = inst.value(this.m_STDEVIndex);
                    }
                    if (this.m_NODEfeature) {
                        newInst[pos++] = inst.value(this.m_NodeIndex);
                    }
                    if (this.m_LENGTHfeature) {
                        newInst[pos++] = inst.value(this.m_LengthIndex);
                    }
                    probsAttIndex = pos;
                    newInst[pos++] = prob;
                    newInst[pos++] = Instance.missingValue();
                    continue;
                }
                newInst[pos++] = i2 == this.m_KeyphrasesAtt ? inst.classValue() : instance.value(i2);
            }
            Instance ins = new Instance(instance.weight(), newInst);
            ins.setDataset(this.outputFormatPeek());
            vector.addElement((Object)ins);
        }
        if (hashKeysEval != null) {
            for (String phrase : hashKeysEval.keySet()) {
                double[] newInst = new double[instance.numAttributes() + numFeatures];
                int pos = 0;
                for (int i3 = 0; i3 < instance.numAttributes(); ++i3) {
                    if (i3 == this.m_DocumentAtt) {
                        int index = this.outputFormatPeek().attribute(pos).addStringValue(phrase);
                        newInst[pos++] = index;
                        index = this.outputFormatPeek().attribute(pos).addStringValue(phrase);
                        newInst[pos++] = index;
                        newInst[pos++] = Instance.missingValue();
                        newInst[pos++] = Instance.missingValue();
                        if (this.m_Debug && this.m_KFused) {
                            newInst[pos++] = Instance.missingValue();
                        }
                        if (this.m_STDEVfeature) {
                            newInst[pos++] = Instance.missingValue();
                        }
                        if (this.m_NODEfeature) {
                            newInst[pos++] = Instance.missingValue();
                        }
                        if (this.m_LENGTHfeature) {
                            newInst[pos++] = Instance.missingValue();
                        }
                        newInst[pos++] = -1.7976931348623157E308;
                    } else {
                        newInst[pos++] = i3 == this.m_KeyphrasesAtt ? 1.0 : instance.value(i3);
                    }
                    Instance inst = new Instance(instance.weight(), newInst);
                    inst.setDataset(this.outputFormatPeek());
                    vector.addElement((Object)inst);
                }
            }
        }
        double[] vals = new double[vector.size()];
        for (int i4 = 0; i4 < vals.length; ++i4) {
            vals[i4] = ((Instance)vector.elementAt(i4)).value(distAttIndex);
        }
        FastVector newVector = new FastVector(vector.size());
        int[] sortedIndices = Utils.stableSort((double[])vals);
        for (i = 0; i < vals.length; ++i) {
            newVector.addElement(vector.elementAt(sortedIndices[i]));
        }
        vector = newVector;
        for (i = 0; i < vals.length; ++i) {
            vals[i] = -((Instance)vector.elementAt(i)).value(tfidfAttIndex);
        }
        newVector = new FastVector(vector.size());
        sortedIndices = Utils.stableSort((double[])vals);
        for (i = 0; i < vals.length; ++i) {
            newVector.addElement(vector.elementAt(sortedIndices[i]));
        }
        vector = newVector;
        for (i = 0; i < vals.length; ++i) {
            vals[i] = 1.0 - ((Instance)vector.elementAt(i)).value(probsAttIndex);
        }
        newVector = new FastVector(vector.size());
        sortedIndices = Utils.stableSort((double[])vals);
        for (i = 0; i < vals.length; ++i) {
            newVector.addElement(vector.elementAt(sortedIndices[i]));
        }
        vector = newVector;
        int rank = 1;
        for (int i5 = 0; i5 < vals.length; ++i5) {
            Instance inst;
            Instance currentInstance = (Instance)vector.elementAt(i5);
            if (Utils.grOrEq((double)vals[i5], (double)1.0)) {
                currentInstance.setValue(probsAttIndex + 1, 2.147483647E9);
                continue;
            }
            for (int startInd = i5; startInd < vals.length && (inst = (Instance)vector.elementAt(startInd)).value(tfidfAttIndex) == currentInstance.value(tfidfAttIndex) && inst.value(probsAttIndex) == currentInstance.value(probsAttIndex) && inst.value(distAttIndex) == currentInstance.value(distAttIndex); ++startInd) {
            }
            currentInstance.setValue(probsAttIndex + 1, (double)rank++);
        }
        return vector;
    }

    public HashMap<String, Counter> getPhrasesForDictionary(String str) {
        String[] buffer = new String[this.m_MaxPhraseLength];
        HashMap<String, Counter> hash = new HashMap<String, Counter>();
        StringTokenizer tok = new StringTokenizer(str, "\n");
        while (tok.hasMoreTokens()) {
            String phrase = tok.nextToken();
            int numSeen = 0;
            StringTokenizer wordTok = new StringTokenizer(phrase, " ");
            while (wordTok.hasMoreTokens()) {
                String word = wordTok.nextToken();
                for (int i = 0; i < this.m_MaxPhraseLength - 1; ++i) {
                    buffer[i] = buffer[i + 1];
                }
                buffer[this.m_MaxPhraseLength - 1] = word;
                if (++numSeen > this.m_MaxPhraseLength) {
                    numSeen = this.m_MaxPhraseLength;
                }
                if (this.m_Stopwords.isStopword(buffer[this.m_MaxPhraseLength - 1])) continue;
                StringBuffer phraseBuffer = new StringBuffer();
                for (int i = 1; i <= numSeen; ++i) {
                    if (i > 1) {
                        phraseBuffer.insert(0, ' ');
                    }
                    phraseBuffer.insert(0, buffer[this.m_MaxPhraseLength - i]);
                    if (i > 1 && this.m_Stopwords.isStopword(buffer[this.m_MaxPhraseLength - i]) || i < this.m_MinPhraseLength) continue;
                    String orig = phraseBuffer.toString();
                    String pseudo = this.pseudoPhrase(orig);
                    String id = this.m_vocabulary.equals("none") ? pseudo : this.m_Vocabulary.getID(orig);
                    if (id == null) continue;
                    Counter count = hash.get(id);
                    if (count == null) {
                        hash.put(id, new Counter());
                        continue;
                    }
                    count.increment();
                }
            }
        }
        return hash;
    }

    private int getPhrases(HashMap<String, FastVector> hash, String str) {
        String[] buffer = new String[this.m_MaxPhraseLength];
        StringTokenizer tok = new StringTokenizer(str, "\n");
        int pos = 1;
        while (tok.hasMoreTokens()) {
            String phrase = tok.nextToken();
            int numSeen = 0;
            StringTokenizer wordTok = new StringTokenizer(phrase, " ");
            while (wordTok.hasMoreTokens()) {
                String word = wordTok.nextToken();
                for (int i = 0; i < this.m_MaxPhraseLength - 1; ++i) {
                    buffer[i] = buffer[i + 1];
                }
                buffer[this.m_MaxPhraseLength - 1] = word;
                if (++numSeen > this.m_MaxPhraseLength) {
                    numSeen = this.m_MaxPhraseLength;
                }
                if (this.m_Stopwords.isStopword(buffer[this.m_MaxPhraseLength - 1])) {
                    ++pos;
                    continue;
                }
                StringBuffer phraseBuffer = new StringBuffer();
                for (int i = 1; i <= numSeen; ++i) {
                    FastVector app;
                    FastVector vec;
                    String pseudo;
                    if (i > 1) {
                        phraseBuffer.insert(0, ' ');
                    }
                    phraseBuffer.insert(0, buffer[this.m_MaxPhraseLength - i]);
                    if (i > 1 && this.m_Stopwords.isStopword(buffer[this.m_MaxPhraseLength - i]) || i < this.m_MinPhraseLength) continue;
                    String orig = phraseBuffer.toString();
                    String id = this.m_vocabulary.equals("none") ? (pseudo = this.pseudoPhrase(orig)) : this.m_Vocabulary.getID(orig);
                    if (id == null) continue;
                    if (!this.m_vocabulary.equals("none")) {
                        orig = this.m_Vocabulary.getOrig(id);
                    }
                    if ((vec = hash.get(id)) == null) {
                        vec = this.m_STDEVfeature ? new FastVector(3) : new FastVector(2);
                        vec.addElement((Object)new Counter(pos + 1 - i));
                        vec.addElement((Object)new Counter());
                        vec.addElement((Object)orig);
                        if (this.m_STDEVfeature) {
                            app = new FastVector();
                            app.addElement((Object)new Counter(pos + 1 - i));
                            vec.addElement((Object)app);
                        }
                        hash.put(id, vec);
                        continue;
                    }
                    ((Counter)vec.elementAt(1)).increment();
                    if (!this.m_STDEVfeature) continue;
                    app = (FastVector)vec.elementAt(3);
                    app.addElement((Object)new Counter(pos + 1 - i));
                    vec.addElement((Object)app);
                }
                ++pos;
            }
        }
        Iterator<String> phrases = hash.keySet().iterator();
        while (phrases.hasNext()) {
            String phrase = phrases.next();
            FastVector info = hash.get(phrase);
            if (((Counter)info.elementAt(1)).value() >= this.m_MinNumOccur) continue;
            phrases.remove();
        }
        return pos;
    }

    private static String[] split(String str, String separator) {
        ArrayList<String> lst = new ArrayList<String>();
        String word = "";
        for (int i = 0; i < str.length(); ++i) {
            int j = i + 1;
            String letter = str.substring(i, j);
            if (!letter.equalsIgnoreCase(separator)) {
                word = word + str.charAt(i);
                continue;
            }
            lst.add(word);
            word = "";
        }
        if (word != "") {
            lst.add(word);
        }
        String[] result = lst.toArray(new String[lst.size()]);
        return result;
    }

    private HashMap<String, Counter> getGivenKeyphrases(String str, boolean forEval) {
        HashMap<String, Counter> hash = new HashMap<String, Counter>();
        StringTokenizer tok = new StringTokenizer(str, "\n");
        while (tok.hasMoreTokens()) {
            String id;
            String orig = tok.nextToken();
            if ((orig = orig.trim()).matches(".+?/.+?")) {
                String[] elements = orig.split("/");
                orig = elements[0];
            }
            if ((orig = this.pseudoPhrase(orig)).length() <= 0 || (id = this.m_vocabulary.equals("none") ? orig : this.m_Vocabulary.getID(orig)) == null) continue;
            if (!hash.containsKey(id)) {
                hash.put(id, new Counter());
                continue;
            }
            Counter c = hash.get(id);
            c.increment();
            hash.put(id, c);
            if (!forEval || !this.m_Debug) continue;
            log.info("Skipping the phrase " + orig + ", which appears twice in the author-assigned keyphrase set.");
        }
        if (hash.size() == 0) {
            return null;
        }
        return hash;
    }

    public String pseudoPhrase(String str) {
        String[] elements;
        if ((str = str.toLowerCase()).matches(".+?/.+?")) {
            elements = str.split("/");
            str = elements[0];
        }
        if (str.matches(".+?\\(.+?")) {
            elements = str.split("\\(");
            str = elements[0];
        }
        if (str.matches(".+?\\'.+?")) {
            elements = str.split("\\'");
            str = elements[1];
        }
        str = str.replace('-', ' ');
        str = str.replace('&', ' ');
        str = str.replaceAll("\\*", "");
        str = str.replaceAll("\\, ", " ");
        str = str.replaceAll("\\. ", " ");
        str = str.replaceAll("\\:", "");
        str = str.trim();
        String[] words = str.split(" ");
        String str_nostop = "";
        for (int i = 0; i < words.length; ++i) {
            if (this.m_Stopwords.isStopword(words[i])) continue;
            str_nostop = str_nostop.equals("") ? words[i] : str_nostop + " " + words[i];
        }
        String stemmed = this.m_Stemmer.stemString(str_nostop);
        String[] pseudophrase = KEAFilter.sort(stemmed.split(" "));
        return KEAFilter.join(pseudophrase);
    }

    private static String join(String[] str) {
        String result = "";
        for (int i = 0; i < str.length; ++i) {
            result = result != "" ? result + " " + str[i] : str[i];
        }
        return result;
    }

    public static void swap(int loc1, int loc2, String[] a) {
        String temp = a[loc1];
        a[loc1] = a[loc2];
        a[loc2] = temp;
    }

    public static String[] sort(String[] a) {
        for (int i = 0; i < a.length - 1; ++i) {
            int firstAt = i;
            for (int j = i + 1; j < a.length; ++j) {
                if (a[j].toUpperCase().compareTo(a[firstAt].toUpperCase()) < 0) {
                    firstAt = j;
                }
                if (a[j].toUpperCase().compareTo(a[firstAt].toUpperCase()) != 0 || a[j].compareTo(a[firstAt]) >= 0) continue;
                firstAt = j;
            }
            if (firstAt == i) continue;
            KEAFilter.swap(i, firstAt, a);
        }
        return a;
    }

    public static void main(String[] argv) {
        try {
            if (Utils.getFlag((char)'b', (String[])argv)) {
                Filter.batchFilterFile((Filter)new KEAFilter(new StopwordsEnglish()), (String[])argv);
            } else {
                Filter.filterFile((Filter)new KEAFilter(new StopwordsEnglish()), (String[])argv);
            }
        }
        catch (Exception ex) {
            log.info(ex.getMessage());
        }
    }
}

