/*
 * Decompiled with CFR 0.152.
 */
package weka.attributeSelection;

import java.util.BitSet;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.attributeSelection.ASEvaluation;
import weka.attributeSelection.ASSearch;
import weka.attributeSelection.ClassifierSubsetEval;
import weka.attributeSelection.HoldOutSubsetEvaluator;
import weka.attributeSelection.LFSMethods;
import weka.attributeSelection.SubsetEvaluator;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.Utils;

public class SubsetSizeForwardSelection
extends ASSearch
implements OptionHandler {
    protected static final int TYPE_FIXED_SET = 0;
    protected static final int TYPE_FIXED_WIDTH = 1;
    public static final Tag[] TAGS_TYPE = new Tag[]{new Tag(0, "Fixed-set"), new Tag(1, "Fixed-width")};
    protected boolean m_performRanking;
    protected int m_numUsedAttributes;
    protected int m_linearSelectionType;
    private ASEvaluation m_setSizeEval;
    protected int m_numFolds;
    protected int m_seed;
    protected int m_numAttribs;
    protected int m_totalEvals;
    protected boolean m_verbose;
    protected double m_bestMerit;
    protected int m_cacheSize;

    public SubsetSizeForwardSelection() {
        this.resetOptions();
    }

    public String globalInfo() {
        return "SubsetSizeForwardSelection:\n\nExtension of LinearForwardSelection. The search performs an interior cross-validation (seed and number of folds can be specified). A LinearForwardSelection is performed on each foldto determine the optimal subset-size (using the given SubsetSizeEvaluator). Finally, a LinearForwardSelection up to the optimal subset-size is performed on the whole data.\n\nFor more information see:\n\n" + this.getTechnicalInformation().toString();
    }

    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.INPROCEEDINGS);
        result.setValue(TechnicalInformation.Field.AUTHOR, "Martin Guetlein and Eibe Frank and Mark Hall");
        result.setValue(TechnicalInformation.Field.YEAR, "2009");
        result.setValue(TechnicalInformation.Field.TITLE, "Large Scale Attribute Selection Using Wrappers");
        result.setValue(TechnicalInformation.Field.BOOKTITLE, "Proc IEEE Symposium on Computational Intelligence and Data Mining");
        result.setValue(TechnicalInformation.Field.PAGES, "332-339");
        result.setValue(TechnicalInformation.Field.PUBLISHER, "IEEE");
        TechnicalInformation additional = result.add(TechnicalInformation.Type.MASTERSTHESIS);
        additional.setValue(TechnicalInformation.Field.AUTHOR, "Martin Guetlein");
        additional.setValue(TechnicalInformation.Field.YEAR, "2006");
        additional.setValue(TechnicalInformation.Field.TITLE, "Large Scale Attribute Selection Using Wrappers");
        additional.setValue(TechnicalInformation.Field.SCHOOL, "Albert-Ludwigs-Universitaet");
        additional.setValue(TechnicalInformation.Field.ADDRESS, "Freiburg, Germany");
        return result;
    }

    public Enumeration listOptions() {
        Vector<Option> newVector = new Vector<Option>(9);
        newVector.addElement(new Option("\tPerform initial ranking to select the\n\ttop-ranked attributes.", "I", 0, "-I"));
        newVector.addElement(new Option("\tNumber of top-ranked attributes that are \n\ttaken into account by the search.", "K", 1, "-K <num>"));
        newVector.addElement(new Option("\tType of Linear Forward Selection (default = 0).", "T", 1, "-T <0 = fixed-set | 1 = fixed-width>"));
        newVector.addElement(new Option("\tSize of lookup cache for evaluated subsets.\n\tExpressed as a multiple of the number of\n\tattributes in the data set. (default = 1)", "S", 1, "-S <num>"));
        newVector.addElement(new Option("\tSubset-evaluator used for subset-size determination.-- -M", "E", 1, "-E <subset evaluator>"));
        newVector.addElement(new Option("\tNumber of cross validation folds\n\tfor subset size determination (default = 5).", "F", 1, "-F <num>"));
        newVector.addElement(new Option("\tSeed for cross validation\n\tsubset size determination. (default = 1)", "R", 1, "-R <num>"));
        newVector.addElement(new Option("\tverbose on/off", "Z", 0, "-Z"));
        if (this.m_setSizeEval != null && this.m_setSizeEval instanceof OptionHandler) {
            newVector.addElement(new Option("", "", 0, "\nOptions specific to evaluator " + this.m_setSizeEval.getClass().getName() + ":"));
            Enumeration enu = ((OptionHandler)((Object)this.m_setSizeEval)).listOptions();
            while (enu.hasMoreElements()) {
                newVector.addElement((Option)enu.nextElement());
            }
        }
        return newVector.elements();
    }

    public void setOptions(String[] options) throws Exception {
        this.resetOptions();
        this.setPerformRanking(Utils.getFlag('I', options));
        String optionString = Utils.getOption('K', options);
        if (optionString.length() != 0) {
            this.setNumUsedAttributes(Integer.parseInt(optionString));
        }
        if ((optionString = Utils.getOption('T', options)).length() != 0) {
            this.setType(new SelectedTag(Integer.parseInt(optionString), TAGS_TYPE));
        } else {
            this.setType(new SelectedTag(0, TAGS_TYPE));
        }
        optionString = Utils.getOption('S', options);
        if (optionString.length() != 0) {
            this.setLookupCacheSize(Integer.parseInt(optionString));
        }
        if ((optionString = Utils.getOption('E', options)).length() == 0) {
            System.out.println("No subset size evaluator given, using evaluator that is used for final search.");
            this.m_setSizeEval = null;
        } else {
            this.setSubsetSizeEvaluator(ASEvaluation.forName(optionString, Utils.partitionOptions(options)));
        }
        optionString = Utils.getOption('F', options);
        if (optionString.length() != 0) {
            this.setNumSubsetSizeCVFolds(Integer.parseInt(optionString));
        }
        if ((optionString = Utils.getOption('R', options)).length() != 0) {
            this.setSeed(Integer.parseInt(optionString));
        }
        this.m_verbose = Utils.getFlag('Z', options);
    }

    public void setLookupCacheSize(int size) {
        if (size >= 0) {
            this.m_cacheSize = size;
        }
    }

    public int getLookupCacheSize() {
        return this.m_cacheSize;
    }

    public String lookupCacheSizeTipText() {
        return "Set the maximum size of the lookup cache of evaluated subsets. This is expressed as a multiplier of the number of attributes in the data set. (default = 1).";
    }

    public String performRankingTipText() {
        return "Perform initial ranking to select top-ranked attributes.";
    }

    public void setPerformRanking(boolean b) {
        this.m_performRanking = b;
    }

    public boolean getPerformRanking() {
        return this.m_performRanking;
    }

    public String numUsedAttributesTipText() {
        return "Set the amount of top-ranked attributes that are taken into account by the search process.";
    }

    public void setNumUsedAttributes(int k) throws Exception {
        if (k < 2) {
            throw new Exception("Value of -K must be >= 2.");
        }
        this.m_numUsedAttributes = k;
    }

    public int getNumUsedAttributes() {
        return this.m_numUsedAttributes;
    }

    public String typeTipText() {
        return "Set the type of the search.";
    }

    public void setType(SelectedTag t) {
        if (t.getTags() == TAGS_TYPE) {
            this.m_linearSelectionType = t.getSelectedTag().getID();
        }
    }

    public SelectedTag getType() {
        return new SelectedTag(this.m_linearSelectionType, TAGS_TYPE);
    }

    public String subsetSizeEvaluatorTipText() {
        return "Subset evaluator to use for subset size determination.";
    }

    public void setSubsetSizeEvaluator(ASEvaluation eval) throws Exception {
        if (!(eval instanceof SubsetEvaluator)) {
            throw new Exception(eval.getClass().getName() + " is no subset evaluator.");
        }
        this.m_setSizeEval = eval;
    }

    public ASEvaluation getSubsetSizeEvaluator() {
        return this.m_setSizeEval;
    }

    public String numSubsetSizeCVFoldsTipText() {
        return "Number of cross validation folds for subset size determination";
    }

    public void setNumSubsetSizeCVFolds(int f) {
        this.m_numFolds = f;
    }

    public int getNumSubsetSizeCVFolds() {
        return this.m_numFolds;
    }

    public String seedTipText() {
        return "Seed for cross validation subset size determination. (default = 1)";
    }

    public void setSeed(int s) {
        this.m_seed = s;
    }

    public int getSeed() {
        return this.m_seed;
    }

    public String verboseTipText() {
        return "Turn on verbose output for monitoring the search's progress.";
    }

    public void setVerbose(boolean b) {
        this.m_verbose = b;
    }

    public boolean getVerbose() {
        return this.m_verbose;
    }

    public String[] getOptions() {
        String[] evaluatorOptions = new String[]{};
        if (this.m_setSizeEval != null && this.m_setSizeEval instanceof OptionHandler) {
            evaluatorOptions = ((OptionHandler)((Object)this.m_setSizeEval)).getOptions();
        }
        String[] options = new String[15 + evaluatorOptions.length];
        int current = 0;
        if (this.m_performRanking) {
            options[current++] = "-I";
        }
        options[current++] = "-K";
        options[current++] = "" + this.m_numUsedAttributes;
        options[current++] = "-T";
        options[current++] = "" + this.m_linearSelectionType;
        options[current++] = "-F";
        options[current++] = "" + this.m_numFolds;
        options[current++] = "-S";
        options[current++] = "" + this.m_seed;
        options[current++] = "-Z";
        options[current++] = "" + this.m_verbose;
        if (this.m_setSizeEval != null) {
            options[current++] = "-E";
            options[current++] = this.m_setSizeEval.getClass().getName();
        }
        options[current++] = "--";
        System.arraycopy(evaluatorOptions, 0, options, current, evaluatorOptions.length);
        current += evaluatorOptions.length;
        while (current < options.length) {
            options[current++] = "";
        }
        return options;
    }

    public String toString() {
        StringBuffer LFSString = new StringBuffer();
        LFSString.append("\tSubset Size Forward Selection.\n");
        LFSString.append("\tLinear Forward Selection Type: ");
        if (this.m_linearSelectionType == 0) {
            LFSString.append("fixed-set\n");
        } else {
            LFSString.append("fixed-width\n");
        }
        LFSString.append("\tNumber of top-ranked attributes that are used: " + this.m_numUsedAttributes + "\n");
        LFSString.append("\tNumber of cross validation folds for subset size determination: " + this.m_numFolds + "\n");
        LFSString.append("\tSeed for cross validation subset size determination: " + this.m_seed + "\n");
        LFSString.append("\tTotal number of subsets evaluated: " + this.m_totalEvals + "\n");
        LFSString.append("\tMerit of best subset found: " + Utils.doubleToString(Math.abs(this.m_bestMerit), 8, 3) + "\n");
        return LFSString.toString();
    }

    public int[] search(ASEvaluation ASEval, Instances data) throws Exception {
        int f;
        int[] ranking;
        this.m_totalEvals = 0;
        if (!(ASEval instanceof SubsetEvaluator)) {
            throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!");
        }
        if (this.m_setSizeEval == null) {
            this.m_setSizeEval = ASEval;
        }
        this.m_numAttribs = data.numAttributes();
        if (this.m_numUsedAttributes > this.m_numAttribs) {
            System.out.println("Decreasing number of top-ranked attributes to total number of attributes: " + data.numAttributes());
            this.m_numUsedAttributes = this.m_numAttribs;
        }
        Instances[] trainData = new Instances[this.m_numFolds];
        Instances[] testData = new Instances[this.m_numFolds];
        LFSMethods[] searchResults = new LFSMethods[this.m_numFolds];
        Random random = new Random(this.m_seed);
        Instances dataCopy = new Instances(data);
        dataCopy.randomize(random);
        if (dataCopy.classAttribute().isNominal()) {
            dataCopy.stratify(this.m_numFolds);
        }
        for (int f2 = 0; f2 < this.m_numFolds; ++f2) {
            trainData[f2] = dataCopy.trainCV(this.m_numFolds, f2, random);
            testData[f2] = dataCopy.testCV(this.m_numFolds, f2);
        }
        LFSMethods LSF = new LFSMethods();
        if (this.m_performRanking) {
            ASEval.buildEvaluator(data);
            ranking = LSF.rankAttributes(data, (SubsetEvaluator)((Object)ASEval), this.m_verbose);
        } else {
            ranking = new int[this.m_numAttribs];
            for (int i = 0; i < ranking.length; ++i) {
                ranking[i] = i;
            }
        }
        int maxSubsetSize = 0;
        for (f = 0; f < this.m_numFolds; ++f) {
            if (this.m_verbose) {
                System.out.println("perform search on internal fold: " + (f + 1) + "/" + this.m_numFolds);
            }
            this.m_setSizeEval.buildEvaluator(trainData[f]);
            searchResults[f] = new LFSMethods();
            searchResults[f].forwardSearch(this.m_cacheSize, new BitSet(this.m_numAttribs), ranking, this.m_numUsedAttributes, this.m_linearSelectionType == 1, 1, -1, trainData[f], (SubsetEvaluator)((Object)this.m_setSizeEval), this.m_verbose);
            maxSubsetSize = Math.max(maxSubsetSize, searchResults[f].getBestGroup().cardinality());
        }
        if (this.m_verbose) {
            System.out.println("continue searches on internal folds to maxSubsetSize (" + maxSubsetSize + ")");
        }
        for (f = 0; f < this.m_numFolds; ++f) {
            if (this.m_verbose) {
                System.out.print("perform search on internal fold: " + (f + 1) + "/" + this.m_numFolds + " with starting set ");
                LFSMethods.printGroup(searchResults[f].getBestGroup(), trainData[f].numAttributes());
            }
            if (searchResults[f].getBestGroup().cardinality() >= maxSubsetSize) continue;
            this.m_setSizeEval.buildEvaluator(trainData[f]);
            searchResults[f].forwardSearch(this.m_cacheSize, searchResults[f].getBestGroup(), ranking, this.m_numUsedAttributes, this.m_linearSelectionType == 1, 1, maxSubsetSize, trainData[f], (SubsetEvaluator)((Object)this.m_setSizeEval), this.m_verbose);
        }
        double[][] testMerit = new double[this.m_numFolds][maxSubsetSize + 1];
        for (int f3 = 0; f3 < this.m_numFolds; ++f3) {
            for (int s = 1; s <= maxSubsetSize; ++s) {
                if (HoldOutSubsetEvaluator.class.isInstance(this.m_setSizeEval)) {
                    this.m_setSizeEval.buildEvaluator(trainData[f3]);
                    testMerit[f3][s] = ((HoldOutSubsetEvaluator)this.m_setSizeEval).evaluateSubset(searchResults[f3].getBestGroupOfSize(s), testData[f3]);
                    continue;
                }
                this.m_setSizeEval.buildEvaluator(testData[f3]);
                testMerit[f3][s] = ((SubsetEvaluator)((Object)this.m_setSizeEval)).evaluateSubset(searchResults[f3].getBestGroupOfSize(s));
            }
        }
        double[] avgTestMerit = new double[maxSubsetSize + 1];
        int finalSubsetSize = -1;
        for (int s = 1; s <= maxSubsetSize; ++s) {
            for (int f4 = 0; f4 < this.m_numFolds; ++f4) {
                avgTestMerit[s] = (avgTestMerit[s] * (double)f4 + testMerit[f4][s]) / (double)(f4 + 1);
            }
            if (finalSubsetSize == -1 || avgTestMerit[s] > avgTestMerit[finalSubsetSize]) {
                finalSubsetSize = s;
            }
            if (!this.m_verbose) continue;
            System.out.println("average merit for subset-size " + s + ": " + avgTestMerit[s]);
        }
        if (this.m_verbose) {
            System.out.println("performing final forward selection to subset-size: " + finalSubsetSize);
        }
        ASEval.buildEvaluator(data);
        LSF.forwardSearch(this.m_cacheSize, new BitSet(this.m_numAttribs), ranking, this.m_numUsedAttributes, this.m_linearSelectionType == 1, 1, finalSubsetSize, data, (SubsetEvaluator)((Object)ASEval), this.m_verbose);
        this.m_totalEvals = LSF.getNumEvalsTotal();
        this.m_bestMerit = LSF.getBestMerit();
        return this.attributeList(LSF.getBestGroup());
    }

    protected void resetOptions() {
        this.m_performRanking = true;
        this.m_numUsedAttributes = 50;
        this.m_linearSelectionType = 0;
        this.m_setSizeEval = new ClassifierSubsetEval();
        this.m_numFolds = 5;
        this.m_seed = 1;
        this.m_totalEvals = 0;
        this.m_cacheSize = 1;
        this.m_verbose = false;
    }

    protected int[] attributeList(BitSet group) {
        int count = 0;
        for (int i = 0; i < this.m_numAttribs; ++i) {
            if (!group.get(i)) continue;
            ++count;
        }
        int[] list = new int[count];
        count = 0;
        for (int i = 0; i < this.m_numAttribs; ++i) {
            if (!group.get(i)) continue;
            list[count++] = i;
        }
        return list;
    }

    public String getRevision() {
        return RevisionUtils.extract("$Revision: 5604 $");
    }
}

