/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.supervised.attribute;

import java.util.Enumeration;
import java.util.Vector;
import weka.core.AbstractInstance;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.ContingencyTables;
import weka.core.DenseInstance;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.SpecialFunctions;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.filters.Filter;
import weka.filters.SupervisedFilter;

public class Discretize
extends Filter
implements SupervisedFilter,
OptionHandler,
WeightedInstancesHandler,
TechnicalInformationHandler {
    static final long serialVersionUID = -3141006402280129097L;
    protected Range m_DiscretizeCols = new Range();
    protected double[][] m_CutPoints = null;
    protected boolean m_MakeBinary = false;
    protected boolean m_UseBinNumbers = false;
    protected boolean m_UseBetterEncoding = false;
    protected boolean m_UseKononenko = false;

    public Discretize() {
        this.setAttributeIndices("first-last");
    }

    @Override
    public Enumeration listOptions() {
        Vector<Option> newVector = new Vector<Option>(7);
        newVector.addElement(new Option("\tSpecifies list of columns to Discretize. First and last are valid indexes.\n\t(default none)", "R", 1, "-R <col1,col2-col4,...>"));
        newVector.addElement(new Option("\tInvert matching sense of column indexes.", "V", 0, "-V"));
        newVector.addElement(new Option("\tOutput binary attributes for discretized attributes.", "D", 0, "-D"));
        newVector.addElement(new Option("\tUse bin numbers rather than ranges for discretized attributes.", "Y", 0, "-Y"));
        newVector.addElement(new Option("\tUse better encoding of split point for MDL.", "E", 0, "-E"));
        newVector.addElement(new Option("\tUse Kononenko's MDL criterion.", "K", 0, "-K"));
        return newVector.elements();
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        this.setMakeBinary(Utils.getFlag('D', options));
        this.setUseBinNumbers(Utils.getFlag('Y', options));
        this.setUseBetterEncoding(Utils.getFlag('E', options));
        this.setUseKononenko(Utils.getFlag('K', options));
        this.setInvertSelection(Utils.getFlag('V', options));
        String convertList = Utils.getOption('R', options);
        if (convertList.length() != 0) {
            this.setAttributeIndices(convertList);
        } else {
            this.setAttributeIndices("first-last");
        }
        if (this.getInputFormat() != null) {
            this.setInputFormat(this.getInputFormat());
        }
    }

    @Override
    public String[] getOptions() {
        String[] options = new String[12];
        int current = 0;
        if (this.getMakeBinary()) {
            options[current++] = "-D";
        }
        if (this.getUseBinNumbers()) {
            options[current++] = "-Y";
        }
        if (this.getUseBetterEncoding()) {
            options[current++] = "-E";
        }
        if (this.getUseKononenko()) {
            options[current++] = "-K";
        }
        if (this.getInvertSelection()) {
            options[current++] = "-V";
        }
        if (!this.getAttributeIndices().equals("")) {
            options[current++] = "-R";
            options[current++] = this.getAttributeIndices();
        }
        while (current < options.length) {
            options[current++] = "";
        }
        return options;
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enableAllAttributes();
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enable(Capabilities.Capability.NOMINAL_CLASS);
        return result;
    }

    @Override
    public boolean setInputFormat(Instances instanceInfo) throws Exception {
        super.setInputFormat(instanceInfo);
        this.m_DiscretizeCols.setUpper(instanceInfo.numAttributes() - 1);
        this.m_CutPoints = null;
        return false;
    }

    @Override
    public boolean input(Instance instance) {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_NewBatch) {
            this.resetQueue();
            this.m_NewBatch = false;
        }
        if (this.m_CutPoints != null) {
            this.convertInstance(instance);
            return true;
        }
        this.bufferInput(instance);
        return false;
    }

    @Override
    public boolean batchFinished() {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_CutPoints == null) {
            this.calculateCutPoints();
            this.setOutputFormat();
            for (int i = 0; i < this.getInputFormat().numInstances(); ++i) {
                this.convertInstance(this.getInputFormat().instance(i));
            }
        }
        this.flushInput();
        this.m_NewBatch = true;
        return this.numPendingOutput() != 0;
    }

    public String globalInfo() {
        return "An instance filter that discretizes a range of numeric attributes in the dataset into nominal attributes. Discretization is by Fayyad & Irani's MDL method (the default).\n\nFor more information, see:\n\n" + this.getTechnicalInformation().toString();
    }

    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.INPROCEEDINGS);
        result.setValue(TechnicalInformation.Field.AUTHOR, "Usama M. Fayyad and Keki B. Irani");
        result.setValue(TechnicalInformation.Field.TITLE, "Multi-interval discretization of continuousvalued attributes for classification learning");
        result.setValue(TechnicalInformation.Field.BOOKTITLE, "Thirteenth International Joint Conference on Articial Intelligence");
        result.setValue(TechnicalInformation.Field.YEAR, "1993");
        result.setValue(TechnicalInformation.Field.VOLUME, "2");
        result.setValue(TechnicalInformation.Field.PAGES, "1022-1027");
        result.setValue(TechnicalInformation.Field.PUBLISHER, "Morgan Kaufmann Publishers");
        TechnicalInformation additional = result.add(TechnicalInformation.Type.INPROCEEDINGS);
        additional.setValue(TechnicalInformation.Field.AUTHOR, "Igor Kononenko");
        additional.setValue(TechnicalInformation.Field.TITLE, "On Biases in Estimating Multi-Valued Attributes");
        additional.setValue(TechnicalInformation.Field.BOOKTITLE, "14th International Joint Conference on Articial Intelligence");
        additional.setValue(TechnicalInformation.Field.YEAR, "1995");
        additional.setValue(TechnicalInformation.Field.PAGES, "1034-1040");
        additional.setValue(TechnicalInformation.Field.PS, "http://ai.fri.uni-lj.si/papers/kononenko95-ijcai.ps.gz");
        return result;
    }

    public String makeBinaryTipText() {
        return "Make resulting attributes binary.";
    }

    public boolean getMakeBinary() {
        return this.m_MakeBinary;
    }

    public void setMakeBinary(boolean makeBinary) {
        this.m_MakeBinary = makeBinary;
    }

    public String useBinNumbersTipText() {
        return "Use bin numbers (eg BXofY) rather than ranges for for discretized attributes";
    }

    public boolean getUseBinNumbers() {
        return this.m_UseBinNumbers;
    }

    public void setUseBinNumbers(boolean useBinNumbers) {
        this.m_UseBinNumbers = useBinNumbers;
    }

    public String useKononenkoTipText() {
        return "Use Kononenko's MDL criterion. If set to false uses the Fayyad & Irani criterion.";
    }

    public boolean getUseKononenko() {
        return this.m_UseKononenko;
    }

    public void setUseKononenko(boolean useKon) {
        this.m_UseKononenko = useKon;
    }

    public String useBetterEncodingTipText() {
        return "Uses a more efficient split point encoding.";
    }

    public boolean getUseBetterEncoding() {
        return this.m_UseBetterEncoding;
    }

    public void setUseBetterEncoding(boolean useBetterEncoding) {
        this.m_UseBetterEncoding = useBetterEncoding;
    }

    public String invertSelectionTipText() {
        return "Set attribute selection mode. If false, only selected (numeric) attributes in the range will be discretized; if true, only non-selected attributes will be discretized.";
    }

    public boolean getInvertSelection() {
        return this.m_DiscretizeCols.getInvert();
    }

    public void setInvertSelection(boolean invert) {
        this.m_DiscretizeCols.setInvert(invert);
    }

    public String attributeIndicesTipText() {
        return "Specify range of attributes to act on. This is a comma separated list of attribute indices, with \"first\" and \"last\" valid values. Specify an inclusive range with \"-\". E.g: \"first-3,5,6-10,last\".";
    }

    public String getAttributeIndices() {
        return this.m_DiscretizeCols.getRanges();
    }

    public void setAttributeIndices(String rangeList) {
        this.m_DiscretizeCols.setRanges(rangeList);
    }

    public void setAttributeIndicesArray(int[] attributes) {
        this.setAttributeIndices(Range.indicesToRangeList(attributes));
    }

    public double[] getCutPoints(int attributeIndex) {
        if (this.m_CutPoints == null) {
            return null;
        }
        return this.m_CutPoints[attributeIndex];
    }

    public String getBinRangesString(int attributeIndex) {
        if (this.m_CutPoints == null) {
            return null;
        }
        double[] cutPoints = this.m_CutPoints[attributeIndex];
        if (cutPoints == null) {
            return "All";
        }
        StringBuilder sb = new StringBuilder();
        boolean first = true;
        int n = cutPoints.length;
        for (int j = 0; j <= n; ++j) {
            if (first) {
                first = false;
            } else {
                sb.append(',');
            }
            sb.append(Discretize.binRangeString(cutPoints, j));
        }
        return sb.toString();
    }

    private static String binRangeString(double[] cutPoints, int j) {
        assert (cutPoints != null);
        int n = cutPoints.length;
        assert (0 <= j && j <= n);
        return j == 0 ? "(-inf-" + Utils.doubleToString(cutPoints[0], 6) + "]" : (j == n ? "(" + Utils.doubleToString(cutPoints[n - 1], 6) + "-" + "inf" + ")" : "(" + Utils.doubleToString(cutPoints[j - 1], 6) + "-" + Utils.doubleToString(cutPoints[j], 6) + "]");
    }

    protected void calculateCutPoints() {
        Instances copy = null;
        this.m_CutPoints = new double[this.getInputFormat().numAttributes()][];
        for (int i = this.getInputFormat().numAttributes() - 1; i >= 0; --i) {
            if (!this.m_DiscretizeCols.isInRange(i) || !this.getInputFormat().attribute(i).isNumeric()) continue;
            if (copy == null) {
                copy = new Instances(this.getInputFormat());
            }
            this.calculateCutPointsByMDL(i, copy);
        }
    }

    protected void calculateCutPointsByMDL(int index, Instances data) {
        data.sort(data.attribute(index));
        int firstMissing = data.numInstances();
        for (int i = 0; i < data.numInstances(); ++i) {
            if (!data.instance(i).isMissing(index)) continue;
            firstMissing = i;
            break;
        }
        this.m_CutPoints[index] = this.cutPointsForSubset(data, index, 0, firstMissing);
    }

    private boolean KononenkosMDL(double[] priorCounts, double[][] bestCounts, double numInstances, int numCutPoints) {
        int i;
        double distAfter = 0.0;
        double instAfter = 0.0;
        int numClassesTotal = 0;
        for (i = 0; i < priorCounts.length; ++i) {
            if (!(priorCounts[i] > 0.0)) continue;
            ++numClassesTotal;
        }
        double distPrior = SpecialFunctions.log2Binomial(numInstances + (double)numClassesTotal - 1.0, numClassesTotal - 1);
        double instPrior = SpecialFunctions.log2Multinomial(numInstances, priorCounts);
        double before = instPrior + distPrior;
        for (i = 0; i < bestCounts.length; ++i) {
            double sum = Utils.sum(bestCounts[i]);
            distAfter += SpecialFunctions.log2Binomial(sum + (double)numClassesTotal - 1.0, numClassesTotal - 1);
            instAfter += SpecialFunctions.log2Multinomial(sum, bestCounts[i]);
        }
        double after = Utils.log2(numCutPoints) + distAfter + instAfter;
        return before > after;
    }

    private boolean FayyadAndIranisMDL(double[] priorCounts, double[][] bestCounts, double numInstances, int numCutPoints) {
        int i;
        double priorEntropy = ContingencyTables.entropy(priorCounts);
        double entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);
        double gain = priorEntropy - entropy;
        int numClassesTotal = 0;
        for (i = 0; i < priorCounts.length; ++i) {
            if (!(priorCounts[i] > 0.0)) continue;
            ++numClassesTotal;
        }
        int numClassesLeft = 0;
        for (i = 0; i < bestCounts[0].length; ++i) {
            if (!(bestCounts[0][i] > 0.0)) continue;
            ++numClassesLeft;
        }
        int numClassesRight = 0;
        for (i = 0; i < bestCounts[1].length; ++i) {
            if (!(bestCounts[1][i] > 0.0)) continue;
            ++numClassesRight;
        }
        double entropyLeft = ContingencyTables.entropy(bestCounts[0]);
        double entropyRight = ContingencyTables.entropy(bestCounts[1]);
        double delta = Utils.log2(Math.pow(3.0, numClassesTotal) - 2.0) - ((double)numClassesTotal * priorEntropy - (double)numClassesRight * entropyRight - (double)numClassesLeft * entropyLeft);
        return gain > (Utils.log2(numCutPoints) + delta) / numInstances;
    }

    private double[] cutPointsForSubset(Instances instances, int attIndex, int first, int lastPlusOne) {
        double gain;
        double priorEntropy;
        int i;
        double currentCutPoint = -1.7976931348623157E308;
        double bestCutPoint = -1.0;
        int bestIndex = -1;
        int numInstances = 0;
        int numCutPoints = 0;
        if (lastPlusOne - first < 2) {
            return null;
        }
        double[][] counts = new double[2][instances.numClasses()];
        for (i = first; i < lastPlusOne; ++i) {
            numInstances = (int)((double)numInstances + instances.instance(i).weight());
            double[] dArray = counts[1];
            int n = (int)instances.instance(i).classValue();
            dArray[n] = dArray[n] + instances.instance(i).weight();
        }
        double[] priorCounts = new double[instances.numClasses()];
        System.arraycopy(counts[1], 0, priorCounts, 0, instances.numClasses());
        double bestEntropy = priorEntropy = ContingencyTables.entropy(priorCounts);
        double[][] bestCounts = new double[2][instances.numClasses()];
        for (i = first; i < lastPlusOne - 1; ++i) {
            double[] dArray = counts[0];
            int n = (int)instances.instance(i).classValue();
            dArray[n] = dArray[n] + instances.instance(i).weight();
            double[] dArray2 = counts[1];
            int n2 = (int)instances.instance(i).classValue();
            dArray2[n2] = dArray2[n2] - instances.instance(i).weight();
            if (!(instances.instance(i).value(attIndex) < instances.instance(i + 1).value(attIndex))) continue;
            currentCutPoint = (instances.instance(i).value(attIndex) + instances.instance(i + 1).value(attIndex)) / 2.0;
            double currentEntropy = ContingencyTables.entropyConditionedOnRows(counts);
            if (currentEntropy < bestEntropy) {
                bestCutPoint = currentCutPoint;
                bestEntropy = currentEntropy;
                bestIndex = i;
                System.arraycopy(counts[0], 0, bestCounts[0], 0, instances.numClasses());
                System.arraycopy(counts[1], 0, bestCounts[1], 0, instances.numClasses());
            }
            ++numCutPoints;
        }
        if (!this.m_UseBetterEncoding) {
            numCutPoints = lastPlusOne - first - 1;
        }
        if ((gain = priorEntropy - bestEntropy) <= 0.0) {
            return null;
        }
        if (this.m_UseKononenko && this.KononenkosMDL(priorCounts, bestCounts, numInstances, numCutPoints) || !this.m_UseKononenko && this.FayyadAndIranisMDL(priorCounts, bestCounts, numInstances, numCutPoints)) {
            double[] cutPoints;
            double[] left = this.cutPointsForSubset(instances, attIndex, first, bestIndex + 1);
            double[] right = this.cutPointsForSubset(instances, attIndex, bestIndex + 1, lastPlusOne);
            if (left == null && right == null) {
                cutPoints = new double[]{bestCutPoint};
            } else if (right == null) {
                cutPoints = new double[left.length + 1];
                System.arraycopy(left, 0, cutPoints, 0, left.length);
                cutPoints[left.length] = bestCutPoint;
            } else if (left == null) {
                cutPoints = new double[1 + right.length];
                cutPoints[0] = bestCutPoint;
                System.arraycopy(right, 0, cutPoints, 1, right.length);
            } else {
                cutPoints = new double[left.length + right.length + 1];
                System.arraycopy(left, 0, cutPoints, 0, left.length);
                cutPoints[left.length] = bestCutPoint;
                System.arraycopy(right, 0, cutPoints, left.length + 1, right.length);
            }
            return cutPoints;
        }
        return null;
    }

    protected void setOutputFormat() {
        if (this.m_CutPoints == null) {
            this.setOutputFormat(null);
            return;
        }
        FastVector<Attribute> attributes = new FastVector<Attribute>(this.getInputFormat().numAttributes());
        int classIndex = this.getInputFormat().classIndex();
        int m = this.getInputFormat().numAttributes();
        for (int i = 0; i < m; ++i) {
            if (this.m_DiscretizeCols.isInRange(i) && this.getInputFormat().attribute(i).isNumeric()) {
                double[] cutPoints = this.m_CutPoints[i];
                if (!this.m_MakeBinary) {
                    FastVector<String> attribValues;
                    if (cutPoints == null) {
                        attribValues = new FastVector<String>(1);
                        attribValues.addElement("'All'");
                    } else {
                        int n;
                        attribValues = new FastVector(cutPoints.length + 1);
                        if (this.m_UseBinNumbers) {
                            n = cutPoints.length;
                            for (int j = 0; j <= n; ++j) {
                                attribValues.addElement("'B" + (j + 1) + "of" + (n + 1) + "'");
                            }
                        } else {
                            n = cutPoints.length;
                            for (int j = 0; j <= n; ++j) {
                                attribValues.addElement("'" + Discretize.binRangeString(cutPoints, j) + "'");
                            }
                        }
                    }
                    Attribute newAtt = new Attribute(this.getInputFormat().attribute(i).name(), attribValues);
                    newAtt.setWeight(this.getInputFormat().attribute(i).weight());
                    attributes.addElement(newAtt);
                    continue;
                }
                if (cutPoints == null) {
                    FastVector<String> attribValues = new FastVector<String>(1);
                    attribValues.addElement("'All'");
                    Attribute newAtt = new Attribute(this.getInputFormat().attribute(i).name(), attribValues);
                    newAtt.setWeight(this.getInputFormat().attribute(i).weight());
                    attributes.addElement(newAtt);
                    continue;
                }
                if (i < this.getInputFormat().classIndex()) {
                    classIndex += cutPoints.length - 1;
                }
                int n = cutPoints.length;
                for (int j = 0; j < n; ++j) {
                    FastVector<String> attribValues = new FastVector<String>(2);
                    if (this.m_UseBinNumbers) {
                        attribValues.addElement("'B1of2'");
                        attribValues.addElement("'B2of2'");
                    } else {
                        double[] binaryCutPoint = new double[]{cutPoints[j]};
                        attribValues.addElement("'" + Discretize.binRangeString(binaryCutPoint, 0) + "'");
                        attribValues.addElement("'" + Discretize.binRangeString(binaryCutPoint, 1) + "'");
                    }
                    Attribute newAtt = new Attribute(this.getInputFormat().attribute(i).name() + "_" + (j + 1), attribValues);
                    newAtt.setWeight(this.getInputFormat().attribute(i).weight());
                    attributes.addElement(newAtt);
                }
                continue;
            }
            attributes.addElement((Attribute)this.getInputFormat().attribute(i).copy());
        }
        Instances outputFormat = new Instances(this.getInputFormat().relationName(), attributes, 0);
        outputFormat.setClassIndex(classIndex);
        this.setOutputFormat(outputFormat);
    }

    protected void convertInstance(Instance instance) {
        int index = 0;
        double[] vals = new double[this.outputFormatPeek().numAttributes()];
        for (int i = 0; i < this.getInputFormat().numAttributes(); ++i) {
            if (this.m_DiscretizeCols.isInRange(i) && this.getInputFormat().attribute(i).isNumeric()) {
                int j;
                double currentVal = instance.value(i);
                if (this.m_CutPoints[i] == null) {
                    vals[index] = instance.isMissing(i) ? Utils.missingValue() : 0.0;
                    ++index;
                    continue;
                }
                if (!this.m_MakeBinary) {
                    if (instance.isMissing(i)) {
                        vals[index] = Utils.missingValue();
                    } else {
                        for (j = 0; j < this.m_CutPoints[i].length && !(currentVal <= this.m_CutPoints[i][j]); ++j) {
                        }
                        vals[index] = j;
                    }
                    ++index;
                    continue;
                }
                for (j = 0; j < this.m_CutPoints[i].length; ++j) {
                    vals[index] = instance.isMissing(i) ? Utils.missingValue() : (currentVal <= this.m_CutPoints[i][j] ? 0.0 : 1.0);
                    ++index;
                }
                continue;
            }
            vals[index] = instance.value(i);
            ++index;
        }
        AbstractInstance inst = null;
        inst = instance instanceof SparseInstance ? new SparseInstance(instance.weight(), vals) : new DenseInstance(instance.weight(), vals);
        inst.setDataset(this.getOutputFormat());
        this.copyValues(inst, false, instance.dataset(), this.getOutputFormat());
        inst.setDataset(this.getOutputFormat());
        this.push(inst);
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 8964 $");
    }

    public static void main(String[] argv) {
        Discretize.runFilter(new Discretize(), argv);
    }
}

