package edu.cmu.minorthird.classify.transform;

import edu.cmu.minorthird.classify.BasicFeatureIndex;
import edu.cmu.minorthird.classify.Dataset;
import edu.cmu.minorthird.classify.ExampleSchema;
import edu.cmu.minorthird.classify.Feature;
import edu.cmu.minorthird.classify.Instance;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;

/* loaded from: input_file:edu/cmu/minorthird/classify/transform/InfoGainTransformLearner2.class */
public class InfoGainTransformLearner2 implements InstanceTransformLearner {
    private String frequencyModel;
    private int numFeatures;

    /* loaded from: input_file:edu/cmu/minorthird/classify/transform/InfoGainTransformLearner2$IGPair.class */
    private class IGPair implements Comparable {
        double value;
        Feature feature;
        private final InfoGainTransformLearner2 this$0;

        public IGPair(InfoGainTransformLearner2 infoGainTransformLearner2, double d, Feature feature) {
            this.this$0 = infoGainTransformLearner2;
            this.value = d;
            this.feature = feature;
        }

        @Override // java.lang.Comparable
        public int compareTo(Object obj) {
            IGPair iGPair = (IGPair) obj;
            if (this.value < iGPair.value) {
                return 1;
            }
            if (this.value > iGPair.value) {
                return -1;
            }
            return this.feature.compareTo(iGPair.feature);
        }

        public String toString() {
            return new StringBuffer().append("[ ").append(this.value).append(",").append(this.feature).append(" ]").toString();
        }
    }

    public InfoGainTransformLearner2() {
        this(100, "document");
    }

    public InfoGainTransformLearner2(int i) {
        this(i, "document");
    }

    public InfoGainTransformLearner2(int i, String str) {
        this.frequencyModel = str;
        this.numFeatures = i;
    }

    @Override // edu.cmu.minorthird.classify.transform.InstanceTransformLearner
    public void setSchema(ExampleSchema exampleSchema) {
    }

    @Override // edu.cmu.minorthird.classify.transform.InstanceTransformLearner
    public InstanceTransform batchTrain(Dataset dataset) {
        BasicFeatureIndex basicFeatureIndex = new BasicFeatureIndex(dataset);
        ArrayList arrayList = new ArrayList();
        if (!dataset.getSchema().equals(ExampleSchema.BINARY_EXAMPLE_SCHEMA)) {
            throw new IllegalArgumentException("only works for binary data!");
        }
        if (this.frequencyModel.equals("document")) {
            double size = basicFeatureIndex.size(ExampleSchema.POS_CLASS_NAME);
            double size2 = dataset.size() - size;
            double entropy = entropy(size / (size + size2), size2 / (size + size2));
            Feature.Looper featureIterator = basicFeatureIndex.featureIterator();
            while (featureIterator.hasNext()) {
                Feature nextFeature = featureIterator.nextFeature();
                double[] dArr = {basicFeatureIndex.size(nextFeature, ExampleSchema.NEG_CLASS_NAME), basicFeatureIndex.size(nextFeature) - dArr[0]};
                double[] dArr2 = {size2 - dArr[0], size - dArr[1]};
                double entropy2 = entropy(dArr[1] / (dArr[0] + dArr[1]), dArr[0] / (dArr[0] + dArr[1]));
                double entropy3 = entropy(dArr2[1] / (dArr2[0] + dArr2[1]), dArr2[0] / (dArr2[0] + dArr2[1]));
                double size3 = (dArr[0] + dArr[1]) / dataset.size();
                arrayList.add(new IGPair(this, (entropy - (size3 * entropy2)) - ((1.0d - size3) * entropy3), nextFeature));
            }
        } else {
            if (this.frequencyModel.equals("word")) {
                throw new UnsupportedOperationException("not implemented");
            }
            System.out.println(new StringBuffer().append("warning: ").append(this.frequencyModel).append(" is an unknown model for frequency!").toString());
            System.exit(1);
        }
        Collections.sort(arrayList);
        HashSet hashSet = new HashSet();
        for (int i = 0; i < this.numFeatures; i++) {
            hashSet.add(((IGPair) arrayList.get(i)).feature);
        }
        return new AbstractInstanceTransform(this, hashSet) { // from class: edu.cmu.minorthird.classify.transform.InfoGainTransformLearner2.1
            private final Set val$activeFeatureSet;
            private final InfoGainTransformLearner2 this$0;

            {
                this.this$0 = this;
                this.val$activeFeatureSet = hashSet;
            }

            @Override // edu.cmu.minorthird.classify.transform.AbstractInstanceTransform, edu.cmu.minorthird.classify.transform.InstanceTransform
            public Instance transform(Instance instance) {
                return new MaskedInstance(instance, this.val$activeFeatureSet);
            }

            public String toString() {
                return new StringBuffer().append("[InstanceTransform: model = ").append(this.this$0.frequencyModel).append(", top ").append(this.this$0.numFeatures).append(" by InfoGain]").toString();
            }
        };
    }

    public double entropy(double d, double d2) {
        return ((d > 0.0d ? 1 : (d == 0.0d ? 0 : -1)) == 0) | ((d2 > 0.0d ? 1 : (d2 == 0.0d ? 0 : -1)) == 0) ? 0.0d : (((-d) * Math.log(d)) / Math.log(2.0d)) - ((d2 * Math.log(d2)) / Math.log(2.0d));
    }
}
