package edu.cmu.minorthird.classify;

import cern.colt.matrix.impl.AbstractFormatter;
import edu.cmu.minorthird.classify.Example;
import edu.cmu.minorthird.classify.semisupervised.SemiSupervisedDataset;
import edu.cmu.minorthird.classify.sequential.SequenceDataset;
import edu.cmu.minorthird.util.MathUtil;
import edu.cmu.minorthird.util.gui.ViewerFrame;
import edu.cmu.minorthird.util.gui.Visible;
import java.util.Random;
import java.util.StringTokenizer;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/cmu/minorthird/classify/SampleDatasets.class */
public class SampleDatasets {
    private static Logger log = Logger.getLogger(SampleDatasets.class);
    public static final String[] posTrain = {"a pricy doll house", "a little red fire truck", "a red wagon", "a pricy red sports car", "punk queen barbie and ken", "a little red bike"};
    public static final String[] negTrain = {"a a a a big 7-seater minivan with an automatic transmission", "a big house in the suburbs with a crushing mortgage", "a job for life at IBM", "a huge pile of tax forms, due yesterday", "huge pile of junk mail, bills, and catalogs"};
    public static final String[] posTest = {"a pricy barbie doll", "a little yellow toy car", "a red 10 speed bike", "a red convertible porshe"};
    public static final String[] negTest = {"a big pile of paperwork", "a huge backlog of email", "a life of woe and trouble"};
    private static String[] posBayesTrain = {"a a pricy doll house", "a a little red red fire truck", "a red wagon", "a pricy red sports car", "punk queen barbie and and ken", "a little red bike"};
    private static String[] negBayesTrain = {"a big big 7-seater minivan with with an an automatic transmission", "a big house in the suburbs with a a crushing mortgage", "a job for for life at at IBM", "a huge pile of of tax forms, due yesterday", "huge pile of of junk mail, bills, and catalogs"};
    private static String[] posBayesTest = {"a a pricy barbie doll", "a little yellow toy car", "a a red 10 speed bike", "a red convertible porshe"};
    private static String[] negBayesTest = {"a big pile of of paperwork", "a huge backlog of email", "a life of woe and and trouble"};
    private static String[] posBayesExtremeTrain = {"p1 p1 p1 p2 p2 p3 p3 p4 p4 p5 p5 n1 n2 n3 n4 n5", "p1 p1 p2 p2 p2 p3 p3 p4 p4 p5 p5 n1 n2 n3 n4 n5", "p1 p1 p2 p2 p3 p3 p3 p4 p4 p5 p5 n1 n2 n3 n4 n5", "p1 p1 p2 p2 p3 p3 p4 p4 p4 p5 p5 n1 n2 n3 n4 n5", "p1 p1 p2 p2 p3 p3 p4 p4 p5 p5 p5 n1 n2 n3 n4 n5"};
    private static String[] negBayesExtremeTrain = {"p1 p2 p3 p4 p5 n1 n1 n1 n2 n2 n3 n3 n4 n4 n5 n5", "p1 p2 p3 p4 p5 n1 n1 n2 n2 n2 n3 n3 n4 n4 n5 n5", "p1 p2 p3 p4 p5 n1 n1 n2 n2 n3 n3 n3 n4 n4 n5 n5", "p1 p2 p3 p4 p5 n1 n1 n2 n2 n3 n3 n4 n4 n4 n5 n5", "p1 p2 p3 p4 p5 n1 n1 n2 n2 n3 n3 n4 n4 n5 n5 n5"};
    private static String[] posBayesExtremeTest = {"p1 p1 n1", "p2 p2 n2", "p3 p3 n3", "p4 p4 n4", "p5 p5 n5"};
    private static String[] negBayesExtremeTest = {"p1 n1 n1", "p2 n2 n2", "p3 n3 n3", "p4 n4 n4", "p5 n5 n5"};
    private static String[] unlabeledBayesExtreme = {"p1 n1 n1", "p2 n2 n2", "p3 n3 n3", "p1 p1 n1", "p2 p2 n2", "p3 p3 n3"};

    private static Dataset makeData(String[] strArr, String[] strArr2) {
        BasicDataset basicDataset = new BasicDataset();
        for (String str : strArr) {
            basicDataset.add(makeExample(1.0d, str));
        }
        for (String str2 : strArr2) {
            basicDataset.add(makeExample(-1.0d, str2));
        }
        return basicDataset;
    }

    private static Example makeExample(double d, String str) {
        MutableInstance mutableInstance = new MutableInstance(str);
        StringTokenizer stringTokenizer = new StringTokenizer(str);
        while (stringTokenizer.hasMoreTokens()) {
            mutableInstance.addBinary(new Feature(stringTokenizer.nextToken()));
        }
        return new Example(mutableInstance, ClassLabel.binaryLabel(d));
    }

    public static Dataset toyTrain() {
        return makeData(posTrain, negTrain);
    }

    public static Dataset toyTest() {
        return makeData(posTest, negTest);
    }

    public static Dataset toyBayesExtremeTrain() {
        return makeBayesData(posBayesExtremeTrain, negBayesExtremeTrain);
    }

    public static Dataset toyBayesExtremeTest() {
        return makeBayesData(posBayesExtremeTest, negBayesExtremeTest);
    }

    public static Dataset toyBayesExtremeUnlabeledTrain() {
        return makeUnlabeledBayesData(posBayesExtremeTrain, negBayesExtremeTrain, unlabeledBayesExtreme);
    }

    private static Dataset makeUnlabeledBayesData(String[] strArr, String[] strArr2, String[] strArr3) {
        SemiSupervisedDataset semiSupervisedDataset = new SemiSupervisedDataset();
        for (String str : strArr) {
            semiSupervisedDataset.add(makeLabeledBayesExample(new ClassLabel(ExampleSchema.POS_CLASS_NAME), str));
        }
        for (String str2 : strArr2) {
            semiSupervisedDataset.add(makeLabeledBayesExample(new ClassLabel(ExampleSchema.NEG_CLASS_NAME), str2));
        }
        for (String str3 : strArr3) {
            semiSupervisedDataset.addUnlabeled(makeUnlabeledBayesExample(str3));
        }
        return semiSupervisedDataset;
    }

    private static Example makeLabeledBayesExample(ClassLabel classLabel, String str) {
        MutableInstance mutableInstance = new MutableInstance();
        StringTokenizer stringTokenizer = new StringTokenizer(str);
        while (stringTokenizer.hasMoreTokens()) {
            Feature feature = new Feature(stringTokenizer.nextToken());
            double weight = mutableInstance.getWeight(feature);
            if (weight == 0.0d) {
                mutableInstance.addBinary(feature);
            } else {
                mutableInstance.addNumeric(feature, weight + 1.0d);
            }
        }
        return new Example(mutableInstance, classLabel);
    }

    private static Instance makeUnlabeledBayesExample(String str) {
        MutableInstance mutableInstance = new MutableInstance();
        StringTokenizer stringTokenizer = new StringTokenizer(str);
        while (stringTokenizer.hasMoreTokens()) {
            Feature feature = new Feature(stringTokenizer.nextToken());
            double weight = mutableInstance.getWeight(feature);
            if (weight == 0.0d) {
                mutableInstance.addBinary(feature);
            } else {
                mutableInstance.addNumeric(feature, weight + 1.0d);
            }
        }
        return mutableInstance;
    }

    private static Dataset makeBayesData(String[] strArr, String[] strArr2) {
        BasicDataset basicDataset = new BasicDataset();
        for (String str : strArr) {
            basicDataset.add(makeBayesExample(1.0d, str));
        }
        for (String str2 : strArr2) {
            basicDataset.add(makeBayesExample(-1.0d, str2));
        }
        return basicDataset;
    }

    private static Example makeBayesExample(double d, String str) {
        MutableInstance mutableInstance = new MutableInstance();
        StringTokenizer stringTokenizer = new StringTokenizer(str);
        while (stringTokenizer.hasMoreTokens()) {
            Feature feature = new Feature(stringTokenizer.nextToken());
            double weight = mutableInstance.getWeight(feature);
            if (weight == 0.0d) {
                mutableInstance.addBinary(feature);
            } else {
                mutableInstance.addNumeric(feature, weight + 1.0d);
            }
        }
        return new Example(mutableInstance, ClassLabel.binaryLabel(d));
    }

    public static Dataset toyBayesTrain() {
        return makeBayesData(posBayesTrain, negBayesTrain);
    }

    public static Dataset toyBayesTest() {
        return makeBayesData(posBayesTest, negBayesTest);
    }

    public static Dataset makeSparseNumericData(Random random, int i) {
        BasicDataset basicDataset = new BasicDataset();
        Feature feature = new Feature("x");
        for (int i2 = 0; i2 < i; i2++) {
            MutableInstance mutableInstance = new MutableInstance();
            if (random.nextDouble() > 0.7d) {
                mutableInstance.addNumeric(feature, 1.0d);
                basicDataset.add(new Example(mutableInstance, ClassLabel.binaryLabel(1.0d)));
            } else {
                basicDataset.add(new Example(mutableInstance, ClassLabel.binaryLabel(-1.0d)));
            }
        }
        return basicDataset;
    }

    public static Dataset makeNumericData(Random random, int i, int i2) {
        Feature feature = new Feature("x");
        Feature feature2 = new Feature("y");
        BasicDataset basicDataset = new BasicDataset();
        String[] strArr = {"x", "y", "z", "t", "u", "v", "w"};
        if (i > strArr.length) {
            throw new IllegalArgumentException("dim to big!");
        }
        for (int i3 = 0; i3 < i2; i3++) {
            MutableInstance mutableInstance = new MutableInstance();
            for (int i4 = 0; i4 < i; i4++) {
                if (i4 != i3) {
                    mutableInstance.addNumeric(new Feature(strArr[i4]), random.nextDouble() * 10.0d);
                }
            }
            double weight = mutableInstance.getWeight(feature);
            double weight2 = mutableInstance.getWeight(feature2);
            basicDataset.add(new Example(mutableInstance, ClassLabel.binaryLabel(((weight >= 3.0d || weight2 >= 3.0d) && (weight <= 7.0d || weight2 <= 7.0d)) ? -1.0d : 1.0d)));
        }
        return basicDataset;
    }

    public static Dataset makeLogisticRegressionData(Random random, int i, double d, double d2) {
        int i2 = 0;
        int i3 = 0;
        BasicDataset basicDataset = new BasicDataset();
        for (int i4 = 0; i4 < i; i4++) {
            double nextDouble = random.nextDouble();
            double logistic = MathUtil.logistic((d * nextDouble) + d2);
            double nextDouble2 = random.nextDouble();
            ClassLabel positiveLabel = logistic > nextDouble2 ? ClassLabel.positiveLabel(1.0d) : ClassLabel.negativeLabel(-1.0d);
            if (logistic > nextDouble2) {
                i2++;
            } else {
                i3++;
            }
            MutableInstance mutableInstance = new MutableInstance();
            mutableInstance.addNumeric(new Feature("x"), nextDouble);
            mutableInstance.addBinary(new Feature("bias"));
            basicDataset.add(new Example(mutableInstance, positiveLabel));
        }
        System.out.println(i + " examples: " + i2 + " pos, " + i3 + " neg");
        return basicDataset;
    }

    public static SequenceDataset makeToySequenceData() {
        return makeToySequenceData(new String[]{"you're a good man Charlie Brown", "where's Waldo?", "alas dear Yorick, I knew him well"});
    }

    public static SequenceDataset makeToySequenceTestData() {
        return makeToySequenceData(new String[]{"hello, World War III", "to be or 2B, that is a question"});
    }

    public static SequenceDataset makeToySequenceData(String[] strArr) {
        SequenceDataset sequenceDataset = new SequenceDataset();
        for (int i = 0; i < strArr.length; i++) {
            String[] split = strArr[i].split(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR);
            Example[] exampleArr = new Example[split.length];
            for (int i2 = 0; i2 < split.length; i2++) {
                ClassLabel classLabel = Character.isUpperCase(split[i2].charAt(0)) ? new ClassLabel(ExampleSchema.POS_CLASS_NAME) : new ClassLabel(ExampleSchema.NEG_CLASS_NAME);
                MutableInstance mutableInstance = new MutableInstance(strArr[i] + ":" + i2, "line" + i);
                mutableInstance.addBinary(new Feature("here " + split[i2]));
                if (i2 > 1) {
                    mutableInstance.addBinary(new Feature("prev " + split[i2 - 1]));
                }
                if (i2 < split.length - 1) {
                    mutableInstance.addBinary(new Feature("next " + split[i2 + 1]));
                }
                mutableInstance.addBinary(new Feature("casePattern " + split[i2].replaceAll("[A-Z]+", "A").replaceAll("[a-z]+", "a")));
                exampleArr[i2] = new Example(mutableInstance, classLabel);
            }
            sequenceDataset.addSequence(exampleArr);
        }
        return sequenceDataset;
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static Dataset makeToy3ClassData(Random random, int i) {
        BasicDataset basicDataset = new BasicDataset();
        String[] strArr = {new String[]{"money", "cash", "sleep", "booze", "chocolate", "fun", "beer", "pizza"}, new String[]{"stocks", "bonds", "money", "cash", "influence", "power", "fame"}, new String[]{"chocolate", "beer", "pizza", "pringles", "popcorn", "spam", "crisco"}};
        String[] strArr2 = {"homer", "marge", "bart"};
        for (int i2 = 0; i2 < i; i2++) {
            int nextInt = random.nextInt(3);
            int nextInt2 = random.nextInt(3) + 2;
            MutableInstance mutableInstance = new MutableInstance();
            for (int i3 = 0; i3 < nextInt2; i3++) {
                mutableInstance.addBinary(new Feature(new String[]{"word", strArr[nextInt][random.nextInt(strArr[nextInt].length)]}));
            }
            basicDataset.add(new Example(mutableInstance, new ClassLabel(strArr2[nextInt])));
        }
        return basicDataset;
    }

    public static Dataset sampleData(String str, boolean z) {
        if ("toy".equals(str)) {
            return z ? toyTest() : toyTrain();
        }
        if ("bayes".equals(str)) {
            return z ? toyBayesTest() : toyBayesTrain();
        }
        if ("bayesExtreme".equals(str)) {
            return z ? toyBayesExtremeTest() : toyBayesExtremeTrain();
        }
        if ("bayesUnlabeled".equals(str)) {
            return z ? toyBayesExtremeTest() : toyBayesExtremeUnlabeledTrain();
        }
        if ("num".equals(str)) {
            return z ? makeNumericData(new Random(666L), 2, 20) : makeNumericData(new Random(999L), 2, 20);
        }
        if ("logistic".equals(str)) {
            return z ? makeLogisticRegressionData(new Random(666L), 50, 2.0d, -2.0d) : makeLogisticRegressionData(new Random(999L), 50, 2.0d, -2.0d);
        }
        if ("bigLogistic".equals(str)) {
            return z ? makeLogisticRegressionData(new Random(666L), 1000, 2.0d, -2.0d) : makeLogisticRegressionData(new Random(999L), 1000, 2.0d, -2.0d);
        }
        if ("sparseNum".equals(str)) {
            return z ? makeSparseNumericData(new Random(666L), 20) : makeSparseNumericData(new Random(999L), 20);
        }
        if ("toy3".equals(str)) {
            return z ? makeToy3ClassData(new Random(666L), 50) : makeToy3ClassData(new Random(999L), 50);
        }
        if ("toySeq".equals(str)) {
            return z ? makeToySequenceTestData() : makeToySequenceData();
        }
        throw new IllegalArgumentException("illegal dataset name '" + str + "'");
    }

    public static void main(String[] strArr) {
        try {
            Dataset sampleData = sampleData(strArr[0], false);
            Dataset sampleData2 = sampleData(strArr[0], true);
            log.debug("Train dataset is: ");
            log.debug(sampleData.toString());
            log.debug("Test dataset is:");
            log.debug(sampleData2.toString());
            if (strArr.length > 0) {
                Classifier train = new DatasetClassifierTeacher(sampleData, strArr.length >= 3 && "active".equals(strArr[2])).train((ClassifierLearner) Class.forName(strArr[1]).newInstance());
                log.info("Classifier: " + train);
                traceClassifier("Train", train, sampleData);
                traceClassifier("Test", train, sampleData2);
                if (train instanceof Visible) {
                    new ViewerFrame(strArr[1] + " on " + strArr[0], ((Visible) train).toGUI());
                }
            }
        } catch (Exception e) {
            System.out.println("usage: [toy|num] edu.cmu.minorthird.classify.SomeLearner [active]");
            e.printStackTrace();
        }
    }

    private static void traceClassifier(String str, Classifier classifier, Dataset dataset) {
        log.info("");
        log.info("Performance on dataset " + str + ":");
        Example.Looper it = dataset.iterator();
        while (it.hasNext()) {
            Example nextExample = it.nextExample();
            if (classifier instanceof BinaryClassifier) {
                double numericLabel = nextExample.getLabel().numericLabel();
                double posWeight = classifier.classification(nextExample).posWeight();
                log.info((posWeight * numericLabel >= 0.0d ? "Y" : "N") + "\tpred=" + posWeight + "\tactual=" + numericLabel + "\t" + nextExample);
            } else {
                ClassLabel label = nextExample.getLabel();
                ClassLabel classification = classifier.classification(nextExample);
                log.info((classification.isCorrect(label) ? "Y" : "N") + "\tpred=" + classification + "\tactual=" + label + "\t" + nextExample);
            }
        }
    }
}
