package edu.cmu.minorthird.text.learn;

import edu.cmu.minorthird.classify.BasicDataset;
import edu.cmu.minorthird.classify.ClassLabel;
import edu.cmu.minorthird.classify.Example;
import edu.cmu.minorthird.classify.SampleDatasets;
import edu.cmu.minorthird.text.BasicTextBase;
import edu.cmu.minorthird.text.EmptyLabels;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.TextLabels;
import edu.cmu.minorthird.util.StringUtil;
import edu.cmu.minorthird.util.gui.ViewerFrame;
import java.io.Serializable;
import java.util.Set;

/* loaded from: input_file:edu/cmu/minorthird/text/learn/SampleFE.class */
public class SampleFE {
    public static final AnnotatedSpanFE BAG_OF_WORDS = new BagOfWordsFE();
    public static final AnnotatedSpanFE BAG_OF_LC_WORDS = new BagOfLowerCaseWordsFE();

    /* loaded from: input_file:edu/cmu/minorthird/text/learn/SampleFE$AnnotatedSpanFE.class */
    public static abstract class AnnotatedSpanFE extends SpanFE {
    }

    /* loaded from: input_file:edu/cmu/minorthird/text/learn/SampleFE$BagOfLowerCaseWordsFE.class */
    public static class BagOfLowerCaseWordsFE extends AnnotatedSpanFE implements Serializable {
        @Override // edu.cmu.minorthird.text.learn.SpanFE
        public void extractFeatures(TextLabels textLabels, Span span) {
            from(span).tokens().eq().lc().emit();
        }
    }

    /* loaded from: input_file:edu/cmu/minorthird/text/learn/SampleFE$BagOfWordsFE.class */
    public static class BagOfWordsFE extends AnnotatedSpanFE implements Serializable {
        @Override // edu.cmu.minorthird.text.learn.SpanFE
        public void extractFeatures(TextLabels textLabels, Span span) {
            from(span).tokens().emit();
        }
    }

    /* loaded from: input_file:edu/cmu/minorthird/text/learn/SampleFE$ExtractionFE.class */
    public static class ExtractionFE extends AnnotatedSpanFE {
        protected int windowSize;
        protected boolean useCharType;
        protected boolean useCompressedCharType;
        protected String[] tokenPropertyFeatures;

        public ExtractionFE() {
            this(3);
        }

        public ExtractionFE(int i) {
            this.windowSize = 5;
            this.useCharType = true;
            this.useCompressedCharType = true;
            this.tokenPropertyFeatures = new String[0];
            this.windowSize = i;
        }

        public void setFeatureWindowSize(int i) {
            this.windowSize = i;
        }

        public int getFeatureWindowSize() {
            return this.windowSize;
        }

        public void setUseCharType(boolean z) {
            this.useCharType = z;
        }

        public boolean getUseCharType() {
            return this.useCharType;
        }

        public void setUseCompressedCharType(boolean z) {
            this.useCompressedCharType = z;
        }

        public boolean getUseCompressedCharType() {
            return this.useCompressedCharType;
        }

        public void setTokenPropertyFeatures(String str) {
            if ("*".equals(str)) {
                this.tokenPropertyFeatures = null;
            } else {
                this.tokenPropertyFeatures = str.split(",\\s*");
            }
        }

        public String getTokenPropertyFeatures() {
            return StringUtil.toString(this.tokenPropertyFeatures);
        }

        public void setTokenPropertyFeatures(Set set) {
            this.tokenPropertyFeatures = (String[]) set.toArray(new String[set.size()]);
        }

        @Override // edu.cmu.minorthird.text.learn.SpanFE
        public void extractFeatures(Span span) {
            extractFeatures(new EmptyLabels(), span);
        }

        @Override // edu.cmu.minorthird.text.learn.SpanFE
        public void extractFeatures(TextLabels textLabels, Span span) {
            requireMyAnnotation(textLabels);
            if (this.tokenPropertyFeatures == null) {
                System.out.println(new StringBuffer().append("setTokenPropertyFeatures to the set ").append(textLabels.getTokenProperties()).toString());
                setTokenPropertyFeatures(textLabels.getTokenProperties());
            }
            from(span).tokens().eq().lc().emit();
            if (this.useCompressedCharType) {
                from(span).tokens().eq().charTypePattern().emit();
            }
            if (this.useCharType) {
                from(span).tokens().eq().charTypes().emit();
            }
            for (int i = 0; i < this.tokenPropertyFeatures.length; i++) {
                from(span).tokens().prop(this.tokenPropertyFeatures[i]).emit();
            }
            for (int i2 = 0; i2 < this.windowSize; i2++) {
                from(span).left().token((-i2) - 1).eq().lc().emit();
                from(span).right().token(i2).eq().lc().emit();
                for (int i3 = 0; i3 < this.tokenPropertyFeatures.length; i3++) {
                    from(span).left().token((-i2) - 1).prop(this.tokenPropertyFeatures[i3]).emit();
                    from(span).right().token(i2).prop(this.tokenPropertyFeatures[i3]).emit();
                }
                if (this.useCompressedCharType) {
                    from(span).left().token((-i2) - 1).eq().charTypePattern().emit();
                    from(span).right().token(i2).eq().charTypePattern().emit();
                }
                if (this.useCharType) {
                    from(span).left().token((-i2) - 1).eq().charTypes().emit();
                    from(span).right().token(i2).eq().charTypes().emit();
                }
            }
        }
    }

    public static final AnnotatedSpanFE makeExtractionFE(int i) {
        ExtractionFE extractionFE = new ExtractionFE();
        extractionFE.setFeatureWindowSize(i);
        return extractionFE;
    }

    public static void main(String[] strArr) {
        try {
            AnnotatedSpanFE annotatedSpanFE = BAG_OF_LC_WORDS;
            BasicTextBase basicTextBase = new BasicTextBase();
            for (int i = 0; i < SampleDatasets.posTrain.length; i++) {
                basicTextBase.loadDocument(new StringBuffer().append("pos").append(i).toString(), SampleDatasets.posTrain[i]);
            }
            for (int i2 = 0; i2 < SampleDatasets.negTrain.length; i2++) {
                basicTextBase.loadDocument(new StringBuffer().append("neg").append(i2).toString(), SampleDatasets.negTrain[i2]);
            }
            BasicDataset basicDataset = new BasicDataset();
            Span.Looper documentSpanIterator = basicTextBase.documentSpanIterator();
            while (documentSpanIterator.hasNext()) {
                Span nextSpan = documentSpanIterator.nextSpan();
                basicDataset.add(new Example(annotatedSpanFE.extractInstance(nextSpan), ClassLabel.binaryLabel(nextSpan.getDocumentId().startsWith("pos") ? 1.0d : -1.0d)));
            }
            new ViewerFrame("Toy data", basicDataset.toGUI());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
