package edu.cmu.minorthird.text.learn;

import edu.cmu.minorthird.classify.ClassLabel;
import edu.cmu.minorthird.classify.ExampleSchema;
import edu.cmu.minorthird.classify.sequential.BatchSegmenterLearner;
import edu.cmu.minorthird.classify.sequential.MutableCandidateSegmentGroup;
import edu.cmu.minorthird.classify.sequential.SegmentCollinsPerceptronLearner;
import edu.cmu.minorthird.classify.sequential.SegmentDataset;
import edu.cmu.minorthird.classify.sequential.Segmentation;
import edu.cmu.minorthird.classify.sequential.Segmenter;
import edu.cmu.minorthird.text.AbstractAnnotator;
import edu.cmu.minorthird.text.Annotator;
import edu.cmu.minorthird.text.MonotonicTextLabels;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.TextLabels;
import edu.cmu.minorthird.ui.Recommended;
import edu.cmu.minorthird.util.ProgressCounter;
import edu.cmu.minorthird.util.gui.ComponentViewer;
import edu.cmu.minorthird.util.gui.SmartVanillaViewer;
import edu.cmu.minorthird.util.gui.Viewer;
import edu.cmu.minorthird.util.gui.ViewerFrame;
import edu.cmu.minorthird.util.gui.Visible;
import java.io.Serializable;
import java.util.Iterator;
import javax.swing.JComponent;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.border.TitledBorder;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/cmu/minorthird/text/learn/SegmentAnnotatorLearner.class */
public class SegmentAnnotatorLearner extends AnnotatorLearner {
    private static Logger log = Logger.getLogger(SegmentAnnotatorLearner.class);
    private static final boolean DEBUG = log.isDebugEnabled();
    protected String annotationType;
    protected SegmentDataset dataset;
    protected BatchSegmenterLearner learner;
    protected SpanFeatureExtractor fe;
    protected int maxWindowSize;
    private boolean displayDatasetBeforeLearning;
    private boolean compressDataset;
    private Span.Looper documentLooper;

    /* loaded from: input_file:edu/cmu/minorthird/text/learn/SegmentAnnotatorLearner$SegmentAnnotator.class */
    public static class SegmentAnnotator extends AbstractAnnotator implements Serializable, Visible, ExtractorAnnotator {
        private static final long serialVersionUID = 1;
        private Segmenter segmenter;
        private SpanFeatureExtractor fe;
        private String annotationType;
        private int maxWindowSize;

        public SegmentAnnotator(Segmenter segmenter, SpanFeatureExtractor spanFeatureExtractor, int i, String str) {
            this.segmenter = segmenter;
            this.fe = spanFeatureExtractor;
            this.maxWindowSize = i;
            this.annotationType = str;
        }

        @Override // edu.cmu.minorthird.text.learn.ExtractorAnnotator
        public String getSpanType() {
            return this.annotationType;
        }

        @Override // edu.cmu.minorthird.text.AbstractAnnotator
        protected void doAnnotate(MonotonicTextLabels monotonicTextLabels) {
            Span.Looper documentSpanIterator = monotonicTextLabels.getTextBase().documentSpanIterator();
            ProgressCounter progressCounter = new ProgressCounter("tagging with segmenter", "document", documentSpanIterator.estimatedSize());
            while (documentSpanIterator.hasNext()) {
                Span nextSpan = documentSpanIterator.nextSpan();
                MutableCandidateSegmentGroup mutableCandidateSegmentGroup = new MutableCandidateSegmentGroup(this.maxWindowSize, nextSpan.size());
                for (int i = 0; i < nextSpan.size(); i++) {
                    for (int i2 = 1; i2 <= this.maxWindowSize; i2++) {
                        if (i2 + i <= nextSpan.size()) {
                            mutableCandidateSegmentGroup.setSubsequence(i, i + i2, this.fe.extractInstance(monotonicTextLabels, nextSpan.subSpan(i, i2)));
                        }
                    }
                }
                Segmentation segmentation = this.segmenter.segmentation(mutableCandidateSegmentGroup);
                if (SegmentAnnotatorLearner.DEBUG) {
                    SegmentAnnotatorLearner.log.debug("slidingWindowGroup: " + mutableCandidateSegmentGroup);
                }
                if (SegmentAnnotatorLearner.DEBUG) {
                    SegmentAnnotatorLearner.log.debug("segmentation: " + segmentation);
                }
                Iterator it = segmentation.iterator();
                while (it.hasNext()) {
                    Segmentation.Segment segment = (Segmentation.Segment) it.next();
                    if (segmentation.className(segment) != null) {
                        Span subSpan = nextSpan.subSpan(segment.lo, segment.hi - segment.lo);
                        monotonicTextLabels.addToType(subSpan, this.annotationType);
                        if (SegmentAnnotatorLearner.DEBUG) {
                            SegmentAnnotatorLearner.log.debug("span of type: " + this.annotationType + ": " + subSpan);
                        }
                    }
                }
                progressCounter.progress();
            }
            progressCounter.finished();
        }

        @Override // edu.cmu.minorthird.text.AbstractAnnotator, edu.cmu.minorthird.text.Annotator
        public String explainAnnotation(TextLabels textLabels, Span span) {
            return "not implemented";
        }

        public String toString() {
            return "[SegmentAnnotator " + this.annotationType + ":\n" + this.segmenter + "]";
        }

        @Override // edu.cmu.minorthird.util.gui.Visible
        public Viewer toGUI() {
            ComponentViewer componentViewer = new ComponentViewer() { // from class: edu.cmu.minorthird.text.learn.SegmentAnnotatorLearner.SegmentAnnotator.1
                @Override // edu.cmu.minorthird.util.gui.ComponentViewer
                public JComponent componentFor(Object obj) {
                    JPanel jPanel = new JPanel();
                    jPanel.setBorder(new TitledBorder("Segmenter Annotator"));
                    SmartVanillaViewer smartVanillaViewer = new SmartVanillaViewer(((SegmentAnnotator) obj).segmenter);
                    smartVanillaViewer.setSuperView(this);
                    jPanel.add(smartVanillaViewer);
                    return new JScrollPane(jPanel);
                }
            };
            componentViewer.setContent(this);
            return componentViewer;
        }
    }

    public SegmentAnnotatorLearner() {
        this(new SegmentCollinsPerceptronLearner(), new Recommended.MultitokenSpanFE());
    }

    public SegmentAnnotatorLearner(BatchSegmenterLearner batchSegmenterLearner, SpanFeatureExtractor spanFeatureExtractor) {
        this(batchSegmenterLearner, spanFeatureExtractor, 4);
    }

    public SegmentAnnotatorLearner(BatchSegmenterLearner batchSegmenterLearner, SpanFeatureExtractor spanFeatureExtractor, int i) {
        this.annotationType = "_prediction";
        this.displayDatasetBeforeLearning = false;
        this.compressDataset = true;
        this.learner = batchSegmenterLearner;
        this.fe = spanFeatureExtractor;
        this.maxWindowSize = i;
        reset();
    }

    @Override // edu.cmu.minorthird.text.learn.AnnotatorLearner
    public void reset() {
        this.dataset = new SegmentDataset();
        this.dataset.setDataCompression(this.compressDataset);
    }

    public boolean getDisplayDatasetBeforeLearning() {
        return this.displayDatasetBeforeLearning;
    }

    public void setDisplayDatasetBeforeLearning(boolean z) {
        this.displayDatasetBeforeLearning = z;
    }

    public String getDisplayDatasetBeforeLearningHelp() {
        return "Pop up an interactive viewer of the sequential dataset before learning.";
    }

    public boolean getCompressDataset() {
        return this.compressDataset;
    }

    public void setCompressDataset(boolean z) {
        this.compressDataset = z;
    }

    public String getCompressDatasetHelp() {
        return "If set, try and compress the data. This leads to longer loading and <br>learning times but less memory usage.";
    }

    public int getHistorySize() {
        return 1;
    }

    public BatchSegmenterLearner getSemiMarkovLearner() {
        return this.learner;
    }

    public void setSemiMarkovLearner(BatchSegmenterLearner batchSegmenterLearner) {
        this.learner = batchSegmenterLearner;
    }

    public String getSemiMarkovLearnerHelp() {
        return "Set the SemiMarkowLearner to be used";
    }

    @Override // edu.cmu.minorthird.text.learn.AnnotatorLearner
    public SpanFeatureExtractor getSpanFeatureExtractor() {
        return this.fe;
    }

    @Override // edu.cmu.minorthird.text.learn.AnnotatorLearner
    public void setSpanFeatureExtractor(SpanFeatureExtractor spanFeatureExtractor) {
        this.fe = spanFeatureExtractor;
    }

    @Override // edu.cmu.minorthird.text.learn.AnnotatorLearner
    public void setAnnotationType(String str) {
        this.annotationType = str;
    }

    @Override // edu.cmu.minorthird.text.learn.AnnotatorLearner
    public String getAnnotationType() {
        return this.annotationType;
    }

    @Override // edu.cmu.minorthird.text.learn.AnnotatorLearner
    public void setDocumentPool(Span.Looper looper) {
        this.documentLooper = looper;
    }

    @Override // edu.cmu.minorthird.text.learn.AnnotatorLearner
    public boolean hasNextQuery() {
        return this.documentLooper.hasNext();
    }

    @Override // edu.cmu.minorthird.text.learn.AnnotatorLearner
    public Span nextQuery() {
        return this.documentLooper.nextSpan();
    }

    @Override // edu.cmu.minorthird.text.learn.AnnotatorLearner
    public void setAnswer(AnnotationExample annotationExample) {
        Span documentSpan = annotationExample.getDocumentSpan();
        MutableCandidateSegmentGroup mutableCandidateSegmentGroup = new MutableCandidateSegmentGroup(this.maxWindowSize, documentSpan.size());
        for (int i = 0; i < documentSpan.size(); i++) {
            for (int i2 = 1; i2 <= this.maxWindowSize; i2++) {
                if (i2 + i <= documentSpan.size()) {
                    Span subSpan = documentSpan.subSpan(i, i2);
                    mutableCandidateSegmentGroup.setSubsequence(i, i + i2, this.fe.extractInstance(annotationExample.getLabels(), subSpan), new ClassLabel(annotationExample.getClassName(subSpan)));
                }
            }
        }
        this.dataset.addCandidateSegmentGroup(mutableCandidateSegmentGroup);
    }

    @Override // edu.cmu.minorthird.text.learn.AnnotatorLearner
    public Annotator getAnnotator() {
        this.learner.setSchema(ExampleSchema.BINARY_EXAMPLE_SCHEMA);
        if (this.displayDatasetBeforeLearning) {
            new ViewerFrame("Sequential Dataset", this.dataset.toGUI());
        }
        Segmenter batchTrain = this.learner.batchTrain(this.dataset);
        if (DEBUG) {
            log.debug("learned segmenter: " + batchTrain);
        }
        return new SegmentAnnotator(batchTrain, this.fe, this.maxWindowSize, this.annotationType);
    }
}
