/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.tokenizer.pipe;

import com.hankcs.hanlp.corpus.document.sentence.word.IWord;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.tokenizer.pipe.Pipe;
import java.util.List;
import java.util.ListIterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RegexRecognizePipe
implements Pipe<List<IWord>, List<IWord>> {
    protected Pattern pattern;
    protected String label;

    public RegexRecognizePipe(Pattern pattern, String label) {
        this.pattern = pattern;
        this.label = label;
    }

    @Override
    public List<IWord> flow(List<IWord> input) {
        ListIterator<IWord> listIterator = input.listIterator();
        while (listIterator.hasNext()) {
            IWord wordOrSentence = listIterator.next();
            if (wordOrSentence.getLabel() != null) continue;
            listIterator.remove();
            String sentence = wordOrSentence.getValue();
            Matcher matcher = this.pattern.matcher(sentence);
            int begin = 0;
            while (matcher.find()) {
                int end = matcher.start();
                listIterator.add(new Word(sentence.substring(begin, end), null));
                listIterator.add(new Word(matcher.group(), this.label));
                begin = matcher.end();
            }
            if (begin >= sentence.length()) continue;
            listIterator.add(new Word(sentence.substring(begin), null));
        }
        return input;
    }
}

