package com.snda.in.svpa.analysis;

import com.snda.in.svpa.lingpipe.Chunk;
import com.snda.in.svpa.lingpipe.ChunkFactory;
import com.snda.in.svpa.lingpipe.Chunking;
import com.snda.in.svpa.lingpipe.ChunkingImpl;
import com.snda.in.svpa.nlp.ner.ASRLabelChunker;
import com.snda.in.svpa.nlp.ner.ChunkingUtil;
import com.snda.in.svpa.nlp.ner.CompositeChunker;
import com.snda.in.svpa.nlp.ner.DictionaryBasedTagger;
import com.snda.in.svpa.nlp.ner.NamedEntityType;
import com.snda.in.svpa.request.UserContext;
import com.snda.in.svpa.request.VoiceRequest;
import com.snda.in.svpa.util.StringUtil;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

/* loaded from: classes.dex */
public class StandardAnalyzer implements RequestAnalyzer {
    static final String TAG = "StandardAnalyzer";
    static final String[] polite = {"啊", "呢", "吧", "哦", "啦", "现在", "，", "我要", "我想", "请帮我", "帮我", "麻烦", "请", "赶快", "帮忙"};
    static final String[] postPolite = {"啊", "呢", "吧", "哦", "啦", "。你", "，你", "。我", "，我"};
    static final List<String> stopWords = Arrays.asList("把", "给", "一个", "跟", "让", "一下", "的", "，");
    CompositeChunker compositeChunker = new CompositeChunker();
    DictionaryBasedTagger dictTagger = new DictionaryBasedTagger();
    ASRLabelChunker asrChunker = new ASRLabelChunker();

    public static void main(String[] strArr) {
        Iterator<AnalyzedRequest> it = new StandardAnalyzer().doAnalysis(new VoiceRequest("打电话给<P_刘升平>和<n_98776>,还有<O_联想>客服", new UserContext("liusp", "BEIJING"))).iterator();
        while (it.hasNext()) {
            System.out.println(it.next().getTextAfterAnalysis());
        }
    }

    @Override // com.snda.in.svpa.analysis.RequestAnalyzer
    public List<AnalyzedRequest> doAnalysis(VoiceRequest voiceRequest) {
        ArrayList arrayList = new ArrayList();
        String inputText = voiceRequest.getInputText();
        if (!StringUtil.isNullorEmpty(inputText)) {
            ArrayList arrayList2 = new ArrayList();
            Chunking chunk = this.asrChunker.chunk(inputText);
            if (chunk != null) {
                inputText = inputText.replaceAll("(<[PpNnOo]_)|>", "");
                arrayList2.add(chunk);
            }
            arrayList2.add(this.dictTagger.chunk(inputText));
            ChunkingImpl chunkingImpl = new ChunkingImpl(inputText);
            chunkingImpl.add(ChunkFactory.createChunk(0, inputText.length(), NamedEntityType.WORD, 1.0d));
            arrayList2.add(chunkingImpl);
            for (Chunking chunking : this.compositeChunker.doChunk(arrayList2, inputText)) {
                AnalyzedRequest analyzedRequest = new AnalyzedRequest();
                analyzedRequest.setOriginalRequest(voiceRequest);
                analyzedRequest.setTextAfterAnalysis(ChunkingUtil.getTaggedText(chunking).toLowerCase());
                CharSequence charSequence = chunking.charSequence();
                for (Chunk chunk2 : chunking.chunkSet()) {
                    String charSequence2 = charSequence.subSequence(chunk2.start(), chunk2.end()).toString();
                    String type = chunk2.type();
                    if (!stopWords.contains(charSequence2)) {
                        analyzedRequest.getKeywords().add(charSequence2.toLowerCase());
                    }
                    if (!type.equals(NamedEntityType.WORD) && !type.equals(NamedEntityType.KEYWORD)) {
                        analyzedRequest.getNERTags().add(type);
                    }
                }
                arrayList.add(analyzedRequest);
            }
        }
        return arrayList;
    }

    @Override // com.snda.in.svpa.analysis.RequestAnalyzer
    public void doNormalization(VoiceRequest voiceRequest) {
        String inputText = voiceRequest.getInputText();
        if (StringUtil.isNullorEmpty(inputText)) {
            return;
        }
        String removeHeadWords = removeHeadWords(polite, inputText);
        if (removeHeadWords.endsWith("。")) {
            removeHeadWords = removeHeadWords.substring(0, removeHeadWords.length() - 1);
        }
        for (String str : postPolite) {
            if (removeHeadWords.endsWith(str)) {
                removeHeadWords = removeHeadWords.substring(0, removeHeadWords.length() - 1);
            }
        }
        String replace = DigitLetterNor.letterNormalize(removeHeadWords.replaceAll("。", "，")).replace(" ", "");
        if (replace.endsWith("，")) {
            replace = replace.substring(0, replace.length() - 1);
        }
        int lastIndexOf = replace.lastIndexOf("，");
        if (lastIndexOf != -1 && lastIndexOf == replace.length() - 2) {
            replace = replace.substring(0, replace.length() - 2);
        }
        if (!StringUtil.isNullorEmpty(replace)) {
            if ("收首受手售瘦寿守".contains(String.valueOf(replace.charAt(0)))) {
                replace = "搜" + replace.substring(1);
            }
            if ("大搭达答到".contains(String.valueOf(replace.charAt(0)))) {
                replace = "打" + replace.substring(1);
            }
            if (replace.startsWith("其实我")) {
                replace = "提醒我" + replace.substring(3);
            }
        }
        voiceRequest.setInputText(replace);
    }

    public String removeHeadWords(String[] strArr, String str) {
        String str2 = str;
        for (String str3 : strArr) {
            if (str.startsWith(str3)) {
                str = str.substring(str3.length());
                str2 = removeHeadWords(strArr, str);
            }
        }
        return str2;
    }
}
