package com.snda.in.svpa.corpus;

import com.snda.in.svpa.analysis.DigitLetterNor;
import com.snda.in.svpa.util.StringUtil;
import com.snda.in.svpa.util.TextFileReaderMob;
import com.snda.in.svpa.util.TextFileWriterMob;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.antlr.runtime.debug.Profiler;

/* loaded from: classes.dex */
public class AppNameDataClean {
    List<String> terms = TextFileReaderMob.readLines("data/temp/XX版.txt");

    public static boolean hasJapChar(String str) {
        for (char c : str.toCharArray()) {
            if (isJapCharacter(c)) {
                return true;
            }
        }
        return false;
    }

    private boolean hasKeywords(String str) {
        for (String str2 : new String[]{"壁纸", "桌面主题", "壁紙", "主题桌面"}) {
            if (str.indexOf(str2) != -1) {
                return true;
            }
        }
        return false;
    }

    public static boolean hasKorenChar(String str) {
        for (char c : str.toCharArray()) {
            if (isKoreanCharacter(c)) {
                return true;
            }
        }
        return false;
    }

    public static boolean isJapCharacter(char c) {
        try {
            return (new String(String.valueOf(c).getBytes(), "UTF-8").getBytes("SHIFT_JIS").length % 2 == 0) && (c > 40869 || c < 19968);
        } catch (UnsupportedEncodingException e) {
            return false;
        }
    }

    private boolean isKeywords(String str) {
        return Arrays.asList("电话", "短信", "音乐", "应用", "网站", "电影", "网络", "一下", "启动", "游戏", "中国", "信息", "屏幕亮度").contains(str);
    }

    public static boolean isKoreanCharacter(char c) {
        return c >= 44032 && c <= 55215;
    }

    public static void main(String[] strArr) {
        new AppNameDataClean().doFilter("data/temp/speech_training_set.txt", "data/temp/app_name.txt", "data/temp/app_name_pak.txt");
    }

    private String removeSpace(String str) {
        for (char c : str.toCharArray()) {
            if (Character.isSpaceChar(c)) {
                str = str.replace(c, ' ');
            }
        }
        return str;
    }

    private String removeXXVer(String str) {
        if (str == null || StringUtil.isNullorEmpty(str)) {
            return null;
        }
        Iterator<String> it = this.terms.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            String next = it.next();
            if (str.endsWith(next)) {
                str = str.substring(0, str.length() - next.length());
                break;
            }
        }
        return str;
    }

    public void checkLength(String str) {
        for (String str2 : TextFileReaderMob.readLines(str)) {
            if (str2.length() == 2) {
                System.out.println(str2);
            }
        }
    }

    public String cleanName(String str) {
        String removeXXVer = removeXXVer(extractNormalName(removeSpace(DigitLetterNor.letterNormalize(str).replaceAll("[,，.。_%\\$\\*#@\\^!;:/\\|\\[\\]&]", "").replaceAll("[“”！？、・•〈〉『』「」《》·：～]", "").replace("\\", "").replace("-", "").replace("\"", "").replace("—", "").replace("——", "").replace("（", "(").replace("）", ")")).toLowerCase().replaceAll("(v|ver)\\s*(\\d+)(\\.(\\d+))+", "").replaceAll("(v|ver)+\\s*(\\d+)(\\.?(\\d+))*", "").replaceAll("\\([^\\)]+\\)", "").replaceAll("【[^】]+】", "")));
        if (removeXXVer == null || removeXXVer.equals("") || removeXXVer.length() <= 1 || isEnglish(removeXXVer) || hasKorenChar(removeXXVer) || hasJapChar(removeXXVer) || isKeywords(removeXXVer) || hasKeywords(removeXXVer)) {
            return null;
        }
        return removeXXVer;
    }

    public void doFilter(String str, String str2, String str3) {
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        int i = 0;
        for (String str4 : TextFileReaderMob.readLines(str)) {
            int i2 = i + 1;
            System.out.println("Processing line: " + i);
            String[] split = str4.split(Profiler.DATA_SEP);
            if (split.length != 2) {
                i = i2;
            } else {
                String cleanName = cleanName(split[0]);
                if (cleanName != null) {
                    hashMap.put(cleanName, 1);
                    hashMap2.put(String.valueOf(cleanName) + Profiler.DATA_SEP + split[1], 1);
                }
                i = i2;
            }
        }
        TextFileWriterMob.writeLines(str3, (Set<String>) hashMap2.keySet());
        TextFileWriterMob.writeLines(str2, (Set<String>) hashMap.keySet());
    }

    public String extractNormalName(String str) {
        Matcher matcher = Pattern.compile("[\\u4e00-\\u9fa50-9a-zA-Z]+").matcher(str);
        if (matcher.find()) {
            return matcher.group();
        }
        return null;
    }

    public boolean isEnglish(String str) {
        return Pattern.compile("^[A-Za-z -_\\.]+$").matcher(str).find();
    }

    public void statSuffix(String str) {
        List<String> readLines = TextFileReaderMob.readLines(str);
        Pattern compile = Pattern.compile("版$");
        HashSet hashSet = new HashSet();
        ArrayList arrayList = new ArrayList();
        for (String str2 : readLines) {
            if (str2.length() >= 4 && compile.matcher(str2).find()) {
                String substring = str2.substring(str2.length() - 3, str2.length());
                if (!hashSet.contains(substring)) {
                    hashSet.add(substring);
                    arrayList.add(String.valueOf(substring) + ":" + str2);
                }
            }
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            System.out.println((String) it.next());
        }
    }

    public void testCleanName2() {
        for (String str : new String[]{"剪贴板助理 Clipboard Contents", "宽带山(KDS)", "快速重启专业版 Fast_Reboot_Pro【木蚂蚁汉化】", "悦T", "飞飞Q信", "360手机卫士", "钓鱼高手2完美版 Fishin2 GO", "钛备份捐赠完全版 Titanium Backup 【木蚂蚁汉化】", "呀！彩票", "宠物犬系列--泰迪"}) {
            System.out.println(extractNormalName(str));
        }
    }
}
