/*
 * Decompiled with CFR 0.152.
 */
package org.tip.flatdb4geonames.model.index;

import fr.devinsy.util.StringSet;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.Collection;
import java.util.Locale;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tip.flatdb4geonames.model.GeoNamesLine;
import org.tip.flatdb4geonames.model.index.WordTriage;
import org.tip.flatdb4geonames.util.Chronometer;
import org.tip.flatdb4geonames.util.Shrinker;
import org.tip.flatdb4geonames.util.StringFileSorter;

public class IndexOfWordSeeksBuilder {
    private static Logger logger = LoggerFactory.getLogger(IndexOfWordSeeksBuilder.class);
    public static final String DEFAULT_CHARSET_NAME = "UTF-8";
    public static final String INDEX0_FILENAME = "word_seeks.index";
    public static final String INDEX1_FILENAME = "seeks.index";
    public static final String INDEX2_FILENAME = "word_seek.index";
    public static final int DEFAULT_SPLIT_LINE = 2000000;
    public static final int WORD_MAX_LENGTH = 30;
    public static final int DEFAULT_PADDING_LENGTH = 40;
    public static final String SEPARATOR_CHARACTERS = " \u30fb+*-_()?'\u2019\u2018`\u201d|\\[\\]!\u00ab\u00bb%{}\\:;/\\.#@&\"";

    public static void buildIndex(File geonamesFile, File outputDirectory) throws IOException {
        IndexOfWordSeeksBuilder.buildIndex(geonamesFile, outputDirectory, 2000000, 40);
    }

    public static void buildIndex(File geonamesFile, File outputDirectory, int splitLimit, int paddingLength) throws IOException {
        if (geonamesFile == null || outputDirectory == null) {
            throw new IllegalArgumentException("Null parameter.");
        }
        if (!outputDirectory.isDirectory()) {
            throw new IllegalArgumentException("Output directory is not a directory.");
        }
        if (splitLimit < 100000) {
            throw new IllegalArgumentException("Invalid split value [" + splitLimit + "]");
        }
        logger.debug("build index start...");
        logger.debug("currentDirectory={}", (Object)new File(".").getAbsolutePath());
        logger.debug("Max   memory= {} Mo", (Object)(Runtime.getRuntime().maxMemory() / 1024L / 1024L));
        logger.debug("Total memory= {} Mo", (Object)(Runtime.getRuntime().totalMemory() / 1024L / 1024L));
        File index0File = IndexOfWordSeeksBuilder.buildIndex0(geonamesFile, outputDirectory, splitLimit);
        IndexOfWordSeeksBuilder.buildIndex12(index0File, outputDirectory, splitLimit, paddingLength);
        boolean deleteStatus = index0File.delete();
        if (!deleteStatus) {
            logger.error("Failed to delete file [{}]", (Object)index0File.getAbsolutePath());
        }
        logger.debug("build index done.");
    }

    public static File buildIndex0(File geonamesFile, File outputDirectory, int splitLimit) throws IOException {
        if (geonamesFile == null || outputDirectory == null) {
            throw new IllegalArgumentException("Null parameter.");
        }
        if (!outputDirectory.isDirectory()) {
            throw new IllegalArgumentException("Output directory is not a directory.");
        }
        if (splitLimit < 100000) {
            throw new IllegalArgumentException("Invalid split value [" + splitLimit + "]");
        }
        logger.debug("build index0 file start...");
        Chronometer chrono = new Chronometer();
        logger.debug("geonames file to word seek file start...");
        File index0File = new File(outputDirectory.getAbsoluteFile() + File.separator + INDEX0_FILENAME);
        Chronometer chronoStep = new Chronometer();
        IndexOfWordSeeksBuilder.geonamesToWordSeek(geonamesFile, index0File);
        logger.debug("geonames file to word seek file done. {}", (Object)chronoStep.stop().interval());
        System.gc();
        logger.debug(" memory={} Mo", (Object)(Runtime.getRuntime().totalMemory() / 1024L / 1024L));
        logger.debug("sort big string file start...");
        chronoStep.reset();
        StringFileSorter.sortBigStringFile(index0File, splitLimit);
        logger.debug("sort big string file done. {}", (Object)chronoStep.stop().interval());
        System.gc();
        logger.debug(" memory={} Mo", (Object)(Runtime.getRuntime().totalMemory() / 1024L / 1024L));
        logger.debug("shrink string file start...");
        chronoStep.reset();
        Shrinker.shrinkStringFile(index0File);
        logger.debug("shrink string file done. {}", (Object)chrono.stop().interval());
        System.gc();
        logger.debug(" memory={} Mo", (Object)(Runtime.getRuntime().totalMemory() / 1024L / 1024L));
        logger.debug("build index0 file done.");
        File result = index0File;
        return result;
    }

    public static void buildIndex12(File source, File outputDirectory, int splitLimit, int paddingLength) throws IOException {
        logger.debug("build index12 files start...");
        Chronometer chronoStep = new Chronometer();
        long currentSeekValue = 0L;
        BufferedReader in = null;
        PrintWriter out1 = null;
        PrintWriter out2 = null;
        try {
            in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(source), DEFAULT_CHARSET_NAME));
            File index1File = new File(outputDirectory.getAbsoluteFile() + File.separator + INDEX1_FILENAME);
            out1 = new PrintWriter(index1File, DEFAULT_CHARSET_NAME);
            File index2File = new File(outputDirectory.getAbsoluteFile() + File.separator + INDEX2_FILENAME);
            out2 = new PrintWriter(index2File, DEFAULT_CHARSET_NAME);
            int lineCount = 0;
            boolean ended = false;
            while (!ended) {
                String line;
                if (lineCount % 1000000 == 0) {
                    System.gc();
                    logger.debug("\tlineCount=" + lineCount + "\tcurrentSeekValue=" + currentSeekValue + " \tmemory=" + Runtime.getRuntime().totalMemory() / 1024L / 1024L + " Mo \t" + (int)(chronoStep.stop().interval() / 1000L) + " s");
                    chronoStep.reset();
                }
                if ((line = in.readLine()) == null) {
                    ended = true;
                    continue;
                }
                ++lineCount;
                String[] tokens = line.split("\t");
                String word = tokens[0];
                String seeks = tokens[1];
                out1.println(seeks);
                String data = String.valueOf(word) + "\t" + currentSeekValue;
                out2.println(String.valueOf(data) + StringUtils.repeat((String)" ", (int)(paddingLength - data.getBytes().length)));
                currentSeekValue += (long)(seeks.getBytes().length + 1);
            }
        }
        catch (Throwable throwable) {
            IOUtils.closeQuietly(in);
            IOUtils.closeQuietly(out1);
            IOUtils.closeQuietly(out2);
            throw throwable;
        }
        IOUtils.closeQuietly((Reader)in);
        IOUtils.closeQuietly((Writer)out1);
        IOUtils.closeQuietly((Writer)out2);
        logger.debug("build index12 files done.");
    }

    public static StringSet geoNameLineToRawWords(String line) {
        StringSet names = IndexOfWordSeeksBuilder.lineToNames(line);
        StringSet result = IndexOfWordSeeksBuilder.namesToRawWords(names);
        return result;
    }

    public static WordTriage geoNameLineToWords(String line) {
        StringSet names = IndexOfWordSeeksBuilder.lineToNames(line);
        WordTriage result = IndexOfWordSeeksBuilder.namesToWords(names);
        return result;
    }

    public static void geonamesToWordSeek(File source, File target) throws IOException {
        Chronometer chronoStep = new Chronometer();
        long currentSeekValue = 0L;
        BufferedReader in = null;
        PrintWriter out = null;
        try {
            in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(source), DEFAULT_CHARSET_NAME));
            out = new PrintWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(target), DEFAULT_CHARSET_NAME));
            int lineCount = 0;
            boolean ended = false;
            StringSet rejectedWords = new StringSet();
            while (!ended) {
                String line;
                if (lineCount % 1000000 == 0) {
                    System.gc();
                    logger.debug("\tlineCount=" + lineCount + "\tcurrentSeekValue=" + currentSeekValue + " \tmemory=" + Runtime.getRuntime().totalMemory() / 1024L / 1024L + " Mo \t" + (int)(chronoStep.stop().interval() / 1000L) + " s");
                    chronoStep.reset();
                }
                if ((line = in.readLine()) == null) {
                    ended = true;
                    continue;
                }
                ++lineCount;
                WordTriage words = IndexOfWordSeeksBuilder.geoNameLineToWords(line);
                for (String word : words.getSelection()) {
                    out.println(String.valueOf(word) + "\t" + currentSeekValue);
                }
                rejectedWords.addAll((Collection)words.getRejection());
                currentSeekValue += (long)(line.getBytes().length + 1);
            }
            FileUtils.writeLines((File)new File(String.valueOf(target.getAbsolutePath()) + ".rejection"), (Collection)rejectedWords);
        }
        catch (Throwable throwable) {
            IOUtils.closeQuietly(in);
            IOUtils.closeQuietly(out);
            throw throwable;
        }
        IOUtils.closeQuietly((Reader)in);
        IOUtils.closeQuietly((Writer)out);
    }

    public static StringSet lineToNames(GeoNamesLine line) {
        String asciiName;
        StringSet result = new StringSet();
        String name = line.getName();
        if (StringUtils.isNotBlank((CharSequence)name)) {
            result.add(name);
        }
        if (StringUtils.isNotBlank((CharSequence)(asciiName = line.getAsciiName()))) {
            result.add(asciiName);
        }
        for (String alternateName : line.getAlternateNames()) {
            if (!StringUtils.isNotBlank((CharSequence)alternateName)) continue;
            result.add(alternateName);
        }
        return result;
    }

    public static StringSet lineToNames(String line) {
        String[] alternateNames;
        String asciiName;
        StringSet result = new StringSet();
        String[] tokens = line.split("\t");
        String name = tokens[1];
        if (StringUtils.isNotBlank((CharSequence)name)) {
            result.add(name);
        }
        if (StringUtils.isNotBlank((CharSequence)(asciiName = tokens[2]))) {
            result.add(asciiName);
        }
        String[] stringArray = alternateNames = tokens[3].split(",");
        int n = alternateNames.length;
        int n2 = 0;
        while (n2 < n) {
            String alternateName = stringArray[n2];
            if (StringUtils.isNotBlank((CharSequence)alternateName)) {
                result.add(alternateName);
            }
            ++n2;
        }
        return result;
    }

    public static StringSet namesToRawWords(StringSet source) {
        StringSet result = new StringSet();
        for (String name : source) {
            result.addAll((Collection)IndexOfWordSeeksBuilder.nameToRawWords(name));
        }
        return result;
    }

    public static WordTriage namesToWords(StringSet source) {
        WordTriage result = new WordTriage();
        for (String name : source) {
            result.addAll(IndexOfWordSeeksBuilder.nameToWords(name));
        }
        return result;
    }

    public static StringSet nameToRawWords(String source) {
        StringSet result = new StringSet();
        if (source != null) {
            String[] words;
            String name = source.toLowerCase(Locale.ROOT);
            String[] stringArray = words = name.split("[ \u30fb+*-_()?'\u2019\u2018`\u201d|\\[\\]!\u00ab\u00bb%{}\\:;/\\.#@&\"]");
            int n = words.length;
            int n2 = 0;
            while (n2 < n) {
                String word = stringArray[n2];
                if (StringUtils.isNotBlank((CharSequence)word)) {
                    result.add(word);
                }
                ++n2;
            }
        }
        return result;
    }

    public static WordTriage nameToWords(String source) {
        WordTriage result = new WordTriage();
        if (source != null) {
            String[] words;
            String name = source.toLowerCase(Locale.ROOT);
            String[] stringArray = words = name.split("[ \u30fb+*-_()?'\u2019\u2018`\u201d|\\[\\]!\u00ab\u00bb%{}\\:;/\\.#@&\"]");
            int n = words.length;
            int n2 = 0;
            while (n2 < n) {
                String word = stringArray[n2];
                if (StringUtils.isNotBlank((CharSequence)word) && word.length() >= 2 && !IndexOfWordSeeksBuilder.startsWithDigit(word) && word.getBytes().length <= 30) {
                    result.getSelection().add(word);
                } else {
                    result.getRejection().add(word);
                }
                ++n2;
            }
        }
        return result;
    }

    public static boolean startsWithDigit(String source) {
        boolean result = source == null ? false : (source.length() == 0 ? false : Character.isDigit(source.charAt(0)));
        return result;
    }

    public static void wordSeeksToWordSeekSeeks(File source, File target) throws IOException {
        Chronometer chronoStep = new Chronometer();
        long currentSeekValue = 0L;
        BufferedReader in = null;
        PrintWriter out = null;
        try {
            in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(source), DEFAULT_CHARSET_NAME));
            out = new PrintWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(target), DEFAULT_CHARSET_NAME));
            int lineCount = 0;
            boolean ended = false;
            while (!ended) {
                String line;
                if (lineCount % 1000000 == 0) {
                    System.gc();
                    logger.debug("\tlineCount=" + lineCount + "\tcurrentSeekValue=" + currentSeekValue + " \tmemory=" + Runtime.getRuntime().totalMemory() / 1024L / 1024L + " Mo \t" + (int)(chronoStep.stop().interval() / 1000L) + " s");
                    chronoStep.reset();
                }
                if ((line = in.readLine()) == null) {
                    ended = true;
                    continue;
                }
                ++lineCount;
                String word = line.split("\t")[0];
                out.println(StringUtils.rightPad((String)(String.valueOf(word) + "\t" + currentSeekValue), (int)41));
                currentSeekValue += (long)(line.getBytes().length + 1);
            }
        }
        catch (Throwable throwable) {
            IOUtils.closeQuietly(in);
            IOUtils.closeQuietly(out);
            throw throwable;
        }
        IOUtils.closeQuietly((Reader)in);
        IOUtils.closeQuietly((Writer)out);
    }
}

