package com.dassault_systemes.doc.search.core;

import com.dassault_systemes.doc.search.filter.FilterConstants;
import com.dassault_systemes.doc.search.mapping.query.TokenSet;
import com.dassault_systemes.doc.search.nls.NlsHandler;
import com.dassault_systemes.doc.search.trace.TraceHandler;
import java.util.Iterator;
import java.util.StringTokenizer;

/* loaded from: input_file:com/dassault_systemes/doc/search/core/QueryTokeniser.class */
public class QueryTokeniser {
    protected final TokenSet NLS_COMMON_WORDS;
    protected final String NLS_DOUBLE_QUOTE;
    protected final String NLS_WHITESPACE_AND_QUOTES;
    protected final String NLS_QUOTES_ONLY;
    protected final String NLS_WORD_SEPARATOR;
    protected String queryText;
    protected boolean glossarySearchOnly;
    protected NlsHandler nlsHandler;
    protected TraceHandler traceHandler;

    public QueryTokeniser(NlsHandler nlsHandler, TraceHandler traceHandler, String str, boolean z) {
        this.nlsHandler = nlsHandler;
        this.traceHandler = traceHandler;
        this.glossarySearchOnly = z;
        if (str == null) {
            traceHandler.trace(3, "QueryTokeniser, QueryTokeniser : Query text is empty");
        }
        this.queryText = str;
        this.NLS_COMMON_WORDS = nlsHandler.getCommonWords();
        this.NLS_DOUBLE_QUOTE = nlsHandler.getDoubleQuote();
        this.NLS_WHITESPACE_AND_QUOTES = nlsHandler.getWhitespaceAndQuotes();
        this.NLS_QUOTES_ONLY = nlsHandler.getQuotesOnly();
        this.NLS_WORD_SEPARATOR = nlsHandler.getWordSeparator();
    }

    protected boolean isCommonWord(String str) {
        if (this.glossarySearchOnly) {
            return false;
        }
        return this.NLS_COMMON_WORDS.contains(str);
    }

    protected boolean isDoubleQuote(String str) {
        return str.equals(this.NLS_DOUBLE_QUOTE);
    }

    protected boolean textHasContent(String str) {
        return (str == null || str.trim().equals("")) ? false : true;
    }

    protected void addNonTrivialWordToResult(String str, TokenSet tokenSet) {
        if (!textHasContent(str) || isCommonWord(str.trim())) {
            return;
        }
        tokenSet.add(str.trim());
    }

    protected String flipDelimiters(String str) {
        return str.equals(this.NLS_WHITESPACE_AND_QUOTES) ? this.NLS_QUOTES_ONLY : this.NLS_WHITESPACE_AND_QUOTES;
    }

    protected TokenSet normalizeTokens(TokenSet tokenSet) {
        TokenSet tokenSet2 = new TokenSet();
        int i = 0;
        while (i < tokenSet.size()) {
            if (tokenSet.get(i).endsWith(FilterConstants.FILTER_DELIMITER)) {
                tokenSet2.add(tokenSet.get(i) + tokenSet.get(i + 1));
                i++;
            } else if (tokenSet.get(i).contains(FilterConstants.FILTER_DELIMITER)) {
                tokenSet2.add(tokenSet.get(i));
            } else if (this.queryText.contains(this.NLS_DOUBLE_QUOTE)) {
                tokenSet2.add("qst:" + tokenSet.get(i));
            } else {
                tokenSet2.add("st:" + tokenSet.get(i));
            }
            i++;
        }
        return tokenSet2;
    }

    public TokenSet parseQueryText() {
        TokenSet tokenSet = new TokenSet();
        this.traceHandler.trace(2, "QueryTokeniser, parseQueryText : begin");
        String str = this.NLS_WHITESPACE_AND_QUOTES;
        if (this.queryText != null) {
            String language = this.nlsHandler.getLanguage();
            if ((language.equalsIgnoreCase("JP") || language.equalsIgnoreCase("JA") || language.equalsIgnoreCase("CN") || language.equalsIgnoreCase("ZH") || language.equalsIgnoreCase("KO")) && !(this.queryText.contains("xreftag") && this.queryText.contains(".xml"))) {
                StringTokenizer stringTokenizer = new StringTokenizer(this.queryText, "\"", true);
                boolean z = false;
                while (stringTokenizer.hasMoreTokens()) {
                    String nextToken = stringTokenizer.nextToken();
                    if (isDoubleQuote(nextToken)) {
                        z = true;
                    } else if (z && nextToken != null) {
                        addNonTrivialWordToResult(nextToken, tokenSet);
                    }
                }
                if (!z) {
                    Iterator<String> it = this.nlsHandler.stringSplitIntoWords(this.queryText, "UTF-8").iterator();
                    while (it.hasNext()) {
                        String normalizeWord = this.nlsHandler.normalizeWord(it.next());
                        if (normalizeWord != null) {
                            addNonTrivialWordToResult(normalizeWord, tokenSet);
                        }
                    }
                }
            } else {
                StringTokenizer stringTokenizer2 = new StringTokenizer(this.queryText, str, true);
                while (stringTokenizer2.hasMoreTokens()) {
                    String nextToken2 = stringTokenizer2.nextToken(str);
                    if (isDoubleQuote(nextToken2)) {
                        str = flipDelimiters(str);
                    } else {
                        String normalizeWord2 = this.nlsHandler.normalizeWord(nextToken2);
                        if (normalizeWord2 != null) {
                            addNonTrivialWordToResult(normalizeWord2, tokenSet);
                        }
                    }
                }
            }
        }
        this.traceHandler.trace(3, "QueryTokeniser, parseQueryText : found :" + tokenSet.toString());
        this.traceHandler.trace(2, "QueryTokeniser, parseQueryText : done");
        return normalizeTokens(tokenSet);
    }
}
