From 27773cd1154ce0396fe2ae42faad3e803a8eb5cd Mon Sep 17 00:00:00 2001 From: Alexander Zagniotov Date: Mon, 15 Jan 2024 16:24:17 -0800 Subject: [PATCH] Reading the dictionary locations from ENV vars with a fallback --- .../tokenizer/SudachiTokenizerFactory.java | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/main/java/io/github/azagniotov/lucene/analysis/ja/sudachi/tokenizer/SudachiTokenizerFactory.java b/src/main/java/io/github/azagniotov/lucene/analysis/ja/sudachi/tokenizer/SudachiTokenizerFactory.java index f983895..c473086 100644 --- a/src/main/java/io/github/azagniotov/lucene/analysis/ja/sudachi/tokenizer/SudachiTokenizerFactory.java +++ b/src/main/java/io/github/azagniotov/lucene/analysis/ja/sudachi/tokenizer/SudachiTokenizerFactory.java @@ -16,14 +16,13 @@ package io.github.azagniotov.lucene.analysis.ja.sudachi.tokenizer; import static com.worksap.nlp.sudachi.Tokenizer.SplitMode; -import static io.github.azagniotov.lucene.analysis.ja.sudachi.cache.DictionaryCache.SYSTEM_DICT_LOCAL_PATH; -import static io.github.azagniotov.lucene.analysis.ja.sudachi.cache.DictionaryCache.USER_DICT_LOCAL_PATH; import com.worksap.nlp.sudachi.Config; import com.worksap.nlp.sudachi.Dictionary; import com.worksap.nlp.sudachi.DictionaryFactory; import io.github.azagniotov.lucene.analysis.ja.sudachi.cache.DictionaryCache; import java.io.IOException; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.Map; import org.apache.lucene.analysis.Tokenizer; @@ -38,6 +37,11 @@ public class SudachiTokenizerFactory extends TokenizerFactory implements Resourc private static final Logger LOGGER = LoggerFactory.getLogger(SudachiTokenizerFactory.class); + private static final String SYSTEM_DICT_ENV_VAR = "SUDACHI_SYSTEM_DICT"; + private static final String USER_DICT_ENV_VAR = "SUDACHI_USER_DICT"; + private static final String SYSTEM_DICT_LOCAL_PATH = "/tmp/sudachi/system-dict/system.dict"; + private static final String USER_DICT_LOCAL_PATH = "/tmp/sudachi/user_lexicon.dict"; + private static final String MODE = "mode"; private static final String DISCARD_PUNCTUATION = "discardPunctuation"; private final SplitMode mode; @@ -73,8 +77,8 @@ public void inform(ResourceLoader loader) throws IOException { final Config currentConfig = this.config == null ? Config.defaultConfig() : this.config; final Config config = currentConfig - .systemDictionary(Paths.get(SYSTEM_DICT_LOCAL_PATH)) - .addUserDictionary(Paths.get(USER_DICT_LOCAL_PATH)); + .systemDictionary(getEnv(SYSTEM_DICT_ENV_VAR, SYSTEM_DICT_LOCAL_PATH)) + .addUserDictionary(getEnv(USER_DICT_ENV_VAR, USER_DICT_LOCAL_PATH)); LOGGER.info("Sudachi: Created config from system and user dictionaries"); final Dictionary dictionary = new DictionaryFactory().create(config); @@ -95,4 +99,15 @@ private SplitMode getMode(final String input) { } throw new IllegalArgumentException("Tokenization input mode is null"); } + + private static Path getEnv(final String name, final String defaultValue) { + final Path defaultValuePath = Paths.get(defaultValue); + try { + final String value = System.getenv(name); + return (value == null || value.trim().isEmpty()) ? defaultValuePath : Paths.get(value); + } catch (final SecurityException ex) { + // System.err.println("SecurityException when reading the env variable: " + name); + return defaultValuePath; + } + } }