diff --git a/src/main/java/io/github/azagniotov/lucene/analysis/ja/sudachi/attributes/SudachiMorphemeAttributeImpl.java b/src/main/java/io/github/azagniotov/lucene/analysis/ja/sudachi/attributes/SudachiMorphemeAttributeImpl.java index 7df0b8d..94e4d8b 100644 --- a/src/main/java/io/github/azagniotov/lucene/analysis/ja/sudachi/attributes/SudachiMorphemeAttributeImpl.java +++ b/src/main/java/io/github/azagniotov/lucene/analysis/ja/sudachi/attributes/SudachiMorphemeAttributeImpl.java @@ -41,7 +41,21 @@ public void clear() { @Override public void reflectWith(AttributeReflector attributeReflector) { - attributeReflector.reflect(SudachiAttribute.class, "morpheme", getMorpheme()); + // AttributeReflector is used by Solr and Elasticsearch to provide analysis output. + // + // The following code is commented out because of: + // attributeReflector.reflect(SudachiMorphemeAttribute.class, "morpheme", getMorpheme()); + // + // 1. We do not need to reflect on Morpheme object implementation, it is not needed for the above. + // 2. The com.worksap.nlp.sudachi.MorphemeImpl has package default visibility. Solr throws because of that: + // Caused by: java.lang.IllegalAccessException: access violation: class com.worksap.nlp.sudachi.MorphemeImpl, + // from public Lookup + // at java.lang.invoke.MethodHandles$Lookup.makeAccessException(Unknown Source) ~[?:?] + // at java.lang.invoke.MethodHandles$Lookup.accessClass(Unknown Source) ~[?:?] + // at org.apache.solr.common.util.Utils.addTraditionalFieldWriters(Utils.java:953) ~[?:?] + // at org.apache.solr.common.util.Utils.getReflectData(Utils.java:912) ~[?:?] + // at org.apache.solr.common.util.Utils.getReflectWriter(Utils.java:847) ~[?:?] + // ... 81 more } @Override diff --git a/src/smokeTest/solr_9.x.x/solr_9_4_0/Dockerfile b/src/smokeTest/solr_9.x.x/solr_9_4_0/Dockerfile new file mode 100644 index 0000000..e23d3df --- /dev/null +++ b/src/smokeTest/solr_9.x.x/solr_9_4_0/Dockerfile @@ -0,0 +1,64 @@ +# A few useful Docker commands to build an image and run the Solr container. +# +# Build (run with '--no-cache' to ensure that Git repo new tags will be pulled down, as Docker caches RUN layers): +# docker build -t solr-sudachi . --progress=plain --no-cache (the presence of '--progress' may give an error, depending on your version of docker) +# +# Run: +# docker run -p 8983:8983 --rm solr-sudachi:latest solr-precreate example +# +# cURL requests: +# 1. curl -sS 'http://localhost:8983/solr/example/analysis/field?analysis.fieldtype=text_ja' --get --data-urlencode 'analysis.fieldvalue=ちいかわ' | jq '.analysis.field_types.text_ja.index[1][].text' +# 2. curl -sS 'http://localhost:8983/solr/example/analysis/field?analysis.fieldtype=text_ja' --get --data-urlencode 'analysis.fieldvalue=すもももももももものうち' | jq '.analysis.field_types.text_ja.index[1][].text' +# 3. curl -sS 'http://localhost:8983/solr/example/analysis/field?analysis.fieldtype=text_ja' --get --data-urlencode 'analysis.fieldvalue=聖川真斗' | jq '.analysis.field_types.text_ja.index[1][].text' +# +# See: https://github.com/apache/lucene/pull/12517 +# + +######################################################################################## +# Stage 1 : Solr Lucene Analyzer Sudachi JAR +######################################################################################## +FROM gradle:8.1.1-jdk11@sha256:681c18e70745546bf66949861d18019b979810ac151a5e0933d4ff83c76b4f5f AS BUILD_JAR_STAGE + +ARG PLUGIN_GIT_TAG=9.4.0 + +ENV GRADLE_USER_HOME=/home/gradle +WORKDIR $GRADLE_USER_HOME + +RUN git clone -b v$PLUGIN_GIT_TAG https://github.com/azagniotov/solr-lucene-analyzer-sudachi.git --depth 1 && \ + git config --global user.name "Alexander Zagniotov" && \ + git config --global user.email "azagniotov@gmail.com" + +# Download the dictionary and assemble the JAR to be placed under Solr /opt/solr/server/solr-webapp/webapp/WEB-INF/lib/ +RUN cd solr-lucene-analyzer-sudachi && \ + gradle configureDictionariesLocally && \ + gradle -PsolrVersion=$PLUGIN_GIT_TAG assemble && \ + ls -al ./build/libs/ + + +######################################################################################## +# Stage 2 : Run Solr +######################################################################################## +FROM solr:9.4.0@sha256:4e9b6794895f1f6685a61d587b63e2f0caa2b6f9a5a2c0e67613d4f160d9540d + +MAINTAINER Alexander Zagniotov + +ENV SOLR_JAVA_MEM="-Xms4g -Xmx4g" +ENV SOLR_INSTALL_DIR=/opt/solr +ENV SOLR_SERVER_HOME=$SOLR_INSTALL_DIR/server + +# https://solr.apache.org/guide/solr/9_4/configuration-guide/libs.html#lib-directories +ENV SOLR_INSTALL_LIB_DIR=$SOLR_INSTALL_DIR/lib + +ENV SUDACHI_DICT_HOME=/tmp/sudachi +ENV SUDACHI_SYSTEM_DICT=$SOLR_INSTALL_DIR/system.dict +ENV SUDACHI_USER_DICT=$SOLR_INSTALL_DIR/user_lexicon.dict + +USER root + +# https://solr.apache.org/guide/solr/9_4/configuration-guide/libs.html#lib-directories +COPY --from=BUILD_JAR_STAGE /home/gradle/solr-lucene-analyzer-sudachi/build/libs/solr-lucene-analyzer-sudachi*.jar $SOLR_INSTALL_LIB_DIR/ +COPY --from=BUILD_JAR_STAGE $SUDACHI_DICT_HOME/system-dict/system.dict $SOLR_INSTALL_DIR/system.dict +COPY --from=BUILD_JAR_STAGE $SUDACHI_DICT_HOME/user_lexicon.dict $SOLR_INSTALL_DIR/user_lexicon.dict +COPY schema.xml $SOLR_SERVER_HOME/solr/configsets/_default/conf/managed-schema.xml + +USER solr diff --git a/src/smokeTest/solr_9.x.x/solr_9_4_0/Dockerfile.arm64 b/src/smokeTest/solr_9.x.x/solr_9_4_0/Dockerfile.arm64 new file mode 100644 index 0000000..e1e8459 --- /dev/null +++ b/src/smokeTest/solr_9.x.x/solr_9_4_0/Dockerfile.arm64 @@ -0,0 +1,64 @@ +# A few useful Docker commands to build an image on Apple Sillicon (ARM64) and run the Solr container. +# +# Build (run with '--no-cache' to ensure that Git repo new tags will be pulled down, as Docker caches RUN layers): +# docker build -t solr-sudachi -f Dockerfile.arm64 . --no-cache --platform linux/arm64 +# +# Run: +# docker run -p 8983:8983 --rm solr-sudachi:latest solr-precreate example +# +# cURL requests: +# 1. curl -sS 'http://localhost:8983/solr/example/analysis/field?analysis.fieldtype=text_ja' --get --data-urlencode 'analysis.fieldvalue=ちいかわ' | jq '.analysis.field_types.text_ja.index[1][].text' +# 2. curl -sS 'http://localhost:8983/solr/example/analysis/field?analysis.fieldtype=text_ja' --get --data-urlencode 'analysis.fieldvalue=すもももももももものうち' | jq '.analysis.field_types.text_ja.index[1][].text' +# 3. curl -sS 'http://localhost:8983/solr/example/analysis/field?analysis.fieldtype=text_ja' --get --data-urlencode 'analysis.fieldvalue=聖川真斗' | jq '.analysis.field_types.text_ja.index[1][].text' +# +# See: https://github.com/apache/lucene/pull/12517 +# + +######################################################################################## +# Stage 1 : Solr Lucene Analyzer Sudachi JAR +######################################################################################## +FROM gradle:8.1.1-jdk11@sha256:085ef38a5cc3c2d0f65fbe15fcf75164ef4e610ffba0bb66c3ba9bbdb72e30b2 AS BUILD_JAR_STAGE + +ARG PLUGIN_GIT_TAG=9.4.0 + +ENV GRADLE_USER_HOME=/home/gradle +WORKDIR $GRADLE_USER_HOME + +RUN git clone -b v$PLUGIN_GIT_TAG https://github.com/azagniotov/solr-lucene-analyzer-sudachi.git --depth 1 && \ + git config --global user.name "Alexander Zagniotov" && \ + git config --global user.email "azagniotov@gmail.com" + +# Download the dictionary and assemble the JAR to be placed under Solr /opt/solr/server/solr-webapp/webapp/WEB-INF/lib/ +RUN cd solr-lucene-analyzer-sudachi && \ + gradle configureDictionariesLocally && \ + gradle -PsolrVersion=$PLUGIN_GIT_TAG assemble && \ + ls -al ./build/libs/ + + +######################################################################################## +# Stage 2 : Run Solr +######################################################################################## +FROM solr:9.4.0@sha256:fbb40ba32b4092aa7e81f49b66dea2628b66950121853ac6792674ee82ce75b7 + +MAINTAINER Alexander Zagniotov + +ENV SOLR_JAVA_MEM="-Xms4g -Xmx4g" +ENV SOLR_INSTALL_DIR=/opt/solr +ENV SOLR_SERVER_HOME=$SOLR_INSTALL_DIR/server + +# https://solr.apache.org/guide/solr/9_4/configuration-guide/libs.html#lib-directories +ENV SOLR_INSTALL_LIB_DIR=$SOLR_INSTALL_DIR/lib + +ENV SUDACHI_DICT_HOME=/tmp/sudachi +ENV SUDACHI_SYSTEM_DICT=$SOLR_INSTALL_DIR/system.dict +ENV SUDACHI_USER_DICT=$SOLR_INSTALL_DIR/user_lexicon.dict + +USER root + +# https://solr.apache.org/guide/solr/9_4/configuration-guide/libs.html#lib-directories +COPY --from=BUILD_JAR_STAGE /home/gradle/solr-lucene-analyzer-sudachi/build/libs/solr-lucene-analyzer-sudachi*.jar $SOLR_INSTALL_LIB_DIR/ +COPY --from=BUILD_JAR_STAGE $SUDACHI_DICT_HOME/system-dict/system.dict $SOLR_INSTALL_DIR/system.dict +COPY --from=BUILD_JAR_STAGE $SUDACHI_DICT_HOME/user_lexicon.dict $SOLR_INSTALL_DIR/user_lexicon.dict +COPY schema.xml $SOLR_SERVER_HOME/solr/configsets/_default/conf/managed-schema.xml + +USER solr diff --git a/src/smokeTest/solr_9.x.x/solr_9_4_0/schema.xml b/src/smokeTest/solr_9.x.x/solr_9_4_0/schema.xml new file mode 100644 index 0000000..f9865db --- /dev/null +++ b/src/smokeTest/solr_9.x.x/solr_9_4_0/schema.xml @@ -0,0 +1,1003 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +