From 43546b1ea1d5c30cd2c2ad0176bce6053d1b08de Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Tue, 10 Sep 2024 17:24:17 +0100 Subject: [PATCH] Updated the SparseVectorStrategy class to use sparse_vector query --- .../helpers/vectorstore/_async/strategies.py | 24 +++++++++---------- .../helpers/vectorstore/_sync/strategies.py | 24 +++++++++---------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/elasticsearch/helpers/vectorstore/_async/strategies.py b/elasticsearch/helpers/vectorstore/_async/strategies.py index 10524e243..2a52bcb4b 100644 --- a/elasticsearch/helpers/vectorstore/_async/strategies.py +++ b/elasticsearch/helpers/vectorstore/_async/strategies.py @@ -96,7 +96,7 @@ def needs_inference(self) -> bool: class AsyncSparseVectorStrategy(AsyncRetrievalStrategy): - """Sparse retrieval strategy using the `text_expansion` processor.""" + """Sparse retrieval strategy using the `sparse_vector` processor.""" def __init__(self, model_id: str = ".elser_model_2"): self.model_id = model_id @@ -127,11 +127,10 @@ def es_query( "bool": { "must": [ { - "text_expansion": { - f"{vector_field}.{self._tokens_field}": { - "model_id": self.model_id, - "model_text": query, - } + "sparse_vector": { + "field": f"{vector_field}.{self._tokens_field}", + "inference_id": self.model_id, + "query": query, } } ], @@ -150,7 +149,7 @@ def es_mappings_settings( mappings: Dict[str, Any] = { "properties": { vector_field: { - "properties": {self._tokens_field: {"type": "rank_features"}} + "properties": {self._tokens_field: {"type": "sparse_vector"}} } } } @@ -172,11 +171,12 @@ async def before_index_creation( { "inference": { "model_id": self.model_id, - "target_field": vector_field, - "field_map": {text_field: "text_field"}, - "inference_config": { - "text_expansion": {"results_field": self._tokens_field} - }, + "input_output": [ + { + "input_field": text_field, + "output_field": f"{vector_field}.{self._tokens_field}", + }, + ], } } ], diff --git a/elasticsearch/helpers/vectorstore/_sync/strategies.py b/elasticsearch/helpers/vectorstore/_sync/strategies.py index 99c9baec2..af89edf8d 100644 --- a/elasticsearch/helpers/vectorstore/_sync/strategies.py +++ b/elasticsearch/helpers/vectorstore/_sync/strategies.py @@ -96,7 +96,7 @@ def needs_inference(self) -> bool: class SparseVectorStrategy(RetrievalStrategy): - """Sparse retrieval strategy using the `text_expansion` processor.""" + """Sparse retrieval strategy using the `sparse_vector` processor.""" def __init__(self, model_id: str = ".elser_model_2"): self.model_id = model_id @@ -127,11 +127,10 @@ def es_query( "bool": { "must": [ { - "text_expansion": { - f"{vector_field}.{self._tokens_field}": { - "model_id": self.model_id, - "model_text": query, - } + "sparse_vector": { + "field": f"{vector_field}.{self._tokens_field}", + "inference_id": self.model_id, + "query": query, } } ], @@ -150,7 +149,7 @@ def es_mappings_settings( mappings: Dict[str, Any] = { "properties": { vector_field: { - "properties": {self._tokens_field: {"type": "rank_features"}} + "properties": {self._tokens_field: {"type": "sparse_vector"}} } } } @@ -172,11 +171,12 @@ def before_index_creation( { "inference": { "model_id": self.model_id, - "target_field": vector_field, - "field_map": {text_field: "text_field"}, - "inference_config": { - "text_expansion": {"results_field": self._tokens_field} - }, + "input_output": [ + { + "input_field": text_field, + "output_field": f"{vector_field}.{self._tokens_field}", + }, + ], } } ],