-
Notifications
You must be signed in to change notification settings - Fork 36
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #51 from bukosabino/develop-multiple-collections
Working with multiple collections/modules
- Loading branch information
Showing
20 changed files
with
259 additions
and
349 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,11 +5,11 @@ chunk_overlap: 100 | |
admin_email: [email protected] | ||
|
||
embeddings_model_name: dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn | ||
embeddings_model_size: 768 | ||
|
||
vector_store: 'qdrant' # {'qdrant', 'pinecone', 'supabase'} | ||
top_k_results: 10 | ||
date_start: 2000/01/01 | ||
date_end: 2004/12/12 | ||
distance_type: 'Cosine' # {'Cosine', 'Euclid', 'Dot'} | ||
|
||
# Prompts | ||
prompt_system: | | ||
|
@@ -24,21 +24,20 @@ prompt_system_context: | | |
En la respuesta no menciones nada sobre el contexto o los scores. | ||
# Qdrant | ||
collection_name: justicio | ||
collections: | ||
- justicio | ||
- bocm | ||
- bopz | ||
|
||
# Openai | ||
llm_model_name: 'gpt-3.5-turbo-1106' # 'gpt-4-1106-preview' | ||
llm_model_name: 'gpt-3.5-turbo-0125' # 'gpt-3.5-turbo-1106', 'gpt-4-1106-preview' | ||
temperature: 0 | ||
seed: 42 | ||
max_tokens: 1024 | ||
|
||
# Deprecated | ||
|
||
# llm_api: 'llama2' # {'llama2', 'openai'} | ||
|
||
# Pinecone | ||
# Not used | ||
## Pinecone | ||
vector_store_index_name: justicio | ||
|
||
# Supabase | ||
## Supabase | ||
table_name: 'documents' | ||
query_name: 'match_documents' | ||
query_name: 'match_documents' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import time | ||
|
||
import schedule | ||
|
||
from src.etls.boe.load import today | ||
|
||
|
||
schedule.every().day.at("11:00").do(today, collection_name="<template>") | ||
|
||
while True: | ||
schedule.run_pending() | ||
time.sleep(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from datetime import date, datetime | ||
|
||
import typer | ||
|
||
from src.email.send_email import send_email | ||
from src.etls.boe.scrapper import BOEScrapper | ||
from src.etls.common.etl import ETL | ||
from src.initialize import initialize_app | ||
|
||
|
||
app = typer.Typer() | ||
INIT_OBJECTS = initialize_app() | ||
|
||
|
||
@app.command() | ||
def today(collection_name: str): | ||
etl_job = ETL(config_loader=INIT_OBJECTS.config_loader, vector_store=INIT_OBJECTS.vector_store[collection_name]) | ||
boe_scrapper = BOEScrapper() | ||
day = date.today() | ||
docs = boe_scrapper.download_day(day) | ||
if docs: | ||
etl_job.run(docs) | ||
|
||
subject = "[BOE] Daily ETL executed" | ||
content = f""" | ||
Daily ETL executed | ||
- Date: {day} | ||
- Documents loaded: {len(docs)} | ||
- Database used: {INIT_OBJECTS.config_loader['vector_store']} | ||
""" | ||
send_email(INIT_OBJECTS.config_loader, subject, content) | ||
|
||
|
||
@app.command() | ||
def dates(collection_name: str, date_start: str, date_end: str): | ||
etl_job = ETL(config_loader=INIT_OBJECTS.config_loader, vector_store=INIT_OBJECTS.vector_store[collection_name]) | ||
boe_scrapper = BOEScrapper() | ||
docs = boe_scrapper.download_days( | ||
date_start=datetime.strptime(date_start, "%Y/%m/%d").date(), | ||
date_end=datetime.strptime(date_end, "%Y/%m/%d").date(), | ||
) | ||
if docs: | ||
etl_job.run(docs) | ||
|
||
subject = "[BOE] Load ETL executed" | ||
content = f""" | ||
Load ETL executed | ||
- Date start: {date_start} | ||
- Date end: {date_end} | ||
- Documents loaded: {len(docs)} | ||
- Database used: {INIT_OBJECTS.config_loader['vector_store']} | ||
""" | ||
send_email(INIT_OBJECTS.config_loader, subject, content) | ||
|
||
|
||
if __name__ == "__main__": | ||
app() |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -98,7 +98,7 @@ | |
"BOE-A-1999-637", | ||
"BOE-A-1999-6568", | ||
"BOE-A-1999-8910", | ||
"BOE-A-1999-8994" | ||
"BOE-A-1999-8994", | ||
] | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.