Compare revisions

Hugo Simon · Hugo Simon · Hugo Simon · Hugo Simon · Hugo Simon · Hugo Simon
--- a/Makefile
+++ b/Makefile
-.PHONY: api web proxy-args base help
+.PHONY: api web proxy-args base launch-docker-compose help
+SHELL := /bin/bash

+EMBEDDING_DIM ?= 1024
+
+use_ollama = 0
 use_proxy = 0

+LLM_OLLAMA ?=mistral
+EMBEDDING_OLLAMA ?=mxbai-embed-large
+
 base_api_img_tag = base-r1
 api_img_tag = 1.0
 web_img_tag = 1.0
@@ -57,5 +64,37 @@ else
 	-t ${BASE_API_IMG_NAME} .
 endif

+launch-docker-compose: ## launch application with docker containers
+	@echo "use_proxy: $(use_proxy)";
+ifeq ($(use_proxy), 1)
+	@cd docker_compose && \
+	docker compose -f external_components/docker_compose_mlflow.yml build \
+	--build-arg http_proxy_arg=$(HTTP_PROXY) \
+	--build-arg https_proxy_arg=$(HTTPS_PROXY) && \
+	docker compose -f docker_compose_caradoc.yml build \
+	--build-arg http_proxy_arg=$(HTTP_PROXY) \
+	--build-arg https_proxy_arg=$(HTTPS_PROXY)
+else 
+	@cd docker_compose && \
+	docker compose -f external_components/docker_compose_mlflow.yml build \
+	--build-arg http_proxy_arg="" \
+	--build-arg https_proxy_arg="" && \
+	docker compose -f docker_compose_caradoc.yml build \
+	--build-arg http_proxy_arg="" \
+	--build-arg https_proxy_arg=""
+endif
+
+ifeq ($(use_ollama), 1)
+	@ollama serve & \
+	ollama pull $(LLM_OLLAMA) & \
+	ollama pull $(EMBEDDING_OLLAMA) &
+endif
+
+	@cd docker_compose && \
+	docker compose -f external_components/docker_compose_mlflow.yml --env-file external_components/config.env up -d && \
+	docker compose -f docker_compose_caradoc.yml up -d && \
+	sleep 3 && \
+	./external_components/init_qdrant.sh $(EMBEDDING_DIM)
+
 help: ## print this help
 	@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z0-9_-]+:.*?## / {gsub("\\\\n",sprintf("\n%22c",""), $$2);printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
--- a/README.md
+++ b/README.md
@@ -79,6 +79,7 @@ api                  build API docker image
 web                  build Web docker image
 proxy-args           print used proxy env vars if needed (@see use_proxy)
 base                 build API base docker image
+launch-docker-compose launch application with docker containers
 help                 print this help

 ```
@@ -107,8 +108,8 @@ Dans un premier temps, il faut configurer les services externes i.e Minio, Qdran
 L'installation de Minio/Mlflow se fait à l'aide de la commande suivante :

 ```bash
-docker compose -f docker_compose_mlflow.yml build --build-arg http_proxy_arg=$http_proxy --build-arg https_proxy_arg=$https_proxy 
-docker compose -f docker_compose_mlflow.yml --env-file config.env up -d
+docker compose -f external_components/docker_compose_mlflow.yml build  --build-arg http_proxy_arg=""  --build-arg https_proxy_arg=""
+docker compose -f external_components/docker_compose_mlflow.yml --env-file external_components/config.env up -d
 ```

 Ensuite, nous allons paramétrer le Minio en créant deux buckets, une access key et secret key à renseigner dans les
@@ -124,12 +125,17 @@ Ensuite, il sera possible de lancer le Minio et le MLFlow.
 Par la suite, pour lancer l'application, il suffit de lancer le script suivant :

 ```bash
-chmod +x start_caradoc.sh
-./start_caradoc.sh
-```
+make launch-docker-compose EMBEDDING_DIM=1024
+# make launch-docker-compose  use_ollama=1 LLM_OLLAMA=mistral EMBEDDING_OLLAMA=mxbai-embed-large EMBEDDING_DIM=1024

+```
 Cela lancera notre application ainsi que le script permettant de créer une collection dans qdrant au nom : `caradoc`

+NB : Il est possible de lancer l'application avec un serveru ollama directement avec le flag use_ollama=1
+
+Le choix du modèle est disponible à l'aide des deux arguments : LLM_OLLAMA et EMBEDDING_OLLAMA
+
+Les modèles disponibles sont dans [la documentation d'Ollama](https://ollama.com/library) 
 ### 3. Installation via Helm
 Charts Helm [ici](./k8s/charts/README.md)

@@ -164,6 +170,9 @@ d'environnement suivantes :
 - `REDIS_HOST` (optionnel) : URL du service redis
 - `REDIS_PORT` (optionnel) : Port du service redis
 - `REDIS_PASSWORD`(optionnel) : Mot de passe de la base redis
+- `LLM_CLIENT_TYPE` : Type de client à utiliser pour gérer les llms : custom -> OpenAILike, openai
+- `EMBEDDIN_CLIENT_TYPE` : Type de client à utiliser pour gérer les modèles d'embeddings : custom -> OpenAILike, openai
+- `OLLAMA_BASE_URL`: URL API Ollama

 #### Fichier `prompt.yaml`

@@ -183,6 +192,8 @@ Il est structuré de la manière suivante :

 ```

+NB : Dès lors qu'un nouveau modèle est ajouté, il est important d'ajouter les prompts adéquats. 
+
 #### Fichier `models_config.json`

 Ce fichier de configuration permet de paramétrer les modèles utilisés dans l'application. Il permet aussi d'ajouter des
@@ -200,11 +211,16 @@ Pour plus d'informations sur la méthode de contribution, voir [ici](doc/CONTRIB

 ### Intégration LLM disponible

-`CARADOC` supporte actuellement uniquement les serveurs de type OpenAI sous forme Azure ou non.
+`CARADOC` supporte actuellement les serveurs de type OpenAI (Azure également), Ollama
 Pour un test rapide, une unique clé OpenAI suffira, sinon vous pourrez proposer l'URL de votre API LLM (ou Azure
 Endpoint) avec la clé associée.

-À venir, une intégration avec Ollama sera disponible.
+Pour plus d'informations sur la configuration [Ollama](doc/Ollama.md)
+
+Pour intégrer de nouveaux types de clients, il suffira de modifier le fichier `api/app/ds/ai_models` en ajoutant les clients dans les fonctions :
+- `get_llm_model`
+- `get_embedding_model`
+- `llm_batch_inference`

 ### ⚡ Pipeline disponible

@@ -318,7 +334,7 @@ De plus, le jeu d'évaluation est généré automatiquement à l'aide d'un LLM.
 rapidement sur des bases de données utilisateurs spécifiques. En effet, chaque cas d'usage a des besoins différents qui
 peuvent être mieux répondus à l'aide de certains pipelines.

-NB : Bien que critiquée, la méthode "LLM-as-a-Judge" est la seule méthode "sans labellisation" permettant d'évaluer les
+NB : Bien que critiqué, la méthode "LLM-as-a-Judge" est la seule méthode "sans labellisation" permettant d'évaluer les
 sorties des méthodes de RAG. Ainsi les métriques sont dépendantes des modèles LLM juges utilisés et peuvent comportés un
 bruit important. Il est donc recommandé d'utiliser le LLM le plus performant pour executer ce genre de tâche tout en
 respectant vos normes de sécurité et de confidentialité
@@ -329,7 +345,8 @@ Cet indice est évalué en deux étapes :

 - **Evaluation classique** : Pour une requête associée à un document donné, nous évaluons la capacité du moteur de
  recherche à récupérer le document associé à la requête selon une précision k. En effet, la recherche par similarité
-  est configurée de manière à retourner les k-documents les plus proches
+  est
+  configuré de manière à retourner les k-documents les plus proches
 - **Evaluation LLM** : Pour une requête, nous évaluons la capacité du moteur de recherche à récupérer l'information
  permettant de répondre à la requête, quel que soit le document. Ce mode d'évaluation permet de s'affranchir de
  l'indépendance d'une information dans un corpus de documents. Une requête peut être répondue à l'aide d'un ou

--- a/api/Dockerfile.base
+++ b/api/Dockerfile.base
-FROM python:3.10.13-slim
+FROM python:3.10.13

 ARG http_proxy_arg
 ARG https_proxy_arg
@@ -7,21 +7,18 @@ ENV HTTP_PROXY=${http_proxy_arg:-""}
 ENV HTTPS_PROXY=${https_proxy_arg:-""}

 RUN set -eux; \
-    if [ -n "${http_proxy_arg}" ]; then \
+    if [[ -n "$http_proxy_arg" ]]; then \
      echo "[global]" > pip.conf; \
      echo "proxy=${HTTP_PROXY}" >> pip.conf; \
    fi; \
    \
    \
    touch /etc/apt/apt.conf; \
-    if [ -n "$http_proxy_arg" ]; then \
-      echo 'Acquire::http::proxy "$HTTP_PROXY";' >> /etc/apt/apt.conf; \
-      sed -i "s|\$HTTP_PROXY|${HTTP_PROXY}|g" /etc/apt/apt.conf; \
-
+    if [[ -n "$http_proxy_arg" ]]; then \
+      echo "Acquire::http::proxy '${HTTP_PROXY}';" >> /etc/apt/apt.conf; \
    fi; \
-    if [ -n "$https_proxy_arg" ]; then \
-      echo 'Acquire::https::proxy "$HTTPS_PROXY";' >> /etc/apt/apt.conf; \
-      sed -i "s|\$HTTPS_PROXY|${HTTPS_PROXY}|g" /etc/apt/apt.conf; \
+    if [[ -n "$https_proxy_arg" ]]; then \
+      echo "Acquire::https::proxy '${HTTPS_PROXY}';" >> /etc/apt/apt.conf; \
    fi; \
    find /etc/apt/ -maxdepth 1 -size 0c -name "apt.conf" -delete


--- a/api/Pipfile
+++ b/api/Pipfile
@@ -28,6 +28,8 @@ motor = "==3.4.0"
 beanie = "==1.26.0"
 pytest = "8.2.2"
 llama-index-embeddings-openai = "*"
+llama-index-llms-ollama = "*"
+llama-index-embeddings-ollama = "*"

 [dev-packages]
 black = "*"

--- a/api/Pipfile.lock
+++ b/api/Pipfile.lock
--- a/api/app/config/openai.py
+++ b/api/app/config/openai.py
-import os
-
-from openai import AzureOpenAI, OpenAI
-
-client, OPENAI_TYPE = (
-    (
-        AzureOpenAI(
-            api_key=os.environ.get("OPENAI_API_KEY"),
-            azure_endpoint=os.environ.get("OPENAI_API_BASE"),
-            api_version=os.environ.get("OPENAI_API_VERSION"),
-        ),
-        "custom",
-    )
-    if os.environ.get("OPENAI_API_BASE")
-    else (OpenAI(api_key=os.environ.get("OPENAI_API_KEY")), "openai")
-)
--- a/api/app/ds/ai_models.py
+++ b/api/app/ds/ai_models.py
-from typing import Any, List
-
+from typing import Any, List, Dict
+from llama_index.llms.openai import OpenAI
+from llama_index.llms.openai_like import OpenAILike
 from llama_index.core.bridge.pydantic import PrivateAttr
 from llama_index.core.embeddings import BaseEmbedding
+from openai import AzureOpenAI, OpenAI
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.llms.ollama import Ollama
+from llama_index.embeddings.ollama import OllamaEmbedding
+import os

+OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL")

 class CustomOpenAIEmbedding(BaseEmbedding):
    """Embedding model class for LlamaIndex
@@ -56,3 +63,92 @@ class CustomOpenAIEmbedding(BaseEmbedding):

    async def _aget_text_embedding(self, text: str) -> List[float]:
        return self._get_text_embedding(text)
+
+
+
+def get_llm_model(client_type : str, model: str, max_tokens : int, temperature : float, top_p : float):
+    """Get llm model according to client type and parameters
+
+    Args:
+        client_type (str): Client type e.g openai, custom
+        model (str): LLM's name
+        max_tokens (int): max tokens parameter for llm completion
+        temperature (float): temperature parameter for llm completion
+        top_p (float): top_p parameter for llm completion
+
+    Returns:
+         LLM llamaindex model
+    """
+    if client_type == "custom" :
+        return OpenAILike(model=model, max_tokens=max_tokens, temperature=temperature, top_p=top_p, timeout=600)
+    elif client_type == "openai":
+        return OpenAI(model=model, max_tokens=max_tokens, temperature=temperature, top_p=top_p, timeout=600)
+    elif client_type == "ollama":
+        return Ollama(model="mistral", base_url = OLLAMA_BASE_URL, max_tokens=max_tokens, temperature=temperature, top_p=top_p, request_timeout=120.0)
+    else :
+        return "Please provide a right client type"
+    
+def get_embedding_model(client_type : str, model: str):
+    """Get embedding model according to a client type 
+
+    Args:
+        client_type (str): Client type e.g openai, custom
+        model (str): embedding model's name
+
+    Returns:
+         Embedding llamaindex model
+    """
+    if client_type == "custom" :
+        openai_client = AzureOpenAI(
+            api_key=os.environ.get("OPENAI_API_KEY"),
+            azure_endpoint=os.environ.get("OPENAI_API_BASE"),
+            api_version=os.environ.get("OPENAI_API_VERSION"),
+        )
+        return CustomOpenAIEmbedding(model_name=model, openai_client=openai_client)
+    elif client_type == "openai":
+        return OpenAIEmbedding(model=model, timeout=60)
+    elif client_type == "ollama":
+        return OllamaEmbedding(
+                model_name=model,
+                base_url=OLLAMA_BASE_URL,
+                ollama_additional_kwargs={"mirostat": 0},
+                )
+    else :
+        return "Please provide a right client type"
+    
+def llm_batch_inference(prompts : List[str], client_type : str, model : str, max_tokens : int, temperature : float, top_p : float) -> List[str] | str:
+    """ Batch inference LLM according to client type
+
+    Args:
+        prompts (List[str]): inputs for LLM
+        model (str): LLM's name
+        max_tokens (int): max tokens parameter for llm inference
+        temperature (float): temperature parameter for llm inference
+        top_p (float): top_p parameter for llm inference
+
+
+    Returns:
+        List[str] | str : Outpus from LLM batch inference
+    """
+    if client_type == "custom":
+        openai_client = AzureOpenAI(
+            api_key=os.environ.get("OPENAI_API_KEY"),
+            azure_endpoint=os.environ.get("OPENAI_API_BASE"),
+            api_version=os.environ.get("OPENAI_API_VERSION"),
+        ) 
+        return [t.text for t in openai_client.completions.create(
+        prompt=prompts, model=model, max_tokens=max_tokens, temperature=temperature, top_p=top_p
+    ).choices]
+    elif client_type == "openai" :
+        openai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+        return [t.text for t in openai_client.completions.create(
+        prompt=prompts, model=model, max_tokens=max_tokens, temperature=temperature, top_p=top_p
+    ).choices]
+    elif client_type == "ollama" :
+        llm = get_llm_model(client_type=client_type, model=model,max_tokens=max_tokens, temperature=temperature, top_p=top_p )
+        output = []
+        for prompt in prompts :
+            output.append(llm.complete(prompt).text)
+        return output
+    else :
+        return "Please provide a right client type"
\ No newline at end of file
--- a/api/app/ds/ds_utils.py
+++ b/api/app/ds/ds_utils.py
 from typing import Any, List
-
+import os
 from llama_index.core.schema import NodeWithScore

 from app.config.logger import logger
-from app.config.openai import client as openai_client
-
+from app.ds.ai_models import get_embedding_model

-def compute_embedding(docs: List[str], model: str) -> List[Any]:
+def compute_embedding(docs: List[str], model: str) -> List[float]:
    """This function computes embeddings for a given list of texts

    Args:
@@ -14,10 +13,10 @@ def compute_embedding(docs: List[str], model: str) -> List[Any]:
        model (str): Embedding model to used

    Returns:
-        _type_: _description_
+        (List[float]): Vectors corresponding to docs
    """
-    embeddings = openai_client.embeddings.create(model=model, input=docs)
-    return [e.embedding for e in embeddings.data]
+    embedding_model = get_embedding_model(client_type=os.getenv("EMBEDDING_CLIENT_TYPE"), model=model)
+    return embedding_model.get_text_embedding_batch(texts=docs)


 def node_parser(nodes: List[NodeWithScore]) -> str:

--- a/api/app/ds/eval_pipeline.py
+++ b/api/app/ds/eval_pipeline.py
 import time
 from abc import ABC, abstractmethod
 from typing import Any, Dict, List
-
+import os 
 import numpy as np
 import pandas as pd
 from fastapi import HTTPException, status
@@ -9,11 +9,10 @@ from tqdm import tqdm

 import app.ds.ds_utils as ds_utils
 from app.config.logger import logger
-from app.config.openai import client as openai_client
 from app.config.prompts import prompts_config
 from app.ds.rag_pipeline import RAGPipeline
 from app.utils.mlflow import log_rag_metrics
-
+from app.ds.ai_models import llm_batch_inference

 class EvalRAGPipeline(ABC):
    """
@@ -48,7 +47,8 @@ def classifiy_pipeline_response(
    embeddings_pr = ds_utils.compute_embedding(
        [str(response) for response in pipeline_response], model=embed_model
    )
-    return clf_pr.predict(embeddings_pr)
+    reduced_embs  = [row[:1024] for row in embeddings_pr]
+    return clf_pr.predict(reduced_embs)


 def correctness_parsing(correctness_eval: str) -> float | int:
@@ -90,10 +90,8 @@ def evaluate_correctness(
        eval_template.format(query=eq, reference_answer=er, generated_answer=pr)
        for eq, er, pr in zip(eval_question, eval_response, pipeline_response)
    ]
-    eval_completions = openai_client.completions.create(
-        prompt=eval_prompts, model=model_name, temperature=0.0, max_tokens=2048
-    )
-    eval_res = [e.text.lower() for e in eval_completions.choices]
+    eval_completions = llm_batch_inference(prompts=eval_prompts, client_type=os.getenv('LLM_CLIENT_TYPE'), model=model_name, max_tokens=2048, temperature=0.0, top_p=0.01)
+    eval_res = [e.lower() for e in eval_completions]
    return [correctness_parsing(e) for e in eval_res]


@@ -160,10 +158,8 @@ def get_faithfulness(responses: str, contexts: str, model_name: str) -> List[str
        eval_template.format(query_str=q, context_str=c)
        for q, c in zip(responses, contexts)
    ]
-    eval_completions = openai_client.completions.create(
-        prompt=eval_prompts, model=model_name, temperature=0.0, max_tokens=2048
-    )
-    eval_res = [e.text.lower() for e in eval_completions.choices]
+    eval_completions = llm_batch_inference(prompts=eval_prompts, client_type=os.getenv('LLM_CLIENT_TYPE'), model=model_name, max_tokens=2048, temperature=0.0, top_p=0.01)
+    eval_res = [e.lower() for e in eval_completions]
    return [1 if "yes" in e[:20] else 0 for e in eval_res]


@@ -209,10 +205,8 @@ def llm_retriever_evaluator(
        for n in search_res:
            sources_docs += n.text + " /n/n "
        prompts.append(check_pt.format(query=query, context=sources_docs))
-    check_completions = openai_client.completions.create(
-        prompt=prompts, model=llm_checker_name, temperature=0.0, max_tokens=2048
-    )
-    check = [c.text.lower() for c in check_completions.choices]
+    check_completions = llm_batch_inference(prompts=prompts, client_type=os.getenv('LLM_CLIENT_TYPE'), model=llm_checker_name, max_tokens=2048, temperature=0.0, top_p=0.01)
+    check = [c.lower() for c in check_completions]
    return [1 if "yes" in c[:20] else 0 for c in check]


@@ -232,17 +226,12 @@ def generate_qa(texts: List[str], model_name: str) -> tuple[List[str], List[str]

    prompt_a = prompts_config["rag"]["classique"][model_name]["prompt"]
    inputs_q = [prompt_q.format(content=t) for t in texts]
-    q_completions = openai_client.completions.create(
-        prompt=inputs_q, model=model_name, temperature=0.0, top_p=0.01, max_tokens=2048
-    )
-    question = [q.text for q in q_completions.choices]
+    question = llm_batch_inference(prompts=inputs_q, client_type=os.getenv('LLM_CLIENT_TYPE'), model=model_name, max_tokens=4096, temperature=0.0, top_p=0.01)
    inputs_a = [
        prompt_a.format(context_str=t, query_str=q) for t, q in zip(texts, question)
    ]
-    a_completions = openai_client.completions.create(
-        prompt=inputs_a, model=model_name, temperature=0.0, max_tokens=4096
-    )
-    answer = [a.text for a in a_completions.choices]
+ 
+    answer = llm_batch_inference(prompts=inputs_a, client_type=os.getenv('LLM_CLIENT_TYPE'), model=model_name, max_tokens=4096, temperature=0.0, top_p=0.01)
    return question, answer



--- a/api/app/ds/parsing_loading_utils.py
+++ b/api/app/ds/parsing_loading_utils.py
@@ -18,10 +18,9 @@ from unstructured.partition.md import partition_md
 from unstructured.partition.odt import partition_odt
 from unstructured.partition.text import partition_text
 from unstructured.staging.base import convert_to_dataframe
-
+from app.ds.ai_models import llm_batch_inference
 import app.ds.ds_utils as ds_utils
 from app.config.logger import logger
-from app.config.openai import client as openai_client
 from app.config.prompts import prompts_config
 from app.config.qdrant import BASE_COLLECTION_NAME
 from app.config.qdrant import client as qdrant_client
@@ -166,10 +165,8 @@ def fiab_document(texts: List[str], model: str) -> List[str]:
    """
    prompt = prompts_config["fiab"]["process"][model]["prompt"]
    inputs = [prompt.format(document=t) for t in texts]
-    output = openai_client.completions.create(
-        prompt=inputs, model=model, max_tokens=1024, temperature=0.0, top_p=0.01
-    )
-    return [re.sub(r"Document fiabilisé:?", "", t.text) for t in output.choices]
+    output = llm_batch_inference(prompts=inputs, client_type=os.getenv('LLM_CLIENT_TYPE'),model=model, max_tokens=1024, temperature=0.0, top_p=0.01)
+    return [re.sub(r"Document fiabilisé:?", "", t) for t in output]


 def check_voc(

--- a/api/app/ds/rag_pipeline.py
+++ b/api/app/ds/rag_pipeline.py
 from abc import ABC, abstractmethod
 from typing import Any, Dict, Tuple
-
+import os 
 from llama_index.core import VectorStoreIndex, get_response_synthesizer
 from llama_index.core.prompts import PromptTemplate
 from llama_index.core.query_pipeline import (FnComponent, InputComponent,
@@ -8,14 +8,9 @@ from llama_index.core.query_pipeline import (FnComponent, InputComponent,
 from llama_index.core.vector_stores import FilterCondition
 from llama_index.core.vector_stores.types import (MetadataFilter,
                                                  MetadataFilters)
-from llama_index.embeddings.openai import OpenAIEmbedding
-from llama_index.llms.openai import OpenAI
-from llama_index.llms.openai_like import OpenAILike
 from llama_index.vector_stores.qdrant import QdrantVectorStore

-import app.ds.ai_models as ai_models
-from app.config.openai import OPENAI_TYPE
-from app.config.openai import client as openai_client
+from app.ds.ai_models import get_embedding_model, get_llm_model
 from app.config.prompts import prompts_config
 from app.config.qdrant import client as qdrant_client
 from app.ds.ds_utils import node_parser
@@ -78,33 +73,15 @@ class NaiveRAGPipeline(RAGPipeline):
        filters: dict,
    ) -> None:
        self.collection_name = collection_name
-
-        if OPENAI_TYPE == "custom":
-            self.embed_model = ai_models.CustomOpenAIEmbedding(
-                openai_client=openai_client, model_name=embed_model_name
-            )
-        if OPENAI_TYPE == "openai":
-            self.embed_model = OpenAIEmbedding(model=embed_model_name, timeout=60)
+        self.embed_model = get_embedding_model(client_type=os.getenv("EMBEDDING_CLIENT_TYPE"), model=embed_model_name)

        self.filters = filters
        self.params = prompts_config["rag"]["classique"][llm_model_name]
-
-        if OPENAI_TYPE == "custom":
-            self.llm_model = OpenAILike(
-                model=llm_model_name,
-                max_tokens=self.params["max_tokens"],
-                temperature=self.params["temperature"],
-                top_p=self.params["top_p"],
-                timeout=600,
-            )
-        if OPENAI_TYPE == "openai":
-            self.llm_model = OpenAI(
-                model=llm_model_name,
-                timeout=60,
-                temperature=self.params["temperature"],
-                top_p=self.params["top_p"],
-            )
-
+        self.llm_model = get_llm_model(client_type=os.getenv("LLM_CLIENT_TYPE"), model=llm_model_name,
+                                        max_tokens=self.params["max_tokens"],
+                                        temperature=self.params["temperature"],
+                                        top_p=self.params["top_p"]
+                                    )
    def get_index(self):
        vector_store = QdrantVectorStore(
            client=qdrant_client, collection_name=self.collection_name
@@ -166,30 +143,15 @@ class CheckerRAGPipeline(RAGPipeline):
        filters: dict,
    ) -> None:
        self.collection_name = collection_name
-        if OPENAI_TYPE == "custom":
-            self.embed_model = ai_models.CustomOpenAIEmbedding(
-                openai_client=openai_client, model_name=embed_model_name
-            )
-        if OPENAI_TYPE == "openai":
-            self.embed_model = OpenAIEmbedding(model=embed_model_name, timeout=60)
+        self.embed_model = get_embedding_model(client_type=os.getenv("EMBEDDING_CLIENT_TYPE"), model=embed_model_name)
        self.filters = filters
        self.qa_params = prompts_config["rag"]["classique"][llm_model_name]
        self.check_params = prompts_config["rag"]["check"][llm_model_name]
-        if OPENAI_TYPE == "custom":
-            self.llm_model = OpenAILike(
-                model=llm_model_name,
-                max_tokens=self.qa_params["max_tokens"],
-                temperature=self.qa_params["temperature"],
-                top_p=self.qa_params["top_p"],
-                timeout=600,
-            )
-        if OPENAI_TYPE == "openai":
-            self.llm_model = OpenAI(
-                model=llm_model_name,
-                timeout=60,
-                temperature=self.qa_params["temperature"],
-                top_p=self.qa_params["top_p"],
-            )
+        self.llm_model = get_llm_model(client_type=os.getenv("LLM_CLIENT_TYPE"), model=llm_model_name,
+                                        max_tokens=self.qa_params["max_tokens"],
+                                        temperature=self.qa_params["temperature"],
+                                        top_p=self.qa_params["top_p"]
+                                    )

    def get_index(self):
        vector_store = QdrantVectorStore(

--- a/api/models_config.json
+++ b/api/models_config.json
 {
-    "embed_model": "multilingual-e5-large",
-    "llm_model": "mixtral-instruct",
-    "fiab_llm_model": "mixtral-instruct",
-    "llm_judge": "mixtral-instruct",
-    "eval_generation_llm": "mixtral-instruct"
+    "embed_model": "mxbai-embed-large",
+    "llm_model": "mistral",
+    "fiab_llm_model": "mistral",
+    "llm_judge": "mistral",
+    "eval_generation_llm": "mistral"
 }
\ No newline at end of file
--- a/api/prompts.yaml
+++ b/api/prompts.yaml
 rag:
  classique:
+    mistral:
+      prompt: "<s> [INST]
+        Tu es un assistant documentaire. Ta missions est de répondre à des questions à partir uniquement du contexte suivant. Si tu ne connais pas l'information, tu diras que tu ne connais pas la réponse. Ta réponse sera concise. Tu utiliseras uniquement l'information pertiente. 
+        Ta réponse devra obligatoirement être en français, c'est un point très important.
+        Voici un exemple de réponse :
+        - L'IA générative se base sur l'architecture transformer développée par Google.
+        [/INST]
+        </s>
+        [INST]
+        Contexte : 
+        {context_str}
+
+        Question :
+        {query_str}  
+
+        Répond à la question en te basant sur le contexte ci-dessus
+        [/INST]"
+      temperature: "0.0"
+      top_p: "0.1"
+      max_tokens: "1024" # For performance reason
+      comment: "RAG prompt template for 'Classique' response generation"
    mixtral-instruct:
      prompt: "<s> [INST]
        Tu es un assistant documentaire. Ta missions est de répondre à des questions à partir uniquement du contexte suivant. Si tu ne connais pas l'information, tu diras que tu ne connais pas la réponse. Ta réponse sera concise. Tu utiliseras uniquement l'information pertiente. 
@@ -22,6 +43,21 @@ rag:
      max_tokens: "1024" # For performance reason
      comment: "RAG prompt template for 'Classique' response generation"
  check:
+    mistral:
+      prompt: "<s> [INST] You are a helpful code assistant.
+        Your task is to reformulate the question in French using the following pieces of context. 
+        [/INST]</s>
+        [INST]
+        Context : {context_str}
+        Question :{query_str}   
+
+        Just rephrase the question without explanation and only with element from the context in French. 
+        If elements are not in the context, just assume  '''Je ne peux pas formuler de question dans ce contexte'''.
+        [/INST]"
+      temperature: "0.0"
+      top_p: "0.1"
+      max_tokens: "2048"
+      comment: "Prompt to check if the next LLM could answer to the question with the element of context"
    mixtral-instruct:
      prompt: "<s> [INST] You are a helpful code assistant.
        Your task is to reformulate the question in French using the following pieces of context. 
@@ -40,8 +76,7 @@ rag:

 faithfulness:
  eval:
-    mixtral-instruct:
-
+    mistral:
      prompt: "[INST]
      Please tell if a given piece of information is supported by the context.
      You need to answer with either YES or NO.
@@ -65,62 +100,79 @@ faithfulness:
      top_p: "0.1"
      max_tokens: "512"
      comment: "Prompt to evaluate the Faithfulness metric"
-  refine: 
    mixtral-instruct:
      prompt: "[INST]
-          We want to understand if the following information is present in the context information: {query_str}
-          We have provided an existing YES/NO answer: {existing_answer}
-          We have the opportunity to refine the existing answer (only if needed) with some more context below.
-          ------------
-          {context_msg}
-          ------------
-          If the existing answer was already YES, still answer YES. If the information is present in the new context, answer YES. Otherwise answer NO.
-          [/INST]
-          "
+      Please tell if a given piece of information is supported by the context.
+      You need to answer with either YES or NO.
+      Answer YES if any of the context supports the information, even if most of the context is unrelated. Some examples are provided below. 
+
+      Information: Apple pie is generally double-crusted.
+      Context: An apple pie is a fruit pie in which the principal filling ingredient is apples. 
+      Apple pie is often served with whipped cream, ice cream ('apple pie à la mode'), custard or cheddar cheese.
+      It is generally double-crusted, with pastry both above and below the filling; the upper crust may be solid or latticed (woven of crosswise strips).
+      Answer: YES
+      Information: Apple pies tastes bad.
+      Context: An apple pie is a fruit pie in which the principal filling ingredient is apples. 
+      Apple pie is often served with whipped cream, ice cream ('apple pie à la mode'), custard or cheddar cheese.
+      It is generally double-crusted, with pastry both above and below the filling; the upper crust may be solid or latticed (woven of crosswise strips).
+      Answer: NO
+      Information: {query_str}
+      Context: {context_str}
+      Answer: 
+      [/INST]"
      temperature: "0.0"
      top_p: "0.1"
      max_tokens: "512"
-      comment: "Prompt to refine the Faithfulness metric if necessary"
-relevancy:
+      comment: "Prompt to evaluate the Faithfulness metric"
+correctness:
  eval:
-    mixtral-instruct:
+    mistral:
+
      prompt: "[INST]
-        Your task is to evaluate if the response for the query     is in line with the context information provided.
-        You have two options to answer. Either YES/ NO.
-        Answer - YES, if the response for the query     is in line with context information otherwise NO.
-        Query and Response: 
-        {query_str}
-        Context: 
-        {context_str}
-        Answer: 
-        [/INST]
+            system: 
+            You are an expert evaluation system for a question answering chatbot.
+
+            You are given the following information:
+            - a user query, and
+            - a generated answer
+
+            You may also be given a reference answer to use for reference in your evaluation.
+
+            Your job is to judge the relevance and correctness of the generated answer.
+            Output a single score that represents a holistic evaluation.
+            You must return your response in a line with only the score.
+            Do not return answers in any other format.
+            On a separate line provide your reasoning for the score as well.
+
+            Follow these guidelines for scoring:
+            - Your score has to be between 1 and 5, where 1 is the worst and 5 is the best.
+            - If the generated answer is not relevant to the user query, you should give a score of 1.
+            - If the generated answer is relevant but contains mistakes, you should give a score between 2 and 3.
+            - If the generated answer is relevant and fully correct, you should give a score between 4 and 5.
+
+            Example Response:
+            4.0
+            The generated answer has the exact same metrics as the reference answer,     but it is not as concise.
+
+
+            user: 
+            ## User Query
+            {query}
+
+            ## Reference Answer
+            {reference_answer}
+
+            ## Generated Answer
+            {generated_answer}
+
+            assistant: 
+            [/INST]
            "
      temperature: "0.0"
      top_p: "0.1"
      max_tokens: "512"
-      comment: "Prompt to evaluate the Relevancy metric"
-  refine: 
-    mixtral-instruct:
-      prompt: "[INST]
-          We want to understand if the following query and response isin line with the context information: 
-          {query_str}
-          We have provided an existing YES/NO answer: 
-          {existing_answer}
-          We have the opportunity to refine the existing answer (only if needed) with some more context below.
-          ------------
-          {context_msg}
-          ------------
-          If the existing answer was already YES, still answer YES. If the information is present in the new context, answer YES. Otherwise answer NO.
-          [/INST]"
-      temperature: "0.0"
-      top_p: "0.1"
-      max_tokens: "512"
-      comment: "Prompt to refine the Relevancy metric if necessary"
-
-correctness:
-  eval:
+      comment: "Prompt to evaluate the Correctness metric"
    mixtral-instruct:
-
      prompt: "[INST]
            system: 
            You are an expert evaluation system for a question answering chatbot.
@@ -167,6 +219,31 @@ correctness:
      comment: "Prompt to evaluate the Correctness metric"
 retrieval_evaluation:
  eval: 
+    mistral:
+      prompt: "<s> [INST] 
+            You are a helpful assistant. Your task is to classify if the informations asked into the question are in the following pieces of context. 
+            Answer with YES or NO. 
+            For example :
+            - Question : What is the size of the Effeil Tower?
+            - Context : The Effeil tower is the most visited monument in Paris \n\n 
+            The Effeil tower is the symbol of Paris 
+            Your answer will be : No, query cannot be answered by the given context
+
+            - Question : What is the Birthday of Barack Obama ? 
+            Obama was the 44th president of the United States of America \n\n 
+            Barack Obama was the first black president of the United states  \n\n 
+            Barack Obama has receive the nobel price for freedom \n\n
+            Obama was born on the 4th of august 1961
+            [/INST]
+            </s> 
+            [INST]
+            Context : {context}
+            Question :{query}   
+            [/INST]"
+      temperature: "0.0"
+      top_p: "0.1"
+      max_tokens: "128"
+      comment: "This prompt is used for retriever evaluation by using LLM to cross information between chunks" 
    mixtral-instruct:
      prompt: "<s> [INST] 
            You are a helpful assistant. Your task is to classify if the informations asked into the question are in the following pieces of context. 
@@ -194,8 +271,24 @@ retrieval_evaluation:
      comment: "This prompt is used for retriever evaluation by using LLM to cross information between chunks" 
 fiab:
  process:
-    mixtral-instruct:
+    mistral:
+      prompt: "[INST]
+        Ta mission est fiabiliser le contenu du document issu d'une extraction via un parser PDF. En effet, lors du parsing, il est possible que certains mots soient interchangés. 
+        Par exemple, si dans le document, on retrouve : 
+        Qu'est que ce soir ?\n\nOn mange
+        Alors, tu retourneras :
+        Qu'est qu'on mange ce soir.
+
+        Fiabilises le document suivant afin qu'il puisse être intégré dans une base documentaire : {document}

+        Tu dois commencer ta réponse par les mots suivants :
+        Document fiabilisé : 
+        [/INST]" 
+      temperature: "0.0"
+      top_p: "0.01"
+      max_tokens: "32000"
+      comment: "This prompt is used to increase parsing quality by using LLM"
+    mixtral-instruct:
      prompt: "[INST]
        Ta mission est fiabiliser le contenu du document issu d'une extraction via un parser PDF. En effet, lors du parsing, il est possible que certains mots soient interchangés. 
        Par exemple, si dans le document, on retrouve : 
@@ -214,6 +307,25 @@ fiab:
      comment: "This prompt is used to increase parsing quality by using LLM"
 generation_eval:
  generate_question:
+    mistral:
+      prompt: "<s> [INST]
+          Ta mission est de générer une question en français à partir d'un extrait d'un document. Tu généreras une question spécifique à partir du paragraphe donné. 
+          Par exemple : 
+          - Quelle est l'information clé dans le paragraphe donné ?
+          - Combien d'emplois ont été implantés dans les services relocalisés en 2022 ?
+          [/INST]
+          </s>
+          [INST]
+          Paragraphe :
+          {content}
+
+          Génère une question en français à partir du paragraphe ci-dessus :
+          [/INST]
+        "
+      temperature: "0.0"
+      top_p: "0.01"
+      max_tokens: "1024"
+      comment: "This prompt is used to create a dataset of query based on document's chunks"
    mixtral-instruct:
      prompt: "<s> [INST]
          Ta mission est de générer une question en français à partir d'un extrait d'un document. Tu généreras une question spécifique à partir du paragraphe donné. 

--- a/api/requirements.txt
+++ b/api/requirements.txt
@@ -2,7 +2,7 @@
 aiofiles==23.2.1; python_version >= '3.7'
 aiohttp==3.9.5; python_version >= '3.8'
 aiosignal==1.3.1; python_version >= '3.7'
-alembic==1.13.1; python_version >= '3.8'
+alembic==1.13.2; python_version >= '3.8'
 aniso8601==9.0.1
 annotated-types==0.7.0; python_version >= '3.8'
 antlr4-python3-runtime==4.9.3
@@ -52,14 +52,14 @@ flask==3.0.3; python_version >= '3.8'
 flatbuffers==24.3.25
 fonttools==4.53.0; python_version >= '3.8'
 frozenlist==1.4.1; python_version >= '3.8'
-fsspec==2024.6.0; python_version >= '3.8'
+fsspec==2024.6.1; python_version >= '3.8'
 fuzzysearch==0.7.3
 gitdb==4.0.11; python_version >= '3.7'
 gitpython==3.1.43; python_version >= '3.7'
-google-api-core[grpc]==2.19.0; python_version >= '3.7'
-google-auth==2.30.0; python_version >= '3.7'
+google-api-core[grpc]==2.19.1; python_version >= '3.7'
+google-auth==2.31.0; python_version >= '3.7'
 google-cloud-vision==3.7.2
-googleapis-common-protos==1.63.1; python_version >= '3.7'
+googleapis-common-protos==1.63.2; python_version >= '3.7'
 graphene==3.3
 graphql-core==3.2.3; python_version >= '3.6' and python_version < '4'
 graphql-relay==3.2.0; python_version >= '3.6' and python_version < '4'
@@ -93,8 +93,10 @@ language-data==1.2.0
 layoutparser==0.3.4; python_version >= '3.6'
 lazy-model==0.2.0; python_version >= '3.7' and python_version < '4.0'
 llama-index-core==0.10.44; python_version < '4.0' and python_full_version >= '3.8.1'
+llama-index-embeddings-ollama==0.1.2; python_version < '4.0' and python_full_version >= '3.8.1'
 llama-index-embeddings-openai==0.1.10; python_version < '4.0' and python_full_version >= '3.8.1'
-llama-index-llms-openai==0.1.22; python_version < '4.0' and python_full_version >= '3.8.1'
+llama-index-llms-ollama==0.1.5; python_version < '4.0' and python_full_version >= '3.8.1'
+llama-index-llms-openai==0.1.24; python_version < '4.0' and python_full_version >= '3.8.1'
 llama-index-llms-openai-like==0.1.2; python_version < '3.12' and python_full_version >= '3.8.1'
 llama-index-vector-stores-qdrant==0.2.9; python_version < '3.13' and python_version >= '3.9'
 llamaindex-py-client==0.1.19; python_version >= '3.8' and python_version < '4'
@@ -119,7 +121,7 @@ mypy-extensions==1.0.0; python_version >= '3.5'
 nest-asyncio==1.6.0; python_version >= '3.5'
 networkx==3.3; python_version >= '3.10'
 nltk==3.8.1; python_version >= '3.7'
-numpy==1.26.4; python_version < '3.12' and python_version >= '3.8'
+numpy==1.26.4; python_version >= '3.9'
 nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64'
 nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
 nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
@@ -130,15 +132,15 @@ nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine
 nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64'
 nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64'
 nvidia-nccl-cu12==2.19.3; platform_system == 'Linux' and platform_machine == 'x86_64'
-nvidia-nvjitlink-cu12==12.5.40; python_version >= '3'
+nvidia-nvjitlink-cu12==12.5.82; python_version >= '3'
 nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
 olefile==0.47; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
 omegaconf==2.3.0; python_version >= '3.6'
 onnx==1.16.1
-onnxruntime==1.18.0
+onnxruntime==1.18.1
 openai==1.12.0; python_full_version >= '3.7.1'
 opencv-python==4.10.0.84; python_version >= '3.6'
-openpyxl==3.1.4
+openpyxl==3.1.5
 opentelemetry-api==1.25.0; python_version >= '3.8'
 opentelemetry-sdk==1.25.0; python_version >= '3.8'
 opentelemetry-semantic-conventions==0.46b0; python_version >= '3.8'
@@ -149,10 +151,10 @@ pandoc==2.3
 pdf2image==1.17.0
 pdfminer.six==20231228
 pdfplumber==0.11.1; python_version >= '3.8'
-phonenumbers==8.13.39
+phonenumbers==8.13.40
 pikepdf==9.0.0
-pillow==10.3.0; python_version >= '3.8'
-pillow-heif==0.16.0
+pillow==10.4.0; python_version >= '3.8'
+pillow-heif==0.17.0
 pluggy==1.5.0; python_version >= '3.8'
 plumbum==1.8.3; python_version >= '3.6'
 ply==3.11
@@ -168,8 +170,8 @@ pyasn1-modules==0.4.0; python_version >= '3.8'
 pycocotools==2.0.8; python_version >= '3.9'
 pycparser==2.22; python_version >= '3.8'
 pycryptodome==3.20.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
-pydantic==2.7.4; python_version >= '3.8'
-pydantic-core==2.18.4; python_version >= '3.8'
+pydantic==2.8.0; python_version >= '3.8'
+pydantic-core==2.20.0; python_version >= '3.8'
 pyenchant==3.2.2; python_version >= '3.5'
 pygments==2.18.0; python_version >= '3.8'
 pymongo==4.7.3; python_version >= '3.7'
@@ -191,7 +193,7 @@ pytz==2024.1
 pyyaml==6.0.1; python_version >= '3.6'
 qdrant-client==1.9.1; python_version >= '3.8'
 querystring-parser==1.2.4
-rapidfuzz==3.9.3; python_version >= '3.8'
+rapidfuzz==3.9.4; python_version >= '3.8'
 redis==5.0.3; python_version >= '3.7'
 regex==2024.4.28; python_version >= '3.8'
 requests==2.32.3; python_version >= '3.8'
@@ -201,8 +203,8 @@ rich==13.7.1; python_full_version >= '3.7.0'
 rsa==4.9; python_version >= '3.6' and python_version < '4'
 safetensors==0.4.3; python_version >= '3.7'
 scikit-learn==1.5.0; python_version >= '3.9'
-scipy==1.13.1; python_version >= '3.9'
-setuptools==70.1.0; python_version >= '3.8'
+scipy==1.14.0; python_version >= '3.10'
+setuptools==70.2.0; python_version >= '3.8'
 shellingham==1.5.4; python_version >= '3.7'
 six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
 skops==0.9.0; python_version >= '3.8'
@@ -221,7 +223,7 @@ streaming-form-data==1.15.0; python_version >= '3.8'
 structlog==24.2.0; python_version >= '3.8'
 sympy==1.12.1; python_version >= '3.8'
 tabulate==0.9.0; python_version >= '3.7'
-tenacity==8.4.1; python_version >= '3.8'
+tenacity==8.4.2; python_version >= '3.8'
 thinc==8.2.5; python_version >= '3.6'
 threadpoolctl==3.5.0; python_version >= '3.8'
 tiktoken==0.6.0; python_version >= '3.8'
@@ -240,9 +242,9 @@ typing-extensions==4.12.2; python_version >= '3.8'
 typing-inspect==0.9.0
 tzdata==2024.1; python_version >= '2'
 unidiff==0.7.5
-unstructured[all-docs]==0.14.7; python_version < '3.13' and python_full_version >= '3.9.0'
-unstructured-client==0.23.7; python_version >= '3.8'
-unstructured-inference==0.7.35
+unstructured[all-docs]==0.14.9; python_version < '3.13' and python_full_version >= '3.9.0'
+unstructured-client==0.23.8; python_version >= '3.8'
+unstructured-inference==0.7.36
 unstructured.pytesseract==0.3.12
 urllib3==2.2.2; python_version >= '3.8'
 uvicorn[standard]==0.30.1; python_version >= '3.8'

--- a/doc/Ollama.md
+++ b/doc/Ollama.md
+# Ollama 
+
+Ollama est une solution pour déployer en local des modèles de langages (LLM / Embeddings).
+
+## Installation et configuration (MacOS)
+
+En premier lieu, il suffit d'installer Ollama sur le [site officiel](https://ollama.com).
+
+Dès lors, nous configurerons Ollama afin qu'il écoute sur tous les ports réseaux de notre machine :
+
+```bash
+launchctl setenv OLLAMA_HOST 0.0.0.0:8080 
+```
+
+La commande suivante permet de lancer le serveur Ollama :
+
+```bash
+ollama serve
+```
+
+Pour télécharger un modèle, nous ferons par exemple :
+
+```bash
+ollama pull mistral
+```
+
+Une fois téléchargé, le serveur Ollama lancera les modèles en mémoire en fonction des requêtes reçues par les utilisateurs. 
--- a/doc/logo-caradoc.png
+++ b/doc/logo-caradoc.png
--- a/docker_compose/docker_compose_caradoc.yml
+++ b/docker_compose/docker_compose_caradoc.yml
@@ -88,6 +88,10 @@ services:
      - MLFLOW_URI=<MLFLOW_URI>
      - RAG_PRECISION=5
      - REDIS_HOST=redis-service
+      - LLM_CLIENT_TYPE=custom
+      - EMBEDDING_CLIENT_TYPE=custom
+      - OLLAMA_BASE_URL=xxxx
+


  clean-file-cron:

--- a/docker_compose/external_components/docker_compose_mlflow.yml
+++ b/docker_compose/external_components/docker_compose_mlflow.yml
@@ -65,7 +65,12 @@ services:

  tracking_server:
    restart: always
-    build: ./mlflow
+    build: 
+     context: ./mlflow
+     dockerfile: Dockerfile
+     args:
+        - http_proxy_arg
+        - https_proxy_arg
    image: mlflow_server
    container_name: mlflow_server
    depends_on:

--- a/docker_compose/external_components/init_qdrant.sh
+++ b/docker_compose/external_components/init_qdrant.sh
+#!/bin/bash
+
+# Check if the size argument is provided
+if [ -z "$1" ]; then
+  echo "Usage: $0 <size>"
+  exit 1
+fi
+
+# Assign the first argument to the size variable
+SIZE=$1
+
 curl -X PUT http://0.0.0.0:6333/collections/qdr \
 -H 'Content-Type: application/json' \
--data-raw '{ 
-"vectors": { 
-"size": 1024, 
-"distance": "Cosine"
-},
-"hnsw_config": {
-        "payload_m": 16,
-        "m": 0
-    }
-}' 
-
+--data-raw "{
+  \"vectors\": {
+    \"size\": $SIZE,
+    \"distance\": \"Cosine\"
+  },
+  \"hnsw_config\": {
+    \"payload_m\": 16,
+    \"m\": 0
+  }
+}"
--- a/docker_compose/external_components/mlflow/Dockerfile
+++ b/docker_compose/external_components/mlflow/Dockerfile
No results found