feat: Update makefile to the correct Ollama behaviour

3d94c257 · Hugo Simon · 3d5b0a02 · 3d94c257 · 3d94c257 · 3d94c257
Commit 3d94c257 authored 9 months ago by Hugo Simon
--- a/Makefile
+++ b/Makefile
@@ -86,7 +86,7 @@ endif

 ifeq ($(use_ollama), 1)
 	@ollama serve & \
-	ollama run $(LLM_OLLAMA) & \
+	ollama pull $(LLM_OLLAMA) & \
 	ollama pull $(EMBEDDING_OLLAMA) &
 endif


--- a/api/app/ds/ai_models.py
+++ b/api/app/ds/ai_models.py
@@ -9,6 +9,8 @@ from llama_index.llms.ollama import Ollama
 from llama_index.embeddings.ollama import OllamaEmbedding
 import os

+OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL")
+
 class CustomOpenAIEmbedding(BaseEmbedding):
    """Embedding model class for LlamaIndex

@@ -82,7 +84,7 @@ def get_llm_model(client_type : str, model: str, max_tokens : int, temperature :
    elif client_type == "openai":
        return OpenAI(model=model, max_tokens=max_tokens, temperature=temperature, top_p=top_p, timeout=600)
    elif client_type == "ollama":
-        return Ollama(model="mistral", base_url = os.getenv("OLLAMA_BASE_URL"), max_tokens=max_tokens, temperature=temperature, top_p=top_p, request_timeout=120.0)
+        return Ollama(model="mistral", base_url = OLLAMA_BASE_URL, max_tokens=max_tokens, temperature=temperature, top_p=top_p, request_timeout=120.0)
    else :
        return "Please provide a right client type"
    
@@ -108,7 +110,7 @@ def get_embedding_model(client_type : str, model: str):
    elif client_type == "ollama":
        return OllamaEmbedding(
                model_name=model,
-                base_url=os.getenv("OLLAMA_BASE_URL"),
+                base_url=OLLAMA_BASE_URL,
                ollama_additional_kwargs={"mirostat": 0},
                )
    else :
@@ -143,7 +145,7 @@ def llm_batch_inference(prompts : List[str], client_type : str, model : str, max
        prompt=prompts, model=model, max_tokens=max_tokens, temperature=temperature, top_p=top_p
    ).choices]
    elif client_type == "ollama" :
-        llm = get_llm_model(client_type=os.getenv('LLM_CLIENT_TYPE'), model=model,max_tokens=max_tokens, temperature=temperature, top_p=top_p )
+        llm = get_llm_model(client_type=client_type, model=model,max_tokens=max_tokens, temperature=temperature, top_p=top_p )
        output = []
        for prompt in prompts :
            output.append(llm.complete(prompt).text)

--- a/doc/Ollama.md
+++ b/doc/Ollama.md
@@ -12,14 +12,16 @@ Dès lors, nous configurerons Ollama afin qu'il écoute sur tous les ports rése
 launchctl setenv OLLAMA_HOST 0.0.0.0:8080 
 ```

-Pour télécharger et lancer un modèle LLM, nous ferons par exemple :
+La commande suivante permet de lancer le serveur Ollama :

 ```bash
-ollama run mistral
+ollama serve
 ```

-Pour un modèle d'embedding :
+Pour télécharger un modèle, nous ferons par exemple :

 ```bash
-ollama pull mxbai-embed-large
-```
\ No newline at end of file
+ollama pull mistral
+```
+
+Une fois téléchargé, le serveur Ollama lancera les modèles en mémoire en fonction des requêtes reçues par les utilisateurs.