julep-ai · whiterabbit1983 · Aug 12, 2024 · Aug 10, 2024 · Aug 10, 2024 · Aug 11, 2024
diff --git a/.env.example b/.env.example
@@ -6,25 +6,18 @@ COZO_HOST=http://memory-store:9070
 COZO_PORT=9070
 COZO_ROCKSDB_DIR=cozo.db
 DTYPE=float16
-EMBEDDING_SERVICE_URL=http://text-embeddings-inference/embed
+EMBEDDING_SERVICE_BASE=http://text-embeddings-inference
+EMBEDDING_SERVICE_URL=${EMBEDDING_SERVICE_BASE}/embed
 GATEWAY_PORT=80
 GPU_MEMORY_UTILIZATION=0.90
 
-HF_TOKEN=""
-HUGGING_FACE_HUB_TOKEN=""
+HF_TOKEN=
+HUGGING_FACE_HUB_TOKEN=
 JWT_SHARED_KEY=
 
 MAX_MODEL_LEN=8192
 MAX_NUM_SEQS=1
 MNT_DIR=/data
-MODEL_API_KEY=myauthkey
-MODEL_API_KEY_HEADER_NAME=Authorization
-MODEL_API_URL=http://model-serving:8000
-MODEL_INFERENCE_URL=http://model-serving:8000/v1
-MODEL_ID=BAAI/bge-m3
-
-# MODEL_NAME="OpenPipe/Hermes-2-Theta-Llama-3-8B-32k"
-MODEL_NAME="julep-ai/Hermes-2-Theta-Llama-3-8B"
 
 SKIP_CHECK_DEVELOPER_HEADERS=true
 SUMMARIZATION_TOKENS_THRESHOLD=2048
@@ -40,4 +33,22 @@ WORKER_URL=temporal:7233
 
 AGENTS_API_DEBUG=false
 OPENAI_API_KEY=
-ANTHROPIC_API_KEY=
+ANTHROPIC_API_KEY=
+GROQ_API_KEY=
+CLOUDFLARE_API_KEY=
+CLOUDFLARE_ACCOUNT_ID=
+NVIDIA_NIM_API_KEY=
+GITHUB_API_KEY=
+VOYAGE_API_KEY=
+GOOGLE_APPLICATION_CREDENTIALS=
+
+LITELLM_URL=http://litellm:4000
+POSTGRES_DB=litellm
+POSTGRES_USER=llmproxy
+POSTGRES_PASSWORD=
+LITELLM_DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@litellm-db:5432/${POSTGRES_DB}
+LITELLM_MASTER_KEY=
+LITELLM_REDIS_HOST=litellm-redis
+LITELLM_REDIS_PORT=6379
+LITELLM_REDIS_PASSWORD=
+REDIS_ARGS="--requirepass ${LITELLM_REDIS_PASSWORD}"
diff --git a/.github/workflows/lint-and-format.yml b/.github/workflows/lint-and-format.yml
@@ -9,7 +9,7 @@ jobs:
 
     strategy:
       matrix:
-        directory: [agents-api, model-serving, sdks/python]
+        directory: [agents-api, sdks/python]
 
     steps:
       - uses: actions/checkout@v4

diff --git a/.github/workflows/push-to-hub.yml b/.github/workflows/push-to-hub.yml
@@ -131,7 +131,6 @@ jobs:
         service-directory:
           - gateway
           - memory-store
-          # - model-serving
 
     steps:
       - uses: actions/checkout@v4

diff --git a/agents-api/agents_api/activities/embed_docs.py b/agents-api/agents_api/activities/embed_docs.py
@@ -1,20 +1,16 @@
 from pydantic import UUID4
 from temporalio import activity
 
-from agents_api.embed_models_registry import EmbeddingModel
-from agents_api.env import embedding_model_id
-from agents_api.models.docs.embed_docs import (
-    embed_docs_snippets_query,
-)
+from agents_api.clients.embed import embed
+from agents_api.models.docs.embed_snippets import embed_snippets as embed_snippets_query
 
 snippet_embed_instruction = "Encode this passage for retrieval: "
 
 
 @activity.defn
 async def embed_docs(doc_id: UUID4, title: str, content: list[str]) -> None:
     indices, snippets = list(zip(*enumerate(content)))
-    model = EmbeddingModel.from_model_name(embedding_model_id)
-    embeddings = await model.embed(
+    embeddings = await embed(
         [
             {
                 "instruction": snippet_embed_instruction,
@@ -24,7 +20,7 @@ async def embed_docs(doc_id: UUID4, title: str, content: list[str]) -> None:
         ]
     )
 
-    embed_docs_snippets_query(
+    embed_snippets_query(
         doc_id=doc_id,
         snippet_indices=indices,
         embeddings=embeddings,

diff --git a/agents-api/agents_api/activities/summarization.py b/agents-api/agents_api/activities/summarization.py
@@ -6,7 +6,6 @@
 from uuid import UUID
 
 import pandas as pd
-from litellm import acompletion
 from temporalio import activity
 
 from agents_api.common.protocol.entries import Entry
@@ -19,8 +18,8 @@
 from agents_api.rec_sum.summarize import summarize_messages
 from agents_api.rec_sum.trim import trim_messages
 
-from ..env import model_api_key, model_inference_url, summarization_model_name
-from ..model_registry import LOCAL_MODELS
+from ..clients.litellm import acompletion
+from ..env import summarization_model_name
 
 
 # TODO: remove stubs
@@ -149,12 +148,6 @@ async def run_prompt(
     parser: Callable[[str], str] = lambda x: x,
     **kwargs,
 ) -> str:
-    api_base = None
-    api_key = None
-    if model in LOCAL_MODELS:
-        api_base = model_inference_url
-        api_key = model_api_key
-        model = f"openai/{model}"
     prompt = make_prompt(dialog, previous_memories, **kwargs)
     response = await acompletion(
         model=model,
@@ -168,8 +161,6 @@ async def run_prompt(
         temperature=temperature,
         stop=["<", "<|"],
         stream=False,
-        api_base=api_base,
-        api_key=api_key,
     )
 
     content = response.choices[0].message.content

diff --git a/agents-api/agents_api/autogen/Agents.py b/agents-api/agents_api/autogen/Agents.py
@@ -8,7 +8,7 @@
 
 from pydantic import AwareDatetime, BaseModel, ConfigDict, Field
 
-from .Chat import GenerationPresetSettings, OpenAISettings, VLLMSettings
+from .Chat import DefaultChatSettings
 
 
 class Agent(BaseModel):
@@ -47,9 +47,7 @@ class Agent(BaseModel):
     """
     Instructions for the agent
     """
-    default_settings: (
-        GenerationPresetSettings | OpenAISettings | VLLMSettings | None
-    ) = None
+    default_settings: DefaultChatSettings | None = None
     """
     Default settings for all sessions created by this agent
     """
@@ -86,9 +84,41 @@ class CreateAgentRequest(BaseModel):
     """
     Instructions for the agent
     """
-    default_settings: (
-        GenerationPresetSettings | OpenAISettings | VLLMSettings | None
-    ) = None
+    default_settings: DefaultChatSettings | None = None
+    """
+    Default settings for all sessions created by this agent
+    """
+
+
+class CreateOrUpdateAgentRequest(CreateAgentRequest):
+    model_config = ConfigDict(
+        populate_by_name=True,
+    )
+    id: UUID
+    metadata: dict[str, Any] | None = None
+    name: Annotated[
+        str,
+        Field(
+            "",
+            pattern="^[\\p{L}\\p{Nl}\\p{Pattern_Syntax}\\p{Pattern_White_Space}]+[\\p{ID_Start}\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}\\p{Pattern_Syntax}\\p{Pattern_White_Space}]*$",
+        ),
+    ]
+    """
+    Name of the agent
+    """
+    about: str = ""
+    """
+    About the agent
+    """
+    model: str = ""
+    """
+    Model name to use (gpt-4-turbo, gemini-nano etc)
+    """
+    instructions: str | list[str] = ""
+    """
+    Instructions for the agent
+    """
+    default_settings: DefaultChatSettings | None = None
     """
     Default settings for all sessions created by this agent
     """
@@ -125,9 +155,7 @@ class PatchAgentRequest(BaseModel):
     """
     Instructions for the agent
     """
-    default_settings: (
-        GenerationPresetSettings | OpenAISettings | VLLMSettings | None
-    ) = None
+    default_settings: DefaultChatSettings | None = None
     """
     Default settings for all sessions created by this agent
     """
@@ -164,9 +192,7 @@ class UpdateAgentRequest(BaseModel):
     """
     Instructions for the agent
     """
-    default_settings: (
-        GenerationPresetSettings | OpenAISettings | VLLMSettings | None
-    ) = None
+    default_settings: DefaultChatSettings | None = None
     """
     Default settings for all sessions created by this agent
     """