Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(agents-api): Preliminary implementation of session.chat #453

Merged
merged 9 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 23 additions & 12 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,18 @@ COZO_HOST=http://memory-store:9070
COZO_PORT=9070
COZO_ROCKSDB_DIR=cozo.db
DTYPE=float16
EMBEDDING_SERVICE_URL=http://text-embeddings-inference/embed
EMBEDDING_SERVICE_BASE=http://text-embeddings-inference
EMBEDDING_SERVICE_URL=${EMBEDDING_SERVICE_BASE}/embed
GATEWAY_PORT=80
GPU_MEMORY_UTILIZATION=0.90

HF_TOKEN=""
HUGGING_FACE_HUB_TOKEN=""
HF_TOKEN=
HUGGING_FACE_HUB_TOKEN=
JWT_SHARED_KEY=

MAX_MODEL_LEN=8192
MAX_NUM_SEQS=1
MNT_DIR=/data
MODEL_API_KEY=myauthkey
MODEL_API_KEY_HEADER_NAME=Authorization
MODEL_API_URL=http://model-serving:8000
MODEL_INFERENCE_URL=http://model-serving:8000/v1
MODEL_ID=BAAI/bge-m3

# MODEL_NAME="OpenPipe/Hermes-2-Theta-Llama-3-8B-32k"
MODEL_NAME="julep-ai/Hermes-2-Theta-Llama-3-8B"

SKIP_CHECK_DEVELOPER_HEADERS=true
SUMMARIZATION_TOKENS_THRESHOLD=2048
Expand All @@ -40,4 +33,22 @@ WORKER_URL=temporal:7233

AGENTS_API_DEBUG=false
OPENAI_API_KEY=
ANTHROPIC_API_KEY=
ANTHROPIC_API_KEY=
GROQ_API_KEY=
CLOUDFLARE_API_KEY=
CLOUDFLARE_ACCOUNT_ID=
NVIDIA_NIM_API_KEY=
GITHUB_API_KEY=
VOYAGE_API_KEY=
GOOGLE_APPLICATION_CREDENTIALS=

LITELLM_URL=http://litellm:4000
POSTGRES_DB=litellm
POSTGRES_USER=llmproxy
POSTGRES_PASSWORD=
LITELLM_DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@litellm-db:5432/${POSTGRES_DB}
LITELLM_MASTER_KEY=
LITELLM_REDIS_HOST=litellm-redis
LITELLM_REDIS_PORT=6379
LITELLM_REDIS_PASSWORD=
REDIS_ARGS="--requirepass ${LITELLM_REDIS_PASSWORD}"
2 changes: 1 addition & 1 deletion .github/workflows/lint-and-format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:

strategy:
matrix:
directory: [agents-api, model-serving, sdks/python]
directory: [agents-api, sdks/python]

steps:
- uses: actions/checkout@v4
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/push-to-hub.yml
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ jobs:
service-directory:
- gateway
- memory-store
# - model-serving

steps:
- uses: actions/checkout@v4
Expand Down
12 changes: 4 additions & 8 deletions agents-api/agents_api/activities/embed_docs.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
from pydantic import UUID4
from temporalio import activity

from agents_api.embed_models_registry import EmbeddingModel
from agents_api.env import embedding_model_id
from agents_api.models.docs.embed_docs import (
embed_docs_snippets_query,
)
from agents_api.clients.embed import embed
from agents_api.models.docs.embed_snippets import embed_snippets as embed_snippets_query

snippet_embed_instruction = "Encode this passage for retrieval: "


@activity.defn
async def embed_docs(doc_id: UUID4, title: str, content: list[str]) -> None:
indices, snippets = list(zip(*enumerate(content)))
model = EmbeddingModel.from_model_name(embedding_model_id)
embeddings = await model.embed(
embeddings = await embed(
[
{
"instruction": snippet_embed_instruction,
Expand All @@ -24,7 +20,7 @@ async def embed_docs(doc_id: UUID4, title: str, content: list[str]) -> None:
]
)

embed_docs_snippets_query(
embed_snippets_query(
doc_id=doc_id,
snippet_indices=indices,
embeddings=embeddings,
Expand Down
13 changes: 2 additions & 11 deletions agents-api/agents_api/activities/summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from uuid import UUID

import pandas as pd
from litellm import acompletion
from temporalio import activity

from agents_api.common.protocol.entries import Entry
Expand All @@ -19,8 +18,8 @@
from agents_api.rec_sum.summarize import summarize_messages
from agents_api.rec_sum.trim import trim_messages

from ..env import model_api_key, model_inference_url, summarization_model_name
from ..model_registry import LOCAL_MODELS
from ..clients.litellm import acompletion
from ..env import summarization_model_name


# TODO: remove stubs
Expand Down Expand Up @@ -149,12 +148,6 @@ async def run_prompt(
parser: Callable[[str], str] = lambda x: x,
**kwargs,
) -> str:
api_base = None
api_key = None
if model in LOCAL_MODELS:
api_base = model_inference_url
api_key = model_api_key
model = f"openai/{model}"
prompt = make_prompt(dialog, previous_memories, **kwargs)
response = await acompletion(
model=model,
Expand All @@ -168,8 +161,6 @@ async def run_prompt(
temperature=temperature,
stop=["<", "<|"],
stream=False,
api_base=api_base,
api_key=api_key,
)

content = response.choices[0].message.content
Expand Down
52 changes: 39 additions & 13 deletions agents-api/agents_api/autogen/Agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from pydantic import AwareDatetime, BaseModel, ConfigDict, Field

from .Chat import GenerationPresetSettings, OpenAISettings, VLLMSettings
from .Chat import DefaultChatSettings


class Agent(BaseModel):
Expand Down Expand Up @@ -47,9 +47,7 @@ class Agent(BaseModel):
"""
Instructions for the agent
"""
default_settings: (
GenerationPresetSettings | OpenAISettings | VLLMSettings | None
) = None
default_settings: DefaultChatSettings | None = None
"""
Default settings for all sessions created by this agent
"""
Expand Down Expand Up @@ -86,9 +84,41 @@ class CreateAgentRequest(BaseModel):
"""
Instructions for the agent
"""
default_settings: (
GenerationPresetSettings | OpenAISettings | VLLMSettings | None
) = None
default_settings: DefaultChatSettings | None = None
"""
Default settings for all sessions created by this agent
"""


class CreateOrUpdateAgentRequest(CreateAgentRequest):
model_config = ConfigDict(
populate_by_name=True,
)
id: UUID
metadata: dict[str, Any] | None = None
name: Annotated[
str,
Field(
"",
pattern="^[\\p{L}\\p{Nl}\\p{Pattern_Syntax}\\p{Pattern_White_Space}]+[\\p{ID_Start}\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}\\p{Pattern_Syntax}\\p{Pattern_White_Space}]*$",
),
]
"""
Name of the agent
"""
about: str = ""
"""
About the agent
"""
model: str = ""
"""
Model name to use (gpt-4-turbo, gemini-nano etc)
"""
instructions: str | list[str] = ""
"""
Instructions for the agent
"""
default_settings: DefaultChatSettings | None = None
"""
Default settings for all sessions created by this agent
"""
Expand Down Expand Up @@ -125,9 +155,7 @@ class PatchAgentRequest(BaseModel):
"""
Instructions for the agent
"""
default_settings: (
GenerationPresetSettings | OpenAISettings | VLLMSettings | None
) = None
default_settings: DefaultChatSettings | None = None
"""
Default settings for all sessions created by this agent
"""
Expand Down Expand Up @@ -164,9 +192,7 @@ class UpdateAgentRequest(BaseModel):
"""
Instructions for the agent
"""
default_settings: (
GenerationPresetSettings | OpenAISettings | VLLMSettings | None
) = None
default_settings: DefaultChatSettings | None = None
"""
Default settings for all sessions created by this agent
"""
Loading
Loading