python-ai-agent-frameworks-demos/examples/llamaindex.py at main · Azure-Samples/python-ai-agent-frameworks-demos · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# https://docs.llamaindex.ai/en/stable/examples/agent/react_agent_with_query_engine/

import os
from pathlib import Path

import azure.identity
from dotenv import load_dotenv
from llama_index.core import Settings, SimpleDirectoryReader, StorageContext, VectorStoreIndex, load_index_from_storage
from llama_index.core.agent.workflow import AgentStream, ReActAgent
from llama_index.core.tools import QueryEngineTool
from llama_index.core.workflow import Context
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai_like import OpenAILike

# Setup the client to use Azure OpenAI
load_dotenv(override=True)
API_HOST = os.getenv("API_HOST", "azure")

if API_HOST == "azure":
    token_provider = azure.identity.get_bearer_token_provider(
        azure.identity.DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
    )
    Settings.llm = OpenAILike(
        model=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"],
        api_base=os.environ["AZURE_OPENAI_ENDPOINT"] + "/openai/v1",
        api_key=token_provider(),
        is_chat_model=True,
    )

    Settings.embed_model = OpenAIEmbedding(
        model=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"],
        api_base=os.environ["AZURE_OPENAI_ENDPOINT"] + "/openai/v1",
        api_key=token_provider(),
    )
elif API_HOST == "ollama":
    Settings.llm = OpenAILike(
        model=os.environ.get("OLLAMA_MODEL", "gemma4:e4b"),
        api_base=os.environ.get("OLLAMA_ENDPOINT", "http://localhost:11434/v1"),
        api_key="none",
        is_chat_model=True,
    )

    Settings.embed_model = OpenAIEmbedding(
        model="nomic-embed-text",
        api_base=os.environ.get("OLLAMA_ENDPOINT", "http://localhost:11434/v1"),
        api_key="none",
    )

# Try to load the index from storage
try:
    storage_context = StorageContext.from_defaults(persist_dir="./storage/docs1")
    index1 = load_index_from_storage(storage_context)

    storage_context = StorageContext.from_defaults(persist_dir="./storage/docs2")
    index2 = load_index_from_storage(storage_context)

    index_loaded = True
except FileNotFoundError:
    index_loaded = False

if not index_loaded:
    root_dir = Path(__file__).parent.parent

    docs1 = SimpleDirectoryReader(input_files=[root_dir / "example_data/employee_handbook.pdf"]).load_data()
    docs2 = SimpleDirectoryReader(input_files=[root_dir / "example_data/PerksPlus.pdf"]).load_data()
    index1 = VectorStoreIndex.from_documents(docs1)
    index2 = VectorStoreIndex.from_documents(docs2)

    index1.storage_context.persist(persist_dir=root_dir / "example_data/.llama_index_storage/docs1")
    index2.storage_context.persist(persist_dir=root_dir / "example_data/.llama_index_storage/docs2")

engine1 = index1.as_query_engine(similarity_top_k=3)
engine2 = index2.as_query_engine(similarity_top_k=3)

query_engine_tools = [
    QueryEngineTool.from_defaults(
        query_engine=engine1,
        name="engine1",
        description=(
            "Provides information about Contoso employee handbook"
            " - covering basic job roles, policies, workplace safety, HR, etc."
        ),
    ),
    QueryEngineTool.from_defaults(
        query_engine=engine2,
        name="engine2",
        description=("Provides information about Contoso PerksPlus program, including what can be reimbursed. "),
    ),
]


async def main():
    agent = ReActAgent(tools=query_engine_tools, llm=Settings.llm)
    ctx = Context(agent)

    handler = agent.run("can i get my gardening tools reimbursed?", ctx=ctx)

    async for ev in handler.stream_events():
        if isinstance(ev, AgentStream):
            print(f"{ev.delta}", end="", flush=True)

    response = await handler
    print(str(response))


if __name__ == "__main__":
    import asyncio

    asyncio.run(main())