Skip to main content
Open In Colab
ArangoDB 是一个可扩展的图数据库系统,可以更快地从关联数据中挖掘价值。它通过单一查询语言支持原生图、集成搜索引擎和 JSON。ArangoDB 支持本地部署或云端部署。
本 notebook 展示了如何使用 LLM 为 ArangoDB 数据库提供自然语言接口。

环境准备

您可以通过 ArangoDB Docker 镜像 在本地运行 ArangoDB 实例:
docker run -p 8529:8529 -e ARANGO_ROOT_PASSWORD= arangodb/arangodb
另一种选择是使用 ArangoDB Cloud Connector 包 来获取一个临时云实例:
pip install -qU  python-arango # ArangoDB Python 驱动
pip install -qU  adb-cloud-connector # ArangoDB 云实例配置器
pip install -qU  langchain-openai
pip install -qU  langchain
# 实例化 ArangoDB 数据库
import json

from adb_cloud_connector import get_temp_credentials
from arango import ArangoClient

con = get_temp_credentials()

db = ArangoClient(hosts=con["url"]).db(
    con["dbName"], con["username"], con["password"], verify=True
)

print(json.dumps(con, indent=2))
Log: requesting new credentials...
Succcess: new credentials acquired
{
  "dbName": "TUT3sp29s3pjf1io0h4cfdsq",
  "username": "TUTo6nkwgzkizej3kysgdyeo8",
  "password": "x",
  "hostname": "tutorials.arangodb.cloud",
  "port": 8529,
  "url": "https://tutorials.arangodb.cloud:8529"
}
# 实例化 ArangoDB-LangChain 图
from langchain_community.graphs import ArangoGraph

graph = ArangoGraph(db)

填充数据库

我们将使用 Python DriverGameOfThrones 数据导入数据库。
if db.has_graph("GameOfThrones"):
    db.delete_graph("GameOfThrones", drop_collections=True)

db.create_graph(
    "GameOfThrones",
    edge_definitions=[
        {
            "edge_collection": "ChildOf",
            "from_vertex_collections": ["Characters"],
            "to_vertex_collections": ["Characters"],
        },
    ],
)

documents = [
    {
        "_key": "NedStark",
        "name": "Ned",
        "surname": "Stark",
        "alive": True,
        "age": 41,
        "gender": "male",
    },
    {
        "_key": "CatelynStark",
        "name": "Catelyn",
        "surname": "Stark",
        "alive": False,
        "age": 40,
        "gender": "female",
    },
    {
        "_key": "AryaStark",
        "name": "Arya",
        "surname": "Stark",
        "alive": True,
        "age": 11,
        "gender": "female",
    },
    {
        "_key": "BranStark",
        "name": "Bran",
        "surname": "Stark",
        "alive": True,
        "age": 10,
        "gender": "male",
    },
]

edges = [
    {"_to": "Characters/NedStark", "_from": "Characters/AryaStark"},
    {"_to": "Characters/NedStark", "_from": "Characters/BranStark"},
    {"_to": "Characters/CatelynStark", "_from": "Characters/AryaStark"},
    {"_to": "Characters/CatelynStark", "_from": "Characters/BranStark"},
]

db.collection("Characters").import_bulk(documents)
db.collection("ChildOf").import_bulk(edges)
{'error': False,
 'created': 4,
 'errors': 0,
 'empty': 0,
 'updated': 0,
 'ignored': 0,
 'details': []}

获取和设置 ArangoDB 模式

在实例化 ArangoDBGraph 对象时会生成初始的 ArangoDB Schema。以下是模式的 getter 和 setter 方法,供您查看或修改模式时使用:
# 此处模式应为空,
# 因为 `graph` 在 ArangoDB 数据摄取之前初始化(见上文)。

import json

print(json.dumps(graph.schema, indent=4))
{
    "Graph Schema": [],
    "Collection Schema": []
}
graph.set_schema()
# 现在可以查看生成的模式

import json

print(json.dumps(graph.schema, indent=4))
{
    "Graph Schema": [
        {
            "graph_name": "GameOfThrones",
            "edge_definitions": [
                {
                    "edge_collection": "ChildOf",
                    "from_vertex_collections": [
                        "Characters"
                    ],
                    "to_vertex_collections": [
                        "Characters"
                    ]
                }
            ]
        }
    ],
    "Collection Schema": [
        {
            "collection_name": "ChildOf",
            "collection_type": "edge",
            "edge_properties": [
                {
                    "name": "_key",
                    "type": "str"
                },
                {
                    "name": "_id",
                    "type": "str"
                },
                {
                    "name": "_from",
                    "type": "str"
                },
                {
                    "name": "_to",
                    "type": "str"
                },
                {
                    "name": "_rev",
                    "type": "str"
                }
            ],
            "example_edge": {
                "_key": "266218884025",
                "_id": "ChildOf/266218884025",
                "_from": "Characters/AryaStark",
                "_to": "Characters/NedStark",
                "_rev": "_gVPKGSq---"
            }
        },
        {
            "collection_name": "Characters",
            "collection_type": "document",
            "document_properties": [
                {
                    "name": "_key",
                    "type": "str"
                },
                {
                    "name": "_id",
                    "type": "str"
                },
                {
                    "name": "_rev",
                    "type": "str"
                },
                {
                    "name": "name",
                    "type": "str"
                },
                {
                    "name": "surname",
                    "type": "str"
                },
                {
                    "name": "alive",
                    "type": "bool"
                },
                {
                    "name": "age",
                    "type": "int"
                },
                {
                    "name": "gender",
                    "type": "str"
                }
            ],
            "example_document": {
                "_key": "NedStark",
                "_id": "Characters/NedStark",
                "_rev": "_gVPKGPi---",
                "name": "Ned",
                "surname": "Stark",
                "alive": true,
                "age": 41,
                "gender": "male"
            }
        }
    ]
}

查询 ArangoDB 数据库

现在我们可以使用 ArangoDB Graph QA 链查询数据
import os

os.environ["OPENAI_API_KEY"] = "your-key-here"
from langchain_classic.chains import ArangoGraphQAChain
from langchain_openai import ChatOpenAI

chain = ArangoGraphQAChain.from_llm(
    ChatOpenAI(temperature=0), graph=graph, verbose=True
)
chain.run("Is Ned Stark alive?")
> Entering new ArangoGraphQAChain chain...
AQL Query (1):
WITH Characters
FOR character IN Characters
FILTER character.name == "Ned" AND character.surname == "Stark"
RETURN character.alive

AQL Result:
[True]

> Finished chain.
'Yes, Ned Stark is alive.'
chain.run("How old is Arya Stark?")
> Entering new ArangoGraphQAChain chain...
AQL Query (1):
WITH Characters
FOR character IN Characters
FILTER character.name == "Arya" && character.surname == "Stark"
RETURN character.age

AQL Result:
[11]

> Finished chain.
'Arya Stark is 11 years old.'
chain.run("Are Arya Stark and Ned Stark related?")
> Entering new ArangoGraphQAChain chain...
AQL Query (1):
WITH Characters, ChildOf
FOR v, e, p IN 1..1 OUTBOUND 'Characters/AryaStark' ChildOf
    FILTER p.vertices[-1]._key == 'NedStark'
    RETURN p

AQL Result:
[{'vertices': [{'_key': 'AryaStark', '_id': 'Characters/AryaStark', '_rev': '_gVPKGPi--B', 'name': 'Arya', 'surname': 'Stark', 'alive': True, 'age': 11, 'gender': 'female'}, {'_key': 'NedStark', '_id': 'Characters/NedStark', '_rev': '_gVPKGPi---', 'name': 'Ned', 'surname': 'Stark', 'alive': True, 'age': 41, 'gender': 'male'}], 'edges': [{'_key': '266218884025', '_id': 'ChildOf/266218884025', '_from': 'Characters/AryaStark', '_to': 'Characters/NedStark', '_rev': '_gVPKGSq---'}], 'weights': [0, 1]}]

> Finished chain.
'Yes, Arya Stark and Ned Stark are related. According to the information retrieved from the database, there is a relationship between them. Arya Stark is the child of Ned Stark.'
chain.run("Does Arya Stark have a dead parent?")
> Entering new ArangoGraphQAChain chain...
AQL Query (1):
WITH Characters, ChildOf
FOR v, e IN 1..1 OUTBOUND 'Characters/AryaStark' ChildOf
FILTER v.alive == false
RETURN e

AQL Result:
[{'_key': '266218884027', '_id': 'ChildOf/266218884027', '_from': 'Characters/AryaStark', '_to': 'Characters/CatelynStark', '_rev': '_gVPKGSu---'}]

> Finished chain.
'Yes, Arya Stark has a dead parent. The parent is Catelyn Stark.'

链修饰符

您可以修改以下 ArangoDBGraphQAChain 类变量的值来改变链的结果行为
# 指定要返回的最大 AQL 查询结果数
chain.top_k = 10

# 指定是否在输出字典中返回 AQL 查询
chain.return_aql_query = True

# 指定是否在输出字典中返回 AQL JSON 结果
chain.return_aql_result = True

# 指定最大 AQL 生成尝试次数
chain.max_aql_generation_attempts = 5

# 指定一组 AQL 查询示例,这些示例传递给
# AQL 生成提示模板以促进少样本学习。
# 默认为空字符串。
chain.aql_examples = """
# Is Ned Stark alive?
RETURN DOCUMENT('Characters/NedStark').alive

# Is Arya Stark the child of Ned Stark?
FOR e IN ChildOf
    FILTER e._from == "Characters/AryaStark" AND e._to == "Characters/NedStark"
    RETURN e
"""
chain.run("Is Ned Stark alive?")

# chain("Is Ned Stark alive?") # 返回包含 AQL 查询和 AQL 结果的字典
> Entering new ArangoGraphQAChain chain...
AQL Query (1):
RETURN DOCUMENT('Characters/NedStark').alive

AQL Result:
[True]

> Finished chain.
'Yes, according to the information in the database, Ned Stark is alive.'
chain.run("Is Bran Stark the child of Ned Stark?")
> Entering new ArangoGraphQAChain chain...
AQL Query (1):
FOR e IN ChildOf
    FILTER e._from == "Characters/BranStark" AND e._to == "Characters/NedStark"
    RETURN e

AQL Result:
[{'_key': '266218884026', '_id': 'ChildOf/266218884026', '_from': 'Characters/BranStark', '_to': 'Characters/NedStark', '_rev': '_gVPKGSq--_'}]

> Finished chain.
'Yes, according to the information in the ArangoDB database, Bran Stark is indeed the child of Ned Stark.'