These models define the graph structure that remember() should extract. Metadata is recommended because it tells Cognee which fields to embed and index for retrieval.
text = "The Python programming language is widely used in data analysis, web development, and machine learning."await remember( text, graph_model=ProgrammingLanguage, custom_prompt=CUSTOM_PROMPT, self_improvement=False,)
This ingests the text and builds the graph in one call. The custom graph_model acts as the extraction schema, and CUSTOM_PROMPT tells the LLM what relationships to look for.
import osimport asynciofrom typing import Any, Listfrom pydantic import SkipValidationfrom cognee import prune, remember, visualize_graphfrom cognee.low_level import DataPointCUSTOM_PROMPT = ( "Extract a simple graph containing Programming Language and Fields that it is used in.")# Define a custom graph model for programming languages.class FieldType(DataPoint): name: str = "Field"class Field(DataPoint): name: str is_type: FieldType metadata: dict = {"index_fields": ["name"]}class ProgrammingLanguageType(DataPoint): name: str = "Programming Language"class ProgrammingLanguage(DataPoint): name: str used_in: List[Field] = None is_type: ProgrammingLanguageType metadata: dict = {"index_fields": ["name"]}async def visualize_data(): graph_file_path = os.path.join( os.path.dirname(__file__), ".artifacts", "custom_graph_model_entity_schema_definition.html" ) await visualize_graph(graph_file_path)async def main(): # Prune data and system metadata before running, only if we want "fresh" state. await prune.prune_data() await prune.prune_system(metadata=True) text = "The Python programming language is widely used in data analysis, web development, and machine learning." await remember( text, graph_model=ProgrammingLanguage, custom_prompt=CUSTOM_PROMPT, self_improvement=False, ) await visualize_data()if __name__ == "__main__": asyncio.run(main())
Legacy guide
import os import asyncio from typing import Any, List from pydantic import SkipValidation from cognee import add, cognify, prune, visualize_graph from cognee.low_level import DataPoint CUSTOM_PROMPT = ( "Extract a simple graph containing Programming Language and Fields that it is used in.") # Define a custom graph model for programming languages. class FieldType(DataPoint): name: str = "Field"class Field(DataPoint): name: str is_type: FieldType metadata: dict = {"index_fields": ["name"]} class ProgrammingLanguageType(DataPoint): name: str = "Programming Language"class ProgrammingLanguage(DataPoint): name: str used_in: List[Field] = None is_type: ProgrammingLanguageType metadata: dict = {"index_fields": ["name"]} async def visualize_data(): graph_file_path = os.path.join( os.path.dirname(__file__), ".artifacts", "custom_graph_model_entity_schema_definition.html" ) await visualize_graph(graph_file_path) async def main(): # Prune data and system metadata before running, only if we want "fresh" state. await prune.prune_data() await prune.prune_system(metadata=True) text = "The Python programming language is widely used in data analysis, web development, and machine learning." await add(text) await cognify(graph_model=ProgrammingLanguage, custom_prompt=CUSTOM_PROMPT) await visualize_data() if __name__ == "__main__": asyncio.run(main())
This example shows the complete workflow with metadata for indexing. In practice, you can create complex nested models with multiple relationships and sophisticated data structures.