import os import asyncio from typing import Any, List from pydantic import SkipValidation from cognee import add, cognify, prune, visualize_graph from cognee.low_level import DataPoint CUSTOM_PROMPT = ( "Extract a simple graph containing Programming Language and Fields that it is used in.") # Define a custom graph model for programming languages. class FieldType(DataPoint): name: str = "Field"class Field(DataPoint): name: str is_type: FieldType metadata: dict = {"index_fields": ["name"]} class ProgrammingLanguageType(DataPoint): name: str = "Programming Language"class ProgrammingLanguage(DataPoint): name: str used_in: List[Field] = None is_type: ProgrammingLanguageType metadata: dict = {"index_fields": ["name"]} async def visualize_data(): graph_file_path = os.path.join( os.path.dirname(__file__), ".artifacts", "custom_graph_model_entity_schema_definition.html" ) await visualize_graph(graph_file_path) async def main(): # Prune data and system metadata before running, only if we want "fresh" state. await prune.prune_data() await prune.prune_system(metadata=True) text = "The Python programming language is widely used in data analysis, web development, and machine learning." await add(text) await cognify(graph_model=ProgrammingLanguage, custom_prompt=CUSTOM_PROMPT) await visualize_data() if __name__ == "__main__": asyncio.run(main())
This example shows the complete workflow with metadata for indexing. In practice, you can create complex nested models with multiple relationships and sophisticated data structures.
Create a Pydantic model that inherits from DataPoint. Use SkipValidation[Any] for fields that will hold other DataPoints to avoid forward reference issues. Metadata is recommended - it tells Cognee which fields to embed and store in the vector database for search.
text = "The Python programming language is widely used in data analysis, web development, and machine learning." await add(text) await cognify(graph_model=ProgrammingLanguage, custom_prompt=CUSTOM_PROMPT)
This step ingests unstructured text with add(…), then runs extraction with cognify(…) using the defined custom graph_model and the custom_prompt that will be used to extract your data.
The model acts as a schema for extraction, so Cognee structures output according to the defined domain types.
This renders the generated graph to an HTML file so you can verify nodes, relationships, and overall schema behavior.
Use this as a quick validation step to confirm that your custom model is extracting the structure you expect.