Skip to main content

cognee.datasets

Static class for managing datasets and their data.

Methods

datasets.list_datasets()

await cognee.datasets.list_datasets(user=None)
Returns a list of all datasets accessible to the user.

datasets.list_data()

await cognee.datasets.list_data(dataset_id, user=None)
Returns all data items in a dataset. This is the API to use when you want to read back DataItem fields stored during cognee.add(), such as label and external_metadata.

datasets.has_data()

await cognee.datasets.has_data(dataset_id, user=None) -> bool
Check whether a dataset contains any data.

datasets.get_status()

await cognee.datasets.get_status(dataset_ids) -> dict
Get the pipeline processing status for one or more datasets.

datasets.delete_data()

await cognee.datasets.delete_data(
    dataset_id,
    data_id,
    user=None,
    mode="soft",
    delete_dataset_if_empty=False,
)
Delete a specific data item from a dataset.
dataset_id
UUID
required
UUID of the dataset.
data_id
UUID
required
UUID of the data item to delete.
mode
str
default:"\"soft\""
Deletion mode: "soft" marks as deleted, "hard" removes permanently.
delete_dataset_if_empty
bool
default:"False"
If true, delete the dataset itself when the last item is removed.

datasets.empty_dataset()

await cognee.datasets.empty_dataset(dataset_id, user=None)
Remove all data from a dataset without deleting the dataset itself.

datasets.delete_all()

await cognee.datasets.delete_all(user=None)
Delete all datasets the user has permission to delete.

datasets.discover_datasets()

cognee.datasets.discover_datasets(directory_path)
Discover datasets from files in a local directory.

Examples

import cognee

# List all datasets
datasets = await cognee.datasets.list_datasets()
for ds in datasets:
    print(ds.name, ds.id)

# Check dataset contents
data = await cognee.datasets.list_data(dataset_id=ds.id)

# Delete a specific item
await cognee.datasets.delete_data(
    dataset_id=ds.id,
    data_id=item.id,
)

# Wipe everything
await cognee.datasets.delete_all()
import cognee
from cognee.tasks.ingestion.data_item import DataItem

await cognee.add(
    DataItem(
        "/path/to/report.pdf",
        label="q4-report",
        external_metadata={"author": "Jane Smith", "quarter": "Q4-2024"},
    ),
    dataset_name="finance",
)

datasets = await cognee.datasets.list_datasets()
data_items = await cognee.datasets.list_data(dataset_id=datasets[0].id)

for item in data_items:
    print(item.label, item.external_metadata)
    # q4-report  {"author": "Jane Smith", "quarter": "Q4-2024"}
external_metadata is stored on the relational Data record only. It is not placed into the vector store or knowledge graph and is not returned by cognee.search(). If you need metadata to be vector-searchable, define a custom DataPoint subclass and list the fields to embed in metadata.index_fields. See DataPoints.