Dataset_service

This module contains functionality related to the the dataset_service module for augmentation.langfuse.

Dataset_service

LangfuseDatasetService

Service for managing Langfuse datasets.

This service provides methods to create, manage, and retrieve datasets within the Langfuse platform. It handles the communication with the Langfuse API for all dataset-related operations.

Source code in src/augmentation/langfuse/dataset_service.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
class LangfuseDatasetService:
    """Service for managing Langfuse datasets.

    This service provides methods to create, manage, and retrieve datasets within
    the Langfuse platform. It handles the communication with the Langfuse API for
    all dataset-related operations.
    """

    def __init__(
        self,
        langfuse_client: Langfuse,
        logger: logging.Logger = LoggerConfiguration.get_logger(__name__),
    ):
        """Initialize the Langfuse dataset service.

        Args:
            langfuse_client: Authenticated client for Langfuse API interactions.
            logger: Logger instance for recording operations. Defaults to module logger.
        """
        self.langfuse_client = langfuse_client
        self.logger = logger

    def create_if_does_not_exist(
        self, dataset: LangfuseDatasetConfiguration
    ) -> None:
        """Create a dataset in Langfuse if it doesn't already exist.

        Checks if a dataset with the specified name exists in Langfuse.
        If not found, creates a new dataset with the provided configuration.

        Args:
            dataset: Configuration object containing dataset name, description,
                    and metadata for creation.

        Note:
            The NotFoundError exception from Langfuse is caught and used as
            an indicator to create a new dataset, but is still logged due to
            Langfuse implementation details.
        """
        try:
            self.langfuse_client.get_dataset(dataset.name)
            self.logger.info(f"Dataset {dataset.name} exists.")
        except NotFoundError:
            self.logger.info(
                f"Dataset {dataset.name} does not exist. Creating..."
            )
            self.langfuse_client.create_dataset(
                name=dataset.name,
                description=dataset.description,
                metadata=dataset.metadata,
            )

    def get_dataset(self, dataset_name: str) -> DatasetClient:
        """Retrieve a dataset client by name.

        Provides a client instance for interacting with a specific dataset
        in the Langfuse platform.

        Args:
            dataset_name: The unique name identifier of the dataset to retrieve.

        Returns:
            DatasetClient: A client object for performing operations on the
                          specified dataset (such as adding examples, querying data).

        Raises:
            NotFoundError: If a dataset with the specified name doesn't exist.
        """
        return self.langfuse_client.get_dataset(dataset_name)

__init__(langfuse_client, logger=LoggerConfiguration.get_logger(__name__))

Initialize the Langfuse dataset service.

Parameters:
  • langfuse_client (Langfuse) –

    Authenticated client for Langfuse API interactions.

  • logger (Logger, default: get_logger(__name__) ) –

    Logger instance for recording operations. Defaults to module logger.

Source code in src/augmentation/langfuse/dataset_service.py
24
25
26
27
28
29
30
31
32
33
34
35
36
def __init__(
    self,
    langfuse_client: Langfuse,
    logger: logging.Logger = LoggerConfiguration.get_logger(__name__),
):
    """Initialize the Langfuse dataset service.

    Args:
        langfuse_client: Authenticated client for Langfuse API interactions.
        logger: Logger instance for recording operations. Defaults to module logger.
    """
    self.langfuse_client = langfuse_client
    self.logger = logger

create_if_does_not_exist(dataset)

Create a dataset in Langfuse if it doesn't already exist.

Checks if a dataset with the specified name exists in Langfuse. If not found, creates a new dataset with the provided configuration.

Parameters:
  • dataset (LangfuseDatasetConfiguration) –

    Configuration object containing dataset name, description, and metadata for creation.

Note

The NotFoundError exception from Langfuse is caught and used as an indicator to create a new dataset, but is still logged due to Langfuse implementation details.

Source code in src/augmentation/langfuse/dataset_service.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def create_if_does_not_exist(
    self, dataset: LangfuseDatasetConfiguration
) -> None:
    """Create a dataset in Langfuse if it doesn't already exist.

    Checks if a dataset with the specified name exists in Langfuse.
    If not found, creates a new dataset with the provided configuration.

    Args:
        dataset: Configuration object containing dataset name, description,
                and metadata for creation.

    Note:
        The NotFoundError exception from Langfuse is caught and used as
        an indicator to create a new dataset, but is still logged due to
        Langfuse implementation details.
    """
    try:
        self.langfuse_client.get_dataset(dataset.name)
        self.logger.info(f"Dataset {dataset.name} exists.")
    except NotFoundError:
        self.logger.info(
            f"Dataset {dataset.name} does not exist. Creating..."
        )
        self.langfuse_client.create_dataset(
            name=dataset.name,
            description=dataset.description,
            metadata=dataset.metadata,
        )

get_dataset(dataset_name)

Retrieve a dataset client by name.

Provides a client instance for interacting with a specific dataset in the Langfuse platform.

Parameters:
  • dataset_name (str) –

    The unique name identifier of the dataset to retrieve.

Returns:
  • DatasetClient( DatasetClient ) –

    A client object for performing operations on the specified dataset (such as adding examples, querying data).

Raises:
  • NotFoundError

    If a dataset with the specified name doesn't exist.

Source code in src/augmentation/langfuse/dataset_service.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def get_dataset(self, dataset_name: str) -> DatasetClient:
    """Retrieve a dataset client by name.

    Provides a client instance for interacting with a specific dataset
    in the Langfuse platform.

    Args:
        dataset_name: The unique name identifier of the dataset to retrieve.

    Returns:
        DatasetClient: A client object for performing operations on the
                      specified dataset (such as adding examples, querying data).

    Raises:
        NotFoundError: If a dataset with the specified name doesn't exist.
    """
    return self.langfuse_client.get_dataset(dataset_name)

LangfuseDatasetServiceFactory

Bases: Factory

Factory for creating LangfuseDatasetService instances.

Creates and configures LangfuseDatasetService instances with the appropriate client based on provided configuration.

Attributes:
  • _configuration_class (Type) –

    The configuration class type used by this factory.

Source code in src/augmentation/langfuse/dataset_service.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
class LangfuseDatasetServiceFactory(Factory):
    """Factory for creating LangfuseDatasetService instances.

    Creates and configures LangfuseDatasetService instances with the appropriate
    client based on provided configuration.

    Attributes:
        _configuration_class: The configuration class type used by this factory.
    """

    _configuration_class: Type = LangfuseConfiguration

    @classmethod
    def _create_instance(
        cls, configuration: LangfuseConfiguration
    ) -> LangfuseDatasetService:
        """Create a configured LangfuseDatasetService instance.

        Args:
            configuration: The Langfuse configuration containing API credentials
                          and other settings.

        Returns:
            A fully initialized LangfuseDatasetService instance with an authenticated client.
        """
        client = LangfuseClientFactory.create(configuration)
        return LangfuseDatasetService(client)