Default_embedder

This module contains functionality related to the the default_embedder module for embedding.embedders.

Default_embedder

Embedder

Bases: BaseEmbedder

Implementation of text node embedding operations.

Handles batch embedding generation and vector store persistence for text nodes.

Source code in src/embedding/embedders/default_embedder.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
class Embedder(BaseEmbedder):
    """Implementation of text node embedding operations.

    Handles batch embedding generation and vector store persistence
    for text nodes.
    """

    def save(self, nodes: List[TextNode]) -> None:
        """Save embedded nodes to vector store.

        Args:
            nodes: Collection of text nodes with embeddings

        Note:
            Creates new storage context and vector store index
        """
        logging.info("Saving nodes...")
        storage_context = StorageContext.from_defaults(
            vector_store=self.vector_store
        )
        VectorStoreIndex(
            nodes,
            storage_context=storage_context,
            embed_model=self.embedding_model,
        )

    def embed(self, nodes: List[TextNode]) -> None:
        """Generate embeddings for text nodes in batch.

        Args:
            nodes: Collection of text nodes to embed

        Note:
            Modifies nodes in-place by setting embedding attribute
        """
        logging.info(f"Embedding {len(nodes)} nodes...")
        nodes_contents = [
            node.get_content(metadata_mode=MetadataMode.EMBED) for node in nodes
        ]
        nodes_embeddings = self.embedding_model.get_text_embedding_batch(
            nodes_contents,
            show_progress=True,
        )
        for node, node_embedding in zip(nodes, nodes_embeddings):
            node.embedding = node_embedding

embed(nodes)

Generate embeddings for text nodes in batch.

Parameters:
  • nodes (List[TextNode]) –

    Collection of text nodes to embed

Note

Modifies nodes in-place by setting embedding attribute

Source code in src/embedding/embedders/default_embedder.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def embed(self, nodes: List[TextNode]) -> None:
    """Generate embeddings for text nodes in batch.

    Args:
        nodes: Collection of text nodes to embed

    Note:
        Modifies nodes in-place by setting embedding attribute
    """
    logging.info(f"Embedding {len(nodes)} nodes...")
    nodes_contents = [
        node.get_content(metadata_mode=MetadataMode.EMBED) for node in nodes
    ]
    nodes_embeddings = self.embedding_model.get_text_embedding_batch(
        nodes_contents,
        show_progress=True,
    )
    for node, node_embedding in zip(nodes, nodes_embeddings):
        node.embedding = node_embedding

save(nodes)

Save embedded nodes to vector store.

Parameters:
  • nodes (List[TextNode]) –

    Collection of text nodes with embeddings

Note

Creates new storage context and vector store index

Source code in src/embedding/embedders/default_embedder.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def save(self, nodes: List[TextNode]) -> None:
    """Save embedded nodes to vector store.

    Args:
        nodes: Collection of text nodes with embeddings

    Note:
        Creates new storage context and vector store index
    """
    logging.info("Saving nodes...")
    storage_context = StorageContext.from_defaults(
        vector_store=self.vector_store
    )
    VectorStoreIndex(
        nodes,
        storage_context=storage_context,
        embed_model=self.embedding_model,
    )