Bases: VectorIndexAutoRetriever
Extended auto-retriever with Notion metadata fallback handling.
Extends VectorIndexAutoRetriever to handle missing metadata in Notion documents.
When no documents are found using creation_date filter, falls back to
last_update_date to maximize retrieval coverage.
Attributes: |
-
retriever
–
Internal retriever instance for filter manipulation
|
Source code in src/common/retrievers.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 | class CustomVectorIndexAutoRetriever(VectorIndexAutoRetriever):
"""Extended auto-retriever with Notion metadata fallback handling.
Extends VectorIndexAutoRetriever to handle missing metadata in Notion documents.
When no documents are found using creation_date filter, falls back to
last_update_date to maximize retrieval coverage.
Attributes:
retriever: Internal retriever instance for filter manipulation
"""
def _build_retriever_from_spec(
self, spec: VectorStoreQuerySpec
) -> Tuple[BaseRetriever, QueryBundle]:
"""Builds retriever from vector store query specification.
Args:
spec: Query specification including filters and parameters
Returns:
Tuple containing:
- Configured retriever instance
- Modified query bundle
"""
retriever, new_query_bundle = super()._build_retriever_from_spec(spec)
self.retriever = retriever
return retriever, new_query_bundle
def _retrieve(
self,
query_bundle: QueryBundle,
) -> List[NodeWithScore]:
"""Retrieves relevant nodes with metadata fallback strategy.
Attempts retrieval with creation_date filter first. If no results found,
falls back to last_update_date filter.
Args:
query_bundle: Bundle containing query and additional info
Returns:
List[NodeWithScore]: Retrieved nodes with relevance scores
"""
nodes = super()._retrieve(query_bundle)
if len(nodes) == 0:
first_filter = next(iter(self.retriever._filters.filters), None)
if first_filter and first_filter.key == "creation_date":
logging.info(
f"No nodes found for the given creation date - {first_filter.value}. "
"Replacing creation date filter with last update date filter."
)
first_filter.key = "last_update_date"
nodes = self.retriever._retrieve(query_bundle)
return nodes
|
_build_retriever_from_spec(spec)
Builds retriever from vector store query specification.
Parameters: |
-
spec
(VectorStoreQuerySpec )
–
Query specification including filters and parameters
|
Returns: |
-
Tuple[BaseRetriever, QueryBundle]
–
Tuple containing:
- Configured retriever instance
- Modified query bundle
|
Source code in src/common/retrievers.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36 | def _build_retriever_from_spec(
self, spec: VectorStoreQuerySpec
) -> Tuple[BaseRetriever, QueryBundle]:
"""Builds retriever from vector store query specification.
Args:
spec: Query specification including filters and parameters
Returns:
Tuple containing:
- Configured retriever instance
- Modified query bundle
"""
retriever, new_query_bundle = super()._build_retriever_from_spec(spec)
self.retriever = retriever
return retriever, new_query_bundle
|
_retrieve(query_bundle)
Retrieves relevant nodes with metadata fallback strategy.
Attempts retrieval with creation_date filter first. If no results found,
falls back to last_update_date filter.
Parameters: |
-
query_bundle
(QueryBundle )
–
Bundle containing query and additional info
|
Returns: |
-
List[NodeWithScore]
–
List[NodeWithScore]: Retrieved nodes with relevance scores
|
Source code in src/common/retrievers.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 | def _retrieve(
self,
query_bundle: QueryBundle,
) -> List[NodeWithScore]:
"""Retrieves relevant nodes with metadata fallback strategy.
Attempts retrieval with creation_date filter first. If no results found,
falls back to last_update_date filter.
Args:
query_bundle: Bundle containing query and additional info
Returns:
List[NodeWithScore]: Retrieved nodes with relevance scores
"""
nodes = super()._retrieve(query_bundle)
if len(nodes) == 0:
first_filter = next(iter(self.retriever._filters.filters), None)
if first_filter and first_filter.key == "creation_date":
logging.info(
f"No nodes found for the given creation date - {first_filter.value}. "
"Replacing creation date filter with last update date filter."
)
first_filter.key = "last_update_date"
nodes = self.retriever._retrieve(query_bundle)
return nodes
|