@@ -63,7 +63,10 @@ def __init__(
6363 self ._resolution_result_delimiter_key = "resolution_result_delimiter"
6464 self ._input_text_key = "input_text"
6565
66- async def __call__ (self , graph : nx .Graph , prompt_variables : dict [str , Any ] | None = None , callback : Callable | None = None ) -> EntityResolutionResult :
66+ async def __call__ (self , graph : nx .Graph ,
67+ subgraph_nodes : set [str ],
68+ prompt_variables : dict [str , Any ] | None = None ,
69+ callback : Callable | None = None ) -> EntityResolutionResult :
6770 """Call method definition."""
6871 if prompt_variables is None :
6972 prompt_variables = {}
@@ -88,16 +91,19 @@ async def __call__(self, graph: nx.Graph, prompt_variables: dict[str, Any] | Non
8891
8992 candidate_resolution = {entity_type : [] for entity_type in entity_types }
9093 for k , v in node_clusters .items ():
91- candidate_resolution [k ] = [(a , b ) for a , b in itertools .combinations (v , 2 ) if self .is_similarity (a , b )]
94+ candidate_resolution [k ] = [(a , b ) for a , b in itertools .combinations (v , 2 ) if ( a in subgraph_nodes or b in subgraph_nodes ) and self .is_similarity (a , b )]
9295 num_candidates = sum ([len (candidates ) for _ , candidates in candidate_resolution .items ()])
9396 callback (msg = f"Identified { num_candidates } candidate pairs" )
9497
9598 resolution_result = set ()
99+ resolution_batch_size = 100
96100 async with trio .open_nursery () as nursery :
97101 for candidate_resolution_i in candidate_resolution .items ():
98102 if not candidate_resolution_i [1 ]:
99103 continue
100- nursery .start_soon (lambda : self ._resolve_candidate (candidate_resolution_i , resolution_result ))
104+ for i in range (0 , len (candidate_resolution_i [1 ]), resolution_batch_size ):
105+ candidate_batch = candidate_resolution_i [0 ], candidate_resolution_i [1 ][i :i + resolution_batch_size ]
106+ nursery .start_soon (lambda : self ._resolve_candidate (candidate_batch , resolution_result ))
101107 callback (msg = f"Resolved { num_candidates } candidate pairs, { len (resolution_result )} of them are selected to merge." )
102108
103109 change = GraphChange ()
@@ -118,7 +124,7 @@ async def __call__(self, graph: nx.Graph, prompt_variables: dict[str, Any] | Non
118124 change = change ,
119125 )
120126
121- async def _resolve_candidate (self , candidate_resolution_i , resolution_result ):
127+ async def _resolve_candidate (self , candidate_resolution_i : tuple [ str , list [ tuple [ str , str ]]], resolution_result : set [ str ] ):
122128 gen_conf = {"temperature" : 0.5 }
123129 pair_txt = [
124130 f'When determining whether two { candidate_resolution_i [0 ]} s are the same, you should only focus on critical properties and overlook noisy factors.\n ' ]
0 commit comments