|
@@ -48,21 +48,19 @@ def cluster_step(clusters: "List[List[str]]", addresses: "List[List[str]]"):
|
48
|
48
|
if(len(addresses) == 0):
|
49
|
49
|
return clusters
|
50
|
50
|
|
51
|
|
- #take a set of addresses
|
52
|
51
|
tx = addresses[0]
|
53
|
|
- #remove it from list candidates
|
54
|
|
- addresses = addresses[1:]
|
|
52
|
+ matching_clusters = []
|
|
53
|
+ new_clusters = []
|
55
|
54
|
|
56
|
|
- #find clusters that match these addresses
|
57
|
|
- matching_clusters = filter(lambda cluster: check_lists_overlap(tx, cluster), clusters)
|
58
|
|
-
|
59
|
|
- #remove all clusters that match these addresses
|
60
|
|
- clusters = list(filter(lambda cluster: not check_lists_overlap(tx, cluster), clusters))
|
|
55
|
+ for cluster in clusters:
|
|
56
|
+ if(check_lists_overlap(tx, cluster)):
|
|
57
|
+ matching_clusters.append(cluster)
|
|
58
|
+ else:
|
|
59
|
+ new_clusters.append(cluster)
|
61
|
60
|
|
62
|
|
- #add a new cluster that is the union of found clusters and the inspected list of addresses
|
63
|
|
- clusters.append(merge_lists_distinct(tx, *matching_clusters))
|
|
61
|
+ new_clusters.append(merge_lists_distinct(tx, *matching_clusters))
|
64
|
62
|
|
65
|
|
- return cluster_step(clusters,addresses)
|
|
63
|
+ return cluster_step(new_clusters,addresses[1:])
|
66
|
64
|
|
67
|
65
|
def cluster_partition(iter: "Iterable[Row]") -> Iterable:
|
68
|
66
|
yield cluster_step([], list(map(lambda row: row['addresses'], iter)))
|