You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

naive_implementation.py 1.8KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. from typing import Iterable, List, Set
  2. def merge_lists_distinct(*lists: "Iterable[List[str]]") -> List[str]:
  3. accum = set()
  4. for lst in lists:
  5. accum = accum.union(set(lst))
  6. return list(accum)
  7. def check_lists_overlap(list1, list2):
  8. return any(x in list1 for x in list2)
  9. def cluster_step(clusters: "List[List[str]]", addresses: "List[List[str]]"):
  10. #if there are no more sets of addresses to consider, we are done
  11. if(len(addresses) == 0):
  12. return clusters
  13. tx = addresses[0]
  14. matching_clusters = []
  15. new_clusters = []
  16. for cluster in clusters:
  17. if(check_lists_overlap(tx, cluster)):
  18. matching_clusters.append(cluster)
  19. else:
  20. new_clusters.append(cluster)
  21. new_clusters.append(merge_lists_distinct(tx, *matching_clusters))
  22. return cluster_step(new_clusters,addresses[1:])
  23. def cluster_step_iter(clusters: "List[List[str]]", addresses: "List[List[str]]"):
  24. clstr = clusters
  25. addrs = addresses
  26. while True:
  27. if(len(addrs) == 0):
  28. break
  29. tx = addrs[0]
  30. matching_clusters = []
  31. new_clusters = []
  32. for cluster in clstr:
  33. if(check_lists_overlap(tx, cluster)):
  34. matching_clusters.append(cluster)
  35. else:
  36. new_clusters.append(cluster)
  37. new_clusters.append(merge_lists_distinct(tx, *matching_clusters))
  38. clstr = new_clusters
  39. addrs = addrs[1:]
  40. return clstr
  41. def cluster_n(clusters: "List[List[str]]", addresses: "List[List[str]]"):
  42. tx_sets = map(set, clusters+addresses)
  43. unions = []
  44. for tx in tx_sets:
  45. temp = []
  46. for s in unions:
  47. if not s.isdisjoint(tx):
  48. tx = s.union(tx)
  49. else:
  50. temp.append(s)
  51. temp.append(tx)
  52. unions = temp
  53. return unions