mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-01-18 16:27:02 +00:00
Add graphs/frequent_pattern_graph_miner.py (#1866)
* Add files via upload * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <cclauss@me.com> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <cclauss@me.com> * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <cclauss@me.com> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <cclauss@me.com> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <cclauss@me.com> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <cclauss@me.com> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <cclauss@me.com> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <cclauss@me.com> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <cclauss@me.com> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <cclauss@me.com> * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <cclauss@me.com> * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <cclauss@me.com> * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Whitespace changes * Format with psf/black Co-authored-by: Christian Clauss <cclauss@me.com>
This commit is contained in:
parent
9e5f365fed
commit
7469fb6edd
232
graphs/frequent_pattern_graph_miner.py
Normal file
232
graphs/frequent_pattern_graph_miner.py
Normal file
|
@ -0,0 +1,232 @@
|
|||
"""
|
||||
FP-GraphMiner - A Fast Frequent Pattern Mining Algorithm for Network Graphs
|
||||
|
||||
A novel Frequent Pattern Graph Mining algorithm, FP-GraphMiner, that compactly
|
||||
represents a set of network graphs as a Frequent Pattern Graph (or FP-Graph).
|
||||
This graph can be used to efficiently mine frequent subgraphs including maximal
|
||||
frequent subgraphs and maximum common subgraphs.
|
||||
|
||||
URL: https://www.researchgate.net/publication/235255851
|
||||
"""
|
||||
# fmt: off
|
||||
edge_array = [
|
||||
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12', 'cd-e2', 'ce-e4',
|
||||
'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3', 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3'],
|
||||
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'cd-e2', 'de-e1', 'df-e8',
|
||||
'ef-e3', 'eg-e2', 'fg-e6'],
|
||||
['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'de-e1', 'df-e8', 'dg-e5', 'ef-e3', 'eg-e2',
|
||||
'eh-e12', 'fg-e6', 'fh-e10', 'gh-e6'],
|
||||
['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'bh-e12', 'cd-e2', 'df-e8', 'dh-e10'],
|
||||
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'cd-e2', 'ce-e4', 'de-e1', 'df-e8',
|
||||
'dg-e5', 'ef-e3', 'eg-e2', 'fg-e6']
|
||||
]
|
||||
# fmt: on
|
||||
|
||||
|
||||
def get_distinct_edge(edge_array):
|
||||
"""
|
||||
Return Distinct edges from edge array of multiple graphs
|
||||
>>> sorted(get_distinct_edge(edge_array))
|
||||
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
|
||||
"""
|
||||
distinct_edge = set()
|
||||
for row in edge_array:
|
||||
for item in row:
|
||||
distinct_edge.add(item[0])
|
||||
return list(distinct_edge)
|
||||
|
||||
|
||||
def get_bitcode(edge_array, distinct_edge):
|
||||
"""
|
||||
Return bitcode of distinct_edge
|
||||
"""
|
||||
bitcode = ["0"] * len(edge_array)
|
||||
for i, row in enumerate(edge_array):
|
||||
for item in row:
|
||||
if distinct_edge in item[0]:
|
||||
bitcode[i] = "1"
|
||||
break
|
||||
return "".join(bitcode)
|
||||
|
||||
|
||||
def get_frequency_table(edge_array):
|
||||
"""
|
||||
Returns Frequency Table
|
||||
"""
|
||||
distinct_edge = get_distinct_edge(edge_array)
|
||||
frequency_table = dict()
|
||||
|
||||
for item in distinct_edge:
|
||||
bit = get_bitcode(edge_array, item)
|
||||
# print('bit',bit)
|
||||
# bt=''.join(bit)
|
||||
s = bit.count("1")
|
||||
frequency_table[item] = [s, bit]
|
||||
# Store [Distinct edge, WT(Bitcode), Bitcode] in descending order
|
||||
sorted_frequency_table = [
|
||||
[k, v[0], v[1]]
|
||||
for k, v in sorted(frequency_table.items(), key=lambda v: v[1][0], reverse=True)
|
||||
]
|
||||
return sorted_frequency_table
|
||||
|
||||
|
||||
def get_nodes(frequency_table):
|
||||
"""
|
||||
Returns nodes
|
||||
format nodes={bitcode:edges that represent the bitcode}
|
||||
>>> get_nodes([['ab', 5, '11111'], ['ac', 5, '11111'], ['df', 5, '11111'],
|
||||
... ['bd', 5, '11111'], ['bc', 5, '11111']])
|
||||
{'11111': ['ab', 'ac', 'df', 'bd', 'bc']}
|
||||
"""
|
||||
nodes = {}
|
||||
for i, item in enumerate(frequency_table):
|
||||
nodes.setdefault(item[2], []).append(item[0])
|
||||
return nodes
|
||||
|
||||
|
||||
def get_cluster(nodes):
|
||||
"""
|
||||
Returns cluster
|
||||
format cluster:{WT(bitcode):nodes with same WT}
|
||||
"""
|
||||
cluster = {}
|
||||
for key, value in nodes.items():
|
||||
cluster.setdefault(key.count("1"), {})[key] = value
|
||||
return cluster
|
||||
|
||||
|
||||
def get_support(cluster):
|
||||
"""
|
||||
Returns support
|
||||
>>> get_support({5: {'11111': ['ab', 'ac', 'df', 'bd', 'bc']},
|
||||
... 4: {'11101': ['ef', 'eg', 'de', 'fg'], '11011': ['cd']},
|
||||
... 3: {'11001': ['ad'], '10101': ['dg']},
|
||||
... 2: {'10010': ['dh', 'bh'], '11000': ['be'], '10100': ['gh'],
|
||||
... '10001': ['ce']},
|
||||
... 1: {'00100': ['fh', 'eh'], '10000': ['hi']}})
|
||||
[100.0, 80.0, 60.0, 40.0, 20.0]
|
||||
"""
|
||||
return [i * 100 / len(cluster) for i in cluster]
|
||||
|
||||
|
||||
def print_all() -> None:
|
||||
print("\nNodes\n")
|
||||
for key, value in nodes.items():
|
||||
print(key, value)
|
||||
print("\nSupport\n")
|
||||
print(support)
|
||||
print("\n Cluster \n")
|
||||
for key, value in sorted(cluster.items(), reverse=True):
|
||||
print(key, value)
|
||||
print("\n Graph\n")
|
||||
for key, value in graph.items():
|
||||
print(key, value)
|
||||
print("\n Edge List of Frequent subgraphs \n")
|
||||
for edge_list in freq_subgraph_edge_list:
|
||||
print(edge_list)
|
||||
|
||||
|
||||
def create_edge(nodes, graph, cluster, c1):
|
||||
"""
|
||||
create edge between the nodes
|
||||
"""
|
||||
for i in cluster[c1].keys():
|
||||
count = 0
|
||||
c2 = c1 + 1
|
||||
while c2 < max(cluster.keys()):
|
||||
for j in cluster[c2].keys():
|
||||
"""
|
||||
creates edge only if the condition satisfies
|
||||
"""
|
||||
if int(i, 2) & int(j, 2) == int(i, 2):
|
||||
if tuple(nodes[i]) in graph:
|
||||
graph[tuple(nodes[i])].append(nodes[j])
|
||||
else:
|
||||
graph[tuple(nodes[i])] = [nodes[j]]
|
||||
count += 1
|
||||
if count == 0:
|
||||
c2 = c2 + 1
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
def construct_graph(cluster, nodes):
|
||||
X = cluster[max(cluster.keys())]
|
||||
cluster[max(cluster.keys()) + 1] = "Header"
|
||||
graph = {}
|
||||
for i in X:
|
||||
if tuple(["Header"]) in graph:
|
||||
graph[tuple(["Header"])].append(X[i])
|
||||
else:
|
||||
graph[tuple(["Header"])] = [X[i]]
|
||||
for i in X:
|
||||
graph[tuple(X[i])] = [["Header"]]
|
||||
i = 1
|
||||
while i < max(cluster) - 1:
|
||||
create_edge(nodes, graph, cluster, i)
|
||||
i = i + 1
|
||||
return graph
|
||||
|
||||
|
||||
def myDFS(graph, start, end, path=[]):
|
||||
"""
|
||||
find different DFS walk from given node to Header node
|
||||
"""
|
||||
path = path + [start]
|
||||
if start == end:
|
||||
paths.append(path)
|
||||
for node in graph[start]:
|
||||
if tuple(node) not in path:
|
||||
myDFS(graph, tuple(node), end, path)
|
||||
|
||||
|
||||
def find_freq_subgraph_given_support(s, cluster, graph):
|
||||
"""
|
||||
find edges of multiple frequent subgraphs
|
||||
"""
|
||||
k = int(s / 100 * (len(cluster) - 1))
|
||||
for i in cluster[k].keys():
|
||||
myDFS(graph, tuple(cluster[k][i]), tuple(["Header"]))
|
||||
|
||||
|
||||
def freq_subgraphs_edge_list(paths):
|
||||
"""
|
||||
returns Edge list for frequent subgraphs
|
||||
"""
|
||||
freq_sub_EL = []
|
||||
for edges in paths:
|
||||
EL = []
|
||||
for j in range(len(edges) - 1):
|
||||
temp = list(edges[j])
|
||||
for e in temp:
|
||||
edge = (e[0], e[1])
|
||||
EL.append(edge)
|
||||
freq_sub_EL.append(EL)
|
||||
return freq_sub_EL
|
||||
|
||||
|
||||
def preprocess(edge_array):
|
||||
"""
|
||||
Preprocess the edge array
|
||||
>>> preprocess([['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12',
|
||||
... 'cd-e2', 'ce-e4', 'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3',
|
||||
... 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3']])
|
||||
|
||||
"""
|
||||
for i in range(len(edge_array)):
|
||||
for j in range(len(edge_array[i])):
|
||||
t = edge_array[i][j].split("-")
|
||||
edge_array[i][j] = t
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
preprocess(edge_array)
|
||||
frequency_table = get_frequency_table(edge_array)
|
||||
nodes = get_nodes(frequency_table)
|
||||
cluster = get_cluster(nodes)
|
||||
support = get_support(cluster)
|
||||
graph = construct_graph(cluster, nodes)
|
||||
find_freq_subgraph_given_support(60, cluster, graph)
|
||||
paths = []
|
||||
freq_subgraph_edge_list = freq_subgraphs_edge_list(paths)
|
||||
print_all()
|
Loading…
Reference in New Issue
Block a user