mirror of
https://github.com/TheAlgorithms/Python.git
synced 2024-11-24 13:31:07 +00:00
233 lines
7.2 KiB
Python
233 lines
7.2 KiB
Python
|
"""
|
||
|
FP-GraphMiner - A Fast Frequent Pattern Mining Algorithm for Network Graphs
|
||
|
|
||
|
A novel Frequent Pattern Graph Mining algorithm, FP-GraphMiner, that compactly
|
||
|
represents a set of network graphs as a Frequent Pattern Graph (or FP-Graph).
|
||
|
This graph can be used to efficiently mine frequent subgraphs including maximal
|
||
|
frequent subgraphs and maximum common subgraphs.
|
||
|
|
||
|
URL: https://www.researchgate.net/publication/235255851
|
||
|
"""
|
||
|
# fmt: off
|
||
|
edge_array = [
|
||
|
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12', 'cd-e2', 'ce-e4',
|
||
|
'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3', 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3'],
|
||
|
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'cd-e2', 'de-e1', 'df-e8',
|
||
|
'ef-e3', 'eg-e2', 'fg-e6'],
|
||
|
['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'de-e1', 'df-e8', 'dg-e5', 'ef-e3', 'eg-e2',
|
||
|
'eh-e12', 'fg-e6', 'fh-e10', 'gh-e6'],
|
||
|
['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'bh-e12', 'cd-e2', 'df-e8', 'dh-e10'],
|
||
|
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'cd-e2', 'ce-e4', 'de-e1', 'df-e8',
|
||
|
'dg-e5', 'ef-e3', 'eg-e2', 'fg-e6']
|
||
|
]
|
||
|
# fmt: on
|
||
|
|
||
|
|
||
|
def get_distinct_edge(edge_array):
|
||
|
"""
|
||
|
Return Distinct edges from edge array of multiple graphs
|
||
|
>>> sorted(get_distinct_edge(edge_array))
|
||
|
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
|
||
|
"""
|
||
|
distinct_edge = set()
|
||
|
for row in edge_array:
|
||
|
for item in row:
|
||
|
distinct_edge.add(item[0])
|
||
|
return list(distinct_edge)
|
||
|
|
||
|
|
||
|
def get_bitcode(edge_array, distinct_edge):
|
||
|
"""
|
||
|
Return bitcode of distinct_edge
|
||
|
"""
|
||
|
bitcode = ["0"] * len(edge_array)
|
||
|
for i, row in enumerate(edge_array):
|
||
|
for item in row:
|
||
|
if distinct_edge in item[0]:
|
||
|
bitcode[i] = "1"
|
||
|
break
|
||
|
return "".join(bitcode)
|
||
|
|
||
|
|
||
|
def get_frequency_table(edge_array):
|
||
|
"""
|
||
|
Returns Frequency Table
|
||
|
"""
|
||
|
distinct_edge = get_distinct_edge(edge_array)
|
||
|
frequency_table = dict()
|
||
|
|
||
|
for item in distinct_edge:
|
||
|
bit = get_bitcode(edge_array, item)
|
||
|
# print('bit',bit)
|
||
|
# bt=''.join(bit)
|
||
|
s = bit.count("1")
|
||
|
frequency_table[item] = [s, bit]
|
||
|
# Store [Distinct edge, WT(Bitcode), Bitcode] in descending order
|
||
|
sorted_frequency_table = [
|
||
|
[k, v[0], v[1]]
|
||
|
for k, v in sorted(frequency_table.items(), key=lambda v: v[1][0], reverse=True)
|
||
|
]
|
||
|
return sorted_frequency_table
|
||
|
|
||
|
|
||
|
def get_nodes(frequency_table):
|
||
|
"""
|
||
|
Returns nodes
|
||
|
format nodes={bitcode:edges that represent the bitcode}
|
||
|
>>> get_nodes([['ab', 5, '11111'], ['ac', 5, '11111'], ['df', 5, '11111'],
|
||
|
... ['bd', 5, '11111'], ['bc', 5, '11111']])
|
||
|
{'11111': ['ab', 'ac', 'df', 'bd', 'bc']}
|
||
|
"""
|
||
|
nodes = {}
|
||
|
for i, item in enumerate(frequency_table):
|
||
|
nodes.setdefault(item[2], []).append(item[0])
|
||
|
return nodes
|
||
|
|
||
|
|
||
|
def get_cluster(nodes):
|
||
|
"""
|
||
|
Returns cluster
|
||
|
format cluster:{WT(bitcode):nodes with same WT}
|
||
|
"""
|
||
|
cluster = {}
|
||
|
for key, value in nodes.items():
|
||
|
cluster.setdefault(key.count("1"), {})[key] = value
|
||
|
return cluster
|
||
|
|
||
|
|
||
|
def get_support(cluster):
|
||
|
"""
|
||
|
Returns support
|
||
|
>>> get_support({5: {'11111': ['ab', 'ac', 'df', 'bd', 'bc']},
|
||
|
... 4: {'11101': ['ef', 'eg', 'de', 'fg'], '11011': ['cd']},
|
||
|
... 3: {'11001': ['ad'], '10101': ['dg']},
|
||
|
... 2: {'10010': ['dh', 'bh'], '11000': ['be'], '10100': ['gh'],
|
||
|
... '10001': ['ce']},
|
||
|
... 1: {'00100': ['fh', 'eh'], '10000': ['hi']}})
|
||
|
[100.0, 80.0, 60.0, 40.0, 20.0]
|
||
|
"""
|
||
|
return [i * 100 / len(cluster) for i in cluster]
|
||
|
|
||
|
|
||
|
def print_all() -> None:
|
||
|
print("\nNodes\n")
|
||
|
for key, value in nodes.items():
|
||
|
print(key, value)
|
||
|
print("\nSupport\n")
|
||
|
print(support)
|
||
|
print("\n Cluster \n")
|
||
|
for key, value in sorted(cluster.items(), reverse=True):
|
||
|
print(key, value)
|
||
|
print("\n Graph\n")
|
||
|
for key, value in graph.items():
|
||
|
print(key, value)
|
||
|
print("\n Edge List of Frequent subgraphs \n")
|
||
|
for edge_list in freq_subgraph_edge_list:
|
||
|
print(edge_list)
|
||
|
|
||
|
|
||
|
def create_edge(nodes, graph, cluster, c1):
|
||
|
"""
|
||
|
create edge between the nodes
|
||
|
"""
|
||
|
for i in cluster[c1].keys():
|
||
|
count = 0
|
||
|
c2 = c1 + 1
|
||
|
while c2 < max(cluster.keys()):
|
||
|
for j in cluster[c2].keys():
|
||
|
"""
|
||
|
creates edge only if the condition satisfies
|
||
|
"""
|
||
|
if int(i, 2) & int(j, 2) == int(i, 2):
|
||
|
if tuple(nodes[i]) in graph:
|
||
|
graph[tuple(nodes[i])].append(nodes[j])
|
||
|
else:
|
||
|
graph[tuple(nodes[i])] = [nodes[j]]
|
||
|
count += 1
|
||
|
if count == 0:
|
||
|
c2 = c2 + 1
|
||
|
else:
|
||
|
break
|
||
|
|
||
|
|
||
|
def construct_graph(cluster, nodes):
|
||
|
X = cluster[max(cluster.keys())]
|
||
|
cluster[max(cluster.keys()) + 1] = "Header"
|
||
|
graph = {}
|
||
|
for i in X:
|
||
|
if tuple(["Header"]) in graph:
|
||
|
graph[tuple(["Header"])].append(X[i])
|
||
|
else:
|
||
|
graph[tuple(["Header"])] = [X[i]]
|
||
|
for i in X:
|
||
|
graph[tuple(X[i])] = [["Header"]]
|
||
|
i = 1
|
||
|
while i < max(cluster) - 1:
|
||
|
create_edge(nodes, graph, cluster, i)
|
||
|
i = i + 1
|
||
|
return graph
|
||
|
|
||
|
|
||
|
def myDFS(graph, start, end, path=[]):
|
||
|
"""
|
||
|
find different DFS walk from given node to Header node
|
||
|
"""
|
||
|
path = path + [start]
|
||
|
if start == end:
|
||
|
paths.append(path)
|
||
|
for node in graph[start]:
|
||
|
if tuple(node) not in path:
|
||
|
myDFS(graph, tuple(node), end, path)
|
||
|
|
||
|
|
||
|
def find_freq_subgraph_given_support(s, cluster, graph):
|
||
|
"""
|
||
|
find edges of multiple frequent subgraphs
|
||
|
"""
|
||
|
k = int(s / 100 * (len(cluster) - 1))
|
||
|
for i in cluster[k].keys():
|
||
|
myDFS(graph, tuple(cluster[k][i]), tuple(["Header"]))
|
||
|
|
||
|
|
||
|
def freq_subgraphs_edge_list(paths):
|
||
|
"""
|
||
|
returns Edge list for frequent subgraphs
|
||
|
"""
|
||
|
freq_sub_EL = []
|
||
|
for edges in paths:
|
||
|
EL = []
|
||
|
for j in range(len(edges) - 1):
|
||
|
temp = list(edges[j])
|
||
|
for e in temp:
|
||
|
edge = (e[0], e[1])
|
||
|
EL.append(edge)
|
||
|
freq_sub_EL.append(EL)
|
||
|
return freq_sub_EL
|
||
|
|
||
|
|
||
|
def preprocess(edge_array):
|
||
|
"""
|
||
|
Preprocess the edge array
|
||
|
>>> preprocess([['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12',
|
||
|
... 'cd-e2', 'ce-e4', 'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3',
|
||
|
... 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3']])
|
||
|
|
||
|
"""
|
||
|
for i in range(len(edge_array)):
|
||
|
for j in range(len(edge_array[i])):
|
||
|
t = edge_array[i][j].split("-")
|
||
|
edge_array[i][j] = t
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
preprocess(edge_array)
|
||
|
frequency_table = get_frequency_table(edge_array)
|
||
|
nodes = get_nodes(frequency_table)
|
||
|
cluster = get_cluster(nodes)
|
||
|
support = get_support(cluster)
|
||
|
graph = construct_graph(cluster, nodes)
|
||
|
find_freq_subgraph_given_support(60, cluster, graph)
|
||
|
paths = []
|
||
|
freq_subgraph_edge_list = freq_subgraphs_edge_list(paths)
|
||
|
print_all()
|