Open FabianDus1002 opened 1 week ago
As discussed, here is a potential solution using networkx:
import pm4py
from pm4py.objects.conversion.log.variants import to_data_frame
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
from networkx.drawing.nx_agraph import graphviz_layout
## Full Mental Map
# Load the data
log = pm4py.read_xes('/Users/luisabuck/Documents/UniMa/4_Sem/Team_Project/Test/Road Traffic Fine Management Process_1_all/Road_Traffic_Fine_Management_Process.xes')
# Convert into a dataframe
df = to_data_frame.apply(log)
# Initialize a directed graph
G = nx.DiGraph()
# Define start and end nodes
start_node = 'Start Process'
end_node = 'End Process'
# Extract the events form the dataframe
for case_id, group in df.groupby('case:concept:name'):
events = group['concept:name'].tolist()
# Add start and end nodes
if len(events) > 0:
G.add_edge(start_node, events[0]) # Connect start node to the first event
G.add_edge(events[-1], end_node) # Connect last event to the end node
# Add edges between events in each case
for i in range(len(events) - 1):
start_event = events[i]
end_event = events[i + 1]
# Add an edge between the start and end event
if G.has_edge(start_event, end_event):
G[start_event][end_event]['weight'] += 1 # Increment the weight if the edge exists
else:
G.add_edge(start_event, end_event, weight=1) # Else initialize the edge with weight 1
# Draw the full graph
pos = graphviz_layout(G, prog = "neato")
# Adjust the positions for the start and end event
# Get the current positions to determine max_x, min_x, max_y, and min_y
x_values = [x for x, _ in pos.values()]
y_values = [y for _, y in pos.values()]
# Find the min and max y-values (vertical span)
max_y = max(y_values)
min_y = min(y_values)
# Find the min and max x-values (horizontal span)
max_x = max(x_values)
min_x = min(x_values)
# Calculate the center based on the horizontal span
center_x = (max_x + min_x) / 2 # True horizontal center
pos[start_node] = (center_x, max_y + 2) # Slightly above the max y for visibility
pos[end_node] = (center_x, min_y - min_y - 2) # Slightly below the min y for visibility
# Define the layout
plt.figure(figsize=(10, 15))
# Draw all nodes except the start and end nodes in lightblue
nx.draw_networkx_nodes(G, pos, nodelist=[node for node in G.nodes if node not in [start_node, end_node]],
node_size=700, node_color='lightblue')
# Draw start and end nodes in different colors and without labels
nx.draw_networkx_nodes(G, pos, nodelist=[start_node], node_color='lightgreen', node_size=800)
nx.draw_networkx_nodes(G, pos, nodelist=[end_node], node_color='salmon', node_size=800)
# Draw labels for all nodes except start and end node
nx.draw_networkx_labels(G, pos, labels={node: node for node in G.nodes if node not in [start_node, end_node]}, font_size=6, font_weight='bold')
# Draw edges
nx.draw_networkx_edges(G, pos, arrows=True, width=0.5, arrowsize=20, arrowstyle='-|>', alpha=0.7, connectionstyle='arc3,rad=0.1', min_target_margin=15)
# # Optional: Display edge weights on the graph
# edge_labels = nx.get_edge_attributes(G, 'weight')
# nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=6)
# Plot the graph
plt.title('Mental Map')
plt.savefig('Mental_Map.png')
plt.show()
## Filtered Mental Map
percentage_nodes = 0.5 # Specify the percentage of top events to display
percentage_edges = 1 # Specify the percentage of top connections to display
# Count the occurrences of each event
event_counts = df['concept:name'].value_counts()
total_events = len(event_counts)
# Determine how many events to select based on the percentage
num_events_to_display = int(total_events * percentage_nodes) # Calculate the number of events based on the percentage
top_events = event_counts.head(num_events_to_display).index.tolist() # Get the names of the top events
# Include start and end nodes in the filtered list
subset_nodes = top_events + [start_node, end_node]
# Create a new directed graph for the filtered list
G_subset = nx.DiGraph()
# Add nodes and edges from the full graph to the subset graph
edge_weights = {}
for start_event, end_event in G.edges():
if start_event in subset_nodes and end_event in subset_nodes:
# Get the weight, default to 1 if it doesn't exist
weight = G[start_event][end_event].get('weight', 1)
edge_weights[(start_event, end_event)] = weight
# Determine how many edges to select based on the edge percentage
num_edges_to_display = int(len(edge_weights) * percentage_edges)
# Select the most frequent edges
most_frequent_edges = sorted(edge_weights.items(), key=lambda x: x[1], reverse=True)[:num_edges_to_display]
# Add the selected edges to the filtered graph
for (start_event, end_event), weight in most_frequent_edges:
G_subset.add_edge(start_event, end_event, weight=weight)
# Generate a new layout for the subset graph
subset_pos = {node: pos[node] for node in G_subset.nodes} # Use the same positions as the full graph
# Define the layout
plt.figure(figsize=(10, 15))
# Highlight start and end nodes in the subset
nx.draw_networkx_nodes(G_subset, pos, nodelist=[start_node], node_color='lightgreen', node_size=800)
nx.draw_networkx_nodes(G_subset, pos, nodelist=[end_node], node_color='salmon', node_size=800)
# Draw all nodes except the start and end nodes
nx.draw_networkx_nodes(G_subset, pos, nodelist=[node for node in G_subset.nodes if node not in [start_node, end_node]],
node_size=700, node_color='lightblue')
# Draw labels for all nodes except start and end nodes
nx.draw_networkx_labels(G_subset, pos, labels={node: node for node in G_subset.nodes if node not in [start_node, end_node]}, font_size=6, font_weight='bold')
# Draw edges in the subset graph
nx.draw_networkx_edges(G_subset, pos, arrows=True, width=0.5, arrowsize=20, arrowstyle='-|>', alpha=0.7, connectionstyle='arc3,rad=0.1', min_target_margin=15)
# Optional: Display edge weights for the subset graph
# edge_labels_subset = nx.get_edge_attributes(G_subset, 'weight')
# nx.draw_networkx_edge_labels(G_subset, pos, edge_labels=edge_labels_subset, font_size=6)
plt.title('Filtered Mental Map')
plt.savefig('Filtered_Mental_Map.png')
plt.show()
import pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
from graphviz import Digraph
# Load the event log and discover the DFG
event_log = xes_importer.apply('://Road_Traffic_Fine_Management_Process.xes')
# Discover DFG
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)
# Create a Graphviz Digraph manually
dot = Digraph()
# Add placeholder nodes for the overall "Start" and "End"
dot.node("Start", shape="circle", style="filled", color="lightgrey")
dot.node("End", shape="circle", style="filled", color="lightgrey")
# Add edges from the "Start" node to all start activities
for start_act in start_activities:
dot.edge("Start", start_act, label="start")
# Add edges from all end activities to the "End" node
for end_act in end_activities:
dot.edge(end_act, "End", label="end")
# Add edges from the DFG to the graph
for (source, target), freq in dfg.items():
dot.edge(source, target, label=str(freq))
# Use a layout engine (dot) to calculate node positions
dot.engine = 'dot' # or 'neato' for different layout
dot_output = dot.pipe(format='dot').decode('utf-8')
print(dot_output)
import re
# Regular expression to capture edge positions
edge_pos_pattern = re.compile(r'(\w+|"[^"]*")\s+->\s+(\w+|"[^"]*")\s+\[.*?pos="(.*?)".*?\];', re.DOTALL)
# Extract edge positions
edge_positions = edge_pos_pattern.findall(dot_output)
for source, target, pos in edge_positions:
source = source.strip('"')
target = target.strip('"')
print(f"{source} -> {target}: pos={pos}")
dot_output2 = r"""
digraph {
Start [color=lightgrey,
height=0.78667,
pos="1004,1105.8",
shape=circle,
style=filled,
width=0.78667];
"Create Fine" [height=0.5,
pos="1004,1007",
width=1.4975];
End [color=lightgrey,
height=0.71558,
pos="889,25.761",
shape=circle,
style=filled,
width=0.71558];
"Send Fine" [height=0.5,
pos="1431,918.52",
width=1.3411];
Payment [height=0.5,
pos="466,830.02",
width=1.2132];
"Receive Result Appeal from Prefecture" [height=0.5,
pos="525,299.02",
width=4.3551];
"Appeal to Judge" [height=0.5,
pos="1008,122.02",
width=1.9809];
"Insert Date Appeal to Prefecture" [height=0.5,
pos="584,741.52",
width=3.63];
"Send Appeal to Prefecture" [height=0.5,
pos="800,476.02",
width=3.0045];
"Insert Fine Notification" [height=0.5,
pos="490,387.52",
width=2.7202];
"Send for Credit Collection" [height=0.5,
pos="1070,564.52",
width=3.0329];
"Notify Result Appeal to Offender" [height=0.5,
pos="1115,210.52",
width=3.758];
"Add penalty" [height=0.5,
pos="782,653.02",
width=1.5686];
}"""
# Regular expression to find pos attributes of nodes
node_pos_pattern = re.compile(r'(".*?"|\w+)\s+\[.*?pos=["\']?([-+]?\d*\.?\d+),([-+]?\d*\.?\d+)["\']?', re.DOTALL)
# Extract node positions
node_positions = node_pos_pattern.findall(dot_output2)
for node, x, y in node_positions:
# Remove quotes from node names if present
node = node.strip('"')
print(f"{node}: pos={x},{y}")
-Figure out how to create mental map -Regardless of activities or edges being added position stays the same Resources: https://graphviz.org/docs/layouts/