FreddysOrg / ProViBackend

0 stars 1 forks source link

Generate Mental Map functionality #3

Open FabianDus1002 opened 1 week ago

FabianDus1002 commented 1 week ago

-Figure out how to create mental map -Regardless of activities or edges being added position stays the same Resources: https://graphviz.org/docs/layouts/

luisabuck commented 1 week ago

As discussed, here is a potential solution using networkx:

import pm4py
from pm4py.objects.conversion.log.variants import to_data_frame
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
from networkx.drawing.nx_agraph import graphviz_layout

## Full Mental Map
# Load the data
log = pm4py.read_xes('/Users/luisabuck/Documents/UniMa/4_Sem/Team_Project/Test/Road Traffic Fine Management Process_1_all/Road_Traffic_Fine_Management_Process.xes')

# Convert into a dataframe
df = to_data_frame.apply(log)

# Initialize a directed graph
G = nx.DiGraph()

# Define start and end nodes
start_node = 'Start Process'
end_node = 'End Process'

# Extract the events form the dataframe
for case_id, group in df.groupby('case:concept:name'):
    events = group['concept:name'].tolist()

    # Add start and end nodes
    if len(events) > 0:
        G.add_edge(start_node, events[0])  # Connect start node to the first event
        G.add_edge(events[-1], end_node)   # Connect last event to the end node

    # Add edges between events in each case
    for i in range(len(events) - 1):
        start_event = events[i]
        end_event = events[i + 1]

        # Add an edge between the start and end event
        if G.has_edge(start_event, end_event):
            G[start_event][end_event]['weight'] += 1  # Increment the weight if the edge exists
        else:
            G.add_edge(start_event, end_event, weight=1)  # Else initialize the edge with weight 1

# Draw the full graph
pos = graphviz_layout(G, prog = "neato")  

# Adjust the positions for the start and end event
# Get the current positions to determine max_x, min_x, max_y, and min_y
x_values = [x for x, _ in pos.values()]
y_values = [y for _, y in pos.values()]

# Find the min and max y-values (vertical span)
max_y = max(y_values)
min_y = min(y_values)

# Find the min and max x-values (horizontal span)
max_x = max(x_values)
min_x = min(x_values)

# Calculate the center based on the horizontal span
center_x = (max_x + min_x) / 2  # True horizontal center

pos[start_node] = (center_x, max_y + 2)  # Slightly above the max y for visibility
pos[end_node] = (center_x, min_y - min_y - 2)  # Slightly below the min y for visibility

# Define the layout
plt.figure(figsize=(10, 15))

# Draw all nodes except the start and end nodes in lightblue
nx.draw_networkx_nodes(G, pos, nodelist=[node for node in G.nodes if node not in [start_node, end_node]],
                       node_size=700, node_color='lightblue') 

# Draw start and end nodes in different colors and without labels
nx.draw_networkx_nodes(G, pos, nodelist=[start_node], node_color='lightgreen', node_size=800)  
nx.draw_networkx_nodes(G, pos, nodelist=[end_node], node_color='salmon', node_size=800)       

# Draw labels for all nodes except start and end node
nx.draw_networkx_labels(G, pos, labels={node: node for node in G.nodes if node not in [start_node, end_node]}, font_size=6, font_weight='bold')

# Draw edges
nx.draw_networkx_edges(G, pos, arrows=True, width=0.5, arrowsize=20, arrowstyle='-|>', alpha=0.7, connectionstyle='arc3,rad=0.1', min_target_margin=15)

# # Optional: Display edge weights on the graph 
# edge_labels = nx.get_edge_attributes(G, 'weight')
# nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=6)

# Plot the graph
plt.title('Mental Map')
plt.savefig('Mental_Map.png')
plt.show()

## Filtered Mental Map
percentage_nodes = 0.5  # Specify the percentage of top events to display
percentage_edges = 1 # Specify the percentage of top connections to display

# Count the occurrences of each event
event_counts = df['concept:name'].value_counts() 
total_events = len(event_counts) 

# Determine how many events to select based on the percentage
num_events_to_display = int(total_events * percentage_nodes)  # Calculate the number of events based on the percentage
top_events = event_counts.head(num_events_to_display).index.tolist()  # Get the names of the top events

# Include start and end nodes in the filtered list
subset_nodes = top_events + [start_node, end_node]  

# Create a new directed graph for the filtered list
G_subset = nx.DiGraph()

# Add nodes and edges from the full graph to the subset graph
edge_weights = {}
for start_event, end_event in G.edges():
    if start_event in subset_nodes and end_event in subset_nodes:
        #  Get the weight, default to 1 if it doesn't exist
        weight = G[start_event][end_event].get('weight', 1)
        edge_weights[(start_event, end_event)] = weight  

# Determine how many edges to select based on the edge percentage
num_edges_to_display = int(len(edge_weights) * percentage_edges)  

# Select the most frequent edges
most_frequent_edges = sorted(edge_weights.items(), key=lambda x: x[1], reverse=True)[:num_edges_to_display]

# Add the selected edges to the filtered graph
for (start_event, end_event), weight in most_frequent_edges:
    G_subset.add_edge(start_event, end_event, weight=weight)

# Generate a new layout for the subset graph
subset_pos = {node: pos[node] for node in G_subset.nodes}  # Use the same positions as the full graph

# Define the layout
plt.figure(figsize=(10, 15))

# Highlight start and end nodes in the subset
nx.draw_networkx_nodes(G_subset, pos, nodelist=[start_node], node_color='lightgreen', node_size=800)
nx.draw_networkx_nodes(G_subset, pos, nodelist=[end_node], node_color='salmon', node_size=800)

# Draw all nodes except the start and end nodes
nx.draw_networkx_nodes(G_subset, pos, nodelist=[node for node in G_subset.nodes if node not in [start_node, end_node]],
                       node_size=700, node_color='lightblue') 

# Draw labels for all nodes except start and end nodes
nx.draw_networkx_labels(G_subset, pos, labels={node: node for node in G_subset.nodes if node not in [start_node, end_node]}, font_size=6, font_weight='bold')

# Draw edges in the subset graph
nx.draw_networkx_edges(G_subset, pos, arrows=True, width=0.5, arrowsize=20, arrowstyle='-|>', alpha=0.7, connectionstyle='arc3,rad=0.1', min_target_margin=15)

# Optional: Display edge weights for the subset graph
# edge_labels_subset = nx.get_edge_attributes(G_subset, 'weight')
# nx.draw_networkx_edge_labels(G_subset, pos, edge_labels=edge_labels_subset, font_size=6)

plt.title('Filtered Mental Map')
plt.savefig('Filtered_Mental_Map.png')
plt.show()
loy56 commented 5 days ago

calculate pos attributes of full graph

import pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
from graphviz import Digraph

# Load the event log and discover the DFG
event_log = xes_importer.apply('://Road_Traffic_Fine_Management_Process.xes')

# Discover DFG
dfg, start_activities, end_activities = pm4py.discover_dfg(event_log)

# Create a Graphviz Digraph manually
dot = Digraph()

# Add placeholder nodes for the overall "Start" and "End"
dot.node("Start", shape="circle", style="filled", color="lightgrey")
dot.node("End", shape="circle", style="filled", color="lightgrey")

# Add edges from the "Start" node to all start activities
for start_act in start_activities:
    dot.edge("Start", start_act, label="start")

# Add edges from all end activities to the "End" node
for end_act in end_activities:
    dot.edge(end_act, "End", label="end")

#  Add edges from the DFG to the graph
for (source, target), freq in dfg.items():
    dot.edge(source, target, label=str(freq))

# Use a layout engine (dot) to calculate node positions
dot.engine = 'dot'  # or 'neato' for different layout
dot_output = dot.pipe(format='dot').decode('utf-8')
print(dot_output)
loy56 commented 2 days ago

extract value of pos arrtibute of nodes and edges

import re

# Regular expression to capture edge positions
edge_pos_pattern = re.compile(r'(\w+|"[^"]*")\s+->\s+(\w+|"[^"]*")\s+\[.*?pos="(.*?)".*?\];', re.DOTALL)

# Extract edge positions
edge_positions = edge_pos_pattern.findall(dot_output)
for source, target, pos in edge_positions:
    source = source.strip('"')
    target = target.strip('"')
    print(f"{source} -> {target}: pos={pos}")

dot_output2 = r"""
digraph {
    Start   [color=lightgrey,
        height=0.78667,
        pos="1004,1105.8",
        shape=circle,
        style=filled,
        width=0.78667];
    "Create Fine"   [height=0.5,
        pos="1004,1007",
        width=1.4975];
    End [color=lightgrey,
        height=0.71558,
        pos="889,25.761",
        shape=circle,
        style=filled,
        width=0.71558];
    "Send Fine" [height=0.5,
        pos="1431,918.52",
        width=1.3411];
    Payment [height=0.5,
        pos="466,830.02",
        width=1.2132];
    "Receive Result Appeal from Prefecture" [height=0.5,
        pos="525,299.02",
        width=4.3551];
    "Appeal to Judge"   [height=0.5,
        pos="1008,122.02",
        width=1.9809];
    "Insert Date Appeal to Prefecture"  [height=0.5,
        pos="584,741.52",
        width=3.63];
    "Send Appeal to Prefecture" [height=0.5,
        pos="800,476.02",
        width=3.0045];
    "Insert Fine Notification"  [height=0.5,
        pos="490,387.52",
        width=2.7202];
    "Send for Credit Collection"    [height=0.5,
        pos="1070,564.52",
        width=3.0329];
    "Notify Result Appeal to Offender"  [height=0.5,
        pos="1115,210.52",
        width=3.758];
    "Add penalty"   [height=0.5,
        pos="782,653.02",
        width=1.5686];
}"""    

# Regular expression to find pos attributes of nodes
node_pos_pattern = re.compile(r'(".*?"|\w+)\s+\[.*?pos=["\']?([-+]?\d*\.?\d+),([-+]?\d*\.?\d+)["\']?', re.DOTALL)

# Extract node positions
node_positions = node_pos_pattern.findall(dot_output2)
for node, x, y in node_positions:
    # Remove quotes from node names if present
    node = node.strip('"')
    print(f"{node}: pos={x},{y}")