164 lines
5.3 KiB
Python
164 lines
5.3 KiB
Python
# Re-importing necessary libraries
|
|
import json
|
|
import matplotlib.pyplot as plt
|
|
from collections import defaultdict
|
|
import numpy as np
|
|
|
|
# Simplified JSON data for demonstration
|
|
with open('gemla/round2.json', 'r') as file:
|
|
simplified_json_data = json.load(file)
|
|
|
|
target_node_id = '0c1e64dc-6ddf-4dbb-bf6e-e8218b925194'
|
|
|
|
# Function to traverse the tree to find a node id
|
|
def traverse_left_nodes(node):
|
|
if node is None:
|
|
return []
|
|
|
|
left_node = node.get("left")
|
|
if left_node is None:
|
|
return [node]
|
|
|
|
return [node] + traverse_left_nodes(left_node)
|
|
|
|
# Function to traverse the tree to find a node id
|
|
def traverse_right_nodes(node):
|
|
if node is None:
|
|
return []
|
|
|
|
right_node = node.get("right")
|
|
left_node = node.get("left")
|
|
|
|
if right_node is None and left_node is None:
|
|
return []
|
|
elif right_node and left_node:
|
|
return [right_node] + traverse_right_nodes(left_node)
|
|
|
|
return []
|
|
|
|
|
|
# Getting the left graph
|
|
left_nodes = traverse_left_nodes(simplified_json_data[0])
|
|
left_nodes.reverse()
|
|
# print(node)
|
|
# Print properties available on the first node
|
|
node = left_nodes[0]
|
|
# print(node["val"].keys())
|
|
|
|
scores = []
|
|
for node in left_nodes:
|
|
# print(node)
|
|
# print(f'Node ID: {node["val"]["id"]}')
|
|
# print(f'Node scores length: {len(node["val"]["node"]["scores"])}')
|
|
if node["val"]["node"]:
|
|
node_scores = node["val"]["node"]["scores"]
|
|
if node_scores:
|
|
for score in node_scores:
|
|
scores.append(score)
|
|
|
|
# print(scores)
|
|
|
|
scores_values = [list(score_set.values()) for score_set in scores]
|
|
|
|
# Set up the figure for plotting on the same graph
|
|
fig, ax = plt.subplots(figsize=(10, 6))
|
|
|
|
# Generate a boxplot for each set of scores on the same graph
|
|
boxplots = ax.boxplot(scores_values, vert=False, patch_artist=True, labels=[f'Set {i+1}' for i in range(len(scores_values))])
|
|
|
|
# Set figure name to node id
|
|
# fig.canvas.set_window_title('Main node line')
|
|
|
|
# Labeling
|
|
ax.set_xlabel(f'Scores - Main Line')
|
|
ax.set_ylabel('Score Sets')
|
|
ax.yaxis.grid(True) # Add horizontal grid lines for clarity
|
|
|
|
# Set y-axis labels to be visible
|
|
ax.set_yticklabels([f'Set {i+1}' for i in range(len(scores_values))])
|
|
|
|
# Getting most recent right graph
|
|
right_nodes = traverse_right_nodes(simplified_json_data[0])
|
|
target_node_id = None
|
|
target_node = None
|
|
if target_node_id:
|
|
for node in right_nodes:
|
|
if node["val"]["id"] == target_node_id:
|
|
target_node = node
|
|
break
|
|
else:
|
|
target_node = right_nodes[1]
|
|
scores = target_node["val"]["node"]["scores"]
|
|
|
|
scores_values = [list(score_set.values()) for score_set in scores]
|
|
|
|
# Set up the figure for plotting on the same graph
|
|
fig, ax = plt.subplots(figsize=(10, 6))
|
|
|
|
# Generate a boxplot for each set of scores on the same graph
|
|
boxplots = ax.boxplot(scores_values, vert=False, patch_artist=True, labels=[f'Set {i+1}' for i in range(len(scores_values))])
|
|
|
|
|
|
# Labeling
|
|
ax.set_xlabel(f'Scores: {target_node['val']['id']}')
|
|
ax.set_ylabel('Score Sets')
|
|
ax.yaxis.grid(True) # Add horizontal grid lines for clarity
|
|
|
|
# Set y-axis labels to be visible
|
|
ax.set_yticklabels([f'Set {i+1}' for i in range(len(scores_values))])
|
|
|
|
# Find the highest scoring sets combining all scores and generations
|
|
scores = []
|
|
for node in left_nodes:
|
|
if node["val"]["node"]:
|
|
node_scores = node["val"]["node"]["scores"]
|
|
translated_node_scores = []
|
|
if node_scores:
|
|
for i in range(len(node_scores)):
|
|
for (individual, score) in node_scores[i].items():
|
|
translated_node_scores.append((node["val"]["id"], i, score))
|
|
|
|
scores.append(translated_node_scores)
|
|
|
|
# Add scores from the right nodes
|
|
for node in right_nodes:
|
|
if node["val"]["node"]:
|
|
node_scores = node["val"]["node"]["scores"]
|
|
translated_node_scores = []
|
|
if node_scores:
|
|
for i in range(len(node_scores)):
|
|
for (individual, score) in node_scores[i].items():
|
|
translated_node_scores.append((node["val"]["id"], i, score))
|
|
scores.append(translated_node_scores)
|
|
|
|
# Organize scores by individual and then by generation
|
|
individual_generation_scores = defaultdict(lambda: defaultdict(list))
|
|
for sublist in scores:
|
|
for id, generation, score in sublist:
|
|
individual_generation_scores[id][generation].append(score)
|
|
|
|
# Calculate Q3 for each individual's generation
|
|
individual_generation_q3 = {}
|
|
for id, generations in individual_generation_scores.items():
|
|
for gen, scores in generations.items():
|
|
individual_generation_q3[(id, gen)] = np.percentile(scores, 75)
|
|
|
|
# Sort by Q3 value, highest first, and select the top 20
|
|
top_20_individual_generations = sorted(individual_generation_q3, key=individual_generation_q3.get, reverse=True)[:40]
|
|
|
|
# Prepare scores for the top 20 for plotting
|
|
top_20_scores = [individual_generation_scores[id][gen] for id, gen in top_20_individual_generations]
|
|
|
|
# Adjust labels for clarity, indicating both the individual ID and generation
|
|
labels = [f'{id[:8]}... Gen {gen}' for id, gen in top_20_individual_generations]
|
|
|
|
# Generate box and whisker plots for the top 20 individual generations
|
|
fig, ax = plt.subplots(figsize=(12, 10))
|
|
ax.boxplot(top_20_scores, vert=False, patch_artist=True, labels=labels)
|
|
ax.set_xlabel('Scores')
|
|
ax.set_ylabel('Individual Generation')
|
|
ax.set_title('Top 20 Individual Generations by Q3 Value')
|
|
|
|
# Display the plot
|
|
plt.show()
|
|
|