~ruther/ctu-fee-eoa (940f8e99e5765b17a24d46de1d3914122c0726b1): codes/py_plotter/target_proximity_plotter.py

#!/usr/bin/env python3

import json
import csv
import matplotlib
matplotlib.use('Agg')  # Use non-interactive backend
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import glob
import argparse

class TargetProximityPlotter:
    def __init__(self, config_path):
        with open(config_path, 'r') as f:
            self.config = json.load(f)

        self.data_path = Path(self.config['data_path'])
        self.output_dir = Path(self.config['output_dir'])
        self.output_dir.mkdir(exist_ok=True)

        # Load objectives for percentage deviation calculation
        objectives_path = Path(__file__).parent / 'objectives.json'
        with open(objectives_path, 'r') as f:
            self.objectives = json.load(f)

    def calculate_percentage_deviation(self, values, instance_name):
        """Calculate percentage deviation from optimal value - reused from original plotter"""
        if instance_name not in self.objectives:
            raise ValueError(f"No objective value found for instance {instance_name}")

        optimal_value = self.objectives[instance_name]

        # Check if any values are significantly better than the known optimum
        tolerance = 1e-4 * np.abs(optimal_value)
        significantly_better = values < (optimal_value - tolerance)

        if np.any(significantly_better):
            better_indices = np.where(significantly_better)[0]
            best_found = np.min(values[better_indices])
            improvement = optimal_value - best_found
            improvement_pct = improvement / np.abs(optimal_value) * 100

            print(f"WARNING: Found {np.sum(significantly_better)} values better than known optimum for {instance_name}!")
            print(f"Known optimum: {optimal_value}")
            print(f"Best found: {best_found}")
            print(f"Improvement: {improvement} ({improvement_pct:.3f}%)")
            print(f"Using best found value as new reference point.")

            # Update the optimal value to the best found for this calculation
            optimal_value = best_found

        new_optimal_value = optimal_value
        if optimal_value < 0:
            new_optimal_value = -optimal_value
            values = values + 2*new_optimal_value

        # Calculate percentage deviation: (current - optimal) / |optimal| * 100
        percentage_deviations = (values - new_optimal_value) / new_optimal_value * 100
        return percentage_deviations

    def load_runs_for_algorithm_instance(self, algorithm, instance):
        """Load all individual runs for an algorithm-instance combination"""
        algorithm_path = self.data_path / algorithm / instance
        csv_files = list(algorithm_path.glob('best_candidates_*.csv'))

        if not csv_files:
            print(f"Warning: No CSV files found for {algorithm}/{instance}")
            return None

        print(f"Found {len(csv_files)} files for {algorithm}/{instance}")

        all_runs = []
        for csv_file in csv_files:
            try:
                with open(csv_file, 'r') as f:
                    reader = csv.DictReader(f)
                    iterations = []
                    evaluations = []

                    for row in reader:
                        if 'iteration' in row and 'evaluation' in row:
                            iterations.append(float(row['iteration']))
                            evaluations.append(float(row['evaluation']))

                    if iterations and evaluations:
                        # Convert to percentage deviation
                        values = np.array(evaluations)
                        percentage_devs = self.calculate_percentage_deviation(values, instance)
                        run_data = {
                            'iteration': np.array(iterations),
                            'percentage_deviation': percentage_devs
                        }
                        all_runs.append(run_data)
            except Exception as e:
                print(f"Error reading {csv_file}: {e}")

        return all_runs if all_runs else None

    def calculate_target_proximity_over_time_for_algorithm_instance(self, algorithm, instance, target_pct):
        """Calculate fraction of runs within target proximity at each iteration"""
        runs = self.load_runs_for_algorithm_instance(algorithm, instance)
        if not runs:
            return None

        num_runs = len(runs)

        # Find common iteration grid across all runs (similar to original plotter)
        all_iterations = set()
        for run in runs:
            all_iterations.update(run['iteration'].tolist())
        common_grid = sorted(list(all_iterations))

        # For each iteration, count how many runs are within target
        fractions_over_time = []
        iterations = []

        for eval_point in common_grid:
            within_target_count = 0

            for run in runs:
                # Find the best deviation achieved up to this evaluation point
                mask = run['iteration'] <= eval_point
                if np.any(mask):
                    best_deviation_so_far = np.min(run['percentage_deviation'][mask])
                    if best_deviation_so_far <= target_pct:
                        within_target_count += 1

            fraction = within_target_count / num_runs
            fractions_over_time.append(fraction)
            iterations.append(eval_point)

        return {
            'iterations': np.array(iterations),
            'fractions': np.array(fractions_over_time)
        }

    def calculate_algorithm_average_over_time(self, algorithm, problem_groups, target_pct):
        """Calculate average target proximity over time across problem groups for one algorithm"""
        all_group_data = []

        for group in problem_groups:
            group_data = []

            for instance in group:
                result = self.calculate_target_proximity_over_time_for_algorithm_instance(
                    algorithm, instance, target_pct
                )
                if result:
                    group_data.append(result)

            if group_data:
                all_group_data.extend(group_data)

        if not all_group_data:
            return None

        # Find common iteration grid across all problems
        all_iterations = set()
        for data in all_group_data:
            all_iterations.update(data['iterations'].tolist())
        common_grid = sorted(list(all_iterations))

        # Interpolate each problem's data to common grid and average
        averaged_fractions = []

        for eval_point in common_grid:
            fractions_at_eval = []

            for data in all_group_data:
                # Find the fraction at this evaluation point (or the last known value)
                mask = data['iterations'] <= eval_point
                if np.any(mask):
                    last_known_fraction = data['fractions'][mask][-1]
                    fractions_at_eval.append(last_known_fraction)
                else:
                    # Before any evaluations, fraction is 0
                    fractions_at_eval.append(0.0)

            averaged_fractions.append(np.mean(fractions_at_eval))

        return {
            'iterations': np.array(common_grid),
            'fractions': np.array(averaged_fractions)
        }

    def create_plot(self):
        """Create the target proximity plot"""
        fig, ax = plt.subplots(1, 1, figsize=self.config['plot_settings']['figsize'])

        # Support both single target and multiple targets
        if 'targets' in self.config:
            targets = self.config['targets']  # Multiple targets
        else:
            targets = [self.config['target']]  # Single target (backward compatibility)

        problem_groups = self.config['problem_groups']

        # First pass: collect all algorithm data for all targets to find global max iteration
        all_algorithm_data = {}
        global_max_iteration = 0

        for algorithm in self.config['algorithms']:
            alg_name = algorithm['name']
            print(f"Processing algorithm: {alg_name}")

            # Collect data for each target
            target_data_list = []

            for target_pct in targets:
                # Get average results over time across problem groups for this target
                alg_data = self.calculate_algorithm_average_over_time(alg_name, problem_groups, target_pct)

                if alg_data:
                    target_data_list.append(alg_data)
                    global_max_iteration = max(global_max_iteration, alg_data['iterations'].max())

            if target_data_list:
                all_algorithm_data[alg_name] = target_data_list
            else:
                print(f"No data found for algorithm {alg_name}")

        # Second pass: average across targets and extend all data to global max, then plot
        for algorithm in self.config['algorithms']:
            alg_name = algorithm['name']
            alg_label = algorithm['label']
            alg_color = algorithm.get('color', '#000000')
            linestyle = algorithm.get('linestyle', '-')

            if alg_name not in all_algorithm_data:
                continue

            target_data_list = all_algorithm_data[alg_name]

            # Find common iteration grid across all targets for this algorithm
            all_iterations = set()
            for data in target_data_list:
                all_iterations.update(data['iterations'].tolist())
            all_iterations.add(global_max_iteration)  # Include global max
            common_grid = sorted(list(all_iterations))

            # Average fractions across targets at each iteration point
            averaged_fractions = []
            std_fractions = []

            for eval_point in common_grid:
                fractions_at_eval = []

                for data in target_data_list:
                    # Find the fraction at this evaluation point (or the last known value)
                    mask = data['iterations'] <= eval_point
                    if np.any(mask):
                        last_known_fraction = data['fractions'][mask][-1]
                        fractions_at_eval.append(last_known_fraction)
                    else:
                        # Before any evaluations, fraction is 0
                        fractions_at_eval.append(0.0)

                averaged_fractions.append(np.mean(fractions_at_eval))
                std_fractions.append(np.std(fractions_at_eval))

            averaged_fractions = np.array(averaged_fractions)
            std_fractions = np.array(std_fractions)

            # Plot the averaged line
            ax.plot(common_grid, averaged_fractions,
                   color=alg_color,
                   linestyle=linestyle,
                   label=alg_label,
                   linewidth=2,
                   drawstyle='steps-post')  # Step plot like original plotter

            # Add fill_between for standard deviation bands (if enabled)
            if self.config['plot_settings'].get('show_std', False):
                lower_bound = averaged_fractions - std_fractions
                upper_bound = averaged_fractions + std_fractions

                # Ensure bounds stay within [0, 1] range
                lower_bound = np.maximum(lower_bound, 0.0)
                upper_bound = np.minimum(upper_bound, 1.0)

                alpha_fill = self.config['plot_settings'].get('alpha_fill', 0.2)
                ax.fill_between(common_grid,
                               lower_bound,
                               upper_bound,
                               color=alg_color,
                               alpha=alpha_fill,
                               step='post')

        # Configure plot
        ax.set_xlabel(self.config['plot_settings']['xlabel'], fontsize=18)
        ax.set_ylabel(self.config['plot_settings']['ylabel'], fontsize=18)
        ax.set_title(self.config['plot_settings']['title'], fontsize=20)

        # Set logarithmic x-axis if requested
        if self.config['plot_settings'].get('log_x', False):
            ax.set_xscale('log')

        # Set tick label font sizes
        ax.tick_params(axis='both', which='major', labelsize=16)
        ax.tick_params(axis='both', which='minor', labelsize=14)

        # Set y-axis to [-0.1, 1.1] range for better visibility of 0.0 and 1.0 values
        ax.set_ylim(-0.1, 1.1)
        ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.1f}'))

        if self.config['plot_settings'].get('grid', True):
            ax.grid(True, alpha=0.3)

        if self.config['plot_settings'].get('legend', True):
            ax.legend(fontsize=14)

        plt.tight_layout()

        # Save plot
        output_file = self.output_dir / f"target_proximity_{self.config['plot_name']}.svg"
        plt.savefig(output_file, format='svg', bbox_inches='tight')
        print(f"Plot saved to: {output_file}")

def main():
    parser = argparse.ArgumentParser(description='Plot target proximity analysis for constraint optimization results')
    parser.add_argument('config', help='Path to JSON configuration file')
    args = parser.parse_args()

    plotter = TargetProximityPlotter(args.config)
    plotter.create_plot()

if __name__ == '__main__':
    main()