Source code for ramble.results_table

# Copyright 2022-2026 The Ramble Authors
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.

import copy
import os

from ramble.util.file_util import create_symlink
from ramble.util.logger import logger
from ramble.util.module_utils import import_pandas
from ramble.util.naming import match_pattern


[docs] class ResultsColumn: """Class representing a single column in a results table""" _where_name = "where" _column_attrs = [ "name", "expression", "figure_of_merit", "figure_of_merit_context", "figure_of_merit_origin_type", ] def __init__(self, conf_dict): """Construct a column from a configuration dict, assuming the structure matches the column schema in lib/ramble/ramble/schema/tables.py Args: conf_dict (dict): dictionary structured like the column schema in tables.py """ # Extract column attributes for attr in self._column_attrs: val = conf_dict.get(attr) if val is not None: val = str(val) setattr(self, attr, val) if self.expression and self.figure_of_merit: logger.die( "Results columns cannot have both 'expression' and 'figure_of_merit' attributes" ) if not self.expression and not self.figure_of_merit: logger.die( "One of either 'expression' or 'figure_of_merit' are required for each " "column definition" ) self.where = [] if self._where_name in conf_dict: self.where.extend(conf_dict[self._where_name]) # Internal attributes for context columns self._context_def_name = conf_dict.get("_context_def_name") self._context_vars = conf_dict.get("_context_vars")
[docs] def col_name(self, app_inst): """Expand this columns name based on the current experiment Args: app_inst: Instance of an application class to expand name with Returns: (str): Expanded column name """ return app_inst.expander.expand_var(self.name)
[docs] def extract_value(self, app_inst, extra_vars=None): """Extract this column's value based on an application instance and other column values Args: app_inst: Instance of an application class extra_vars: Dictionary containing additional variables to expand with iteration (Union[None, str]): The iteration to search for. instance (Union[None, str]): The instance to search for. """ if extra_vars is None: extra_vars = {} value = None action_experiment = True for expression in self.where: if not app_inst.expander.evaluate_predicate(expression): action_experiment = False if not action_experiment: return value if self.expression: value = app_inst.expander.expand_var(self.expression, extra_vars=extra_vars) elif self.figure_of_merit: fom_name = self.figure_of_merit context_name = None if self.figure_of_merit_context: context_name = app_inst.expander.expand_var(self.figure_of_merit_context) origin_type = None if self.figure_of_merit_origin_type: origin_type = app_inst.expander.expand_var(self.figure_of_merit_origin_type) results = app_inst.result for context in results.contexts: if (context_name is None or context_name == context["name"]) and ( self._context_def_name is None or self._context_def_name == context.get("context_def_name") ): match_vars = True if self._context_vars is not None: # Ensure all original context vars match. # Regex groups in self._context_vars don't need to match # because they aren't in context.get('context_vars') c_vars = context.get("context_vars", {}) for k, v in c_vars.items(): if self._context_vars.get(k) != v: match_vars = False break if match_vars: for fom in context["foms"]: if fom["name"] == fom_name: keep = True if origin_type and origin_type != fom["origin_type"]: keep = False if keep: if value is not None: logger.warn( "Non-unique values found " f"for column {self.name}" ) value = fom["value"] return value
[docs] class ResultsAutoColumn: """Class representing a template for auto-generated columns""" _where_name = "where" _sort_by_name = "sort_by" _column_attrs = [ "name", "context_name", "figure_of_merit", "figure_of_merit_origin_type", ] def __init__(self, conf_dict): """Construct an auto column from a configuration dict Args: conf_dict (dict): dictionary structured like the autocolumn schema """ # Extract column attributes for attr in self._column_attrs: val = conf_dict.get(attr) if val is not None: val = str(val) setattr(self, attr, val) self.where = [] if self._where_name in conf_dict: self.where.extend(conf_dict[self._where_name]) self.sort_by = [] if self._sort_by_name in conf_dict: if isinstance(conf_dict[self._sort_by_name], list): self.sort_by.extend(conf_dict[self._sort_by_name]) else: self.sort_by.append(conf_dict[self._sort_by_name])
[docs] class ResultsTable: """A single results table based on the tables.py schema""" _default_group_method = "max" _group_method_name = "group_method" _group_by_name = "group_by" _sort_by_name = "sort_by" _columns_name = "columns" _autocolumns_name = "autocolumns" _where_name = "where" _transpose_name = "transpose" def __init__(self, conf_dict): """Constructor for a single table Args: conf_dict (dict): Configuration dictionary based on the table schema in tables.py """ self._num_rows = 0 self._data = {} self.name = conf_dict["name"] self.group_method = self._default_group_method if self._group_method_name in conf_dict: self.group_method = conf_dict[self._group_method_name] # Build group_by and sort_by attrs for set_name in ["group", "sort"]: temp_list = [] set_name_attr = getattr(self, f"_{set_name}_by_name") if set_name_attr in conf_dict: if isinstance(conf_dict[set_name_attr], list): temp_list.extend(conf_dict[set_name_attr]) else: temp_list.append(conf_dict[set_name_attr]) setattr(self, f"{set_name}_by", temp_list) # Build table where statements self.where = [] if self._where_name in conf_dict: self.where.extend(conf_dict[self._where_name]) self.transpose = False if self._transpose_name in conf_dict: self.transpose = conf_dict[self._transpose_name] # Build table columns self.columns = [] if self._columns_name in conf_dict: for column_config in conf_dict[self._columns_name]: self.columns.append(ResultsColumn(column_config)) # Build table auto columns self.autocolumns = [] if self._autocolumns_name in conf_dict: for column_config in conf_dict[self._autocolumns_name]: self.autocolumns.append(ResultsAutoColumn(column_config)) self.generated_columns = {}
[docs] def render(self, app_inst): new_table = copy.deepcopy(self) new_table.name = app_inst.expander.expand_var(self.name) return new_table
[docs] def table_name(self, app_inst): """Determine the name for this table, based on a given experiment Args: app_inst: Instance of an application class Returns: (str): Name of table """ return app_inst.expander.expand_var(self.name)
[docs] def add_where(self, expressions): """Add a where expression to this table Args: expression (Union[List[str], str]): The regular expression to search for. """ if not expressions: return if isinstance(expressions, list): self.where.extend(expressions) else: self.where.append(expressions)
[docs] def includes_experiment(self, app_inst): """Determine if an experiment is included in this table. Args: app_inst: Instance of an application class Returns: (bool): True if the app_inst is included in table, False otherwise """ include_experiment = True for expression in self.where: if not app_inst.expander.evaluate_predicate(expression): include_experiment = False return include_experiment
[docs] def extract_row(self, app_inst): """Extract a row of data from an experiment Args: app_inst: Instance of an application class to extract data from """ # Perform discovery for auto columns manual_col_names = {c.col_name(app_inst) for c in self.columns} for autocol_template in self.autocolumns: action_column = True for expression in autocol_template.where: if not app_inst.expander.evaluate_predicate(expression): action_column = False if not action_column: continue for context in app_inst.result.contexts: # Try matching against context_def_name first (the type of context) matched_ctx, ctx_groups = match_pattern( autocol_template.context_name, context.get("context_def_name", "") ) # If no match, try matching against the instance name (the output name) if not matched_ctx: matched_ctx, ctx_groups = match_pattern( autocol_template.context_name, context.get("name", "") ) if matched_ctx: context_vars = context.get("context_vars", {}).copy() context_vars.update(ctx_groups) for fom in context["foms"]: matched_fom, fom_groups = match_pattern( autocol_template.figure_of_merit, fom["name"] ) if matched_fom: temp_vars = context_vars.copy() temp_vars.update(fom_groups) temp_vars["fom_name"] = fom["name"] temp_vars["context_name"] = context.get("name") col_name = app_inst.expander.expand_var( autocol_template.name, extra_vars=temp_vars ) if ( col_name not in manual_col_names and col_name not in self.generated_columns ): combined_vars = context.get("context_vars", {}).copy() combined_vars.update(ctx_groups) combined_vars.update(fom_groups) combined_vars["context_name"] = context.get("name") conf_dict = { "name": col_name, "figure_of_merit": fom["name"], "figure_of_merit_context": context["name"], "figure_of_merit_origin_type": ( autocol_template.figure_of_merit_origin_type ), "where": autocol_template.where, "_context_def_name": context.get("context_def_name"), "_context_vars": combined_vars, } col_obj = ResultsColumn(conf_dict) col_obj._template = autocol_template self.generated_columns[col_name] = col_obj column_values = {} remaining_columns = set(self._data.keys()) # Combine manual and generated columns all_columns = self.columns.copy() # Group generated columns by template to apply variable-based sorting if specified for autocol_template in self.autocolumns: template_cols = [ col for col in self.generated_columns.values() if getattr(col, "_template", None) is autocol_template ] if autocol_template.sort_by: def sort_key(col, template=autocol_template): key = [] for var in template.sort_by: val = col._context_vars.get(var) if col._context_vars else None try: val = float(val) if val is not None else float("-inf") except (ValueError, TypeError): val = str(val) if val is not None else "" # Use a tuple of (type_flag, value) to prevent comparison errors # between strings and floats in Python 3 type_flag = 0 if isinstance(val, (int, float)) else 1 key.append((type_flag, val)) return tuple(key) template_cols.sort(key=sort_key) all_columns.extend(template_cols) # Re-order self._data to ensure columns appear in the correct sorted order new_data = {} for column in all_columns: col_name = column.col_name(app_inst) if col_name in self._data: new_data[col_name] = self._data[col_name] for k, v in self._data.items(): if k not in new_data: new_data[k] = v self._data = new_data for column in all_columns: col_value = column.extract_value(app_inst, extra_vars=column_values) if col_value is None: continue col_name = column.col_name(app_inst) if col_name in remaining_columns: remaining_columns.remove(col_name) self._data[col_name].append(col_value) elif col_name not in self._data: self._data[col_name] = [None] * self._num_rows + [col_value] column_values[col_name] = col_value for col_name in remaining_columns: self._data[col_name].append(None) self._num_rows += 1
def _to_dataframe(self): """Construct a pandasdata frame from this table's data""" pd = import_pandas() self._df = pd.DataFrame(self._data) for column in self._df.columns: try: self._df[column] = pd.to_numeric(self._df[column]) except ValueError: pass if self.transpose: self._df = self._df.transpose() # If transposed, the original columns become the index, # and rows become columns. # We might want to reset the index if we want it as a column, # but usually transpose in CSV means just flipping it. def _group_dataframe(self): """Apply any grouping to this pandas dataframe""" if self.group_by and not self.transpose: try: grouped_df = self._df.groupby(*self.group_by, as_index=False) group_func = getattr(grouped_df, self.group_method, grouped_df.max) self._df = group_func() except KeyError: pass def _sort_dataframe(self): """Apply any sorting to this pandas dataframe""" if self.sort_by and not self.transpose: try: self._df = self._df.sort_values(by=self.sort_by) except KeyError: pass
[docs] def to_csv(self, directory, timestamp): """CSV converter for results table Args: directory (str): Directory to write tabular data into timestamp (str): Timestamp to apply to table output files """ self._to_dataframe() self._group_dataframe() self._sort_dataframe() extension = "csv" inner_delim = "." filename = self.name + inner_delim + timestamp + inner_delim + extension latestname = self.name + inner_delim + "latest" + inner_delim + extension file_path = os.path.join(directory, filename) latest_path = os.path.join(directory, latestname) # If transposed, we might want the index to be written as the first column self._df.to_csv(file_path, index=self.transpose) create_symlink(file_path, latest_path) return file_path, latest_path
[docs] class ResultsTables: """Class representing a set of results tables""" def __init__(self): self.table_templates = [] self.tables = {} @property def num_tables(self): return len(self.table_templates)
[docs] def add_table_template(self, table_conf): """Construct a new results table, and add to this set of tables Args: table_conf (dict): Dictionary configuration of table, assuming table schema from tables.py Returns: (ResultsTable): New table instance """ new_table = ResultsTable(table_conf) self.table_templates.append(new_table) return new_table
[docs] def build_tables(self, experiment_set, filters): """Extract data for each table in this set Args: experiment_set: Set of experiments to extract data from filters: Filter object to downselect experiments """ for _, app_inst, _ in experiment_set.filtered_experiments(filters): for table_template in self.table_templates: if table_template.includes_experiment(app_inst): table_name = table_template.table_name(app_inst) if table_name not in self.tables: self.tables[table_name] = table_template.render(app_inst) self.tables[table_name].extract_row(app_inst)
[docs] def output_tables(self, directory, timestamp): """Output tabular data for each of the tables in this set Args: directory (str): Directory to write tables into timestamp (str): Timestamp to apply to table output files """ table_files = [] table_symlinks = [] for table in self.tables.values(): table_file, table_symlink = table.to_csv(directory, timestamp) table_files.append(table_file) table_symlinks.append(table_symlink) if table_files: logger.all_msg("Tables written:") for file in table_files: logger.all_msg(f" {file}") logger.all_msg("Table symlinks updated:") for symlink in table_symlinks: logger.all_msg(f" {symlink}")