Source code for cacp.winner
import typing
from pathlib import Path
import pandas as pd
from cacp.comparison import DEFAULT_METRICS
from cacp.util import to_latex
[docs]def process_comparison_result_winners_for_metric(metric: str, result_dir: Path) -> pd.DataFrame:
"""
Processes comparison results, finds winners for metric.
:param metric: comparison metric {auc, accuracy, precision, recall, f1}
:param result_dir: results directory
:return: DateFrame with winners for metric
"""
df = pd.read_csv(result_dir.joinpath('comparison.csv'))
algorithms = df['Algorithm'].unique()
places = [i for i in range(min(len(algorithms), 3))]
winner_dir = result_dir.joinpath('winner').joinpath(metric.lower())
winner_dir.mkdir(exist_ok=True, parents=True)
def count_places(place=0):
count = {a: 0 for a in algorithms}
names = {a: [] for a in algorithms}
for dataset, df_d in df.groupby('Dataset'):
df_d_a_m = df_d.groupby('Algorithm').mean(numeric_only=True).sort_values(by=[metric], ascending=False)
best = df_d_a_m.iloc[place]
count[best.name] += 1
names[best.name].append(dataset)
return count, names
counts = []
for c, n in [count_places(i) for i in places]:
counts.append(c)
rows = []
for algorithm in algorithms:
row = [algorithm]
for p in places:
row.append(counts[p][algorithm])
rows.append(row)
columns = ['Algorithm'] + ['1st', '2nd', '3rd'][: len(places)]
df_r = pd.DataFrame(columns=columns, data=rows)
df_r = df_r.sort_values(by=['1st'], ascending=False)
df_r.reset_index(drop=True, inplace=True)
df_r.index += 1
df_r.to_csv(winner_dir.joinpath('comparison_result.csv'), index=True)
winner_dir.joinpath('comparison_result.tex').open('w').write(
to_latex(
df_r,
caption=f'Ranking of compared algorithms for {metric}',
label=f'tab:places_{metric}',
)
)
return df_r
[docs]def process_comparison_result_winners(result_dir: Path,
metrics: typing.Sequence[typing.Tuple[str, typing.Callable]] = DEFAULT_METRICS):
"""
Processes comparison results, finds winners.
:param result_dir: results directory
:param metrics: metrics collection
"""
wins_df = None
for metric, _ in metrics:
metric_wins = process_comparison_result_winners_for_metric(metric, result_dir).sort_values(by=['Algorithm'])
if wins_df is None:
wins_df = metric_wins[['Algorithm']].copy()
for c in metric_wins.columns[1:]:
wins_df[f'{metric} {c}'] = metric_wins[c].values
winner_dir = result_dir.joinpath('winner')
winner_dir.mkdir(exist_ok=True, parents=True)
wins_df = wins_df.sort_values(by=wins_df.columns[1:].values.tolist(), ascending=False)
wins_df.reset_index(drop=True, inplace=True)
wins_df.index += 1
wins_df.to_csv(winner_dir.joinpath('comparison.csv'), index=True)
winner_dir.joinpath('comparison.tex').open('w').write(
to_latex(
wins_df,
caption='Ranking of compared algorithms',
label='tab:places',
)
)