Source code for cacp.info
import pathlib
import typing
from pathlib import Path
import pandas as pd
import river.datasets.base
from cacp.dataset import ClassificationDatasetBase
from cacp.util import to_latex
[docs]def dataset_info(datasets: typing.Iterable[typing.Union[ClassificationDatasetBase, river.datasets.base.Dataset]],
result_dir: Path):
"""
Produces results files with list of all datasets used in experiment alog with their attributes.
:param datasets: dataset collection
:param result_dir: results directory
"""
records = []
for dataset_idx, dataset in enumerate(datasets):
name = dataset.__class__.__name__.lower()
if hasattr(dataset, "name"):
name = dataset.name
i = 0
x = {}
labels = set()
for i, (x, y) in enumerate(dataset):
labels.add(y)
row = {
'Dataset': name,
'Instances': i + 1,
'Features': len(x),
'Classes': len(labels),
}
records.append(row)
df = pd.DataFrame(records)
df.index += 1
info_dir = result_dir.joinpath('info')
info_dir.mkdir(exist_ok=True, parents=True)
df.to_csv(info_dir.joinpath('datasets.csv'), index=True)
f = info_dir.joinpath('datasets.tex').open('w')
tex = to_latex(
df,
caption='Datasets used to perform experiments',
label='tab:datasets'
)
f.write(tex)
[docs]def classifier_info(classifiers: typing.Iterable[typing.Tuple[str, typing.Callable]], result_dir: pathlib.Path):
"""
Produces results files with list of all classifiers used in experiment along with their attributes.
:param classifiers: classifiers collection
:param result_dir: results directory
"""
records = []
for cn, c in classifiers:
cz = c(2, 2).__class__
library = cz.__module__.split('.')[0]
name = cz.__name__
classifier_type = 'CUSTOM'
if library == 'sklearn':
classifier_type = 'BATCH'
elif library == 'skmultiflow':
classifier_type = 'INCREMENTAL'
elif library == 'river':
classifier_type = 'INCREMENTAL'
row = {
'Name': cn,
'Class Name': name,
'Library': library,
'Type': classifier_type
}
records.append(row)
df = pd.DataFrame(records)
df = df.drop_duplicates()
df.sort_values(['Type', 'Name'], inplace=True)
df = df.reset_index(drop=True)
df.index += 1
info_dir = result_dir.joinpath('info')
info_dir.mkdir(exist_ok=True, parents=True)
df.to_csv(info_dir.joinpath('classifiers.csv'), index=True)
f = info_dir.joinpath('classifiers.tex').open('w')
tex = to_latex(
df,
caption='Classifiers used to perform experiments',
label='tab:algorithm'
)
f.write(tex)