Source code for bob.ip.binseg.utils.table
#!/usr/bin/env python
# coding=utf-8
import tabulate
from .measure import auc
[docs]def performance_table(data, fmt):
"""Tables result comparison in a given format
Parameters
----------
data : dict
A dictionary in which keys are strings defining plot labels and values
are dictionaries with two entries:
* ``df``: :py:class:`pandas.DataFrame`
A dataframe that is produced by our evaluator engine, indexed by
integer "thresholds", containing the following columns:
``threshold``, ``tp``, ``fp``, ``tn``, ``fn``, ``mean_precision``,
``mode_precision``, ``lower_precision``, ``upper_precision``,
``mean_recall``, ``mode_recall``, ``lower_recall``, ``upper_recall``,
``mean_specificity``, ``mode_specificity``, ``lower_specificity``,
``upper_specificity``, ``mean_accuracy``, ``mode_accuracy``,
``lower_accuracy``, ``upper_accuracy``, ``mean_jaccard``,
``mode_jaccard``, ``lower_jaccard``, ``upper_jaccard``,
``mean_f1_score``, ``mode_f1_score``, ``lower_f1_score``,
``upper_f1_score``, ``frequentist_precision``,
``frequentist_recall``, ``frequentist_specificity``,
``frequentist_accuracy``, ``frequentist_jaccard``,
``frequentist_f1_score``.
* ``threshold``: :py:class:`list`
A threshold to graph with a dot for each set. Specific
threshold values do not affect "second-annotator" dataframes.
fmt : str
One of the formats supported by tabulate.
Returns
-------
table : str
A table in a specific format
"""
headers = [
"Dataset",
"T",
"E(F1)",
"CI(F1)",
"AUC",
"CI(AUC)",
]
table = []
for k, v in data.items():
entry = [k, v["threshold"], ]
# statistics based on the "assigned" threshold (a priori, less biased)
bins = len(v["df"])
index = int(round(bins*v["threshold"]))
index = min(index, len(v["df"])-1) #avoids out of range indexing
entry.append(v["df"].mean_f1_score[index])
entry.append(f"{v['df'].lower_f1_score[index]:.3f}-{v['df'].upper_f1_score[index]:.3f}")
# AUC PR curve
entry.append(auc(v["df"]["mean_recall"].to_numpy(),
v["df"]["mean_precision"].to_numpy()))
lower_auc = auc(v["df"]["lower_recall"].to_numpy(),
v["df"]["lower_precision"].to_numpy())
upper_auc = auc(v["df"]["upper_recall"].to_numpy(),
v["df"]["upper_precision"].to_numpy())
entry.append(f"{lower_auc:.3f}-{upper_auc:.3f}")
table.append(entry)
return tabulate.tabulate(table, headers, tablefmt=fmt, floatfmt=".3f",
stralign="right")