import mmcv
import numpy as np
from lyft_dataset_sdk.eval.detection.mAP_evaluation import (Box3D, get_ap,
get_class_names,
get_ious,
group_by_key,
wrap_in_box)
from mmcv.utils import print_log
from os import path as osp
from terminaltables import AsciiTable
def load_lyft_gts(lyft, data_root, eval_split, logger=None):
"""Loads ground truth boxes from database.
Args:
lyft (:obj:`LyftDataset`): Lyft class in the sdk.
data_root (str): Root of data for reading splits.
eval_split (str): Name of the split for evaluation.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
Returns:
list[dict]: List of annotation dictionaries.
"""
split_scenes = mmcv.list_from_file(
osp.join(data_root, f'{eval_split}.txt'))
# Read out all sample_tokens in DB.
sample_tokens_all = [s['token'] for s in lyft.sample]
assert len(sample_tokens_all) > 0, 'Error: Database has no samples!'
if eval_split == 'test':
# Check that you aren't trying to cheat :)
assert len(lyft.sample_annotation) > 0, \
'Error: You are trying to evaluate on the test set \
but you do not have the annotations!'
sample_tokens = []
for sample_token in sample_tokens_all:
scene_token = lyft.get('sample', sample_token)['scene_token']
scene_record = lyft.get('scene', scene_token)
if scene_record['name'] in split_scenes:
sample_tokens.append(sample_token)
all_annotations = []
print_log('Loading ground truth annotations...', logger=logger)
# Load annotations and filter predictions and annotations.
for sample_token in mmcv.track_iter_progress(sample_tokens):
sample = lyft.get('sample', sample_token)
sample_annotation_tokens = sample['anns']
for sample_annotation_token in sample_annotation_tokens:
# Get label name in detection task and filter unused labels.
sample_annotation = \
lyft.get('sample_annotation', sample_annotation_token)
detection_name = sample_annotation['category_name']
if detection_name is None:
continue
annotation = {
'sample_token': sample_token,
'translation': sample_annotation['translation'],
'size': sample_annotation['size'],
'rotation': sample_annotation['rotation'],
'name': detection_name,
}
all_annotations.append(annotation)
return all_annotations
def load_lyft_predictions(res_path):
"""Load Lyft predictions from json file.
Args:
res_path (str): Path of result json file recording detections.
Returns:
list[dict]: List of prediction dictionaries.
"""
predictions = mmcv.load(res_path)
predictions = predictions['results']
all_preds = []
for sample_token in predictions.keys():
all_preds.extend(predictions[sample_token])
return all_preds
[docs]def lyft_eval(lyft, data_root, res_path, eval_set, output_dir, logger=None):
"""Evaluation API for Lyft dataset.
Args:
lyft (:obj:`LyftDataset`): Lyft class in the sdk.
data_root (str): Root of data for reading splits.
res_path (str): Path of result json file recording detections.
eval_set (str): Name of the split for evaluation.
output_dir (str): Output directory for output json files.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
Returns:
dict[str, float]: The evaluation results.
"""
# evaluate by lyft metrics
gts = load_lyft_gts(lyft, data_root, eval_set, logger)
predictions = load_lyft_predictions(res_path)
class_names = get_class_names(gts)
print('Calculating mAP@0.5:0.95...')
iou_thresholds = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
metrics = {}
average_precisions = \
get_classwise_aps(gts, predictions, class_names, iou_thresholds)
APs_data = [['IOU', 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]]
mAPs = np.mean(average_precisions, axis=0)
mAPs_cate = np.mean(average_precisions, axis=1)
final_mAP = np.mean(mAPs)
metrics['average_precisions'] = average_precisions.tolist()
metrics['mAPs'] = mAPs.tolist()
metrics['Final mAP'] = float(final_mAP)
metrics['class_names'] = class_names
metrics['mAPs_cate'] = mAPs_cate.tolist()
APs_data = [['class', 'mAP@0.5:0.95']]
for i in range(len(class_names)):
row = [class_names[i], round(mAPs_cate[i], 3)]
APs_data.append(row)
APs_data.append(['Overall', round(final_mAP, 3)])
APs_table = AsciiTable(APs_data, title='mAPs@0.5:0.95')
APs_table.inner_footing_row_border = True
print_log(APs_table.table, logger=logger)
res_path = osp.join(output_dir, 'lyft_metrics.json')
mmcv.dump(metrics, res_path)
return metrics
def get_classwise_aps(gt, predictions, class_names, iou_thresholds):
"""Returns an array with an average precision per class.
Note: Ground truth and predictions should have the following format.
.. code-block::
gt = [{
'sample_token': '0f0e3ce89d2324d8b45aa55a7b4f8207
fbb039a550991a5149214f98cec136ac',
'translation': [974.2811881299899, 1714.6815014457964,
-23.689857123368846],
'size': [1.796, 4.488, 1.664],
'rotation': [0.14882026466054782, 0, 0, 0.9888642620837121],
'name': 'car'
}]
predictions = [{
'sample_token': '0f0e3ce89d2324d8b45aa55a7b4f8207
fbb039a550991a5149214f98cec136ac',
'translation': [971.8343488872263, 1713.6816097857359,
-25.82534357061308],
'size': [2.519726579986132, 7.810161372666739, 3.483438286096803],
'rotation': [0.10913582721095375, 0.04099572636992043,
0.01927712319721745, 1.029328402625659],
'name': 'car',
'score': 0.3077029437237213
}]
Args:
gt (list[dict]): list of dictionaries in the format described below.
predictions (list[dict]): list of dictionaries in the format
described below.
class_names (list[str]): list of the class names.
iou_thresholds (list[float]): IOU thresholds used to calculate
TP / FN
Returns:
np.ndarray: an array with an average precision per class.
"""
assert all([0 <= iou_th <= 1 for iou_th in iou_thresholds])
gt_by_class_name = group_by_key(gt, 'name')
pred_by_class_name = group_by_key(predictions, 'name')
average_precisions = np.zeros((len(class_names), len(iou_thresholds)))
for class_id, class_name in enumerate(class_names):
if class_name in pred_by_class_name:
recalls, precisions, average_precision = get_single_class_aps(
gt_by_class_name[class_name], pred_by_class_name[class_name],
iou_thresholds)
average_precisions[class_id, :] = average_precision
return average_precisions
def get_single_class_aps(gt, predictions, iou_thresholds):
"""Compute recall and precision for all iou thresholds. Adapted from
LyftDatasetDevkit.
Args:
gt (list[dict]): list of dictionaries in the format described above.
predictions (list[dict]): list of dictionaries in the format \
described below.
iou_thresholds (list[float]): IOU thresholds used to calculate \
TP / FN
Returns:
tuple[np.ndarray]: Returns (recalls, precisions, average precisions)
for each class.
"""
num_gts = len(gt)
image_gts = group_by_key(gt, 'sample_token')
image_gts = wrap_in_box(image_gts)
sample_gt_checked = {
sample_token: np.zeros((len(boxes), len(iou_thresholds)))
for sample_token, boxes in image_gts.items()
}
predictions = sorted(predictions, key=lambda x: x['score'], reverse=True)
# go down dets and mark TPs and FPs
num_predictions = len(predictions)
tps = np.zeros((num_predictions, len(iou_thresholds)))
fps = np.zeros((num_predictions, len(iou_thresholds)))
for prediction_index, prediction in enumerate(predictions):
predicted_box = Box3D(**prediction)
sample_token = prediction['sample_token']
max_overlap = -np.inf
jmax = -1
if sample_token in image_gts:
gt_boxes = image_gts[sample_token]
# gt_boxes per sample
gt_checked = sample_gt_checked[sample_token]
# gt flags per sample
else:
gt_boxes = []
gt_checked = None
if len(gt_boxes) > 0:
overlaps = get_ious(gt_boxes, predicted_box)
max_overlap = np.max(overlaps)
jmax = np.argmax(overlaps)
for i, iou_threshold in enumerate(iou_thresholds):
if max_overlap > iou_threshold:
if gt_checked[jmax, i] == 0:
tps[prediction_index, i] = 1.0
gt_checked[jmax, i] = 1
else:
fps[prediction_index, i] = 1.0
else:
fps[prediction_index, i] = 1.0
# compute precision recall
fps = np.cumsum(fps, axis=0)
tps = np.cumsum(tps, axis=0)
recalls = tps / float(num_gts)
# avoid divide by zero in case the first detection
# matches a difficult ground truth
precisions = tps / np.maximum(tps + fps, np.finfo(np.float64).eps)
aps = []
for i in range(len(iou_thresholds)):
recall = recalls[:, i]
precision = precisions[:, i]
assert np.all(0 <= recall) & np.all(recall <= 1)
assert np.all(0 <= precision) & np.all(precision <= 1)
ap = get_ap(recall, precision)
aps.append(ap)
aps = np.array(aps)
return recalls, precisions, aps