Coverage for src/ensae_projects/hackathon/hack2020.py: 82%
34 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-07-20 04:37 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-07-20 04:37 +0200
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Code for :ref:`l-hackathon-2020`.
5"""
6import os # pylint: disable=W0611
7import numpy # pylint: disable=W0611
8import pandas # pylint: disable=W0611
9from sklearn.metrics import accuracy_score, log_loss
12def score_images(df_expected, df_prediction):
13 """
14 Scores the predictions for images.
15 """
16 if df_expected.shape[1] != 2:
17 raise ValueError("Expected 2 columns for the expected dataframe.")
18 if df_prediction.shape[1] != 3:
19 raise ValueError(
20 "Expected 3 columns for the expected dataframe not %s." % list(
21 df_prediction.columns))
22 if df_expected.columns[1] != df_prediction.columns[1]:
23 raise ValueError(
24 "Predictions must have the following columns %r but "
25 "has %r." % (list(df_expected.columns) + ['score'],
26 list(df_prediction.columns)))
27 name_left = df_expected.columns[0]
28 name_right = df_prediction.columns[0]
29 label = df_prediction.columns[1]
30 score = 'score'
31 if len(set(df_expected[name_left])) != df_expected.shape[0]:
32 raise ValueError("Names in expected values are not unique.")
33 if len(set(df_prediction[name_right])) != df_prediction.shape[0]:
34 raise ValueError("Names in expected values are not unique.")
35 merged = df_expected.merge(df_prediction, left_on=name_left,
36 right_on=name_right, how='left')
37 messages = []
38 if merged.shape[0] != df_prediction.shape[0]:
39 messages.append(
40 "Some image names do not belong to the evaluation set.")
41 lx = label + "_x"
42 ly = label + "_y"
43 merged[ly] = merged[ly].fillna(0)
44 mini = merged['score'][~numpy.isnan(merged['score'])].min()
45 if mini > 0: # pylint: disable=R1730
46 mini = 0
47 merged['score'] = merged['score'].fillna(mini)
48 acc = accuracy_score(merged[lx], merged[ly])
49 ll = log_loss(merged[lx], merged[score])
50 return dict(accuracy=acc, logloss=ll)