Coverage for src/ensae_projects/hackathon/hack2020.py: 82%

34 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-07-20 04:37 +0200

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief Code for :ref:`l-hackathon-2020`. 

5""" 

6import os # pylint: disable=W0611 

7import numpy # pylint: disable=W0611 

8import pandas # pylint: disable=W0611 

9from sklearn.metrics import accuracy_score, log_loss 

10 

11 

12def score_images(df_expected, df_prediction): 

13 """ 

14 Scores the predictions for images. 

15 """ 

16 if df_expected.shape[1] != 2: 

17 raise ValueError("Expected 2 columns for the expected dataframe.") 

18 if df_prediction.shape[1] != 3: 

19 raise ValueError( 

20 "Expected 3 columns for the expected dataframe not %s." % list( 

21 df_prediction.columns)) 

22 if df_expected.columns[1] != df_prediction.columns[1]: 

23 raise ValueError( 

24 "Predictions must have the following columns %r but " 

25 "has %r." % (list(df_expected.columns) + ['score'], 

26 list(df_prediction.columns))) 

27 name_left = df_expected.columns[0] 

28 name_right = df_prediction.columns[0] 

29 label = df_prediction.columns[1] 

30 score = 'score' 

31 if len(set(df_expected[name_left])) != df_expected.shape[0]: 

32 raise ValueError("Names in expected values are not unique.") 

33 if len(set(df_prediction[name_right])) != df_prediction.shape[0]: 

34 raise ValueError("Names in expected values are not unique.") 

35 merged = df_expected.merge(df_prediction, left_on=name_left, 

36 right_on=name_right, how='left') 

37 messages = [] 

38 if merged.shape[0] != df_prediction.shape[0]: 

39 messages.append( 

40 "Some image names do not belong to the evaluation set.") 

41 lx = label + "_x" 

42 ly = label + "_y" 

43 merged[ly] = merged[ly].fillna(0) 

44 mini = merged['score'][~numpy.isnan(merged['score'])].min() 

45 if mini > 0: # pylint: disable=R1730 

46 mini = 0 

47 merged['score'] = merged['score'].fillna(mini) 

48 acc = accuracy_score(merged[lx], merged[ly]) 

49 ll = log_loss(merged[lx], merged[score]) 

50 return dict(accuracy=acc, logloss=ll)