Skip to content

Commit 1916ea0

Browse files
Merge pull request #2119 from recommenders-team/fix-eval
Revert and fix python evaluation
2 parents 5569376 + 66ace3e commit 1916ea0

2 files changed

Lines changed: 12 additions & 12 deletions

File tree

recommenders/evaluation/python_evaluation.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -680,14 +680,14 @@ def ndcg_at_k(
680680
df_idcg["idcg"] = df_idcg["rel"] / discfun(1 + df_idcg["irank"])
681681

682682
# Calculate the actual DCG for each user
683-
df_user = df_dcg.groupby(col_user, as_index=False, sort=False).agg(dcg="sum")
683+
df_user = df_dcg.groupby(col_user, as_index=False, sort=False).agg({"dcg": "sum"})
684684

685685
# Calculate the ideal DCG for each user
686686
df_user = df_user.merge(
687687
df_idcg.groupby(col_user, as_index=False, sort=False)
688688
.head(k)
689689
.groupby(col_user, as_index=False, sort=False)
690-
.agg(idcg="sum"),
690+
.agg({"idcg": "sum"}),
691691
on=col_user,
692692
)
693693

@@ -726,7 +726,7 @@ def _get_reciprocal_rank(
726726
df_hit_sorted["rr"] = (
727727
df_hit_sorted.groupby(col_user).cumcount() + 1
728728
) / df_hit_sorted["rank"]
729-
df_hit_sorted = df_hit_sorted.groupby(col_user).agg(rr="sum").reset_index()
729+
df_hit_sorted = df_hit_sorted.groupby(col_user).agg({"rr": "sum"}).reset_index()
730730

731731
return pd.merge(df_hit_sorted, df_hit_count, on=col_user), n_users
732732

@@ -1235,7 +1235,7 @@ def _get_intralist_similarity(
12351235
item_pair_sim["i1"] != item_pair_sim["i2"]
12361236
].reset_index(drop=True)
12371237
df_intralist_similarity = (
1238-
item_pair_sim.groupby([col_user]).agg(**{col_sim: "mean"}).reset_index()
1238+
item_pair_sim.groupby([col_user]).agg({col_sim: "mean"}).reset_index()
12391239
)
12401240
df_intralist_similarity.columns = [col_user, "avg_il_sim"]
12411241

@@ -1345,7 +1345,7 @@ def diversity(
13451345
col_item,
13461346
col_sim,
13471347
)
1348-
avg_diversity = df_user_diversity.agg(user_diversity="mean")[0]
1348+
avg_diversity = df_user_diversity.agg({"user_diversity": "mean"})[0]
13491349
return avg_diversity
13501350

13511351

@@ -1432,7 +1432,7 @@ def novelty(train_df, reco_df, col_user=DEFAULT_USER_COL, col_item=DEFAULT_ITEM_
14321432
reco_item_novelty["product"] = (
14331433
reco_item_novelty["count"] * reco_item_novelty["item_novelty"]
14341434
)
1435-
avg_novelty = reco_item_novelty.agg(product="sum")[0] / n_recommendations
1435+
avg_novelty = reco_item_novelty.agg({"product": "sum"})[0] / n_recommendations
14361436

14371437
return avg_novelty
14381438

@@ -1512,7 +1512,7 @@ def user_item_serendipity(
15121512

15131513
reco_user_item_avg_sim = (
15141514
reco_train_user_item_sim.groupby([col_user, col_item])
1515-
.agg(**{col_sim: "mean"})
1515+
.agg({col_sim: "mean"})
15161516
.reset_index()
15171517
)
15181518
reco_user_item_avg_sim.columns = [
@@ -1582,7 +1582,7 @@ def user_serendipity(
15821582
)
15831583
df_user_serendipity = (
15841584
df_user_item_serendipity.groupby(col_user)
1585-
.agg(user_item_serendipity="mean")
1585+
.agg({"user_item_serendipity": "mean"})
15861586
.reset_index()
15871587
)
15881588
df_user_serendipity.columns = [col_user, "user_serendipity"]
@@ -1636,7 +1636,7 @@ def serendipity(
16361636
col_sim,
16371637
col_relevance,
16381638
)
1639-
avg_serendipity = df_user_serendipity.agg(user_serendipity="mean")[0]
1639+
avg_serendipity = df_user_serendipity.agg({"user_serendipity": "mean"})[0]
16401640
return avg_serendipity
16411641

16421642

@@ -1711,6 +1711,6 @@ def distributional_coverage(
17111711
df_entropy["p(i)"] = df_entropy["count"] / count_row_reco
17121712
df_entropy["entropy(i)"] = df_entropy["p(i)"] * np.log2(df_entropy["p(i)"])
17131713

1714-
d_coverage = -df_entropy.agg(**{"entropy(i)": "sum"})[0]
1714+
d_coverage = -df_entropy.agg({"entropy(i)": "sum"})[0]
17151715

17161716
return d_coverage

recommenders/evaluation/spark_evaluation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ def diversity(self):
761761
if self.avg_diversity is None:
762762
self.df_user_diversity = self.user_diversity()
763763
self.avg_diversity = self.df_user_diversity.agg(
764-
user_diversity="mean"
764+
{"user_diversity": "mean"}
765765
).first()[0]
766766
return self.avg_diversity
767767

@@ -904,7 +904,7 @@ def serendipity(self):
904904
if self.avg_serendipity is None:
905905
self.df_user_serendipity = self.user_serendipity()
906906
self.avg_serendipity = self.df_user_serendipity.agg(
907-
user_serendipity="mean"
907+
{"user_serendipity": "mean"}
908908
).first()[0]
909909
return self.avg_serendipity
910910

0 commit comments

Comments
 (0)