Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
"3.9",
"3.10",
"3.11",
# "3.12", # Numba not available for Python 3.12: https://github.com/numba/numba/issues/9197
"3.12",
]
runs-on: ${{ matrix.os }}
steps:
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies = [
"pandas>=1.1.5",
"scikit-learn>=1.0",
"importlib-metadata >= 1.0; python_version < '3.8'",
"importlib-resources; python_version < '3.9'",
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

importlib.resources.files is new in 3.9, importlib-resources is the backport

]

classifiers = [
Expand All @@ -31,6 +32,7 @@ classifiers = [
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"License :: OSI Approved :: MIT License",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
Expand Down
22 changes: 14 additions & 8 deletions sklego/datasets.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import os
import sys

import numpy as np
import pandas as pd
from pkg_resources import resource_filename

if sys.version_info >= (3, 9):
import importlib.resources as importlib_resources # pragma: no cover
else:
import importlib_resources as importlib_resources # pragma: no cover


from sklearn.datasets import fetch_openml


Expand Down Expand Up @@ -92,7 +98,7 @@ def load_penguins(return_X_y=False, as_frame=False):

(Accessed 2020-06-08).
"""
filepath = resource_filename("sklego", os.path.join("data", "penguins.zip"))
filepath = importlib_resources.files("sklego") / "data" / "penguins.zip"
df = pd.read_csv(filepath)
if as_frame:
return df
Expand Down Expand Up @@ -151,7 +157,7 @@ def load_arrests(return_X_y=False, as_frame=False):

- Personal communication from Michael Friendly, York University.
"""
filepath = resource_filename("sklego", os.path.join("data", "arrests.zip"))
filepath = importlib_resources.files("sklego") / "data" / "arrests.zip"
df = pd.read_csv(filepath)
if as_frame:
return df
Expand Down Expand Up @@ -198,7 +204,7 @@ def load_chicken(return_X_y=False, as_frame=False):
- Crowder, M. and Hand, D. (1990), Analysis of Repeated Measures, Chapman and Hall (example 5.3)
- Hand, D. and Crowder, M. (1996), Practical Longitudinal Data Analysis, Chapman and Hall (table A.2)
"""
filepath = resource_filename("sklego", os.path.join("data", "chickweight.zip"))
filepath = importlib_resources.files("sklego") / "data" / "chickweight.zip"
df = pd.read_csv(filepath)
if as_frame:
return df
Expand Down Expand Up @@ -244,7 +250,7 @@ def load_abalone(return_X_y=False, as_frame=False):

Sea Fisheries Division, Technical Report No. 48 (ISSN 1034-3288)
"""
filepath = resource_filename("sklego", os.path.join("data", "abalone.zip"))
filepath = importlib_resources.files("sklego") / "data" / "abalone.zip"
df = pd.read_csv(filepath)
if as_frame:
return df
Expand Down Expand Up @@ -294,7 +300,7 @@ def load_heroes(return_X_y=False, as_frame=False):
# Index(['name', 'attack_type', 'role', 'health', 'attack', 'attack_spd'], dtype='object')
```
"""
filepath = resource_filename("sklego", os.path.join("data", "heroes.zip"))
filepath = importlib_resources.files("sklego") / "data" / "heroes.zip"
df = pd.read_csv(filepath)
if as_frame:
return df
Expand Down Expand Up @@ -351,7 +357,7 @@ def load_hearts(return_X_y=False, as_frame=False):
The documentation of the dataset can be viewed at:
https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/heart-disease.names
"""
filepath = resource_filename("sklego", os.path.join("data", "hearts.zip"))
filepath = importlib_resources.files("sklego") / "data" / "hearts.zip"
df = pd.read_csv(filepath)
if as_frame:
return df
Expand Down