Skip to content
13 changes: 13 additions & 0 deletions src/datasets/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import platform
from pathlib import Path
from typing import Optional

from packaging import version

Expand Down Expand Up @@ -31,7 +32,9 @@

# General environment variables accepted values for booleans
ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
ENV_VARS_FALSE_VALUES = {"0", "OFF", "NO", "FALSE"}
ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
ENV_VARS_FALSE_AND_AUTO_VALUES = ENV_VARS_FALSE_VALUES.union({"AUTO"})


# Imports
Expand Down Expand Up @@ -170,6 +173,16 @@
os.environ.get("HF_UPDATE_DOWNLOAD_COUNTS", "AUTO").upper() in ENV_VARS_TRUE_AND_AUTO_VALUES
)

# Remote dataset scripts support
TRUST_REMOTE_CODE: Optional[bool] = (
True
if os.environ.get("HF_TRUST_REMOTE_CODE", "AUTO").upper() in ENV_VARS_TRUE_AND_AUTO_VALUES
else False
if os.environ.get("HF_TRUST_REMOTE_CODE", "AUTO").upper() in ENV_VARS_FALSE_VALUES
else None
)
TIME_OUT_REMOTE_CODE = 15

# Batch size constants. For more info, see:
# https://github.com/apache/arrow/blob/master/docs/source/cpp/arrays.rst#size-limitations-and-recommendations)
DEFAULT_MAX_BATCH_SIZE = 1000
Expand Down
Loading