-
Notifications
You must be signed in to change notification settings - Fork 61
A function to read file from Amazon S3, URLs, or local paths #162
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| """Defines a function to read files from Amazon S3, URLs, or local paths in text or binary mode.""" | ||
|
|
||
| import requests | ||
|
|
||
|
|
||
| def read_file(source: str, mode: str = "r"): | ||
| """ | ||
| Reads a file from an S3 bucket, a URL, or the local filesystem based on the source path provided. | ||
|
|
||
| Parameters: | ||
| - source (str): The path to the file. This can be an S3 bucket path (s3://bucket_name/file_path), | ||
| a URL (http:// or https://), or a local file path. | ||
| - mode (str): The mode in which to open the file. Use 'r' for text mode and 'rb' for binary mode. | ||
| Default is 'r'. | ||
|
|
||
| Returns: | ||
| The content of the file as a string if mode is 'r', or as bytes if mode is 'rb'. | ||
|
|
||
| Example usage: | ||
| ``` | ||
| content = read_file("s3://my_bucket/my_file.txt") | ||
| content = read_file("https://my_bucket.abc.com/my_file.txt") | ||
| content = read_file("./my_file.txt") | ||
| ``` | ||
| Note: | ||
| - When reading from an S3 bucket, make sure you have the necessary credentials and permissions. | ||
| - When reading from a URL, ensure that the URL is accessible and the file exists. | ||
| - When reading from the local filesystem, provide the correct file path. | ||
|
|
||
| """ | ||
| if mode not in ["r", "rb"]: | ||
| raise ValueError( | ||
| "Unsupported mode. Use 'r' for text mode or 'rb' for binary mode." | ||
| ) | ||
|
|
||
| try: | ||
| if source.startswith("s3://"): | ||
| try: | ||
| import boto3 # pylint: disable=import-outside-toplevel | ||
| except ModuleNotFoundError as exc: | ||
| raise ModuleNotFoundError( | ||
| "Please install boto3. You can use `pip install boto3` to install it." | ||
| ) from exc | ||
| # Read file from S3 | ||
| bucket_name, file_path = source[5:].split("/", 1) | ||
| s3 = boto3.client("s3") | ||
| obj = s3.get_object(Bucket=bucket_name, Key=file_path) | ||
| data = obj["Body"].read() | ||
| return data.decode() if mode == "r" else data | ||
| if source.startswith("http://") or source.startswith("https://"): | ||
|
||
| # Read file from URL | ||
| response = requests.get(source) | ||
| response.raise_for_status() | ||
| return response.text if mode == "r" else response.content | ||
| # Read file from local filesystem | ||
| with open(source, mode) as file: | ||
| return file.read() | ||
|
Comment on lines
+56
to
+58
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: else |
||
| except (requests.RequestException, IOError) as e: | ||
| # Handle network request errors or file I/O errors | ||
| print(f"Error accessing {source}: {e}") | ||
| except Exception as e: | ||
| # Handle any other unexpected errors | ||
| print(f"An unexpected error occurred: {e}") | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Args because we we using Google docstring format.