python labelme dataset class #1511
Replies: 2 comments
-
|
Certainly! Working with the Here's a basic implementation of a LabelMeDataset Class Implementation import os
import json
import numpy as np
import cv2
import tensorflow as tf
from PIL import Image
from sklearn.model_selection import train_test_split
class LabelMeDataset:
def __init__(self, data_dir, image_size=(256, 256), test_size=0.2, random_state=42):
self.data_dir = data_dir
self.image_size = image_size
self.test_size = test_size
self.random_state = random_state
self.images, self.annotations, self.class_distribution = self.load_data()
def load_data(self):
images = []
annotations = []
class_distribution = {}
# Load images and annotations
for filename in os.listdir(self.data_dir):
if filename.endswith('.json'):
annotation_path = os.path.join(self.data_dir, filename)
with open(annotation_path, 'r') as f:
annotation = json.load(f)
image_path = os.path.join(self.data_dir, annotation['imagePath'])
if not os.path.exists(image_path):
continue
# Load image
image = Image.open(image_path)
image = image.resize(self.image_size)
image = np.array(image) / 255.0 # Normalize to [0, 1]
# Parse annotations
shapes = annotation['shapes']
labels = [shape['label'] for shape in shapes]
for label in labels:
if label in class_distribution:
class_distribution[label] += 1
else:
class_distribution[label] = 1
# Append to lists
images.append(image)
annotations.append(shapes)
# Convert to numpy arrays
images = np.array(images)
annotations = np.array(annotations)
return images, annotations, class_distribution
def show_class_distribution(self):
print("Class Distribution:")
for label, count in self.class_distribution.items():
print(f"{label}: {count}")
def get_datasets(self):
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
self.images, self.annotations, test_size=self.test_size, random_state=self.random_state
)
# Convert to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
return train_dataset, test_dataset
# Example usage
if __name__ == "__main__":
dataset = LabelMeDataset(path='path/to/your/labelme/dataset')
dataset.show_class_distribution()
train_dataset, test_dataset = dataset.get_datasets()
# Now you can use train_dataset and test_dataset with TensorFlow for training
# For example, you can batch and shuffle the datasets
train_dataset = train_dataset.batch(32).shuffle(buffer_size=1000)
test_dataset = test_dataset.batch(32)
# Your training loop hereExplanation
Usage
This implementation should help you get started with using the |
Beta Was this translation helpful? Give feedback.
-
|
Bah voilà ! 👌
Le ven. 29 nov. 2024, 16:41, huanxi0319 ***@***.***> a écrit :
… Certainly! Working with the labelme dataset can indeed be streamlined by
using a pre-built dataset class. While there might not be an official
LabelMeDataset class, you can create or use a community-contributed one
that simplifies the process of loading images and annotations, showing
class distribution, and integrating with TensorFlow for training.
Here's a basic implementation of a LabelMeDataset class that you can use.
This class will handle loading images, parsing annotations, and providing a
convenient interface for TensorFlow.
LabelMeDataset Class Implementation
import osimport jsonimport numpy as npimport cv2import tensorflow as tffrom PIL import Imagefrom sklearn.model_selection import train_test_split
class LabelMeDataset:
def __init__(self, data_dir, image_size=(256, 256), test_size=0.2, random_state=42):
self.data_dir = data_dir
self.image_size = image_size
self.test_size = test_size
self.random_state = random_state
self.images, self.annotations, self.class_distribution = self.load_data()
def load_data(self):
images = []
annotations = []
class_distribution = {}
# Load images and annotations
for filename in os.listdir(self.data_dir):
if filename.endswith('.json'):
annotation_path = os.path.join(self.data_dir, filename)
with open(annotation_path, 'r') as f:
annotation = json.load(f)
image_path = os.path.join(self.data_dir, annotation['imagePath'])
if not os.path.exists(image_path):
continue
# Load image
image = Image.open(image_path)
image = image.resize(self.image_size)
image = np.array(image) / 255.0 # Normalize to [0, 1]
# Parse annotations
shapes = annotation['shapes']
labels = [shape['label'] for shape in shapes]
for label in labels:
if label in class_distribution:
class_distribution[label] += 1
else:
class_distribution[label] = 1
# Append to lists
images.append(image)
annotations.append(shapes)
# Convert to numpy arrays
images = np.array(images)
annotations = np.array(annotations)
return images, annotations, class_distribution
def show_class_distribution(self):
print("Class Distribution:")
for label, count in self.class_distribution.items():
print(f"{label}: {count}")
def get_datasets(self):
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
self.images, self.annotations, test_size=self.test_size, random_state=self.random_state
)
# Convert to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
return train_dataset, test_dataset
# Example usageif __name__ == "__main__":
dataset = LabelMeDataset(path='path/to/your/labelme/dataset')
dataset.show_class_distribution()
train_dataset, test_dataset = dataset.get_datasets()
# Now you can use train_dataset and test_dataset with TensorFlow for training
# For example, you can batch and shuffle the datasets
train_dataset = train_dataset.batch(32).shuffle(buffer_size=1000)
test_dataset = test_dataset.batch(32)
# Your training loop here
Explanation
1.
*Initialization*:
- The LabelMeDataset class is initialized with the path to the dataset
directory, the desired image size, and the test size for splitting the
dataset.
2.
*Loading Data*:
- The load_data method loads images and annotations from the dataset
directory. It also calculates the class distribution.
- Images are resized and normalized to the range [0, 1].
- Annotations are parsed, and the class distribution is updated.
3.
*Showing Class Distribution*:
- The show_class_distribution method prints the class distribution.
4.
*Getting Datasets*:
- The get_datasets method splits the data into training and testing
sets and converts them into TensorFlow datasets.
Usage
- Instantiate the LabelMeDataset class with the path to your dataset.
- Call show_class_distribution to see the class distribution.
- Use get_datasets to get the training and testing datasets, which can
be used directly with TensorFlow for training.
This implementation should help you get started with using the labelme
dataset in a more straightforward manner. If you need additional features
or have specific requirements, you can extend this class accordingly.
—
Reply to this email directly, view it on GitHub
<#1511 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAA57OYY5DTUEPROYSUMPCT2DCDJFAVCNFSM6AAAAABRT62XAWVHI2DSMVQWIX3LMV43URDJONRXK43TNFXW4Q3PNVWWK3TUHMYTCNBRG4YTMNQ>
.
You are receiving this because you authored the thread.Message ID:
***@***.***>
|
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.
-
Hi,
To use labelme images, it looks like everybody is writing it's own python dataset class, to :
load images
load annotations
show class distribution
....
This is really not straight.
Is there somewhere a labelmedataset class I can directly use, to do it ?
I will only have to instantiate it using mydataset = LabelMeDataset(path) , and use it directly with TF for training.
Beta Was this translation helpful? Give feedback.
All reactions