|
| 1 | +import json |
| 2 | +from dataclasses import asdict |
| 3 | +from typing import Any, Dict, List, Union |
| 4 | + |
| 5 | +from aif_gen.task import AlignmentTask |
| 6 | + |
| 7 | +from .alignment_sample import AlignmentDatasetSample |
| 8 | + |
| 9 | + |
| 10 | +class AlignmentDataset: |
| 11 | + r"""Container object for an Alignment Dataset. |
| 12 | +
|
| 13 | + Args: |
| 14 | + task (AligmnentTask): The AlignmentTask associated with the dataset. |
| 15 | + samples (List[AlignmentDatasetSample]): The samples in this AlignmentDataset. |
| 16 | + """ |
| 17 | + |
| 18 | + def __init__( |
| 19 | + self, task: AlignmentTask, samples: List[AlignmentDatasetSample] |
| 20 | + ) -> None: |
| 21 | + self._task = task |
| 22 | + self._samples = samples |
| 23 | + |
| 24 | + @property |
| 25 | + def task(self) -> AlignmentTask: |
| 26 | + """AlignmentTask: The task associated with the AlignmentDataset.""" |
| 27 | + return self._task |
| 28 | + |
| 29 | + @property |
| 30 | + def samples(self) -> List[AlignmentDatasetSample]: |
| 31 | + """List[AlignmentDatasetSample]: The list of samples associated with the AlignmentDataset.""" |
| 32 | + return self._samples |
| 33 | + |
| 34 | + @property |
| 35 | + def num_samples(self) -> int: |
| 36 | + """int: The number of samples associated with the AlignmentDataset.""" |
| 37 | + return len(self.samples) |
| 38 | + |
| 39 | + def __len__(self) -> int: |
| 40 | + return self.num_samples |
| 41 | + |
| 42 | + def __getitem__( |
| 43 | + self, key: Union[slice, int] |
| 44 | + ) -> Union[AlignmentDatasetSample, List[AlignmentDatasetSample]]: |
| 45 | + # Slicing directly on the samples |
| 46 | + return self.samples[key] |
| 47 | + |
| 48 | + def append(self, sample: AlignmentDatasetSample) -> None: |
| 49 | + r"""Append a single AlignmentDatasetSample to the Alignment Dataset. |
| 50 | +
|
| 51 | + Args: |
| 52 | + sample (AlignmentDatasetSample): The new sample to add. |
| 53 | +
|
| 54 | + Raises: |
| 55 | + TypeError: if the sample is not of type AlignmentDatasetSample. |
| 56 | + """ |
| 57 | + if isinstance(sample, AlignmentDatasetSample): |
| 58 | + self.samples.append(sample) |
| 59 | + else: |
| 60 | + raise TypeError( |
| 61 | + f'Sample: {sample} must be of type AlignmentDatasetSample but got {sample.__class__.__name__}' |
| 62 | + ) |
| 63 | + |
| 64 | + def extend(self, samples: List[AlignmentDatasetSample]) -> None: |
| 65 | + r"""Add multiple AlignmentDatasetSample's to the Alignment Dataset. |
| 66 | +
|
| 67 | + Args: |
| 68 | + samples (List[AlignmentDatasetSample]): The new samples to add. |
| 69 | +
|
| 70 | + Raises: |
| 71 | + TypeError: if any sample is not of type AlignmentDatasetSample. |
| 72 | + """ |
| 73 | + for sample in samples: |
| 74 | + self.append(sample) |
| 75 | + |
| 76 | + def to_json(self, file_path: str) -> None: |
| 77 | + r"""Save the AlignmentDataset to a json file. |
| 78 | +
|
| 79 | + Note: Uses to_dict() under the hood to get a dictionary representation. |
| 80 | +
|
| 81 | + Args: |
| 82 | + file_path (str): The os.pathlike object to write to. |
| 83 | + """ |
| 84 | + dataset_dict = self.to_dict() |
| 85 | + with open(file_path, 'w') as f: |
| 86 | + json.dump(dataset_dict, f) |
| 87 | + |
| 88 | + def to_dict(self) -> Dict[str, Any]: |
| 89 | + r"""Convert the AlignmentDataset to dictionary represenetation. |
| 90 | +
|
| 91 | + Note: This method is the functional inverse of AlignmentDataset.from_dict(). |
| 92 | +
|
| 93 | + Returns: |
| 94 | + Dict[str, Any]: The dictionary representation of the AlignmentDataset. |
| 95 | + """ |
| 96 | + dataset_dict: Dict[str, Any] = {} |
| 97 | + dataset_dict['task'] = self.task.to_dict() |
| 98 | + dataset_dict['samples'] = [] |
| 99 | + for sample in self.samples: |
| 100 | + dataset_dict['samples'].append(asdict(sample)) |
| 101 | + return dataset_dict |
| 102 | + |
| 103 | + @classmethod |
| 104 | + def from_json(cls, file_path: str) -> 'AlignmentDataset': |
| 105 | + r"""Load the AlignmentDataset from a json file. |
| 106 | +
|
| 107 | + Note: Uses AlignmentDataset.from_dict() under the hood to parse the representation. |
| 108 | +
|
| 109 | + Args: |
| 110 | + file_path (str): The os.pathlike object to read from. |
| 111 | +
|
| 112 | + Returns: |
| 113 | + AlignmentDataset: The newly constructed AlignmentDataset. |
| 114 | + """ |
| 115 | + with open(file_path, 'r') as f: |
| 116 | + dataset_dict = json.load(f) |
| 117 | + |
| 118 | + return cls.from_dict(dataset_dict) |
| 119 | + |
| 120 | + @classmethod |
| 121 | + def from_dict(cls, dataset_dict: Dict[str, Any]) -> 'AlignmentDataset': |
| 122 | + r"""Construct an AlignmentDataset from dictionary representation. |
| 123 | +
|
| 124 | + Note: |
| 125 | + Expects 'task', and 'samples' keys to be present in the dictionary. |
| 126 | + The 'task' value should be parsable by AlignmentTask.from_dict(). |
| 127 | + The 'samples' value should be a list of dictionaries, each of which |
| 128 | + are parsable by AlignmentDatasetSample. |
| 129 | +
|
| 130 | + Args: |
| 131 | + dataset_dict (Dict[str, Any]): The dictionary that encodes the AlignmentDataset. |
| 132 | +
|
| 133 | + Returns: |
| 134 | + AlignmentDataset: The newly constructed AlignmentDataset. |
| 135 | +
|
| 136 | + Raises: |
| 137 | + ValueError: If the input dictionary is missing any required keys. |
| 138 | + """ |
| 139 | + task = AlignmentTask.from_dict(dataset_dict['task']) |
| 140 | + samples = [] |
| 141 | + for sample in dataset_dict['samples']: |
| 142 | + sample = AlignmentDatasetSample(**sample) |
| 143 | + samples.append(sample) |
| 144 | + |
| 145 | + return cls(task, samples) |
0 commit comments