dataset (dataloader)在getitem时,如果一个sample有问题,换一个sample重新加载的做法
在dataset中:
def __getitem__(self, idx):
if self.is_train:
try:
result = self.prepare_sample(idx)
data = self.pipeline(result)
return data
except Exception as e:
print('#'*5 + f"\tSample {self.sample_path_list[idx]} load error!!! Error Msg: {e}")
print(traceback.format_exc())
new_idx = random.randrange(0, len(self.sample_path_list)) # 从所有数据中随机取一个sample id
return self.__getitem__(new_idx) # 取一个新的sample
else:
result = self.prepare_sample(idx)
data = self.pipeline(result)
return data
附,在runner.py中的一个做法:
class IterLoaderwithResume:
def __init__(self, dataloader):
self._dataloader = dataloader
self.iter_loader = iter(self._dataloader)
self._epoch = 0
@property
def epoch(self) -> int:
return self._epoch
def __next__(self):
try:
data = next(self.iter_loader)
except StopIteration:
self._epoch += 1
if hasattr(self._dataloader.sampler, 'set_epoch'):
self._dataloader.sampler.set_epoch(self._epoch)
time.sleep(2) # Prevent possible deadlock during epoch transition
self.iter_loader = iter(self._dataloader)
data = next(self.iter_loader)
except RuntimeError as e:
print(f"DataLoader timeout, retrying: {e}")
data = next(self.iter_loader)
return data

浙公网安备 33010602011771号