dataset (dataloader)在getitem时,如果一个sample有问题,换一个sample的做法

在dataset中:

    def __getitem__(self, idx):              
        if self.is_train:
            try:
                result = self.prepare_sample(idx)
                data = self.pipeline(result)
                return data
            except Exception as e:
                print('#'*5 + f"\tSample {self.sample_path_list[idx]} load error!!! Error Msg: {e}")
                print(traceback.format_exc())
                new_idx = random.randrange(0, len(self.sample_path_list))    # 从所有数据中随机取一个sample id
                return self.__getitem__(new_idx)   # 取一个新的sample
        else:
            result = self.prepare_sample(idx)
            data = self.pipeline(result)
            return data

  

附,在runner.py中的一个做法:

class IterLoaderwithResume:

    def __init__(self, dataloader):
        self._dataloader = dataloader
        self.iter_loader = iter(self._dataloader)
        self._epoch = 0

    @property
    def epoch(self) -> int:
        return self._epoch

    def __next__(self):
        try:
            data = next(self.iter_loader)
        except StopIteration:
            self._epoch += 1
            if hasattr(self._dataloader.sampler, 'set_epoch'):
                self._dataloader.sampler.set_epoch(self._epoch)
            time.sleep(2)  # Prevent possible deadlock during epoch transition
            self.iter_loader = iter(self._dataloader)
            data = next(self.iter_loader)
        except RuntimeError as e:
            print(f"DataLoader timeout, retrying: {e}")
            data = next(self.iter_loader)
        return data

  

 

posted @ 2025-08-30 11:59  Picassooo  阅读(2)  评论(0)    收藏  举报