准备数据集-1:读取文件夹中的图像,生成名称的CSV文件

"""
This script shows how to generate a file name list in CSV format
"""
import csv
import os


class ListGenerator:
    """Generate a list of specific files in directory."""

    def __init__(self):
        """Initialization"""
        # The list to be generated.
        self.file_list = []

    def generate_list(self, target_dir, format_list=['mp4', 'jpg']):
        """Generate the file list of format_list in target_dir

        Args:
            target_dir: the directory in which files will be listed.
            format_list: a list of file extention names.

        Returns:
            a list of file urls.

        """
        self.target_dir = target_dir
        self.format_list = format_list

        # Walk through directories and list all files.
        for file_path, _, current_files in os.walk(self.target_dir, followlinks=False):
            # print('current_files', current_files, 'filepath', file_path)
            for filename in current_files:
                # First make sure the file is exactly of the format we need.
                # Then process the file.
                if filename.split('.')[-1] in self.format_list:
                    # Get file url.
                    file_url = os.path.join(file_path, filename)
                    # file_url = filename.split('.')[0]
                    self.file_list.append(file_url) # 加\t,防止CSV中0001变1

        return self.file_list

    def save_list(self, list_name='list.csv'):
        """Save the list in csv format.

        Args:
            list_name: the file name to be written.

        """
        with open(list_name, 'w', newline='') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=['file_url'])

            # Write the header.
            writer.writeheader()

            # Write all the rows.
            for each_record in self.file_list:
                writer.writerow({'file_url': each_record})

    def save_basename_list(self, list_name='basename.csv'):
        basename_list = []
        for image in image_list:
            basename = os.path.basename(image)
            basename_list.append(basename.rstrip('.jpg'))

        with open(list_name, 'w', newline='') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=['file_basename'])

            # Write the header.
            writer.writeheader()

            # Write all the rows.
            for each_record in basename_list:
                writer.writerow({'file_basename': each_record})


def main():
    """MAIN"""
    lg = ListGenerator()
    lg.generate_list(target_dir='VOC2007/JPEGImages')
    lg.save_list()


if __name__ == '__main__':
    main()

 

posted @ 2020-10-10 15:53  黑无常  阅读(462)  评论(0)    收藏  举报