salesforce python 下载附件

文件一:download.ini

[salesforce]
username =
password =
security_token =
connect_to_sandbox = False
consumer_key =
consumer_secret =
output_dir = C:\pythoncode\attachments12\
download_attachments = True
batch_size = 100
loglevel = INFO
parent_query = SELECT id from order where CreatedDate >2017-10-01T01:36:47.000+0000 and CreatedDate < 2017-11-01T01:36:47.000+0000
parent_type_craeteddate = where CreatedDate >2017-10-01T01:36:47.000+0000 and CreatedDate < 2017-11-01T01:36:47.000+0000 and Parent.type = 'order'

 

文件二:download.py

import concurrent.futures
from simple_salesforce import Salesforce
import requests
import os.path
import csv
import logging


def split_into_batches(items, batch_size):
    full_list = list(items)
    for i in range(0, len(full_list), batch_size):
        yield full_list[i:i + batch_size]


def create_filename(title, record_id, output_directory):
    # Create filename
    bad_chars = [';', ':', '!', "*", '/', '\\', ' ', ',','?','>','<']
    clean_title = filter(lambda i: i not in bad_chars, title)
    clean_title = ''.join(list(clean_title))
    filename = "{0}{1}-{2}".format(output_directory, record_id, clean_title)
    return filename


def get_record_ids(sf, output_directory, query):
    # Locate/Create output directory
    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)

    results_path = output_directory + 'files.csv'
    record_ids = set()
    records = sf.query_all(query)

    # Save results file with file mapping and return ids
    with open(results_path, 'w', encoding='UTF-8', newline='') as results_csv:
        file_writer = csv.writer(results_csv, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
        file_writer.writerow(
            ['ParentId', 'Parent.Name', 'Id', 'VersionData', 'PathOnClient', 'Title', 'OwnerId',
                'CreatedDate', 'CreatedById', 'LastModifiedDate'])

        for content_document in records["records"]:
            record_ids.add(content_document["Id"])
            filename = create_filename(content_document["Name"],
                                        content_document["Id"],
                                        output_directory)
           
            parent_name = content_document["Parent"]
            logging.info(parent_name["Name"])
            file_writer.writerow(
                [content_document["ParentId"], parent_name["Name"], content_document["Id"], filename, filename,
                    content_document["Name"], content_document["OwnerId"], content_document['CreatedDate'],
                    content_document['CreatedById'], content_document['LastModifiedDate']])

    return record_ids


def download_attachment(args):
    record, output_directory, sf = args
    # Create filename
    parent_name = record["Parent"]
    filename = create_filename(record["Id"] + '-' + record["Name"], parent_name["Name"], output_directory)
    url = "https://%s%s%s/body" % (sf.sf_instance, '/services/data/v47.0/sobjects/Attachment/', record["Id"])
    logging.debug("Downloading from " + url)
    response = requests.get(url, headers={"Authorization": "OAuth " + sf.session_id,
                                            "Content-Type": "application/octet-stream"})

    if response.ok:
        # Save File
        with open(filename, "wb") as output_file:
            output_file.write(response.content)
        return "Saved file to %s" % filename
    else:
        return "Couldn't download %s" % url


def fetch_files(sf, query_string, output_directory, valid_record_ids=None, batch_size=100):
    # Divide the full list of files into batches of 100 ids
    batches = list(split_into_batches(valid_record_ids, batch_size))

    i = 0
    for batch in batches:

        i = i + 1
        logging.info("Processing batch {0}/{1}".format(i, len(batches)))
        batch_query = query_string + ' WHERE Id in (' + ",".join("'" + item + "'" for item in batch) + ')'
        query_response = sf.query(batch_query)
        records_to_process = len(query_response["records"])
        logging.debug("Attachment Query found {0} results".format(records_to_process))

        extracted = 0

        with concurrent.futures.ProcessPoolExecutor() as executor:
            args = ((record, output_directory, sf) for record in query_response["records"])
            for result in executor.map(download_attachment, args):
                logging.debug(result)

        logging.info('All files in batch {0} downloaded'.format(i))
    logging.info('All batches complete')


def main():
    import configparser

    # Get settings from config file
    config = configparser.ConfigParser()
    config.read('download.ini')

    username = config['salesforce']['username']
    password = config['salesforce']['password']
    token = config['salesforce']['security_token']
    consumer_key = config['salesforce']['consumer_key']
    consumer_secret = config['salesforce']['consumer_secret']
    is_sandbox = config['salesforce']['connect_to_sandbox']
    download_attachments = config['salesforce']['download_attachments'] == 'True'
    parent_query = config['salesforce']['parent_query']
    batch_size = int(config['salesforce']['batch_size'])
    loglevel = logging.getLevelName(config['salesforce']['loglevel'])
 
    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=loglevel)

    attachment_query = 'SELECT Id, ContentType, Description, Name, OwnerId, ParentId, CreatedById, CreatedDate, Parent.Name, ' \
                        'LastModifiedDate FROM Attachment WHERE ParentId IN ({0})'.format(parent_query)
    output = config['salesforce']['output_dir']

    attachment_query_string = "SELECT Id, ContentType, Description, Name, OwnerId, ParentId, Parent.Name FROM Attachment"

    domain = None
    if is_sandbox == 'True':
        domain = 'test'

    # Output
    logging.info('Export Attachments from Salesforce')
    logging.info('Username: ' + username)
    logging.info('Output directory: ' + output)
   
    # Connect
    # sf = Salesforce(username=username, password=password, security_token=token, domain=domain)
    # sf = Salesforce(username='zhilianheapple@163.com', password='ZHIlianhe402', security_token='VcnqsH8jcSIakpW3sRASIL0DY')
    sf = Salesforce(username=username, password=password, consumer_key=consumer_key, consumer_secret=consumer_secret,domain='test')

    logging.debug("Connected successfully to {0}".format(sf.sf_instance))

    if attachment_query and download_attachments:
        logging.info("Querying to get Attachment Ids...")
        valid_record_ids = get_record_ids(sf=sf, output_directory=output, query=attachment_query)
        logging.info("Found {0} total attachments".format(len(valid_record_ids)))
        fetch_files(sf=sf, query_string=attachment_query_string, valid_record_ids=valid_record_ids,
                    output_directory=output, batch_size=batch_size)

if __name__ == "__main__":
    main()

 

posted @ 2025-05-19 10:16  赫赫332  阅读(9)  评论(0)    收藏  举报