salesforce python 下载附件

文件一：download.ini

[salesforce]
username =
password =
security_token =
connect_to_sandbox = False
consumer_key =
consumer_secret =
output_dir = C:\pythoncode\attachments12\
download_attachments = True
batch_size = 100
loglevel = INFO
parent_query = SELECT id from order where CreatedDate >2017-10-01T01:36:47.000+0000 and CreatedDate < 2017-11-01T01:36:47.000+0000
parent_type_craeteddate = where CreatedDate >2017-10-01T01:36:47.000+0000 and CreatedDate < 2017-11-01T01:36:47.000+0000 and Parent.type = 'order'

文件二：download.py

import concurrent.futures

from simple_salesforce import Salesforce

import requests

import os.path

import csv

import logging

def split_into_batches(items, batch_size):

full_list = list(items)

for i in range(0, len(full_list), batch_size):

yield full_list[i:i + batch_size]

def create_filename(title, record_id, output_directory):

# Create filename

bad_chars = [';', ':', '!', "*", '/', '\\', ' ', ',','?','>','<']

clean_title = filter(lambda i: i not in bad_chars, title)

clean_title = ''.join(list(clean_title))

filename = "{0}{1}-{2}".format(output_directory, record_id, clean_title)

return filename

def get_record_ids(sf, output_directory, query):

# Locate/Create output directory

if not os.path.isdir(output_directory):

os.mkdir(output_directory)

results_path = output_directory + 'files.csv'

record_ids = set()

records = sf.query_all(query)

# Save results file with file mapping and return ids

with open(results_path, 'w', encoding='UTF-8', newline='') as results_csv:

file_writer = csv.writer(results_csv, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)

file_writer.writerow(

['ParentId', 'Parent.Name', 'Id', 'VersionData', 'PathOnClient', 'Title', 'OwnerId',

'CreatedDate', 'CreatedById', 'LastModifiedDate'])

for content_document in records["records"]:

record_ids.add(content_document["Id"])

filename = create_filename(content_document["Name"],

content_document["Id"],

output_directory)

parent_name = content_document["Parent"]

logging.info(parent_name["Name"])

file_writer.writerow(

[content_document["ParentId"], parent_name["Name"], content_document["Id"], filename, filename,

content_document["Name"], content_document["OwnerId"], content_document['CreatedDate'],

content_document['CreatedById'], content_document['LastModifiedDate']])

return record_ids

def download_attachment(args):

record, output_directory, sf = args

# Create filename

parent_name = record["Parent"]

filename = create_filename(record["Id"] + '-' + record["Name"], parent_name["Name"], output_directory)

url = "https://%s%s%s/body" % (sf.sf_instance, '/services/data/v47.0/sobjects/Attachment/', record["Id"])

logging.debug("Downloading from " + url)

response = requests.get(url, headers={"Authorization": "OAuth " + sf.session_id,

"Content-Type": "application/octet-stream"})

if response.ok:

# Save File

with open(filename, "wb") as output_file:

output_file.write(response.content)

return "Saved file to %s" % filename

else:

return "Couldn't download %s" % url

def fetch_files(sf, query_string, output_directory, valid_record_ids=None, batch_size=100):

# Divide the full list of files into batches of 100 ids

batches = list(split_into_batches(valid_record_ids, batch_size))

i = 0

for batch in batches:

i = i + 1

logging.info("Processing batch {0}/{1}".format(i, len(batches)))

batch_query = query_string + ' WHERE Id in (' + ",".join("'" + item + "'" for item in batch) + ')'

query_response = sf.query(batch_query)

records_to_process = len(query_response["records"])

logging.debug("Attachment Query found {0} results".format(records_to_process))

extracted = 0

with concurrent.futures.ProcessPoolExecutor() as executor:

args = ((record, output_directory, sf) for record in query_response["records"])

for result in executor.map(download_attachment, args):

logging.debug(result)

logging.info('All files in batch {0} downloaded'.format(i))

logging.info('All batches complete')

def main():

import configparser

# Get settings from config file

config = configparser.ConfigParser()

config.read('download.ini')

username = config['salesforce']['username']

password = config['salesforce']['password']

token = config['salesforce']['security_token']

consumer_key = config['salesforce']['consumer_key']

consumer_secret = config['salesforce']['consumer_secret']

is_sandbox = config['salesforce']['connect_to_sandbox']

download_attachments = config['salesforce']['download_attachments'] == 'True'

parent_query = config['salesforce']['parent_query']

batch_size = int(config['salesforce']['batch_size'])

loglevel = logging.getLevelName(config['salesforce']['loglevel'])

logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=loglevel)

attachment_query = 'SELECT Id, ContentType, Description, Name, OwnerId, ParentId, CreatedById, CreatedDate, Parent.Name, ' \

'LastModifiedDate FROM Attachment WHERE ParentId IN ({0})'.format(parent_query)

output = config['salesforce']['output_dir']

attachment_query_string = "SELECT Id, ContentType, Description, Name, OwnerId, ParentId, Parent.Name FROM Attachment"

domain = None

if is_sandbox == 'True':

domain = 'test'

# Output

logging.info('Export Attachments from Salesforce')

logging.info('Username: ' + username)

logging.info('Output directory: ' + output)

# Connect

# sf = Salesforce(username=username, password=password, security_token=token, domain=domain)

# sf = Salesforce(username='zhilianheapple@163.com', password='ZHIlianhe402', security_token='VcnqsH8jcSIakpW3sRASIL0DY')

sf = Salesforce(username=username, password=password, consumer_key=consumer_key, consumer_secret=consumer_secret,domain='test')

logging.debug("Connected successfully to {0}".format(sf.sf_instance))

if attachment_query and download_attachments:

logging.info("Querying to get Attachment Ids...")

valid_record_ids = get_record_ids(sf=sf, output_directory=output, query=attachment_query)

logging.info("Found {0} total attachments".format(len(valid_record_ids)))

fetch_files(sf=sf, query_string=attachment_query_string, valid_record_ids=valid_record_ids,

output_directory=output, batch_size=batch_size)

if __name__ == "__main__":

main()

posted @ 2025-05-19 10:16 赫赫332 阅读(9) 评论(0) 收藏举报

刷新页面返回顶部

salesforce python 下载附件

公告