salesforce python 下载附件
文件一:download.ini
[salesforce]
username =
password =
security_token =
connect_to_sandbox = False
consumer_key =
consumer_secret =
output_dir = C:\pythoncode\attachments12\
download_attachments = True
batch_size = 100
loglevel = INFO
parent_query = SELECT id from order where CreatedDate >2017-10-01T01:36:47.000+0000 and CreatedDate < 2017-11-01T01:36:47.000+0000
parent_type_craeteddate = where CreatedDate >2017-10-01T01:36:47.000+0000 and CreatedDate < 2017-11-01T01:36:47.000+0000 and Parent.type = 'order'
文件二:download.py
import concurrent.futures
from simple_salesforce import Salesforce
import requests
import os.path
import csv
import logging
def split_into_batches(items, batch_size):
full_list = list(items)
for i in range(0, len(full_list), batch_size):
yield full_list[i:i + batch_size]
def create_filename(title, record_id, output_directory):
# Create filename
bad_chars = [';', ':', '!', "*", '/', '\\', ' ', ',','?','>','<']
clean_title = filter(lambda i: i not in bad_chars, title)
clean_title = ''.join(list(clean_title))
filename = "{0}{1}-{2}".format(output_directory, record_id, clean_title)
return filename
def get_record_ids(sf, output_directory, query):
# Locate/Create output directory
if not os.path.isdir(output_directory):
os.mkdir(output_directory)
results_path = output_directory + 'files.csv'
record_ids = set()
records = sf.query_all(query)
# Save results file with file mapping and return ids
with open(results_path, 'w', encoding='UTF-8', newline='') as results_csv:
file_writer = csv.writer(results_csv, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
file_writer.writerow(
['ParentId', 'Parent.Name', 'Id', 'VersionData', 'PathOnClient', 'Title', 'OwnerId',
'CreatedDate', 'CreatedById', 'LastModifiedDate'])
for content_document in records["records"]:
record_ids.add(content_document["Id"])
filename = create_filename(content_document["Name"],
content_document["Id"],
output_directory)
parent_name = content_document["Parent"]
logging.info(parent_name["Name"])
file_writer.writerow(
[content_document["ParentId"], parent_name["Name"], content_document["Id"], filename, filename,
content_document["Name"], content_document["OwnerId"], content_document['CreatedDate'],
content_document['CreatedById'], content_document['LastModifiedDate']])
return record_ids
def download_attachment(args):
record, output_directory, sf = args
# Create filename
parent_name = record["Parent"]
filename = create_filename(record["Id"] + '-' + record["Name"], parent_name["Name"], output_directory)
url = "https://%s%s%s/body" % (sf.sf_instance, '/services/data/v47.0/sobjects/Attachment/', record["Id"])
logging.debug("Downloading from " + url)
response = requests.get(url, headers={"Authorization": "OAuth " + sf.session_id,
"Content-Type": "application/octet-stream"})
if response.ok:
# Save File
with open(filename, "wb") as output_file:
output_file.write(response.content)
return "Saved file to %s" % filename
else:
return "Couldn't download %s" % url
def fetch_files(sf, query_string, output_directory, valid_record_ids=None, batch_size=100):
# Divide the full list of files into batches of 100 ids
batches = list(split_into_batches(valid_record_ids, batch_size))
i = 0
for batch in batches:
i = i + 1
logging.info("Processing batch {0}/{1}".format(i, len(batches)))
batch_query = query_string + ' WHERE Id in (' + ",".join("'" + item + "'" for item in batch) + ')'
query_response = sf.query(batch_query)
records_to_process = len(query_response["records"])
logging.debug("Attachment Query found {0} results".format(records_to_process))
extracted = 0
with concurrent.futures.ProcessPoolExecutor() as executor:
args = ((record, output_directory, sf) for record in query_response["records"])
for result in executor.map(download_attachment, args):
logging.debug(result)
logging.info('All files in batch {0} downloaded'.format(i))
logging.info('All batches complete')
def main():
import configparser
# Get settings from config file
config = configparser.ConfigParser()
config.read('download.ini')
username = config['salesforce']['username']
password = config['salesforce']['password']
token = config['salesforce']['security_token']
consumer_key = config['salesforce']['consumer_key']
consumer_secret = config['salesforce']['consumer_secret']
is_sandbox = config['salesforce']['connect_to_sandbox']
download_attachments = config['salesforce']['download_attachments'] == 'True'
parent_query = config['salesforce']['parent_query']
batch_size = int(config['salesforce']['batch_size'])
loglevel = logging.getLevelName(config['salesforce']['loglevel'])
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=loglevel)
attachment_query = 'SELECT Id, ContentType, Description, Name, OwnerId, ParentId, CreatedById, CreatedDate, Parent.Name, ' \
'LastModifiedDate FROM Attachment WHERE ParentId IN ({0})'.format(parent_query)
output = config['salesforce']['output_dir']
attachment_query_string = "SELECT Id, ContentType, Description, Name, OwnerId, ParentId, Parent.Name FROM Attachment"
domain = None
if is_sandbox == 'True':
domain = 'test'
# Output
logging.info('Export Attachments from Salesforce')
logging.info('Username: ' + username)
logging.info('Output directory: ' + output)
# Connect
# sf = Salesforce(username=username, password=password, security_token=token, domain=domain)
# sf = Salesforce(username='zhilianheapple@163.com', password='ZHIlianhe402', security_token='VcnqsH8jcSIakpW3sRASIL0DY')
sf = Salesforce(username=username, password=password, consumer_key=consumer_key, consumer_secret=consumer_secret,domain='test')
logging.debug("Connected successfully to {0}".format(sf.sf_instance))
if attachment_query and download_attachments:
logging.info("Querying to get Attachment Ids...")
valid_record_ids = get_record_ids(sf=sf, output_directory=output, query=attachment_query)
logging.info("Found {0} total attachments".format(len(valid_record_ids)))
fetch_files(sf=sf, query_string=attachment_query_string, valid_record_ids=valid_record_ids,
output_directory=output, batch_size=batch_size)
if __name__ == "__main__":
main()
浙公网安备 33010602011771号