from io import BytesIO
import requests
import pdfplumber
import re
import boto3
from boto3.dynamodb.conditions import Key, Attr
import json
def lambda_handler(event, context):
a=""
url = "http://static.cninfo.com.cn/finalpage/2022-02-08/1212324031.PDF"
req = requests.get(url)
with pdfplumber.open(BytesIO(req.content)) as pdf:
for page in pdf.pages:
text = page.extract_text()
a=a+text
a=re.sub(r"\n", '', a)
a=re.sub(r" ", '', a)
aa = [{"id": 1, "name": a}]
s_str = json.dumps(aa,ensure_ascii=False)
OBJ_S3 = boto3.resource('s3')
S_BUCKET = 'fenci'
s_object_name = 'json.txt'
OBJ_S3.Object(S_BUCKET, s_object_name).put(Body=s_str)
return {
'statusCode': "hello",
}
import json
import boto3
def lambda_handler(event, context):
# TODO implement
OBJ_S3 = boto3.resource('s3')
S_BUCKET = 'fenci'
s_object_name = 'json.txt'
obj = OBJ_S3.Object(S_BUCKET, s_object_name)
s_out = obj.get()['Body'].read()
return [{
'id': 0,
'name': s_out
}]