Prozess CSV zeilenweise aus dem S3 mit python auf Lambda
Ich bin versucht zu verarbeiten .csv-Datei (30MB) - Datei, die auf S3-bucket AWS Lambda (Python). Ich schrieb meine python-code lokal auf der Prozess-Datei, versucht nun, die ausführen mithilfe von Lambda. Eine harte Zeit und Lesen Sie die Datei zeilenweise ein.
Bitte lassen Sie mich wissen, wie kann ich die traverse Datei zeilenweise mit boto3-oder s3-Methoden. Bitte helfen Sie mir auf die gleiche frühestens. Dank
In Lambda:
s3 = boto3.client("s3")
file_obj = event["Records"][0]
filename=str(file_obj['s3']['object']['key'])
#print('file name is :', filename)
fileObj = s3.get_object(Bucket='sarapuri-weather-rawdata', Key=filename)
file_content = fileObj["Body"].read().decode('utf-8')
Mein Original-code:
import csv
import pandas as pd
import datetime
#from datetime import datetime,timedelta
import numpy as np
with open ('sample.csv', 'r') as file_name:
csv_reader = csv.reader(file_name, delimiter=',')
Time = []
Latitude=[]
Longitude= []
Org_Units=[]
Org_Unit_Type =[]
Variable_Name=[]
#New columns
Year=[]
Month= []
Day =[]
Celsius=[]
Far=[]
Conv_Units=[]
Conv_Unit_Type=[]
header = ['Time','Latitude', 'Longitude','Org_Units','Org_Unit_Type','Conv_Units','Conv_Unit_Type','Variable_Name']
out_filename = 'Write' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") #need to rename based on the org file name
with open(out_filename +'.csv', 'w') as csvFile:
outputwriter = csv.writer(csvFile, delimiter=',')
outputwriter.writerow(header)
next(csv_reader, None) # avoid hearder
for row in csv_reader:
# print(row)
Time = row[0]
Org_Lat=row[1]
Org_Long=row[2]
Org_Units=row[3]
Org_Unit_Type =row[4]
Variable_Name=row[5]
# print(Time,Org_Lat,Org_Long,Org_Units,Org_Unit_Type,Variable_Name)
if Org_Unit_Type == 'm s-1':
Conv_Units =round(float(Org_Units) * 1.151,2)
Conv_Unit_Type = 'miles'
if Org_Unit_Type == 'm':
Conv_Units =round(float(Org_Units) / 1609.344,2)
# print (Org_Units,Conv_Units)
Conv_Unit_Type = 'miles'
if Org_Unit_Type == 'Pa':
Conv_Units =round(float(Org_Units) / 6894.757,2)
Conv_Unit_Type = 'Psi'
#print(type(Time))
date_time_obj = datetime.datetime.strptime(Time, '%m-%d-%Y, %H:%M')
# Year = time.strptime(date_time_obj, "%B")
#print(date_time_obj)
f_row =[Time,Latitude,Longitude,Org_Units,Org_Unit_Type,Conv_Units,Conv_Unit_Type,Variable_Name]
outputwriter.writerow(f_row)
csvFile.close()
print("done")