mercredi 7 juillet 2021

How does a JSON Schema, in AWS Lambda, using Python, format check a string for a specific datetime pattern?

I currently have a lambda function which takes a .csv file as an input (uploaded onto S3 which triggers the lambda), converts it into an array of json objects and injects each into DynamoDB.

This lambda function includes a Json schema for validation purposes, so for example, a field name is required/mandatory. I'm currently trying to implement a validation for the field activeFrom so it is of format %Y-%m-%dT%H:%M:%S.00-00:00. However, the following error appears:

[ERROR] KeyError: 'activeFrom' Traceback (most recent call last): File "/var/task/untitled1.py", line 62, in lambda_handler elem['activeFrom'] = elem.pop('activeFrom') [ERROR] KeyError: 'activeFrom' Traceback (most recent call last): File "/var/task/untitled1.py", line 62, in lambda_handler elem['activeFrom'] = elem.pop('activeFrom').

The input(.csv file) is below and takes 3 headings and its content. The rest is manually injected into DynamoDB by the lambda:

Prefix,Provider, activeFrom
SMBTT,001 Covid Test, 2021-07-25T11:32:56.00-00:00
RIGHT,0044 COVID Test, 2021-07-25T11:32:56.00-00:00

The lambda code: FYI the lambda function uses an external json schema library as a layer so the import validate works.

import json
import boto3
import ast
import csv
import os
import datetime as dt
from datetime import datetime
import jsonschema 
from jsonschema import validate

s3 = boto3.client('s3')
dynamodb = boto3.resource('dynamodb')
providerCodesSchema = {
    "type": "array",
    "items": {
      "type": "object", 
      "properties": {
          "providerCode": {"type": "string", "maxLength": 5},
          "providerName": {"type": "string"},
          "activeFrom": {"type": "string", "format": "date", "pattern": "(((19|20)([2468][048]|[13579][26]|0[48])|2000)[/-]02[/-]29|((19|20)[0-9]{2}[/-](0[469]|11)[/-](0[1-9]|[12][0-9]|30)|(19|20)[0-9]{2}[/-](0[13578]|1[02])[/-](0[1-9]|[12][0-9]|3[01])|(19|20)[0-9]{2}[/-]02[/-](0[1-9]|1[0-9]|2[0-8])))"},
          "activeTo": {"type": "string", "format": "date", "pattern": "(((19|20)([2468][048]|[13579][26]|0[48])|2000)[/-]02[/-]29|((19|20)[0-9]{2}[/-](0[469]|11)[/-](0[1-9]|[12][0-9]|30)|(19|20)[0-9]{2}[/-](0[13578]|1[02])[/-](0[1-9]|[12][0-9]|3[01])|(19|20)[0-9]{2}[/-]02[/-](0[1-9]|1[0-9]|2[0-8])))"},
          "apiActiveFrom": {"type": "string"},
          "apiActiveTo": {"type": "string"},
          "countThreshold": {"type": "string"}
      },
    "required": ["providerName"]
    }
}

def lambda_handler(event, context):

    datestamp = dt.datetime.now().strftime("%Y/%m/%d")
    timestamp = dt.datetime.now().strftime("%s")
    updateTime = dt.datetime.now().strftime("%Y/%m/%d/%H:%M:%S")
    nowdatetime = dt.datetime.now()
    yesterday = nowdatetime - dt.timedelta(days=1)
    nintydaysfromnow = nowdatetime + dt.timedelta(days=90)
    filename_json = "/tmp/file_{ts}.json".format(ts=timestamp)
    filename_csv = "/tmp/file_{ts}.csv".format(ts=timestamp)
    keyname_s3 = "/output/{ds}/{ts}.json".format(ds=datestamp, ts=timestamp)
    json_data = []

    for record in event['Records']:
        bucket_name = record['s3']['bucket']['name']
        key_name = record['s3']['object']['key']

    s3_object = s3.get_object(Bucket=bucket_name, Key=key_name)
    data = s3_object['Body'].read()
    contents = data.decode('latin')

    with open(filename_csv, 'a', encoding='utf-8') as csv_data:
        csv_data.write(contents)

    with open(filename_csv, encoding='utf-8-sig') as csv_data:
        csv_reader = csv.DictReader(csv_data)

        for csv_row in csv_reader:
            json_data.append(csv_row)
        for elem in json_data:
            elem['providerCode'] = elem.pop('Prefix')
            elem['providerName'] = elem.pop('Provider')
            elem['activeFrom'] = elem.pop('activeFrom')
        for element in json_data:
            element['activeTo'] = nintydaysfromnow.strftime("%Y-%m-%dT%H:%M:%S.00-00:00")
            element['apiActiveFrom'] = " "
            element['apiActiveTo'] = " "
            element['countThreshold'] = "3"
            element['updateDate'] = updateTime

# Validate will raise exception if given json is not
# what is described in schema.
        try:
               validate(instance=json_data, schema=providerCodesSchema)
        except jsonschema.exceptions.ValidationError as err:
            print(err)
            err = "Given JSON data is InValid"
            return None
    
    with open(filename_json, 'w', encoding='utf-8-sig') as json_file:
        json_file.write(json.dumps(json_data, default=str))

    with open(filename_json, 'r', encoding='utf-8-sig') as json_file_contents:
        response = s3.put_object(Bucket=bucket_name, Key=keyname_s3, Body=json_file_contents.read())

    for jsonElement in json_data:
        table = dynamodb.Table('-loader-')
        table.put_item(Item=jsonElement)
   #     print("finished enriching JSON")

    os.remove(filename_csv)
    os.remove(filename_json)

    return None

Any help on why the error persists is much appreciated.

Aucun commentaire:

Enregistrer un commentaire