-
Notifications
You must be signed in to change notification settings - Fork 330
Expand file tree
/
Copy pathlambda_comprehend_batch_avg.py
More file actions
75 lines (58 loc) · 2.17 KB
/
lambda_comprehend_batch_avg.py
File metadata and controls
75 lines (58 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# -*- coding: utf-8 -*-
__author__ = "Chirag Rathod (Srce Cde)"
__license__ = "MIT"
__email__ = "chiragr83@gmail.com"
__maintainer__ = "Chirag Rathod (Srce Cde)"
import json
import boto3
s3 = boto3.client("s3")
comprehend = boto3.client("comprehend")
def lambda_handler(event, context):
if event:
s3_object = event["Records"][0]["s3"]
bucket_name = s3_object["bucket"]["name"]
file_name = s3_object["object"]["key"]
file_obj = s3.get_object(Bucket=bucket_name, Key=file_name)
transcript_result = json.loads(file_obj["Body"].read())
paragraph = transcript_result["results"]["transcripts"][0]["transcript"]
response = comprehend.batch_detect_sentiment(
TextList=data_chunk(paragraph), LanguageCode="en"
)
final_response = average_sentiment(response)
s3.put_object(Bucket="bucket-name", Key=file_name, Body=final_response)
def data_chunk(paragraph, chunk_size=5000):
# chunk the data due to comprehend limitation
text_list = []
while paragraph:
text_list.append(str(paragraph[:chunk_size]))
paragraph = paragraph[chunk_size:]
return text_list
def average_sentiment(response):
# averaging sentiment score
positive, negative, neutral, mixed = 0, 0, 0, 0
for score in response["ResultList"]:
positive += score["SentimentScore"]["Positive"]
negative += score["SentimentScore"]["Negative"]
neutral += score["SentimentScore"]["Neutral"]
mixed += score["SentimentScore"]["Mixed"]
total_record = len(response["ResultList"])
mapping = {
"POSITIVE": positive / total_record,
"NEGATIVE": negative / total_record,
"NEUTRAL": neutral / total_record,
"MIXED": mixed / total_record,
}
response = json.dumps(
[
{
"Sentiment": max(mapping, key=mapping.get),
"SentimentScore": {
"Positive": mapping["POSITIVE"],
"Negative": mapping["NEGATIVE"],
"Neutral": mapping["NEUTRAL"],
"Mixed": mapping["MIXED"],
},
}
]
)
return response