This repository has been archived by the owner on Mar 5, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathdatastore.py
executable file
·81 lines (68 loc) · 2.44 KB
/
datastore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
# full imports
import argparse
import boto3
import logging
import os
import shutil
# cherry-pick imports
from dotenv import load_dotenv
# imports from my biz
from GitHub_V3 import GitHub_v3 as ghv3_api
from GitHub_V4 import GitHub_v4 as ghv4_api
parser = argparse.ArgumentParser(description="Triggers gathering data from GitHub")
parser.add_argument(
"--token", "-t", help="GitHub developer token to use instead of one in config"
)
parser.add_argument(
"--logging",
"-l",
help="Set the log level (default: INFO)",
choices=["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL"],
default="INFO",
)
def upload_files_to_s3(s3):
"""
Uploads files to S3 while preserving directory structure
"""
bucket_name = os.getenv("S3_ROOT_BUCKET")
bucket = s3.Bucket(bucket_name)
upload_path = "output"
for subdir, dirs, files in os.walk(upload_path):
for file in files:
full_path = os.path.join(subdir, file)
with open(full_path, "rb") as data:
bucket.put_object(Key=full_path[len(upload_path) + 1 :], Body=data)
# delete file after upload
os.remove(full_path)
# delete output folder and remaining directories
shutil.rmtree(upload_path)
if __name__ == "__main__":
args = parser.parse_args()
load_dotenv(override=True)
# set logging level
logging.basicConfig(format="%(levelname)s:%(message)s", level=args.logging)
token = args.token if args.token is not None else os.getenv("GITHUB_TOKEN")
ghv4 = ghv4_api(token)
ghv3 = ghv3_api(token)
for org_name in os.getenv("GITHUB_ORGS").split(","):
try:
ghv4.write_data_for_org_disk(org_name)
except GitHubV4Error as e:
logging.error(e)
ghv3.write_org_traffic(org_name)
# now upload the json to S3
s3 = boto3.resource("s3")
upload_files_to_s3(s3)