-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathworkflow_metrics.py
108 lines (84 loc) · 4.18 KB
/
workflow_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
workflow_metrics.py - Retrieve and evaluate GitHub Actions workflow runs for a repository.
The script uses the GitHub API to retrieve workflow runs for the specified repository or repositories inside an org, and
calculates metrics such as the average duration, median duration, success rate, and total number of runs for
each workflow.
The following environment variables must be set:
- OWNER_NAME: The name of the repository owner (e.g. "myorg").
- START_DATE: The start date of the date range in ISO format (e.g. "2022-01-01").
- END_DATE: The end date of the date range in ISO format (e.g. "2022-01-31").
- REPO_NAME: Optional - The name of the repository (e.g. "myrepo").
- DELAY_BETWEEN_QUERY: Optional - The number of seconds to wait between queries to the GitHub API.
The script uses the following external tools:
- `gh` (GitHub CLI): Used to authenticate with GitHub and retrieve workflow runs.
- `jq`: Used to extract workflow names from the workflow runs JSON.
The script outputs the following files:
- `runs.json`: Workflow runs in JSON, or `org-runs.json`: Workflow runs in JSON for every repo in the org.
- `workflow-stats.csv`: Workflow statistics in CSV, or `org-workflow-stats.csv`: Workflow statistics in CSV for every repo in the org.
Usage: python workflow_metrics.py
"""
import os
import subprocess
import time
import json
# Get environment variables
gh_token = os.getenv("GH_TOKEN")
if not gh_token:
raise ValueError("GITHUB_TOKEN environment variable not set")
owner_name = os.getenv("OWNER_NAME")
if not owner_name:
raise ValueError("OWNER_NAME environment variable not set")
start_date = os.getenv("START_DATE")
if not start_date:
raise ValueError("START_DATE environment variable not set")
end_date = os.getenv("END_DATE")
if not end_date:
raise ValueError("END_DATE environment variable not set")
repo_name = os.getenv("REPO_NAME")
sleep_time = os.getenv("DELAY_BETWEEN_QUERY")
# Authenticate with GitHub CLI
subprocess.run(['gh', 'auth', 'login', '--with-token'], input=gh_token.encode())
# Get list of repository names if no repository name is specified
if not repo_name:
# Get list of repository names
cmd = f'gh api orgs/{owner_name}/repos --jq \'.[] | .name\''
query_output = subprocess.check_output(cmd, shell=True, text=True)
repo_names = []
for line in query_output.strip().split('\n'):
repo_names.append(line)
with open('org-workflow-stats.csv', 'w') as f:
f.write('repository_name,workflow_name,average_duration,median_duration,success_rate,total_runs\n')
# create a file for org-runs.json
with open('org-runs.json', 'w') as f:
f.write('[\n')
# Get workflow runs for each repository
for repo in repo_names:
# Get workflow runs
subprocess.run(['python', '/get_workflow_runs.py', owner_name, repo, start_date, end_date])
# Read every JSON record in runs.json, add repo name to each record, and append to org-runs.json
with open('runs.json', 'r') as f1, open('org-runs.json', 'a') as f2:
data = json.load(f1)
for record in data:
record['repository_name'] = str(repo)
for i, record in enumerate(data):
json.dump(record, f2)
if i != len(data) - 1:
f2.write(',\n')
else:
f2.write('\n]')
# Evaluate workflow runs statistics
subprocess.run(['python', '/evaluate_workflow_runs.py'])
# Read every line of workflow-stats.csv skipping the header line, add repo name to the beginning of each line, and write to all-workflow-stats.csv
with open('workflow-stats.csv', 'r') as f:
lines = f.readlines()
with open('org-workflow-stats.csv', 'a') as f2:
for line in lines[1:]:
f2.write(f'{repo},{line}')
if sleep_time:
print(f' Sleeping for {sleep_time} seconds to prevent rate limiting...')
time.sleep(int(sleep_time))
else:
# Get workflow runs
subprocess.run(['python', '/get_workflow_runs.py', owner_name, repo_name, start_date, end_date])
# Evaluate workflow runs statistics
subprocess.run(['python', '/evaluate_workflow_runs.py'])