1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
import sys
import os
import time
from concurrent import futures
import requests
import progressbar
from .lib.util import parse_date, parse_argv
PAPERTRAIL_API_TOKEN = os.environ.get('PAPERTRAIL_API_TOKEN', None)
ARCHIVES_URL = 'https://papertrailapp.com/api/v1/archives.json'
DEFAULT_REMAIN_SIZE = 25
HEADERS = {'X-Papertrail-Token': PAPERTRAIL_API_TOKEN}
MIN_INTERVAL_SEC = 0.1
def get_ppheader(response):
hist = [response] + response.history
for h in hist:
if 'X-Rate-Limit-Remaining' not in h.headers:
continue
return (
int(h.headers['X-Rate-Limit-Limit']),
int(h.headers['X-Rate-Limit-Remaining']),
int(h.headers['X-Rate-Limit-Reset'])
)
def do_download(url, filename, index):
while True:
try:
with requests.Session() as s:
res = s.get(url, headers=HEADERS)
finishtime = time.time_ns()
(limit, rem, reset) = get_ppheader(res)
if 200 <= res.status_code < 300:
with open(filename, "wb") as f:
for chunk in res.iter_content(chunk_size=128):
f.write(chunk)
return (limit, rem, reset, finishtime, index)
except requests.ConnectionError:
time.sleep(1)
def main():
if not PAPERTRAIL_API_TOKEN:
print('Not set the environment variable `PAPERTRAIL_API_TOKEN`',
file=sys.stderr)
sys.exit(1)
date_from, date_to = parse_argv(sys.argv)
print("fetching log archives information ...", end="\r", file=sys.stderr)
r = requests.get(ARCHIVES_URL, headers=HEADERS)
r.raise_for_status()
archives = [
ar for ar in r.json()
if (
# If `date_from` is None,
# then it gets archives without `date_from` limitation
((not date_from) or date_from <= parse_date(ar["start"]))
# ... and `date_to` is as well.
and ((not date_to) or parse_date(ar["end"]) < date_to)
)
]
with futures.ThreadPoolExecutor(max_workers=10) as executor:
future_list = []
remain = DEFAULT_REMAIN_SIZE
until_reset_sec = 0
lasttime = time.time_ns()
with progressbar.ProgressBar(max_value=len(archives)) as bar:
for i, ar in enumerate(archives):
future_list.append(
executor.submit(
do_download,
ar['_links']['download']['href'],
ar['filename'],
i
)
)
if len(future_list) < remain:
time.sleep(MIN_INTERVAL_SEC)
continue
for future in future_list:
(_, rem, reset, finishtime, index) = future.result()
if finishtime > lasttime:
remain = rem
until_reset_sec = reset
lasttime = finishtime
bar.update(index)
future_list = []
if remain <= 0:
time.sleep(until_reset_sec)
remain = DEFAULT_REMAIN_SIZE
continue
time.sleep(MIN_INTERVAL_SEC)
|