diff options
author | Nao Ueda <nao.uedder@gmail.com> | 2020-03-11 09:29:36 +0900 |
---|---|---|
committer | Nao Ueda <nao.uedder@gmail.com> | 2020-03-11 09:29:36 +0900 |
commit | ca6a7f96a59d46ce0583fa62eeb49d803f3f5a63 (patch) | |
tree | 276a36f373efaaad733ea95ba0997e4ecd547c45 /ppad.py | |
download | ppad-ca6a7f96a59d46ce0583fa62eeb49d803f3f5a63.tar.gz ppad-ca6a7f96a59d46ce0583fa62eeb49d803f3f5a63.tar.bz2 ppad-ca6a7f96a59d46ce0583fa62eeb49d803f3f5a63.zip |
initial commit.
Diffstat (limited to 'ppad.py')
-rwxr-xr-x | ppad.py | 134 |
1 files changed, 134 insertions, 0 deletions
@@ -0,0 +1,134 @@ +#!/usr/bin/env python3 + +import sys +import os +import time +import datetime +from concurrent import futures +import pytz +from dateutil import parser +import requests +import progressbar + +PAPERTRAIL_API_TOKEN = os.environ.get('PAPERTRAIL_API_TOKEN', None) +ARCHIVES_URL = 'https://papertrailapp.com/api/v1/archives.json' +DEFAULT_REMAIN_SIZE = 25 +HEADERS = {'X-Papertrail-Token': PAPERTRAIL_API_TOKEN} + + +def get_ppheader(response): + hist = [response] + response.history + for h in hist: + if 'X-Rate-Limit-Remaining' not in h.headers: + continue + + return ( + int(h.headers['X-Rate-Limit-Limit']), + int(h.headers['X-Rate-Limit-Remaining']), + int(h.headers['X-Rate-Limit-Reset']) + ) + + +def do_download(url, filename, index): + while True: + try: + with requests.Session() as s: + res = s.get(url, headers=HEADERS) + finishtime = time.time_ns() + (limit, rem, reset) = get_ppheader(res) + + if 200 <= res.status_code < 300: + with open(filename, "wb") as f: + for chunk in res.iter_content(chunk_size=128): + f.write(chunk) + return (limit, rem, reset, finishtime, index) + except requests.ConnectionError: + time.sleep(1) + + +def parse_span(): + if len(sys.argv) == 1: + return None, None + + f = t = None + fromstr = tostr = '' + span = sys.argv[1].split('~') + if len(span) == 1: + fromstr = tostr = span[0] + else: + [fromstr, tostr, *_] = span + + if fromstr: + f = parser.isoparse(fromstr) + if not f.tzname(): + utc = pytz.timezone('UTC') + f = utc.localize(f) + + if tostr: + t = parser.isoparse(tostr) + if not t.tzname(): + utc = pytz.timezone('UTC') + t = utc.localize(t) + + if tostr and fromstr == tostr: + t = t + datetime.timedelta(days=1) + + return f, t + + +def main(): + if not PAPERTRAIL_API_TOKEN: + print('Not set the environment variable `PAPERTRAIL_API_TOKEN`', + file=sys.stderr) + sys.exit(1) + + _from, to = parse_span() + + r = requests.get(ARCHIVES_URL, headers=HEADERS) + r.raise_for_status() + + archives = [ar for ar in r.json() + if (not _from or _from <= parser.isoparse(ar["start"])) + and (not to or parser.isoparse(ar["end"]) < to) + ] + + with futures.ThreadPoolExecutor(max_workers=10) as executor: + future_list = [] + remain = DEFAULT_REMAIN_SIZE + until_reset_sec = 0 + lasttime = time.time_ns() + with progressbar.ProgressBar(max_value=len(archives)) as bar: + for i, ar in enumerate(archives): + future_list.append( + executor.submit( + do_download, + ar['_links']['download']['href'], + ar['filename'], + i + ) + ) + + if len(future_list) < remain: + time.sleep(0.2) + continue + + for future in future_list: + (_, rem, reset, finishtime, index) = future.result() + if finishtime > lasttime: + remain = rem + until_reset_sec = reset + lasttime = finishtime + bar.update(index) + + future_list = [] + + if remain <= 0: + time.sleep(until_reset_sec) + remain = DEFAULT_REMAIN_SIZE + continue + + time.sleep(0.2) + + +if __name__ == "__main__": + main() |