From ca6a7f96a59d46ce0583fa62eeb49d803f3f5a63 Mon Sep 17 00:00:00 2001 From: Nao Ueda Date: Wed, 11 Mar 2020 09:29:36 +0900 Subject: initial commit. --- README.md | 16 ++++++++ ppad.py | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+) create mode 100644 README.md create mode 100755 ppad.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..b096a61 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +# Papertrail log archives downloader + +## Usage + +Please set your token to the environment variable named `PAPERTRAIL_API_TOKEN` to run the script. + +```bash +$ PAPERTRAIL_API_TOKEN=YOUR_TOKEN ./ppad.py # Download all the log archives +$ PAPERTRAIL_API_TOKEN=YOUR_TOKEN ./ppad.py 2020-01-01~2020-02-01 # Download the archives which have logged January 2020 +$ PAPERTRAIL_API_TOKEN=YOUR_TOKEN ./ppad.py 2020-01-01~ # Specified the since date (including the since date file) +$ PAPERTRAIL_API_TOKEN=YOUR_TOKEN ./ppad.py ~2020-02-01 # Specified the until date (NOT including the until date file) +``` + +The date format is ISO-8601 format supported. + +(The script uses [dateutil.isoparse](https://dateutil.readthedocs.io/en/stable/parser.html#dateutil.parser.isoparse)) diff --git a/ppad.py b/ppad.py new file mode 100755 index 0000000..a4b50eb --- /dev/null +++ b/ppad.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 + +import sys +import os +import time +import datetime +from concurrent import futures +import pytz +from dateutil import parser +import requests +import progressbar + +PAPERTRAIL_API_TOKEN = os.environ.get('PAPERTRAIL_API_TOKEN', None) +ARCHIVES_URL = 'https://papertrailapp.com/api/v1/archives.json' +DEFAULT_REMAIN_SIZE = 25 +HEADERS = {'X-Papertrail-Token': PAPERTRAIL_API_TOKEN} + + +def get_ppheader(response): + hist = [response] + response.history + for h in hist: + if 'X-Rate-Limit-Remaining' not in h.headers: + continue + + return ( + int(h.headers['X-Rate-Limit-Limit']), + int(h.headers['X-Rate-Limit-Remaining']), + int(h.headers['X-Rate-Limit-Reset']) + ) + + +def do_download(url, filename, index): + while True: + try: + with requests.Session() as s: + res = s.get(url, headers=HEADERS) + finishtime = time.time_ns() + (limit, rem, reset) = get_ppheader(res) + + if 200 <= res.status_code < 300: + with open(filename, "wb") as f: + for chunk in res.iter_content(chunk_size=128): + f.write(chunk) + return (limit, rem, reset, finishtime, index) + except requests.ConnectionError: + time.sleep(1) + + +def parse_span(): + if len(sys.argv) == 1: + return None, None + + f = t = None + fromstr = tostr = '' + span = sys.argv[1].split('~') + if len(span) == 1: + fromstr = tostr = span[0] + else: + [fromstr, tostr, *_] = span + + if fromstr: + f = parser.isoparse(fromstr) + if not f.tzname(): + utc = pytz.timezone('UTC') + f = utc.localize(f) + + if tostr: + t = parser.isoparse(tostr) + if not t.tzname(): + utc = pytz.timezone('UTC') + t = utc.localize(t) + + if tostr and fromstr == tostr: + t = t + datetime.timedelta(days=1) + + return f, t + + +def main(): + if not PAPERTRAIL_API_TOKEN: + print('Not set the environment variable `PAPERTRAIL_API_TOKEN`', + file=sys.stderr) + sys.exit(1) + + _from, to = parse_span() + + r = requests.get(ARCHIVES_URL, headers=HEADERS) + r.raise_for_status() + + archives = [ar for ar in r.json() + if (not _from or _from <= parser.isoparse(ar["start"])) + and (not to or parser.isoparse(ar["end"]) < to) + ] + + with futures.ThreadPoolExecutor(max_workers=10) as executor: + future_list = [] + remain = DEFAULT_REMAIN_SIZE + until_reset_sec = 0 + lasttime = time.time_ns() + with progressbar.ProgressBar(max_value=len(archives)) as bar: + for i, ar in enumerate(archives): + future_list.append( + executor.submit( + do_download, + ar['_links']['download']['href'], + ar['filename'], + i + ) + ) + + if len(future_list) < remain: + time.sleep(0.2) + continue + + for future in future_list: + (_, rem, reset, finishtime, index) = future.result() + if finishtime > lasttime: + remain = rem + until_reset_sec = reset + lasttime = finishtime + bar.update(index) + + future_list = [] + + if remain <= 0: + time.sleep(until_reset_sec) + remain = DEFAULT_REMAIN_SIZE + continue + + time.sleep(0.2) + + +if __name__ == "__main__": + main() -- cgit v1.2.3-54-g00ecf