initial commit.

author: Nao Ueda <nao.uedder@gmail.com> 2020-03-11 09:29:36 +0900
committer: Nao Ueda <nao.uedder@gmail.com> 2020-03-11 09:29:36 +0900
commit: ca6a7f96a59d46ce0583fa62eeb49d803f3f5a63 (patch)
tree: 276a36f373efaaad733ea95ba0997e4ecd547c45 /ppad.py
download: ppad-ca6a7f96a59d46ce0583fa62eeb49d803f3f5a63.tar.gz
ppad-ca6a7f96a59d46ce0583fa62eeb49d803f3f5a63.tar.bz2
ppad-ca6a7f96a59d46ce0583fa62eeb49d803f3f5a63.zip
1 files changed, 134 insertions, 0 deletions
diff --git a/ppad.py b/ppad.py
new file mode 100755
index 0000000..a4b50eb
--- /dev/null
+++ b/ppad.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import time
+import datetime
+from concurrent import futures
+import pytz
+from dateutil import parser
+import requests
+import progressbar
+
+PAPERTRAIL_API_TOKEN = os.environ.get('PAPERTRAIL_API_TOKEN', None)
+ARCHIVES_URL = 'https://papertrailapp.com/api/v1/archives.json'
+DEFAULT_REMAIN_SIZE = 25
+HEADERS = {'X-Papertrail-Token': PAPERTRAIL_API_TOKEN}
+
+
+def get_ppheader(response):
+    hist = [response] + response.history
+    for h in hist:
+        if 'X-Rate-Limit-Remaining' not in h.headers:
+            continue
+
+        return (
+                int(h.headers['X-Rate-Limit-Limit']),
+                int(h.headers['X-Rate-Limit-Remaining']),
+                int(h.headers['X-Rate-Limit-Reset'])
+                )
+
+
+def do_download(url, filename, index):
+    while True:
+        try:
+            with requests.Session() as s:
+                res = s.get(url, headers=HEADERS)
+                finishtime = time.time_ns()
+                (limit, rem, reset) = get_ppheader(res)
+
+                if 200 <= res.status_code < 300:
+                    with open(filename, "wb") as f:
+                        for chunk in res.iter_content(chunk_size=128):
+                            f.write(chunk)
+                    return (limit, rem, reset, finishtime, index)
+        except requests.ConnectionError:
+            time.sleep(1)
+
+
+def parse_span():
+    if len(sys.argv) == 1:
+        return None, None
+
+    f = t = None
+    fromstr = tostr = ''
+    span = sys.argv[1].split('~')
+    if len(span) == 1:
+        fromstr = tostr = span[0]
+    else:
+        [fromstr, tostr, *_] = span
+
+    if fromstr:
+        f = parser.isoparse(fromstr)
+        if not f.tzname():
+            utc = pytz.timezone('UTC')
+            f = utc.localize(f)
+
+    if tostr:
+        t = parser.isoparse(tostr)
+        if not t.tzname():
+            utc = pytz.timezone('UTC')
+            t = utc.localize(t)
+
+    if tostr and fromstr == tostr:
+        t = t + datetime.timedelta(days=1)
+
+    return f, t
+
+
+def main():
+    if not PAPERTRAIL_API_TOKEN:
+        print('Not set the environment variable `PAPERTRAIL_API_TOKEN`',
+              file=sys.stderr)
+        sys.exit(1)
+
+    _from, to = parse_span()
+
+    r = requests.get(ARCHIVES_URL, headers=HEADERS)
+    r.raise_for_status()
+
+    archives = [ar for ar in r.json()
+                if (not _from or _from <= parser.isoparse(ar["start"]))
+                and (not to or parser.isoparse(ar["end"]) < to)
+                ]
+
+    with futures.ThreadPoolExecutor(max_workers=10) as executor:
+        future_list = []
+        remain = DEFAULT_REMAIN_SIZE
+        until_reset_sec = 0
+        lasttime = time.time_ns()
+        with progressbar.ProgressBar(max_value=len(archives)) as bar:
+            for i, ar in enumerate(archives):
+                future_list.append(
+                    executor.submit(
+                        do_download,
+                        ar['_links']['download']['href'],
+                        ar['filename'],
+                        i
+                    )
+                )
+
+                if len(future_list) < remain:
+                    time.sleep(0.2)
+                    continue
+
+                for future in future_list:
+                    (_, rem, reset, finishtime, index) = future.result()
+                    if finishtime > lasttime:
+                        remain = rem
+                        until_reset_sec = reset
+                        lasttime = finishtime
+                    bar.update(index)
+
+                future_list = []
+
+                if remain <= 0:
+                    time.sleep(until_reset_sec)
+                    remain = DEFAULT_REMAIN_SIZE
+                    continue
+
+                time.sleep(0.2)
+
+
+if __name__ == "__main__":
+    main()
author	Nao Ueda <nao.uedder@gmail.com>	2020-03-11 09:29:36 +0900
committer	Nao Ueda <nao.uedder@gmail.com>	2020-03-11 09:29:36 +0900
commit	ca6a7f96a59d46ce0583fa62eeb49d803f3f5a63 (patch)
tree	276a36f373efaaad733ea95ba0997e4ecd547c45 /ppad.py
download	ppad-ca6a7f96a59d46ce0583fa62eeb49d803f3f5a63.tar.gz ppad-ca6a7f96a59d46ce0583fa62eeb49d803f3f5a63.tar.bz2 ppad-ca6a7f96a59d46ce0583fa62eeb49d803f3f5a63.zip