aboutsummaryrefslogtreecommitdiff
path: root/ppad/__init__.py
diff options
context:
space:
mode:
authorNao Ueda <nao.uedder@gmail.com>2020-11-03 19:24:22 +0900
committerNao Ueda <nao.uedder@gmail.com>2020-11-03 19:24:22 +0900
commit193407686a0cb4858f4e403d8f54eba0404454a4 (patch)
tree885c6c3978ca55feb3fb9331f24e0df76c46015f /ppad/__init__.py
parentca6a7f96a59d46ce0583fa62eeb49d803f3f5a63 (diff)
parentcae02018b4ecd61fb8f2ee594775474c3f8f062f (diff)
downloadppad-193407686a0cb4858f4e403d8f54eba0404454a4.tar.gz
ppad-193407686a0cb4858f4e403d8f54eba0404454a4.tar.bz2
ppad-193407686a0cb4858f4e403d8f54eba0404454a4.zip
Merge branch 'pkg' into main0.0.7
the first release to pip
Diffstat (limited to 'ppad/__init__.py')
-rwxr-xr-xppad/__init__.py129
1 files changed, 129 insertions, 0 deletions
diff --git a/ppad/__init__.py b/ppad/__init__.py
new file mode 100755
index 0000000..39be363
--- /dev/null
+++ b/ppad/__init__.py
@@ -0,0 +1,129 @@
+import sys
+import os
+import time
+import datetime
+from concurrent import futures
+import pytz
+from dateutil import parser
+import requests
+import progressbar
+
+PAPERTRAIL_API_TOKEN = os.environ.get('PAPERTRAIL_API_TOKEN', None)
+ARCHIVES_URL = 'https://papertrailapp.com/api/v1/archives.json'
+DEFAULT_REMAIN_SIZE = 25
+HEADERS = {'X-Papertrail-Token': PAPERTRAIL_API_TOKEN}
+MIN_INTERVAL_SEC = 0.1
+
+
+def get_ppheader(response):
+ hist = [response] + response.history
+ for h in hist:
+ if 'X-Rate-Limit-Remaining' not in h.headers:
+ continue
+
+ return (
+ int(h.headers['X-Rate-Limit-Limit']),
+ int(h.headers['X-Rate-Limit-Remaining']),
+ int(h.headers['X-Rate-Limit-Reset'])
+ )
+
+
+def do_download(url, filename, index):
+ while True:
+ try:
+ with requests.Session() as s:
+ res = s.get(url, headers=HEADERS)
+ finishtime = time.time_ns()
+ (limit, rem, reset) = get_ppheader(res)
+
+ if 200 <= res.status_code < 300:
+ with open(filename, "wb") as f:
+ for chunk in res.iter_content(chunk_size=128):
+ f.write(chunk)
+ return (limit, rem, reset, finishtime, index)
+ except requests.ConnectionError:
+ time.sleep(1)
+
+
+def parse_span():
+ if len(sys.argv) == 1:
+ return None, None
+
+ f = t = None
+ fromstr = tostr = ''
+ span = sys.argv[1].split('~')
+ if len(span) == 1:
+ fromstr = tostr = span[0]
+ else:
+ [fromstr, tostr, *_] = span
+
+ if fromstr:
+ f = parser.isoparse(fromstr)
+ if not f.tzname():
+ utc = pytz.timezone('UTC')
+ f = utc.localize(f)
+
+ if tostr:
+ t = parser.isoparse(tostr)
+ if not t.tzname():
+ utc = pytz.timezone('UTC')
+ t = utc.localize(t)
+
+ if tostr and fromstr == tostr:
+ t = t + datetime.timedelta(days=1)
+
+ return f, t
+
+
+def main():
+ if not PAPERTRAIL_API_TOKEN:
+ print('Not set the environment variable `PAPERTRAIL_API_TOKEN`',
+ file=sys.stderr)
+ sys.exit(1)
+
+ _from, to = parse_span()
+
+ r = requests.get(ARCHIVES_URL, headers=HEADERS)
+ r.raise_for_status()
+
+ archives = [ar for ar in r.json()
+ if (not _from or _from <= parser.isoparse(ar["start"]))
+ and (not to or parser.isoparse(ar["end"]) < to)
+ ]
+
+ with futures.ThreadPoolExecutor(max_workers=10) as executor:
+ future_list = []
+ remain = DEFAULT_REMAIN_SIZE
+ until_reset_sec = 0
+ lasttime = time.time_ns()
+ with progressbar.ProgressBar(max_value=len(archives)) as bar:
+ for i, ar in enumerate(archives):
+ future_list.append(
+ executor.submit(
+ do_download,
+ ar['_links']['download']['href'],
+ ar['filename'],
+ i
+ )
+ )
+
+ if len(future_list) < remain:
+ time.sleep(MIN_INTERVAL_SEC)
+ continue
+
+ for future in future_list:
+ (_, rem, reset, finishtime, index) = future.result()
+ if finishtime > lasttime:
+ remain = rem
+ until_reset_sec = reset
+ lasttime = finishtime
+ bar.update(index)
+
+ future_list = []
+
+ if remain <= 0:
+ time.sleep(until_reset_sec)
+ remain = DEFAULT_REMAIN_SIZE
+ continue
+
+ time.sleep(MIN_INTERVAL_SEC)