aboutsummaryrefslogtreecommitdiff
path: root/ppad.py
diff options
context:
space:
mode:
authorNao Ueda <nao.uedder@gmail.com>2020-11-03 19:21:03 +0900
committerNao Ueda <nao.uedder@gmail.com>2020-11-03 19:21:03 +0900
commitd813bcb5b038e07db230813b084d24b6f1edc5ca (patch)
tree7b66b029ec4261c8cdebdd79592f65cbd495ec32 /ppad.py
parentfc4fa1f514caa16c9baafbdbadd0d1fe855db636 (diff)
downloadppad-d813bcb5b038e07db230813b084d24b6f1edc5ca.tar.gz
ppad-d813bcb5b038e07db230813b084d24b6f1edc5ca.tar.bz2
ppad-d813bcb5b038e07db230813b084d24b6f1edc5ca.zip
change structure to distribute a package
Diffstat (limited to 'ppad.py')
-rwxr-xr-xppad.py135
1 files changed, 0 insertions, 135 deletions
diff --git a/ppad.py b/ppad.py
deleted file mode 100755
index 338b37c..0000000
--- a/ppad.py
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-import os
-import time
-import datetime
-from concurrent import futures
-import pytz
-from dateutil import parser
-import requests
-import progressbar
-
-PAPERTRAIL_API_TOKEN = os.environ.get('PAPERTRAIL_API_TOKEN', None)
-ARCHIVES_URL = 'https://papertrailapp.com/api/v1/archives.json'
-DEFAULT_REMAIN_SIZE = 25
-HEADERS = {'X-Papertrail-Token': PAPERTRAIL_API_TOKEN}
-MIN_INTERVAL_SEC = 0.1
-
-
-def get_ppheader(response):
- hist = [response] + response.history
- for h in hist:
- if 'X-Rate-Limit-Remaining' not in h.headers:
- continue
-
- return (
- int(h.headers['X-Rate-Limit-Limit']),
- int(h.headers['X-Rate-Limit-Remaining']),
- int(h.headers['X-Rate-Limit-Reset'])
- )
-
-
-def do_download(url, filename, index):
- while True:
- try:
- with requests.Session() as s:
- res = s.get(url, headers=HEADERS)
- finishtime = time.time_ns()
- (limit, rem, reset) = get_ppheader(res)
-
- if 200 <= res.status_code < 300:
- with open(filename, "wb") as f:
- for chunk in res.iter_content(chunk_size=128):
- f.write(chunk)
- return (limit, rem, reset, finishtime, index)
- except requests.ConnectionError:
- time.sleep(1)
-
-
-def parse_span():
- if len(sys.argv) == 1:
- return None, None
-
- f = t = None
- fromstr = tostr = ''
- span = sys.argv[1].split('~')
- if len(span) == 1:
- fromstr = tostr = span[0]
- else:
- [fromstr, tostr, *_] = span
-
- if fromstr:
- f = parser.isoparse(fromstr)
- if not f.tzname():
- utc = pytz.timezone('UTC')
- f = utc.localize(f)
-
- if tostr:
- t = parser.isoparse(tostr)
- if not t.tzname():
- utc = pytz.timezone('UTC')
- t = utc.localize(t)
-
- if tostr and fromstr == tostr:
- t = t + datetime.timedelta(days=1)
-
- return f, t
-
-
-def main():
- if not PAPERTRAIL_API_TOKEN:
- print('Not set the environment variable `PAPERTRAIL_API_TOKEN`',
- file=sys.stderr)
- sys.exit(1)
-
- _from, to = parse_span()
-
- r = requests.get(ARCHIVES_URL, headers=HEADERS)
- r.raise_for_status()
-
- archives = [ar for ar in r.json()
- if (not _from or _from <= parser.isoparse(ar["start"]))
- and (not to or parser.isoparse(ar["end"]) < to)
- ]
-
- with futures.ThreadPoolExecutor(max_workers=10) as executor:
- future_list = []
- remain = DEFAULT_REMAIN_SIZE
- until_reset_sec = 0
- lasttime = time.time_ns()
- with progressbar.ProgressBar(max_value=len(archives)) as bar:
- for i, ar in enumerate(archives):
- future_list.append(
- executor.submit(
- do_download,
- ar['_links']['download']['href'],
- ar['filename'],
- i
- )
- )
-
- if len(future_list) < remain:
- time.sleep(MIN_INTERVAL_SEC)
- continue
-
- for future in future_list:
- (_, rem, reset, finishtime, index) = future.result()
- if finishtime > lasttime:
- remain = rem
- until_reset_sec = reset
- lasttime = finishtime
- bar.update(index)
-
- future_list = []
-
- if remain <= 0:
- time.sleep(until_reset_sec)
- remain = DEFAULT_REMAIN_SIZE
- continue
-
- time.sleep(MIN_INTERVAL_SEC)
-
-
-if __name__ == "__main__":
- main()