aboutsummaryrefslogtreecommitdiff
path: root/ppad/__init__.py
blob: 5f87383b562a660376e19eeb265ccc9d91928a6c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import sys
import os
import time
from concurrent import futures
import requests
import progressbar

from .lib.util import parse_date, parse_argv


PAPERTRAIL_API_TOKEN = os.environ.get('PAPERTRAIL_API_TOKEN', None)
ARCHIVES_URL = 'https://papertrailapp.com/api/v1/archives.json'
DEFAULT_REMAIN_SIZE = 25
HEADERS = {'X-Papertrail-Token': PAPERTRAIL_API_TOKEN}
MIN_INTERVAL_SEC = 0.1


def get_ppheader(response):
    hist = [response] + response.history
    for h in hist:
        if 'X-Rate-Limit-Remaining' not in h.headers:
            continue

        return (
                int(h.headers['X-Rate-Limit-Limit']),
                int(h.headers['X-Rate-Limit-Remaining']),
                int(h.headers['X-Rate-Limit-Reset'])
                )


def do_download(url, filename, index):
    while True:
        try:
            with requests.Session() as s:
                res = s.get(url, headers=HEADERS)
                finishtime = time.time_ns()
                (limit, rem, reset) = get_ppheader(res)

                if 200 <= res.status_code < 300:
                    with open(filename, "wb") as f:
                        for chunk in res.iter_content(chunk_size=128):
                            f.write(chunk)
                    return (limit, rem, reset, finishtime, index)
        except requests.ConnectionError:
            time.sleep(1)


def main():
    if not PAPERTRAIL_API_TOKEN:
        print('Not set the environment variable `PAPERTRAIL_API_TOKEN`',
              file=sys.stderr)
        sys.exit(1)

    date_from, date_to = parse_argv(sys.argv)

    print("fetching log archives information ...", end="\r", file=sys.stderr)
    r = requests.get(ARCHIVES_URL, headers=HEADERS)
    r.raise_for_status()

    archives = [
        ar for ar in r.json()
        if (
            # If `date_from` is None,
            # then it gets archives without `date_from` limitation
            ((not date_from) or date_from <= parse_date(ar["start"]))
            # ... and `date_to` is as well.
            and ((not date_to) or parse_date(ar["end"]) < date_to)
        )
    ]

    with futures.ThreadPoolExecutor(max_workers=10) as executor:
        future_list = []
        remain = DEFAULT_REMAIN_SIZE
        until_reset_sec = 0
        lasttime = time.time_ns()
        with progressbar.ProgressBar(max_value=len(archives)) as bar:
            for i, ar in enumerate(archives):
                future_list.append(
                    executor.submit(
                        do_download,
                        ar['_links']['download']['href'],
                        ar['filename'],
                        i
                    )
                )

                if len(future_list) < remain:
                    time.sleep(MIN_INTERVAL_SEC)
                    continue

                for future in future_list:
                    (_, rem, reset, finishtime, index) = future.result()
                    if finishtime > lasttime:
                        remain = rem
                        until_reset_sec = reset
                        lasttime = finishtime
                    bar.update(index)

                future_list = []

                if remain <= 0:
                    time.sleep(until_reset_sec)
                    remain = DEFAULT_REMAIN_SIZE
                    continue

                time.sleep(MIN_INTERVAL_SEC)