0%

【Python】并发场景下的令牌桶限流

并发场景下实现基于令牌桶算法的QPS限流,适合需要精确控制爬虫速率的场景

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from functools import wraps
from threading import RLock
from time import time
from gevent.pool import Pool
from gevent import monkey
monkey.patch_all()


class TokenBucket:
def __init__(self, cap: int, rate: int) -> None:
self._cap = float(cap)
self._tokens = self._cap
self._rate = float(rate)
self._last_time = time()
self._lock = RLock()

def __get_cur_tokens(self):
with self._lock:
if self._tokens < self._cap:
now = time()
delta = self._rate * (now - self._last_time)
self._tokens = min(self._cap, self._tokens + delta)
self._last_time = now
return self._tokens

def __consume(self, tokens: int):
with self._lock:
if tokens <= self.__get_cur_tokens():
self._tokens -= tokens
return True
else:
return False

def limit(self, func):
@wraps(func)
def with_limit(*args, **kwargs):
while True:
if self.__consume(tokens=1):
return func(*args, **kwargs)
return with_limit


token_bucket = TokenBucket(cap=5, rate=5)


@token_bucket.limit
def task(url):
print(time(), url)


if __name__ == '__main__':
pool = Pool(size=8)
url_list = ["http://www.baidu.com"] * 50
pool.map(task, url_list)

参考: https://github.com/titan-web/rate-limit/blob/master/token_bucket/__init__.py