并发场景下实现基于令牌桶算法的QPS限流,适合需要精确控制爬虫速率的场景
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 from functools import wrapsfrom threading import RLockfrom time import timefrom gevent.pool import Poolfrom gevent import monkeymonkey.patch_all() class TokenBucket : def __init__ (self, cap: int , rate: int ) -> None : self._cap = float (cap) self._tokens = self._cap self._rate = float (rate) self._last_time = time() self._lock = RLock() def __get_cur_tokens (self ): with self._lock: if self._tokens < self._cap: now = time() delta = self._rate * (now - self._last_time) self._tokens = min (self._cap, self._tokens + delta) self._last_time = now return self._tokens def __consume (self, tokens: int ): with self._lock: if tokens <= self.__get_cur_tokens(): self._tokens -= tokens return True else : return False def limit (self, func ): @wraps(func ) def with_limit (*args, **kwargs ): while True : if self.__consume(tokens=1 ): return func(*args, **kwargs) return with_limit token_bucket = TokenBucket(cap=5 , rate=5 ) @token_bucket.limit def task (url ): print (time(), url) if __name__ == '__main__' : pool = Pool(size=8 ) url_list = ["http://www.baidu.com" ] * 50 pool.map (task, url_list)
参考: https://github.com/titan-web/rate-limit/blob/master/token_bucket/__init__.py