Skip to content

Commit 02ae1ea

Browse files
authored
Merge pull request #169 from qshine/pipeline
use pipeline when read redis list queue
2 parents 9671d50 + 9a383ab commit 02ae1ea

File tree

1 file changed

+11
-7
lines changed

1 file changed

+11
-7
lines changed

src/scrapy_redis/spiders.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,18 +72,22 @@ def setup_redis(self, crawler=None):
7272
# that's when we will schedule new requests from redis queue
7373
crawler.signals.connect(self.spider_idle, signal=signals.spider_idle)
7474

75+
def lpop_multi(self, redis_key, batch_size):
76+
with self.server.pipeline() as pipe:
77+
pipe.lrange(redis_key, 0, batch_size - 1)
78+
pipe.ltrim(redis_key, batch_size, -1)
79+
datas, _ = pipe.execute()
80+
return datas
81+
7582
def next_requests(self):
7683
"""Returns a request to be scheduled or none."""
7784
use_set = self.settings.getbool('REDIS_START_URLS_AS_SET', defaults.START_URLS_AS_SET)
78-
fetch_one = self.server.spop if use_set else self.server.lpop
85+
fetch_data = self.server.spop if use_set else self.lpop_multi
7986
# XXX: Do we need to use a timeout here?
8087
found = 0
81-
# TODO: Use redis pipeline execution.
82-
while found < self.redis_batch_size:
83-
data = fetch_one(self.redis_key)
84-
if not data:
85-
# Queue empty.
86-
break
88+
89+
datas = fetch_data(self.redis_key, self.redis_batch_size)
90+
for data in datas:
8791
req = self.make_request_from_data(data)
8892
if req:
8993
yield req

0 commit comments

Comments
 (0)