我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用scrapy.shell()。
def parse(self, response): """parse crawl page :response: TODO :returns: None """ # debug # from scrapy.shell import inspect_response # inspect_response(response, self) for i in range(1, self.page+1): yield scrapy.Request( response.request.url + '%s' % (i), self.parse_ip, dont_filter=True, )
def _extract_item(self, response): #?scrapy shell???response #inspect_response(response, self) #???????scrapy????response????????????? #open_in_browser(response) #??????? l = ItemLoader(response=response, item=MyspiderItem(), type='html') l.add_xpath('movie_name', '//h1/span[@property="v:itemreviewed"]/text()') l.add_xpath('movie_year', '//span[@property="v:initialReleaseDate"]/text()') l.add_xpath('movie_type', '//span[@property="v:genre"]/text()') l.add_xpath('movie_rate', '//strong[@class="ll rating_num"]/text()') l.add_value('url', response.url) #????????????load_item()????scrapy.Item?? #?scrapy-redis????json?item???????redis?item??? #??json?????python?????????????item????? return dict(l.load_item())
def parse(self, response): # #????? # from scrapy.shell import inspect_response # inspect_response(response,self) # # shell >> view(response) lis = response.xpath('//ol[@class="grid_view"]/li') for li in lis: item = Dou1801Item() item['ranks'] = li.xpath('div/div[1]/em/text()').extract()[0] item['titles'] = li.xpath('div/div[2]/div[1]/a/span[1]/text()').extract()[0] item['score'] = li.xpath('div/div[2]/div[2]/div/span[2]/text()').extract()[0] item['nums'] = li.xpath('div/div[2]/div[2]/div/span[4]/text()').extract()[0] des = li.xpath('div/div[2]/div[2]/p[2]/span/text()').extract() if des: item['des'] = des[0] item['links'] = li.xpath('div/div[2]/div[1]/a/@href').extract()[0] yield item next_url = response.xpath('//*[@id="content"]/div/div[1]/div[2]/span[3]/a/@href').extract() if next_url: next_url = 'https://movie.douban.com/top250' + next_url[0] yield scrapy.Request(url=next_url,headers=self.headers)
def parse(self, response): """parse crawl page :response: TODO :returns: None """ # debug # from scrapy.shell import inspect_response # inspect_response(response, self) for i in range(1, 2): yield scrapy.Request(response.request.url + '/%s' % (i), callback=self.parse_ip)
def parse_page(self, response): # from scrapy.shell import inspect_response # inspect_response(response, self) for row in response.xpath("//table/tr")[2:]: # Item creation and deployment item = ProxyfetcherItem() item["ip"] = row.xpath("td")[1].re("document.write\('(.+?)'")[0].strip() # The port is "encoded" as hexadecimal item["port"] = str(int(row.xpath("td")[2].re("gp.dep\('(.+?)'")[0], 16)) item["country"] = row.xpath("td[5]/text()").extract()[0] item["con_type"] = 'http' item["full_address"] = "{}:{}".format(item["ip"], item["port"]) yield item.status_check(item)