Python scrapy.log 模块,DEBUG 实例源码

我们从Python开源项目中,提取了以下30个代码示例,用于说明如何使用scrapy.log.DEBUG

项目:crepriceSpider    作者:zhousenbiao    | 项目源码 | 文件源码
def process_item(self, item, spider):
        valid = True
        for data in item:
            if not data:
                valid = False
                raise DropItem('Missing{0}!'.format(data))
        if valid:
            self.collection.insert(dict(item))
            log.msg('??????!', level=log.DEBUG, spider=spider)

        return item

    # def testdb(self):
    #     # ???MongoHQ
    #     con = pymongo.Connection("paulo.mongohq.com",10042)
    #     db = con.mytest
    #     db.authenticate("root", "sa123")
    #     db.urllist.drop()
项目:MonkeyKing_crawler_recommender    作者:BitTigerInst    | 项目源码 | 文件源码
def process_item(self, item, spider):
        #import pudb; pu.db
        #val = "{0}\t{1}\t{2}\t{3}\t".format(item['appid'], item['title'], item['recommended'], item['intro'])
        #self.file.write('--------------------------------------------\n')
        #self.file.write(val)
        valid = True
        for data in item:
            if not data:
                valid = False
                raise DropItem("Missing {0}!".format(data))
        if valid:
            self.collection.insert(dict(item))
            log.msg("new app added to MongoDB database!",
                    level=log.DEBUG, spider=spider)

        return item
项目:stockSpider    作者:mizhdi    | 项目源码 | 文件源码
def process_item(self, item, spider):
        if spider.name == 'baiduTopStockSpider':
            collection = self.db[settings['stock']]
            d = dict(item)
            cursor = list(collection.find({'num': d["num"], 'source': d["source"]}))

            if cursor:
                collection.update({'_id': cursor[0]['_id']}, d)
            else:
                collection.insert(d)
            log.msg("stock added to MongoDB database!", level=log.DEBUG, spider=spider)
        elif spider.name == 'xueqiuPostSpider':
            collection = self.db['post']
            collection.save(dict(item))
            log.msg("post added to MongoDB database!", level=log.DEBUG, spider=spider)

        return item
项目:scrapy-cluster    作者:WalnutATiie    | 项目源码 | 文件源码
def _retry(self, request, reason, spider):
        retries = request.meta.get('retry_times', 0) + 1
        if retries <= self.max_retry_times:
            log.msg(format="Retrying %(request)s " \
                            "(failed %(retries)d times): %(reason)s",
                    level=log.DEBUG, spider=spider, request=request,
                    retries=retries, reason=reason)
            retryreq = request.copy()
            retryreq.meta['retry_times'] = retries
            retryreq.dont_filter = True
            # our priority setup is different from super
            retryreq.meta['priority'] = retryreq.meta['priority'] - 10

            return retryreq
        else:
            log.msg(format="Gave up retrying %(request)s "\
                            "(failed %(retries)d times): %(reason)s",
                    level=log.DEBUG, spider=spider, request=request,
                    retries=retries, reason=reason)
项目:findtrip    作者:fankcoder    | 项目源码 | 文件源码
def process_item(self, item, spider):
        if item['site'] == 'Qua':
            if item['company']:
                item['company'] = wash(item['company'])
            if item['flight_time']:
                item['flight_time'] = wash(item['flight_time'])
            if item['airports']:
                item['airports'] = wash(item['airports'])
            if item['passtime']:
                item['passtime'] = wash(item['passtime'])
            if item['price']:
                item['price'] = wash(item['price'])        
            for data in item:
                if not data:
                    raise DropItem("Missing data!")
            self.collection.insert(dict(item))
            log.msg("Question added to MongoDB database!",
                    level=log.DEBUG, spider=spider)
        elif item['site'] == 'Ctrip':
            self.collection.insert(dict(item))
            log.msg("Question added to MongoDB database!",
                    level=log.DEBUG, spider=spider)

        return item
项目:spider    作者:qcl643062    | 项目源码 | 文件源码
def parse(self, response):
        el = JDspiderLoader(response=response)
        el.add_xpath('title', '//*[@id="name"]/h1/text()')
        with Browser() as browser:
            url = response.url
            browser.visit(url)
            price = browser.find_by_id('jd-price')
            if price == []:
                price = browser.find_by_xpath('//*[@id="price"]/strong')
            # self.log(price[0].value, level=log.DEBUG)
            el.add_value('price', price[0].value[1:])
        with Browser() as browser:
            number = response.url.split('/')[-1].split('.')[0]
            url = 'http://club.jd.com/review/' + number + '-2-1.html'
            browser.visit(url)
            shaitu = browser.find_by_xpath('//*[@id="comments-list"]/div[1]/ul/li[5]/a/em')
            el.add_value('shaitu', shaitu[0].value[1:-1])
            haoping = browser.find_by_xpath('//*[@id="comments-list"]/div[1]/ul/li[2]/a/em')
            el.add_value('haoping', haoping[0].value[1:-1])
            zhongping = browser.find_by_xpath('//*[@id="comments-list"]/div[1]/ul/li[3]/a/em')
            el.add_value('zhongping', zhongping[0].value[1:-1])
            chaping = browser.find_by_xpath('//*[@id="comments-list"]/div[1]/ul/li[4]/a/em')
            el.add_value('chaping', chaping[0].value[1:-1])
        return el.load_item()
项目:NewsScrapy    作者:yinzishao    | 项目源码 | 文件源码
def dropped(self, item, exception, response, spider):
        return {
            'level': log.DEBUG,
            'msg': logformatter.DROPPEDMSG,
            'args': {
                'exception': exception,
                'item': item,
            }
        }
项目:rental    作者:meihuanyu    | 项目源码 | 文件源码
def process_item(self, item, spider):
        valid = True
        print '--'*40
        for data in item:
            if not data:
                valid = False
                raise DropItem("Missing {0}!".format(data))
        if valid:
            try:
                self.collection.insert(dict(item))
                log.msg("Question added to MongoDB database!",
                        level=log.DEBUG, spider=spider)
            except:
                print 'ggggg'*40
        return item
项目:Jobs-search    作者:Hopetree    | 项目源码 | 文件源码
def process_item(self, item, spider):
        valid = True
        for data in item:
            if not data:
                valid = False
                raise DropItem('Missming{}!'.format(data))
        if valid:
            self.coll.insert(dict(item))
            log.msg('item added to mongodb database !',level=log.DEBUG,spider=spider)

        return item
项目:FreeFoodCalendar    作者:Yuliang-Zou    | 项目源码 | 文件源码
def process_item(self, item, spider):
        valid = True
        for data in item:
            if not data:
                valid = False
                raise DropItem("Missing {0}!".format(data))
        if valid:
            self.collection.insert(dict(item))
            log.msg("Event added to MongoDB database!",
                    level=log.DEBUG, spider=spider)
        return item
项目:FreeFoodCalendar    作者:Yuliang-Zou    | 项目源码 | 文件源码
def process_item(self, item, spider):
        valid = True
        for data in item:
            if not data:
                valid = False
                raise DropItem("Missing {0}!".format(data))
        if valid:
            self.collection.insert(dict(item))
            log.msg("Event added to MongoDB database!",
                    level=log.DEBUG, spider=spider)
        return item
项目:CourseWebCrawler    作者:BitTigerInst    | 项目源码 | 文件源码
def process_request(self, request, spider):
        ua = random.choice(self.user_agent_list)
        if ua:
            request.headers.setdefault('User-Agent', ua)
            spider.log(
                    u'User-Agent: {} {}'.format(request.headers.get('User-Agent'), request),
                    level=log.DEBUG
                )
项目:scrappy    作者:DormyMo    | 项目源码 | 文件源码
def start_listening(self):
        self.port = listen_tcp(self.portrange, self.host, self)
        h = self.port.getHost()
        log.msg(format='Web service listening on %(host)s:%(port)d',
                level=log.DEBUG, host=h.host, port=h.port)
项目:taobao    作者:laogewen    | 项目源码 | 文件源码
def process_item(self, item, spider):
        valid=True
        for data in item:
            if not data:
                valid=False
                raise DropItem('Missing{0}!'.format(data))
        if valid:
            self.collection.insert(dict(item))
            log.msg('question added to mongodb database!',
                    level=log.DEBUG,spider=spider)
        return item
项目:crawlBugs    作者:gnahznib    | 项目源码 | 文件源码
def process_item(self, item, spider):
        for data in item:
            if not data:
                raise DropItem("Missing data!")
        #self.collection.update({'url': item['url']}, dict(item), upsert=True)
        self.collection.insert(dict(item))
        log.msg("Question added to MongoDB database!",
                level=log.DEBUG, spider=spider)
        return None
项目:malspider    作者:ciscocsirt    | 项目源码 | 文件源码
def _download_request(self, request, spider):
        """Download a request URL using webdriver."""
        log.msg('Downloading %s with webdriver' % request.url, level=log.DEBUG)
        request.manager.webdriver.get(request.url)
        #time.sleep(5)
        take_screenshot = getattr(settings, 'TAKE_SCREENSHOT', None)
        screenshot_loc = getattr(settings, 'SCREENSHOT_LOCATION', None)
        if take_screenshot and screenshot_loc:
          screenshot_location = screenshot_loc + str(randint(10000,10000000)) + '.png'
          request.manager.webdriver.save_screenshot(screenshot_location)
          request.meta['screenshot'] = screenshot_location

        request.meta['User-Agent'] = request.headers.get('User-Agent')
        request.meta['Referer'] = request.headers.get('Referer')
        return WebdriverResponse(request.url, request.manager.webdriver)
项目:malspider    作者:ciscocsirt    | 项目源码 | 文件源码
def _do_action_request(self, request, spider):
        """Perform an action on a previously webdriver-loaded page."""
        log.msg('Running webdriver actions %s' % request.url, level=log.DEBUG)
        request.actions.perform()
        return WebdriverResponse(request.url, request.manager.webdriver)
项目:dytt8project    作者:WiseWolfs    | 项目源码 | 文件源码
def process_item(self,item,spider):
        for data in item:
            if not data:
                raise DropItem("Missing data!")
        self.collection.update({'url':item['url']},dict(item),upsert=True)
        log.msg("Question added to MongoDB !",level=log.DEBUG,spider=spider)
        return item
项目:Spider    作者:shineyr    | 项目源码 | 文件源码
def debug(msg):
    log.msg(str(msg), level=log.DEBUG)

#??????
项目:spiders    作者:poodarchu    | 项目源码 | 文件源码
def process_item(self, item, spider):
        if self.__get_uniq_key() is None:
            self.collection.insert(dict(item))
        else:
            self.collection.update(
                            {self.__get_uniq_key(): item[self.__get_uniq_key()]},
                            dict(item),
                            upsert=True)  
        log.msg("Item wrote to MongoDB database %s/%s" %
                    (settings['MONGODB_DB'], settings['MONGODB_COLLECTION']),
                    level=log.DEBUG, spider=spider)  
        return item
项目:spiders    作者:poodarchu    | 项目源码 | 文件源码
def debug(msg):
    log.msg(str(msg), level=log.DEBUG)
项目:taobaobao    作者:1dot75cm    | 项目源码 | 文件源码
def open_spider(self, spider):
        self.connection = pymongo.MongoClient(
            settings['MONGODB_SERVER'],
            settings['MONGODB_PORT']
        )
        self.db = self.connection[settings['MONGODB_DB']]
        self.collection = self.db[settings['MONGODB_COLLECTION']]

        log.msg('Load nid from MongoDB database!',
                level=log.DEBUG, spider=spider)
        self.itemlist = set()
        for i in self.collection.find():
            self.itemlist.add(i['nid'])
项目:taobaobao    作者:1dot75cm    | 项目源码 | 文件源码
def process_item(self, item, spider):
        if item['nid'] in self.itemlist:
            raise DropItem('Duplication data!')
        #self.collection.update({'nid': item['nid']}, dict(item), upsert=True)
        self.collection.insert(dict(item))
        log.msg('Goods added to MongoDB database!',
                level=log.DEBUG, spider=spider)
        return item
项目:bittiger-scrapy-stackoverflow    作者:yueran    | 项目源码 | 文件源码
def process_item(self, item, spider):
        if not isinstance(item,StackOverflowItem):
            return item
        valid = True
        for data in item:
            if not data:
                valid = False
                raise DropItem("Missing {0}!".format(data))
        if valid:
            self.collection.insert(dict(item))
            log.msg("Question added to MongoDB database!",level=log.DEBUG, spider=spider)
        return item
项目:bittiger-scrapy-stackoverflow    作者:yueran    | 项目源码 | 文件源码
def process_item(self, item, spider):
        if not isinstance(item,StackOverflowItemJobs):
            return item
        valid = True
        for data in item:
            if not data:
                valid = False
                raise DropItem("Missing {0}!".format(data))
        if valid:
            self.collection.insert(dict(item))
            log.msg("Jobs added to MongoDB database!",level=log.DEBUG, spider=spider)
        return item
项目:mafengwo_spider    作者:DWJWendy    | 项目源码 | 文件源码
def process_item(self, item, spider):
        """ ??item????????????????? """
        if isinstance(item, TravelCrawlItem):
            try:
                self.spot_review.insert(dict(item))
                log.msg("News added to MongoDB database!", level=log.DEBUG, spider=spider)
            except Exception:
                pass
        elif isinstance(item, TravelnoteItem):
            try:
                self.note.insert(dict(item))
                log.msg("News added to MongoDB database!", level=log.DEBUG, spider=spider)
            except Exception:
                pass
        elif isinstance(item, TravelfoodItem):
            try:
                self.food_review.insert(dict(item))
                log.msg("News added to MongoDB database!", level=log.DEBUG, spider=spider)
            except Exception:
                pass
        elif isinstance(item, TravelhotelItem):
            try:
                self.hotel_review.insert(dict(item))
                log.msg("News added to MongoDB database!", level=log.DEBUG, spider=spider)
            except Exception:
                pass
        return item
项目:spider    作者:qcl643062    | 项目源码 | 文件源码
def parse(self, response):
        el = Pbdnof58Loader(response=response)
        PageUrl = response.xpath('//a[contains(@class, "next")]/@href').extract()
        self.log(PageUrl, level=log.DEBUG)
        r = Redis()
        if PageUrl != []:
            r.lpush('myspider:58_urls', self.url + PageUrl[0])
            sleep(1)
            el.add_value('UrlofPage', self.url + PageUrl[0])
        urls = response.xpath('//table[contains(@class, "tbimg")]/tr')
        for url in urls:
            url = url.xpath('td[contains(@class, "t")]/a/@href').extract()
            if len(url) == 1 and 'zhuan' not in url[0]:
                r.lpush('myspider:start_urls', url[0])
        return el.load_item()
项目:spider    作者:qcl643062    | 项目源码 | 文件源码
def __do__insert(self, conn, item, spider):
        try:
            conn.execute("""
                insert into 58pbdndb set title = %s, area = %s, price = %s, quality = %s, time = %s
            """, (item['title'], item['area'], item['price'], item['quality'], item['time']))

        except MySQLdb.Error, e:
            spider.log("Mysql Error %d: %s" % (e.args[0], e.args[1]), level=log.DEBUG)
项目:qzonePictureSpider    作者:samrayleung    | 项目源码 | 文件源码
def file_path(self, request, response=None, info=None):
        item = request.meta['item']
        # ?URL????????
        # ????????,??:qq?/{???}/?????.jpg
        image_guid = request.url.split('/')[-3]
        log.msg(image_guid, level=log.DEBUG)
        filename = u'{0[account]}/{0[album_name]}/{1}.jpg'.format(item,
                                                                  image_guid)
        return filename
项目:smth_coupons_crawler    作者:moyawong    | 项目源码 | 文件源码
def process_item(self, item, spider):
        valid = True
        for data in item:
            if not data :
                valid = False
                raise DropItem("Missing {0}!".format(data))
        if item['title'] == '':
            valid = False
            raise DropItem("title is empty")
        if item['content'] == '':
            valid = False
            raise DropItem("content is empty")

        for keyword in settings['EXCLUDE']:
            if keyword in item['title']:
                valid = False
                DropItem("title have invalid keywords")
                break

        if valid:
            iskey = False
            for key in settings['KEYS']:
                if key in item['title']:
                    iskey = True
                    break
            for author in settings['AUTHOR']:
                if author == item['author']:
                    iskey = True
                    break
            if not iskey:
                raise DropItem("item do not have keywords")

            for info in self.db.items.find({}, {"title":1}):
                infoTitle = info["title"].encode("utf-8")
                if infoTitle == item["title"]:
                    valid = False
                    raise DropItem("item exist!")
                    break

        if valid:
            self.collection.insert(dict(item))
            send_mail(item['title'], item['content'], item['href'])

            # log.msg("webCrewl item added to MongoDB database!",
            #         level=log.DEBUG, spider=spider)
        return item