Scrapy, spider name not found, what could be causing this error?

A

Andrey Kovalchuk2017-02-08 07:20:09

Django

Andrey Kovalchuk, 2017-02-08 07:20:09

Good day.
Subject: the spider does not start. It crashes like this.

KeyError: 'Spider not found: news_spider'

As far as I understand, this means that something is wrong with the spider code. What, I can't understand.
Below is the code for the spider.
news_scrap

import locale

import scrapy

from main.models import News
from ..sup import replace_date
from ..items import NewsItem


class QuotesSpider1(scrapy.Spider):
    name = "news_scrap"

    def start_requests(self):
        urls = ['http://vladivostok3000.ru/news/'
                'http://vladivostok3000.ru/events/',
                'http://vladivostok3000.ru/city/',
                'http://vladivostok3000.ru/culture/'
                ]
        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response):
        if response.url == 'http://vladivostok3000.ru/news/':
            titles = response.css('.news_title_page a::attr(href)').extract()
            last_news_db = News.objects.latest('date')
            for last_news in titles:
                if last_news == last_news_db.site_source:
                    self.logger.info('Base actual')
                else:
                    yield scrapy.Request(response.urljoin(last_news), callback=self.parse_news)

            page_count = 1
            next_page = response.css('.next::attr(href)').extract_first()
            if next_page is not None and page_count <= 5:
                next_page = response.urljoin(next_page)
                page_count += 1
                yield scrapy.Request(next_page, callback=self.parse)

        elif response.url == 'http://vladivostok3000.ru/events/':
            pass
        elif response.url == 'http://vladivostok3000.ru/city/':
            pass
        elif response.url == 'http://vladivostok3000.ru/culture/':
           pass
        else:
            pass

    def parse_news(self, response):
        self.logger.info('Hi, this is parse_news! %s', response.url)
        item = NewsItem()
        item['title'] = response.xpath('//div[@class="news_title"]/text()').extract()[0]
        self.logger.info('Title: success')
        item['image'] = response.xpath('//div[@class="news_pic"]/img/@src').extract()[0]
        self.logger.info('Image_link: success')
        item['content'] = ' '.join(response.xpath('//div[@class="post_content"]/p/text()').extract()).replace('\xa0',
                                                                                                              ' ')
        self.logger.info('Content: success')
        item['site_source'] = 'http://' + response.xpath('//meta[@property="og:url"]/@content').extract()[0]
        self.logger.info('Site: success')
        locale.setlocale(locale.LC_ALL, 'ru_RU.UTF-8')
        item['date'] = replace_date(response.xpath('//div[@class="post_section"]/text()').extract()[0])
        self.logger.info('Date: success')

        yield item

Reply

Answer the question

In order to leave comments, you need to log in

2 answer(s)

S

screen_sailor, 2017-02-08
@mrkovalchuk

news_scrap -- is that the name ? that's why he writes that he did not find news_spider

E

Evgen, 2017-05-17
@Verz1Lka

name = "news_scrap"
This is your spider name