Answer the question
In order to leave comments, you need to log in
Why doesn't Scrapy Xpath parse some expressions?
Good afternoon!
I'm trying to learn Scrapy by adapting the found workable example to my needs.
#! coding: utf-8
__author__ = 'acman'
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors import LinkExtractor
from scrapy.item import Item, Field
from scrapy.contrib.loader import XPathItemLoader
from scrapy.contrib.loader.processor import TakeFirst
from scrapy.selector import HtmlXPathSelector
#
#
class ScrapyTestItem(Item):
title = Field()
imagelink = Field()
url = Field()
price = Field()
field_11 = Field()
class Test03Loader(XPathItemLoader):
default_output_processor = TakeFirst()
class ScrapyTestSpider(CrawlSpider):
name = "test03"
allowed_domains = ["pastelmebel.ru"]
start_urls = ["http://pastelmebel.ru/shop/bedroom-furniture/bedroom-august/the-cabinet-wall-s-83-sfw1w-august-wenge/"]
rules = (
Rule(LinkExtractor(allow=('/bedroom-august/')),
callback='parse_item', follow=True),
)
def parse_item(self, response):
hxs = HtmlXPathSelector(response)
l = Test03Loader(ScrapyTestItem(), hxs)
l.add_xpath('title', "//h1[last()]/text()")
l.add_xpath('imagelink', "//img[1]/@src")
l.add_xpath('price', "//*[@class='itemOtherPricePrice number']/text()")
l.add_xpath('field_11', ".//*[@id='tab-sub-about-features']/table/tbody/tr[1]/td[2]/span")
l.add_value('url', response.url)
return l.load_item()
Answer the question
In order to leave comments, you need to log in
and you yourself, excuse me, understand what is written there? )
.// this one how?
also *
try to learn how to write XPath expressions (a hint is when they don’t have such [1] [2]) and working with XPath will become easy and understandable
delete /tbody/
you can test expressions in scrapy slell
for examplescrapy shell http://targetsite.com
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question