在 Scrapy 中将项目写入 MySQL 数据库
我是 Scrapy 的新手,我有蜘蛛代码
I am new to Scrapy, I had the spider code
class Example_spider(BaseSpider):
name = "example"
allowed_domains = ["www.example.com"]
def start_requests(self):
yield self.make_requests_from_url("http://www.example.com/bookstore/new")
def parse(self, response):
hxs = HtmlXPathSelector(response)
urls = hxs.select('//div[@class="bookListingBookTitle"]/a/@href').extract()
for i in urls:
yield Request(urljoin("http://www.example.com/", i[1:]), callback=self.parse_url)
def parse_url(self, response):
hxs = HtmlXPathSelector(response)
main = hxs.select('//div[@id="bookshelf-bg"]')
items = []
for i in main:
item = Exampleitem()
item['book_name'] = i.select('div[@class="slickwrap full"]/div[@id="bookstore_detail"]/div[@class="book_listing clearfix"]/div[@class="bookstore_right"]/div[@class="title_and_byline"]/p[@class="book_title"]/text()')[0].extract()
item['price'] = i.select('div[@id="book-sidebar-modules"]/div[@class="add_to_cart_wrapper slickshadow"]/div[@class="panes"]/div[@class="pane clearfix"]/div[@class="inner"]/div[@class="add_to_cart 0"]/form/div[@class="line-item"]/div[@class="line-item-price"]/text()').extract()
items.append(item)
return items
管道代码是:
class examplePipeline(object):
def __init__(self):
self.dbpool = adbapi.ConnectionPool('MySQLdb',
db='blurb',
user='root',
passwd='redhat',
cursorclass=MySQLdb.cursors.DictCursor,
charset='utf8',
use_unicode=True
)
def process_item(self, spider, item):
# run db query in thread pool
assert isinstance(item, Exampleitem)
query = self.dbpool.runInteraction(self._conditional_insert, item)
query.addErrback(self.handle_error)
return item
def _conditional_insert(self, tx, item):
print "db connected-=========>"
# create record if doesn't exist.
tx.execute("select * from example_book_store where book_name = %s", (item['book_name']) )
result = tx.fetchone()
if result:
log.msg("Item already stored in db: %s" % item, level=log.DEBUG)
else:
tx.execute("""INSERT INTO example_book_store (book_name,price)
VALUES (%s,%s)""",
(item['book_name'],item['price'])
)
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
def handle_error(self, e):
log.err(e)
运行后出现以下错误
exceptions.NameError: global name 'Exampleitem' is not defined
当我在 process_item
方法中添加以下代码时出现上述错误
I got the above error when I added the below code in process_item
method
assert isinstance(item, Exampleitem)
没有添加这一行,我得到了
and without adding this line I am getting
**exceptions.TypeError: 'Example_spider' object is not subscriptable
谁能运行这段代码并确保所有项目都保存到数据库中?
Can anyone make this code run and make sure that all the items saved into database?
推荐答案
在您的管道中尝试以下代码
Try the following code in your pipeline
import sys
import MySQLdb
import hashlib
from scrapy.exceptions import DropItem
from scrapy.http import Request
class MySQLStorePipeline(object):
def __init__(self):
self.conn = MySQLdb.connect('host', 'user', 'passwd',
'dbname', charset="utf8",
use_unicode=True)
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
try:
self.cursor.execute("""INSERT INTO example_book_store (book_name, price)
VALUES (%s, %s)""",
(item['book_name'].encode('utf-8'),
item['price'].encode('utf-8')))
self.conn.commit()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
return item
相关文章