在Django中使用scrapy的代码
在Django中使用scrapy,包括django和scrapy需要修改的部分的代码
创建一个django项目
django-admin.py startproject djangoapp
修改settings.py文件
import os ITEM_PIPELINES = ['myapp.pipelines.DjangoPipeline'] def setup_django_env(path): import imp, os from django.core.management import setup_environ f, filename, desc = imp.find_module('settings', [path]) project = imp.load_module('settings', f, filename, desc) setup_environ(project) current_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) setup_django_env(os.path.join(current_dir, '../djangoapp/'))
pipelines.py文件
python
from djangoapp.websites.models import Website
from django.db.utils import IntegrityError
class DjangoPipeline(object):
def process_item(self, item, spider):
website = Website(link=item['link'][0],
created=datetime.datetime.now(),
)
try:
website.save()
except IntegrityError:
raise DropItem("Contains duplicate domain: %s" % item['link'][0])
return item
***djangoapp model***
from django.db import models
class Website(models.Model):
link = models.CharField(max_length=200, unique=True)
created = models.DateTimeField('date created')
def __unicode__(self):
return u"%s" % self.link
相关文章