当前位置: 移动技术网 > 科技>操作系统>windows > scrapy案例 爬取数据保存到excel

scrapy案例 爬取数据保存到excel

2020年11月28日  | 移动技术网科技  | 我要评论
# -*- coding: utf-8 -*-import scrapyclass Mkw1Item(scrapy.Item): # define the fields for your item here like: img = scrapy.Field() title = scrapy.Field() type = scrapy.Field() pic = scrapy.Field()# -*- coding: utf-8 -*-import s
# -*- coding: utf-8 -*-

import scrapy


class Mkw1Item(scrapy.Item):
    img = scrapy.Field()
    title = scrapy.Field()
    type = scrapy.Field()
    pic = scrapy.Field()

# -*- coding: utf-8 -*-
import scrapy
from .. import items
import re


class MukeSpider(scrapy.Spider):
    name = 'muke'
    allowed_domains = ['imooc.com']
    start_urls = ['https://www.imooc.com/new/course/list']

    def parse(self, response):
        item = items.Mkw1Item()
        a = response.xpath('//*[@id="main"]/div[5]/div[1]/a')
        for i in range(len(a)):
            img = response.xpath('//a[{}]/div/@style'.format(i + 1)).extract()[0]
            pattern_2 = '//.*\.*g'
            img = re.findall(pattern_2, img)[0]
            item['img'] = img
            item['title'] = response.xpath('//a[{}]/p[1]/text()'.format(i + 1)).extract()[0]
            item['type'] = response.xpath('//a[{}]/p[2]/text()'.format(i + 1)).extract()[0]
            item['pic'] = response.xpath('//a[{}]/p[3]/span[1]/text()'.format(i + 1)).extract()[0]
            yield item

# -*- coding: utf-8 -*-

import xlwt


class Mkw1Pipeline(object):
    def __init__(self):
        self.num = 1
        self.wb = xlwt.Workbook()
        self.sheet = self.wb.add_sheet('慕课网')
        self.list = ['img', 'title', 'type', 'pic']
        for i in range(len(self.list)):
            self.sheet.write(0, i, self.list[i])

    def process_item(self, item, spider):
        for i, j in zip(range(len(item)), item):
            self.sheet.write(self.num, i, item[j])
        self.num = self.num + 1

    def close_spider(self, spider):
        self.wb.save('../mkw.xlsx')


本文地址:https://blog.csdn.net/Hoo_ligan/article/details/110261766

如您对本文有疑问或者有任何想说的,请点击进行留言回复,万千网友为您解惑!

相关文章:

验证码:
移动技术网