1.创建scrapy项目
首先创建python项目,在项目命令行中执行
#安装依赖
pip3 install scrapy
#创建scrapy项目
scrapy startproject scrapy_guazi_demo
cd scrapy_guazi_demo
scrapy genspider guazi guazi.com
2.item.py
声明字段
class ScrapyGuaziDemoItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
#车源号
card_id=scrapy.Field()
#车名称
car_name=scrapy.Field
#排量
pailiang=scrapy.Field
#变速箱
bianshuxiang=scrapy.Field
#价格
price=scrapy.Field
3.spiders/guazi.py
from typing import Iterable
import scrapy
from scrapy import Request
import json
from ..items import ScrapyGuaziDemoItem
class GuaziSpider(scrapy.Spider):
name = "guazi"
allowed_domains = ["guazi.com"]
start_urls = ["https://guazi.com"]
def start_requests(self):
# 发送列表页请求
with open("minor.txt", "r", encoding="utf-8") as f:
minor = json.loads(f.read())
data = minor.get("data")
for k, value in data[1].get("filterValue").get("common").items():
for v in value:
url = "https://mapi.guazi.com/car-source/carList/wapList?versionId=0.0.0.0&sourceFrom=wap&deviceId=76f7395f-04e0-4d6c-8aa8-53d8347052c5&guid=76f7395f-04e0-4d6c-8aa8-53d8347052c5&userId=&orgUserId=&p_key=mguazicom_list&unit=&guazi_city=103&location_city=103&selectedCity=103&osv=IOS16.6&city_filter=103&page=1&incident_id=454034440138702918&pageSize=10&order=0&minor={}&tag=&license_date=0,-1&auto_type=&driving_type=&gearbox=&road_haul=0,-1&air_displacement=0,-1&emission=&car_color=&guobie=&seat=&fuel_type=&key_word=&priceRange=0,-1&tag_types=&finance_types=&diff_city=&initialPriceRange=0,-1&monthlyPriceRange=0,-1&transfer_num=&car_year=&carid_qigangshu=&carid_jinqixingshi=&cheliangjibie=&horsepower=0,-1&voyage=0,-1&platfromSource=wap".format(
v.get("value"))
yield scrapy.Request(url=url, callback=self.parse)
break
def parse(self, response):
"""
第一页列表也请求的返回
:param response:
:return:
"""
data = response.json().get("data")
guazi_items = data.get("postList")
for item in guazi_items:
detail_url = "https://m.guazi.com/detail?incident_id=1709734006486&clueId=132465431&hideTitlebar=1&h5Ready=1&cpc_ad=-1&ad_location=zero&rank=1&qpres=454043988656259095&storeId=2046695&carListRecommendId=c3865b2f-a61c-4d10-9385-f625533c4672&tk_p_mti=5.2.guazi_mall.list.feed-car.0".format(
item.get("clue_id"))
yield scrapy.Request(url=detail_url, callback=self.parse_detail)
break
def parse_detail(self, response):
guazi_info = ScrapyGuaziDemoItem()
# 车源号
guazi_info["car_id"] = response.xpath(
"//div[@class='base-info__main__items'][2]/div[@class='item-list']/div[@class='item-list__items'][4]/p[1]/text()").extract_first().strip()
# 车名称
guazi_info["car_name"] = response.xpath("//div[@class='base-info__title']/text()").extract_first().strip()
yield guazi_info