根据关键词query获取google_img(api方式)

说明

根据关键词query获取google_img

USERNAME = “xxx”
PASSWORD = “xxx”

官网申请。

代码

首先获取图片链接,保存为json之后下载。

第一部分:链接保存为Json

import requests
from pprint import pprint
import pandas as pd
from tqdm import tqdm
import json
import time
import os

name_list = ["cat","dog"]
USERNAME = "xxx"
PASSWORD = "xxx"
for object_name in name_list:
    start_page = 1
    length = 1
    total_it = 70
    print(object_name)
    for i in tqdm(range(total_it),desc=object_name):
        payload = {
            'source': 'google_search',
            'domain': 'nl',
            'query': object_name,
            'parse': True,
            'context': [
                {'key': 'tbm', 'value': 'isch'},
            ],
            'start_page' : start_page,
            'pages' : length    
        }

        end_page = start_page+length
        # Get response.
        response = requests.post(
            'https://realtime.oxylabs.io/v1/queries',
            auth=(USERNAME, PASSWORD),
            json=payload,
        )


        if not os.path.exists(f'./google_data/{object_name}'):
            os.makedirs(f'./google_data/{object_name}')
        with open(f'./google_data/{object_name}/object_name_page_{start_page}_to_{end_page}.json', 'w') as f:
            json.dump(response.json(), f,indent=4)

        start_page = end_page
        time.sleep(3)

第二部分:链接转换为img

import json
from tqdm import tqdm
import requests
import os 
folder = './google_data'
subname_list = os.listdir(folder)
down_folder = './google_image/img_download'
os.makedirs(down_folder,exist_ok=True)
fail_list = []
for subname in subname_list:
    subfolder = os.path.join(folder,subname)
    path_lists = os.listdir(subfolder)
    cnt = 0
    img_down_list = []
    
    
    for mypath in path_lists:
        json_path = os.path.join(subfolder,mypath)
        with open(json_path) as file:
            data = json.load(file)
        try:
            img_urls = data['results'][0]['content']['results']['organic']
            for img_url_item in tqdm(img_urls,desc=f'{subname}-{mypath[-7:-5]}'): 
                img_url = img_url_item['image']
                if img_url in img_down_list:
                    continue
                else:
                    img_down_list.append(img_url)
                # try:
                #     
                # except:
                #     print('None')
                if 'http' in img_url:
                    response = requests.get(img_url)
                    if response.status_code == 200:
                        os.makedirs(f'./{down_folder}/{subname}',exist_ok=True)
                        with open(f'./{down_folder}/{subname}/{subname}_{cnt}.jpg', 'wb') as file:
                            file.write(response.content)
                        #print('图片下载成功')
                        cnt += 1
                    else:
                        print('图片下载失败,状态码:', response.status_code)  
              
                        #print(img_url)
        except:
            print(subname)
            fail_list.append(subname)
        #break
    #break
#print(fail_list)
unique_set = set(fail_list)
unique_list = list(unique_set)
print(unique_list)
filename = 'my_list.txt'

# 打开文件进行写入
with open(filename, 'w') as file:
    for item in unique_list:
        # 写入每个元素,每个元素后面跟一个换行符
        file.write(item + '\n')

相关推荐

  1. 根据关键词query获取google_img(api方式)

    2024-07-10 13:50:04       25 阅读
  2. golang 根据URL获取文件名

    2024-07-10 13:50:04       56 阅读
  3. linux下根据进程pid获取对应的window id的方法

    2024-07-10 13:50:04       35 阅读
  4. 【docker】根据docker inspect获取启动参数

    2024-07-10 13:50:04       64 阅读
  5. php根据用户地址获取经纬度

    2024-07-10 13:50:04       36 阅读

最近更新

  1. docker php8.1+nginx base 镜像 dockerfile 配置

    2024-07-10 13:50:04       99 阅读
  2. Could not load dynamic library ‘cudart64_100.dll‘

    2024-07-10 13:50:04       107 阅读
  3. 在Django里面运行非项目文件

    2024-07-10 13:50:04       90 阅读
  4. Python语言-面向对象

    2024-07-10 13:50:04       98 阅读

热门阅读

  1. redis中的事务和mysql中的事务有什么区别?

    2024-07-10 13:50:04       23 阅读
  2. C# 构造函数依赖注入 使用out向外传递参数

    2024-07-10 13:50:04       28 阅读
  3. 信息时代,呼唤新的哲学

    2024-07-10 13:50:04       25 阅读
  4. 【数据基础】— B树

    2024-07-10 13:50:04       30 阅读
  5. Vue 路由传参 query方法 bug 记录

    2024-07-10 13:50:04       26 阅读
  6. 翻页 上一页/下一页

    2024-07-10 13:50:04       26 阅读
  7. 前端导出pdf

    2024-07-10 13:50:04       28 阅读
  8. Knife4j的原理及应用详解(五)

    2024-07-10 13:50:04       27 阅读
  9. Day2--每日一练

    2024-07-10 13:50:04       28 阅读
  10. 东方博宜1626 - 暑假的旅游计划

    2024-07-10 13:50:04       28 阅读