文章目录
说明
根据关键词query获取google_img
USERNAME = “xxx”
PASSWORD = “xxx”
官网申请。
代码
首先获取图片链接,保存为json之后下载。
第一部分:链接保存为Json
import requests
from pprint import pprint
import pandas as pd
from tqdm import tqdm
import json
import time
import os
name_list = ["cat","dog"]
USERNAME = "xxx"
PASSWORD = "xxx"
for object_name in name_list:
start_page = 1
length = 1
total_it = 70
print(object_name)
for i in tqdm(range(total_it),desc=object_name):
payload = {
'source': 'google_search',
'domain': 'nl',
'query': object_name,
'parse': True,
'context': [
{'key': 'tbm', 'value': 'isch'},
],
'start_page' : start_page,
'pages' : length
}
end_page = start_page+length
# Get response.
response = requests.post(
'https://realtime.oxylabs.io/v1/queries',
auth=(USERNAME, PASSWORD),
json=payload,
)
if not os.path.exists(f'./google_data/{object_name}'):
os.makedirs(f'./google_data/{object_name}')
with open(f'./google_data/{object_name}/object_name_page_{start_page}_to_{end_page}.json', 'w') as f:
json.dump(response.json(), f,indent=4)
start_page = end_page
time.sleep(3)
第二部分:链接转换为img
import json
from tqdm import tqdm
import requests
import os
folder = './google_data'
subname_list = os.listdir(folder)
down_folder = './google_image/img_download'
os.makedirs(down_folder,exist_ok=True)
fail_list = []
for subname in subname_list:
subfolder = os.path.join(folder,subname)
path_lists = os.listdir(subfolder)
cnt = 0
img_down_list = []
for mypath in path_lists:
json_path = os.path.join(subfolder,mypath)
with open(json_path) as file:
data = json.load(file)
try:
img_urls = data['results'][0]['content']['results']['organic']
for img_url_item in tqdm(img_urls,desc=f'{subname}-{mypath[-7:-5]}'):
img_url = img_url_item['image']
if img_url in img_down_list:
continue
else:
img_down_list.append(img_url)
# try:
#
# except:
# print('None')
if 'http' in img_url:
response = requests.get(img_url)
if response.status_code == 200:
os.makedirs(f'./{down_folder}/{subname}',exist_ok=True)
with open(f'./{down_folder}/{subname}/{subname}_{cnt}.jpg', 'wb') as file:
file.write(response.content)
#print('图片下载成功')
cnt += 1
else:
print('图片下载失败,状态码:', response.status_code)
#print(img_url)
except:
print(subname)
fail_list.append(subname)
#break
#break
#print(fail_list)
unique_set = set(fail_list)
unique_list = list(unique_set)
print(unique_list)
filename = 'my_list.txt'
# 打开文件进行写入
with open(filename, 'w') as file:
for item in unique_list:
# 写入每个元素,每个元素后面跟一个换行符
file.write(item + '\n')