1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- # -*- codeing = utf-8 -*-
- # @Time : 2024/1/17 17:43
- # @Author : Clown
- # @File : demo_窄门数据获取.py
- # @Software : PyCharm
- import requests
- import json
- import pandas as pd
- import time
- import random
- from dateutil.parser import parse
- def downLoadShopsInfoByBrandId(brand_id,sessionId,brand_name,date_time):
- page = 0
- page_cnt = 1
- df_out = []
- start_time = time.time()
- while page < page_cnt:
- page += 1
- url = f'https://fa.kaoputou.com/api/brand/{brand_id}/shops?slug={brand_id}&status=1&page={page}'
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x63090819) XWEB/8531',
- 'sessionId': sessionId}
- a = 1
- while a == 1:
- try:
- resp = requests.get(url,headers=headers).json()
- a = 0
- except:
- print(page,'失败')
- time.sleep(10)
- if page == 1:
- try:
- shopStatus = resp['shopStatus']['open']
- page_cnt = shopStatus / 15
- print(f'总页数{page_cnt}')
- except:
- shopStatus = 15
- page_cnt = shopStatus / 15
- print(f'总页数{page_cnt}')
- print(f'当前正在获取--第{page}页')
- df_out = df_out + resp['shops']
- time.sleep(random.uniform(3,4))
- # if page%10 == 0 :
- # time.sleep(20)
- df_out = pd.DataFrame(df_out)
- date_list = lambda df_in: [time.strftime('%Y%m%d',time.strptime(i, "%a, %d %b %Y %H:%M:%S %Z")) for i in df_in]
- df_out['openDate'] = date_list(df_out['openDate'])
- df_out.to_excel(f'C:/Users/ClownHe/Desktop/导入/输出/{brand_name}-{brand_id}全国门店分布{date_time}.xlsx')
- time_spend = round((time.time()-start_time)/60)
- print(f'总计用时{time_spend}min')
- if __name__ == '__main__':
- brand_id = 21903627
- brand_name = '楼兰辛香'
- sessionId = 'wx_ab1e429c8ef4b99ecda823313d13b810'
- date_time = time.strftime('%Y%m%d%H%M%S')
- downLoadShopsInfoByBrandId(brand_id,sessionId,brand_name,date_time)
- # openDate = 'Wed, 01 Mar 2023 10:00:00 GMT'
- # openDate = time.strptime(openDate, "%a, %d %b %Y %H:%M:%S %Z")
- # openDate = time.strftime('%Y%m%d',openDate)
- # print(openDate)
|