demo_窄门数据获取.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. # -*- codeing = utf-8 -*-
  2. # @Time : 2024/1/17 17:43
  3. # @Author : Clown
  4. # @File : demo_窄门数据获取.py
  5. # @Software : PyCharm
  6. import requests
  7. import json
  8. import pandas as pd
  9. import time
  10. import random
  11. from dateutil.parser import parse
  12. def downLoadShopsInfoByBrandId(brand_id,sessionId,brand_name,date_time):
  13. page = 0
  14. page_cnt = 1
  15. df_out = []
  16. start_time = time.time()
  17. while page < page_cnt:
  18. page += 1
  19. url = f'https://fa.kaoputou.com/api/brand/{brand_id}/shops?slug={brand_id}&status=1&page={page}'
  20. headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x63090819) XWEB/8531',
  21. 'sessionId': sessionId}
  22. a = 1
  23. while a == 1:
  24. try:
  25. resp = requests.get(url,headers=headers).json()
  26. a = 0
  27. except:
  28. print(page,'失败')
  29. time.sleep(10)
  30. if page == 1:
  31. try:
  32. shopStatus = resp['shopStatus']['open']
  33. page_cnt = shopStatus / 15
  34. print(f'总页数{page_cnt}')
  35. except:
  36. shopStatus = 15
  37. page_cnt = shopStatus / 15
  38. print(f'总页数{page_cnt}')
  39. print(f'当前正在获取--第{page}页')
  40. df_out = df_out + resp['shops']
  41. time.sleep(random.uniform(3,4))
  42. # if page%10 == 0 :
  43. # time.sleep(20)
  44. df_out = pd.DataFrame(df_out)
  45. date_list = lambda df_in: [time.strftime('%Y%m%d',time.strptime(i, "%a, %d %b %Y %H:%M:%S %Z")) for i in df_in]
  46. df_out['openDate'] = date_list(df_out['openDate'])
  47. df_out.to_excel(f'C:/Users/ClownHe/Desktop/导入/输出/{brand_name}-{brand_id}全国门店分布{date_time}.xlsx')
  48. time_spend = round((time.time()-start_time)/60)
  49. print(f'总计用时{time_spend}min')
  50. if __name__ == '__main__':
  51. brand_id = 21903627
  52. brand_name = '楼兰辛香'
  53. sessionId = 'wx_ab1e429c8ef4b99ecda823313d13b810'
  54. date_time = time.strftime('%Y%m%d%H%M%S')
  55. downLoadShopsInfoByBrandId(brand_id,sessionId,brand_name,date_time)
  56. # openDate = 'Wed, 01 Mar 2023 10:00:00 GMT'
  57. # openDate = time.strptime(openDate, "%a, %d %b %Y %H:%M:%S %Z")
  58. # openDate = time.strftime('%Y%m%d',openDate)
  59. # print(openDate)