demo_400数据获取.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. # -*- codeing = utf-8 -*-
  2. # @Time : 2023/11/17 10:56
  3. # @Author : Clown
  4. # @File : demo_400数据获取.py
  5. # @Software : PyCharm
  6. import json
  7. import requests
  8. import ddddocr
  9. import pandas as pd
  10. from bs4 import BeautifulSoup
  11. import execjs
  12. import datetime
  13. import time
  14. from urllib.parse import quote
  15. # 图像识别
  16. ocr = ddddocr.DdddOcr(show_ad = False)
  17. def dOcrImage(image_path):
  18. with open(image_path,'rb') as f:
  19. img_bytes = f.read()
  20. text = ocr.classification(img_bytes)
  21. # print(text)
  22. return text
  23. # 验证码验证之后获取对应的headers中的code码
  24. def getCodeAndCodeId(save_path,typeName,cookie):
  25. if typeName == 'xt4008':
  26. url = 'https://www.xt4008.com/api/xuntec/login/admin/code'
  27. headers = {
  28. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
  29. 'content-type': 'application/x-www-form-urlencoded'}
  30. resp = requests.get(url, headers = headers)
  31. codeid = resp.headers.get('Codeid')
  32. data = resp.content
  33. with open(save_path+'code.png', mode = "wb") as f:
  34. f.write(data)
  35. # print(codeid)
  36. code = dOcrImage(save_path+'code.png')
  37. out_json = {'code':code,'codeId':codeid}
  38. elif typeName == '400pt':
  39. now = datetime.datetime.now()
  40. formatted_time = now.strftime("%a %b %d %Y %H:%M:%S GMT ") + "0800 (中国标准时间)"
  41. url = 'https://www.400pt.net/platform/pm/getVaildImg'
  42. data = f'now={formatted_time}'
  43. headers = {
  44. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
  45. 'content-type': 'application/x-www-form-urlencoded',
  46. 'cookie':cookie}
  47. resp = requests.get(url, headers = headers,data=quote(data))
  48. data = resp.content
  49. with open(save_path+'code.png', mode = "wb") as f:
  50. f.write(data)
  51. code = dOcrImage(save_path+'code.png')
  52. out_json = {'code':code,'codeId':'codeid'}
  53. return out_json
  54. # step1网址'https://www.400pt.net/'获取待授权cookie_id信息
  55. def getCookie():
  56. url = 'https://www.400pt.net/'
  57. headers = {
  58. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
  59. 'content-type': 'application/x-www-form-urlencoded'}
  60. resp = requests.get(url,headers=headers).headers.get('Set-Cookie')
  61. cookie_id = resp.split(';')[0]
  62. return cookie_id
  63. # step2网址'https://www.400pt.net/'授权cookie_id信息,使cookie_id信息变的有效
  64. def getInhtml(cookie_id):
  65. url = 'https://www.400pt.net/api/cust/auth/index'
  66. headers = {
  67. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
  68. 'Referer': 'https://www.400pt.net/',
  69. 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  70. 'Cookie':cookie_id,
  71. 'Upgrade-Insecure-Requests':'1'}
  72. resp = requests.get(url,headers=headers)
  73. def getMd5(url,strIn):
  74. if url != '':
  75. url = url
  76. headers = {
  77. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'}
  78. resp = requests.get(url, headers = headers).text
  79. with open('md5.js', mode = 'w') as f:
  80. f.write(resp)
  81. else:
  82. with open("md5.js", "r") as file:
  83. resp = file.read()
  84. ctx = execjs.compile(resp)
  85. result = ctx.call("MD5",strIn)
  86. # print(result)
  87. return result
  88. def getToken(userName,pwd,typeName,img_path):
  89. cookie_id = ''
  90. if typeName == '400pt':
  91. cookie_id=getCookie()
  92. # print(cookie_id)
  93. code_json = getCodeAndCodeId(img_path, typeName,cookie_id)
  94. loginName = userName
  95. pwd = pwd
  96. md5 = getMd5('',pwd)
  97. # print(f'md5:{md5}')
  98. timestamp = int(time.time()*1000)
  99. # print(timestamp)
  100. password = getMd5('',md5+str(timestamp))
  101. # print(password)
  102. url = 'https://www.400pt.net/platform/pm/admin/login'
  103. params = f'loginName={loginName}&password={password}&pwd={md5}&authCode={code_json["code"]}&timestamp={int(timestamp)}&loginType=0'
  104. headers = {
  105. 'Accept':'application/json, text/javascript, */*; q=0.01',
  106. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
  107. 'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
  108. 'Cookie':cookie_id,
  109. 'Sec-Ch-Ua':'"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
  110. 'Sec-Ch-Ua-Mobile':'?0',
  111. 'Sec-Ch-Ua-Platform':'"Windows"',
  112. 'Sec-Fetch-Dest':'empty',
  113. 'Sec-Fetch-Mode':'cors',
  114. 'Sec-Fetch-Site':'same-origin',
  115. 'X-Requested-With':'XMLHttpRequest'}
  116. resp = requests.post(url,headers=headers,params = params)
  117. getInhtml(cookie_id)
  118. elif typeName == 'xt4008':
  119. code_json = getCodeAndCodeId(img_path, typeName, cookie_id)
  120. url = 'https://www.xt4008.com/api/xuntec/login/admin/login'
  121. headers = {
  122. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
  123. 'content-type': 'application/x-www-form-urlencoded'}
  124. params = {'userName': int(userName),
  125. 'passWord': pwd,
  126. 'code': code_json['code'],
  127. 'codeId': code_json['codeId']}
  128. resp = requests.post(url, headers = headers, params = params)
  129. # print(resp.text)
  130. cookie_id = resp.json()['data']['token']
  131. # print(resp.text)
  132. return cookie_id
  133. # 此函数未启用不要调用
  134. def getJsScript():
  135. url = 'https://www.400pt.net/api/cust/auth/index'
  136. headers = {
  137. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'}
  138. resp = requests.get(url, headers = headers).text
  139. soup = BeautifulSoup(resp, 'html.parser')
  140. tags = soup.find_all(src=True)
  141. tag_dict = {}
  142. ii = ['md5','login.js']
  143. for tag in tags:
  144. tag_text = tag['src']
  145. for i in ii:
  146. if i in tag_text:
  147. tag_dict[i]='https://www.400pt.net/'+tag_text
  148. getMd5(tag_dict['md5'],'')
  149. # 此函数未启用不要调用,已使用getMd5进行替代
  150. def getUUid(url,strIn):
  151. if url != '':
  152. url = url
  153. headers = {
  154. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'}
  155. resp = requests.get(url, headers = headers).text
  156. with open('md5.js', mode = 'w') as f:
  157. f.write(resp)
  158. else:
  159. with open("md5.js", "r") as file:
  160. resp = file.read()
  161. ctx = execjs.compile(resp)
  162. result = ctx.call("MD5", strIn)
  163. # print(result)
  164. return result
  165. # 此函数未启用不要调用
  166. def saveJsonToText(jsonIn,file_path):
  167. data = json.dumps(jsonIn,ensure_ascii = False)
  168. with open(file_path,mode = 'w', encoding = 'utf-8-sig') as f:
  169. f.write(data)
  170. if __name__ == '__main__':
  171. img_path = ''
  172. if 1 == 0:
  173. # 获取xt4008相关数据
  174. userName = '4009017757'
  175. pwd = '4N+0xuYCWiNoawOggredIQ=='
  176. typeName = 'xt4008'
  177. startTime = '2023-12-01 00:00:00'
  178. endTime = '2023-12-07 00:00:59'
  179. token = getToken(userName,pwd,typeName,img_path)
  180. print(token)
  181. url = f'https://www.xt4008.com/api/xuntec/callAcdReport/queryCallDetailPd?token={token}&page=1'
  182. headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'}
  183. params = {"msisdn":userName,"caller":"","called":"","callSts":"","callerCode":"","startTime":startTime,"endTime":endTime,"remarkStatus":"","end_code":""}
  184. resp = requests.post(url,headers=headers,json=params).text
  185. print(resp)
  186. resp = json.loads(resp)['data']['rows']
  187. if resp is None:
  188. print('1')
  189. else:
  190. print(resp)
  191. if 1 == 0:
  192. # 获取400pt相关数据
  193. userName = '4009018187'
  194. pwd = 'Abcd123456'
  195. typeName = '400pt'
  196. cookie = f'{getToken(userName, pwd, typeName,img_path)};'
  197. print(cookie)
  198. url = 'https://www.400pt.net/api/cust/report/customerReportCallListExport'
  199. headers = {
  200. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
  201. 'content-type': 'application/x-www-form-urlencoded',
  202. 'Cookie': f'{cookie}uid=c_10974'}
  203. params = {'status': 1,
  204. 'startDate': '2023-11-01 00:00',
  205. 'endDate': '2023-12-07 23:59',
  206. 'rows': 80,
  207. 'isForm': 1,
  208. 'page': 1}
  209. resp = requests.post(url, headers = headers, params = params)
  210. print(resp.text)
  211. data = resp.content
  212. save_path = ''
  213. with open(save_path + '记录.csv', mode = "wb") as f:
  214. f.write(data)
  215. df = pd.read_csv('记录.csv', encoding = 'gbk',keep_default_na = '').to_dict ( 'records' )
  216. print(json.dumps({'data':df,'solutionCnt':len(df)},ensure_ascii = False))