# -*- codeing = utf-8 -*- # @Time : 2023/11/17 10:56 # @Author : Clown # @File : demo_400数据获取.py # @Software : PyCharm import json import requests import ddddocr import pandas as pd from bs4 import BeautifulSoup import execjs import datetime import time from urllib.parse import quote # 图像识别 ocr = ddddocr.DdddOcr(show_ad = False) def dOcrImage(image_path): with open(image_path,'rb') as f: img_bytes = f.read() text = ocr.classification(img_bytes) # print(text) return text # 验证码验证之后获取对应的headers中的code码 def getCodeAndCodeId(save_path,typeName,cookie): if typeName == 'xt4008': url = 'https://www.xt4008.com/api/xuntec/login/admin/code' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'content-type': 'application/x-www-form-urlencoded'} resp = requests.get(url, headers = headers) codeid = resp.headers.get('Codeid') data = resp.content with open(save_path+'code.png', mode = "wb") as f: f.write(data) # print(codeid) code = dOcrImage(save_path+'code.png') out_json = {'code':code,'codeId':codeid} elif typeName == '400pt': now = datetime.datetime.now() formatted_time = now.strftime("%a %b %d %Y %H:%M:%S GMT ") + "0800 (中国标准时间)" url = 'https://www.400pt.net/platform/pm/getVaildImg' data = f'now={formatted_time}' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'content-type': 'application/x-www-form-urlencoded', 'cookie':cookie} resp = requests.get(url, headers = headers,data=quote(data)) data = resp.content with open(save_path+'code.png', mode = "wb") as f: f.write(data) code = dOcrImage(save_path+'code.png') out_json = {'code':code,'codeId':'codeid'} return out_json # step1网址'https://www.400pt.net/'获取待授权cookie_id信息 def getCookie(): url = 'https://www.400pt.net/' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'content-type': 'application/x-www-form-urlencoded'} resp = requests.get(url,headers=headers).headers.get('Set-Cookie') cookie_id = resp.split(';')[0] return cookie_id # step2网址'https://www.400pt.net/'授权cookie_id信息,使cookie_id信息变的有效 def getInhtml(cookie_id): url = 'https://www.400pt.net/api/cust/auth/index' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'Referer': 'https://www.400pt.net/', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Cookie':cookie_id, 'Upgrade-Insecure-Requests':'1'} resp = requests.get(url,headers=headers) def getMd5(url,strIn): if url != '': url = url headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'} resp = requests.get(url, headers = headers).text with open('md5.js', mode = 'w') as f: f.write(resp) else: with open("md5.js", "r") as file: resp = file.read() ctx = execjs.compile(resp) result = ctx.call("MD5",strIn) # print(result) return result def getToken(userName,pwd,typeName,img_path): cookie_id = '' if typeName == '400pt': cookie_id=getCookie() # print(cookie_id) code_json = getCodeAndCodeId(img_path, typeName,cookie_id) loginName = userName pwd = pwd md5 = getMd5('',pwd) # print(f'md5:{md5}') timestamp = int(time.time()*1000) # print(timestamp) password = getMd5('',md5+str(timestamp)) # print(password) url = 'https://www.400pt.net/platform/pm/admin/login' params = f'loginName={loginName}&password={password}&pwd={md5}&authCode={code_json["code"]}×tamp={int(timestamp)}&loginType=0' headers = { 'Accept':'application/json, text/javascript, */*; q=0.01', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8', 'Cookie':cookie_id, 'Sec-Ch-Ua':'"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"', 'Sec-Ch-Ua-Mobile':'?0', 'Sec-Ch-Ua-Platform':'"Windows"', 'Sec-Fetch-Dest':'empty', 'Sec-Fetch-Mode':'cors', 'Sec-Fetch-Site':'same-origin', 'X-Requested-With':'XMLHttpRequest'} resp = requests.post(url,headers=headers,params = params) getInhtml(cookie_id) elif typeName == 'xt4008': code_json = getCodeAndCodeId(img_path, typeName, cookie_id) url = 'https://www.xt4008.com/api/xuntec/login/admin/login' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'content-type': 'application/x-www-form-urlencoded'} params = {'userName': int(userName), 'passWord': pwd, 'code': code_json['code'], 'codeId': code_json['codeId']} resp = requests.post(url, headers = headers, params = params) # print(resp.text) cookie_id = resp.json()['data']['token'] # print(resp.text) return cookie_id # 此函数未启用不要调用 def getJsScript(): url = 'https://www.400pt.net/api/cust/auth/index' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'} resp = requests.get(url, headers = headers).text soup = BeautifulSoup(resp, 'html.parser') tags = soup.find_all(src=True) tag_dict = {} ii = ['md5','login.js'] for tag in tags: tag_text = tag['src'] for i in ii: if i in tag_text: tag_dict[i]='https://www.400pt.net/'+tag_text getMd5(tag_dict['md5'],'') # 此函数未启用不要调用,已使用getMd5进行替代 def getUUid(url,strIn): if url != '': url = url headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'} resp = requests.get(url, headers = headers).text with open('md5.js', mode = 'w') as f: f.write(resp) else: with open("md5.js", "r") as file: resp = file.read() ctx = execjs.compile(resp) result = ctx.call("MD5", strIn) # print(result) return result # 此函数未启用不要调用 def saveJsonToText(jsonIn,file_path): data = json.dumps(jsonIn,ensure_ascii = False) with open(file_path,mode = 'w', encoding = 'utf-8-sig') as f: f.write(data) if __name__ == '__main__': img_path = '' if 1 == 0: # 获取xt4008相关数据 userName = '4009017757' pwd = '4N+0xuYCWiNoawOggredIQ==' typeName = 'xt4008' startTime = '2023-12-01 00:00:00' endTime = '2023-12-07 00:00:59' token = getToken(userName,pwd,typeName,img_path) print(token) url = f'https://www.xt4008.com/api/xuntec/callAcdReport/queryCallDetailPd?token={token}&page=1' headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'} params = {"msisdn":userName,"caller":"","called":"","callSts":"","callerCode":"","startTime":startTime,"endTime":endTime,"remarkStatus":"","end_code":""} resp = requests.post(url,headers=headers,json=params).text print(resp) resp = json.loads(resp)['data']['rows'] if resp is None: print('1') else: print(resp) if 1 == 0: # 获取400pt相关数据 userName = '4009018187' pwd = 'Abcd123456' typeName = '400pt' cookie = f'{getToken(userName, pwd, typeName,img_path)};' print(cookie) url = 'https://www.400pt.net/api/cust/report/customerReportCallListExport' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'content-type': 'application/x-www-form-urlencoded', 'Cookie': f'{cookie}uid=c_10974'} params = {'status': 1, 'startDate': '2023-11-01 00:00', 'endDate': '2023-12-07 23:59', 'rows': 80, 'isForm': 1, 'page': 1} resp = requests.post(url, headers = headers, params = params) print(resp.text) data = resp.content save_path = '' with open(save_path + '记录.csv', mode = "wb") as f: f.write(data) df = pd.read_csv('记录.csv', encoding = 'gbk',keep_default_na = '').to_dict ( 'records' ) print(json.dumps({'data':df,'solutionCnt':len(df)},ensure_ascii = False))