# Author:Clown # -*- codeing = utf-8 -*- # @Time :2022/3/16 14:17 # @File: 食亨菜单导出浆v2.py import requests import pandas as pd import time import xlrd from datetime import date, timedelta, datetime import os import shutil import pymysql import sys import traceback import hashlib import csv def takeThetoken(username,password): '''密钥获取返回【token】''' try: cookie = '' url = 'https://bi.shihengtech.com/api/userlogin/login' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36', 'Cookie': cookie} params = {'username': username, 'password': password} resp = requests.post(url, headers=headers, json=params) token = resp.json()['result'] except: token = '' return token def submit(startTime,endTime,headers,brandId): '''提交下载请求返回【submit_result】''' url_submit = "https://bi.shihengtech.com/api/excel/order/detail/export/submit" params_submit = { "brandId":brandId, "endTime":endTime, "labelIds":[], "orderStatusEnum":"VALID", "startTime":startTime, "platforms":[0,1], "provinces":None, "cities":None, "shopIds":None, "taskBizTypeEnum":"ORDER_DETAIL_DETAIL"} resp_submit = requests.post(url_submit,json = params_submit,headers = headers) if resp_submit.json()['code'] == 200: submit_result = 1 print("成功提交请求") return submit_result else: submit_result = 0 print("请求失败") return submit_result def queryTasks(headers): '''查找下载文件列表记录返回【df_linke,df_name】''' url_queryTasks = "https://bi.shihengtech.com/api/shtask/queryTasks" params_queryTasks = { "pageNo":1, "pageSize":20 } resp_queryTasks = requests.post(url_queryTasks,json = params_queryTasks,headers = headers) data_queryTasks = resp_queryTasks.json()['result']['result'] df_link = data_queryTasks[0]["resultUrl"] df_name = data_queryTasks[0]["name"] return df_link,df_name def excel2parts(excel_file,save_path_forms,save_path_goods): '''将导出的文件,按sheet分成两张表格,当表格sheet名称发生变化时,在此处进行修该''' for a,b,files in os.walk(excel_file,topdown = False): for file in files: path = str(excel_file)+'/'+str(file) workbook = xlrd.open_workbook(r'%s'%str(path)) sheet_names = workbook.sheet_names() print(file, '正在拆分') for worksheet_name in sheet_names: if worksheet_name == '订单明细': data_xls = pd.read_excel(path,worksheet_name,dtype=str) data_xls.to_excel(save_path_forms + '/' +str(file)[:-5] + worksheet_name + '.xlsx', index=0,encoding='utf-8-sig') print(file, worksheet_name, '已拆出') else: data_xls = pd.read_excel(path, worksheet_name, dtype=str) data_xls.to_excel(save_path_goods + '/' + str(file)[:-5] + worksheet_name + '.xlsx', index=0,encoding='utf-8-sig') print(file, worksheet_name, '已拆出') def excel2sheet(file_path): '''读取excel表sheet中的数据返回【sheet_data】''' try: for a, b, files in os.walk(file_path, topdown=False): for file in files: path = str(file_path) + '/' + str(file) sheet_data = xlrd.open_workbook(path).sheet_by_name('Sheet1') print('\n%s 正在转csv文件' % str(file)) except: sheet_data = '' print('excel数据读取失败') return sheet_data def linkTomySql(host,passwd,db_name): '''连接至数据库返回【db】,v2新增local_infile=1 打开文件导入权限''' try: #本地连接为:localhost 服务器连接为:124.222.188.59 db = pymysql.connect( host='124.222.188.59', user="root", passwd=passwd, db=db_name, charset='utf8mb4', local_infile=1, port=63306) print('\nconnect to mysql server 成功') print('------------------------------------') except: print("\ncould not connect to mysql server") db = "连接失败" return db def excel2csv_forms(sheet_data,file_path): '''v2新增 excel文件转csv文件返回【csv_name】 处理forms''' csv_name = 'excel2csvForms.csv' f = open(file_path + '/' + f'{csv_name}', mode='a', encoding='utf-8-sig', newline='') csv_writer = csv.writer(f) csv_title = ['col0', 'col1', 'col2', 'col3', 'col4', 'col5', 'col6', 'col7', 'col8', 'col9', 'col10', 'col11', 'col12', 'col13', 'col14', 'col15', 'col16', 'col17', 'col18', 'col19', 'col20', 'col21', 'col22', 'col23', 'col24', 'col25', 'col26', 'col27'] csv_writer.writerow(csv_title) row_num = sheet_data.nrows for i in range(1, row_num): # 第一行是标题名,对应表中的字段名所以应该从第二行开始,计算机以0开始计数,所以值是1 row_data = sheet_data.row_values(i) value = [row_data[0], row_data[1], row_data[2], row_data[3], row_data[4], row_data[5], row_data[6], row_data[7], row_data[8], row_data[9], row_data[10], row_data[11], row_data[12], row_data[13], row_data[14], row_data[15], row_data[16], row_data[17], row_data[18], row_data[19], row_data[20], row_data[21], row_data[22], row_data[23], row_data[24], row_data[25], row_data[26], row_data[27]] csv_writer.writerow(value) per = i / (row_num - 1) * 100 print('\r 已导入%s行,共计%s行,已完成%2d%%' % (str(i), str(row_num - 1), per), end='') # print('\r 已导入%s行,共计%s行,已完成%2d%%' % (str(i), str(row_num - 1), per)) return csv_name # def insert_forms_deta(sheet_data,db): # '''v1版函数 导入文件至order_froms或order_froms_zzx''' # cursor = db.cursor() # row_num = sheet_data.nrows # for i in range(1, row_num): # 第一行是标题名,对应表中的字段名所以应该从第二行开始,计算机以0开始计数,所以值是1 # row_data = sheet_data.row_values(i) # value = (row_data[0], row_data[1], row_data[2], row_data[3], row_data[4], row_data[5], row_data[6], row_data[7], row_data[8], row_data[9], row_data[10], row_data[11], row_data[12], row_data[13], row_data[14], row_data[15], row_data[16], row_data[17], row_data[18], row_data[19], row_data[20], row_data[21], row_data[22], row_data[23], row_data[24], row_data[25], row_data[26], row_data[27]) # sql = "INSERT INTO order_forms " \ # "VALUES" \ # "(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" # cursor.execute(sql, value) # 执行sql语句 # db.commit() # per = i / (row_num - 1) * 100 # print('\r 已导入%s行,共计%s行,已完成%2d%%' % (str(i), str(row_num - 1), per), end='') # cursor.close() # 关闭连接 def excel2csv_goods(sheet_data,file_path): '''v2新增 excel文件转csv文件返回【csv_name】 处理goods''' csv_name = 'excel2csvGoods.csv' f = open(file_path + '/' + f'{csv_name}', mode='a', encoding='utf-8-sig', newline='') csv_writer = csv.writer(f) csv_title = ['col0', 'col1', 'col2', 'col3', 'col4', 'col5', 'col6', 'col7', 'col8', 'col9', 'col10', 'col11', 'col12', 'col13', 'col14', 'col15', 'col16', 'col17', 'col18', 'col19', 'col20', 'col21'] csv_writer.writerow(csv_title) row_num = sheet_data.nrows for i in range(1, row_num): # 第一行是标题名,对应表中的字段名所以应该从第二行开始,计算机以0开始计数,所以值是1 n = (str(i)).zfill(10) time_str = str(int(time.time())) + n row_data = sheet_data.row_values(i) # 定义hash值 md5 = hashlib.md5() md5.update(row_data[3].encode('utf-8')) # 将【标准商品名称】改为hash值 value = [time_str, row_data[0], row_data[1], md5.hexdigest(), row_data[3], row_data[4], row_data[5], row_data[6], row_data[7], row_data[8], str(int(float(row_data[9]))), row_data[10], str(int(float(row_data[11]))), row_data[12], row_data[13], row_data[14], row_data[15], row_data[16], row_data[17], row_data[18], row_data[19], row_data[20]] csv_writer.writerow(value) per = i / (row_num - 1) * 100 print('\r 已导入%s行,共计%s行,已完成%2d%%' % (str(i), str(row_num - 1), per), end='') # print('\r 已导入%s行,共计%s行,已完成%2d%%' % (str(i), str(row_num - 1), per)) return csv_name # def insert_goods_deta(sheet_data,db): # '''v1版函数 导入文件至order_fromsgoods或order_fromsgoods_zzx''' # cursor = db.cursor() # row_num = sheet_data.nrows # for i in range(1, row_num): # 第一行是标题名,对应表中的字段名所以应该从第二行开始,计算机以0开始计数,所以值是1 # n = (str(i)).zfill(10) # time_str = str(int(time.time())) + n # row_data = sheet_data.row_values(i) # # #定义hash值 # md5 = hashlib.md5() # md5.update(row_data[3].encode('utf-8')) # # #将【标准商品名称】改为hash值 # value = (time_str,row_data[0], row_data[1], md5.hexdigest(), row_data[3], row_data[4], row_data[5], row_data[6], row_data[7], row_data[8], str(int(float(row_data[9]))), row_data[10], str(int(float(row_data[11]))), row_data[12], row_data[13], row_data[14], row_data[15], row_data[16], row_data[17], row_data[18], row_data[19], row_data[20]) # sql = "INSERT INTO order_formsgoods " \ # "VALUES" \ # "(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" # cursor.execute(sql, value) # 执行sql语句 # db.commit() # per = i / (row_num - 1) * 100 # print('\r 已导入%s行,共计%s行,已完成%2d%%' % (str(i), str(row_num - 1), per), end='') # cursor.close() # 关闭连接 def loadDataInfile(file_path ,csv_name,db,table_name): '''v2新增 执行csv文件导入数据库,注意ENCLOSED BY '"' 防止误判''' cursor = db.cursor() sql0 = f"SET global local_infile = 1;" cursor.execute(sql0) db.commit() sql1 = f'''LOAD DATA LOCAL INFILE '{file_path+'/'+csv_name}' INTO TABLE {table_name} FIELDS TERMINATED BY ',' ENCLOSED BY '"' IGNORE 1 LINES;''' cursor.execute(sql1) db.commit() sql2= f"SET global local_infile = 0;" cursor.execute(sql2) db.commit() cursor.close() def update_scriptrunninglogs(log_date,log_time,running_Time,operation_results,table_name,db): '''将脚本执行情况发送至【脚本执行日志】表''' try: sql = "INSERT INTO scriptrunninglogs VALUES (%s,%s,%s,%s,%s,%s,%s)"#date, time, running_Time, script_name, operation_results, table_name, id script_name = os.path.basename(sys.argv[0]) value = (log_date,log_time,running_Time,script_name,operation_results,table_name,0) cursor = db.cursor() cursor.execute(sql, value) # 执行sql语句 db.commit() cursor.close() except: result = "数据库连接失败" print(result) if __name__ == '__main__': log_date = (date.today()).strftime("%Y-%m-%d") #日志记录日期 log_time = datetime.now().strftime('%H:%M:%S') #日志记录时间 start_run = time.time() #脚本运行开始时间 startTime = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d") #导出表,开始日期选择 endTime = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d") #导出表,结束日期选择 brandId = '102073' #浆小白 username = 'JXBsjz888' pw = 'a330463a451f07658e0374042a1dea7b199cf73706d4e44cd06c5c3dc79cec45' user_token = takeThetoken(username, pw) #密钥 # user_token = "eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJ7XCJ1c2VySWRcIjoyNDYyfSIsImV4cCI6MTY1NTI2MjQ4OH0.yIxEENnikcTAixfRYFGpj21PjdCc3RT5bO1Q7IZBgxpSURens0RjpZcicLk7zv7idtztG2rGbO3GIuBYDWsQEQ" # brandId = '105541'#粥小鲜 # username = 'ZXXsjz888' # pw = '134fdc5ef944ff6ce1439d469ec719abdde30782eeb18a611449c66da81170d6' # user_token = takeThetoken(username, pw) # user_token = "eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJ7XCJ1c2VySWRcIjo1MDAyfSIsImV4cCI6MTY0ODA4NzY1NX0.BgZuyoVkZU5y732blbRKmx9oswPUBnNqZJPk_34VxbFEJJlpyzEvoReaVMcXx-H0qJP8_Ry52CaE6uRscNzGhg" save_path = r'C:/Users/ClownHe/Desktop/goods/home/orderForms_jxb' #excel文件下载路径 out_path_forms = r'C:/Users/ClownHe/Desktop/goods/home/orderFormsSplit_forms_jxb' #订单详情excel文件保存路径 out_path_goods = r'C:/Users/ClownHe/Desktop/goods/home/orderFormsSplit_goods_jxb' #订单商品详情excel文件保存路径 # save_path = r'C:\Users\浆小白_何\Desktop\订单详情' # out_path = r'C:\Users\浆小白_何\Desktop\订单明细拆分' Cookie = "_ga=GA1.2.725502531.1618630729; " \ "Hm_lvt_eaa57ca47dacb4ad4f5a257001a3457c=1625040492,1625123277,1625293351,1625448933; " \ "_gid=GA1.2.296614433.1647229990; " \ "acw_tc=76b20f6a16474114899837204e57f03f3766bf1b4d60819fca83da136ed7e6; " \ "user-token=%s; " % str(user_token) + "_gat_gtag_UA_127759150_3=1" #request公共参数 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36", "Content-Type": "application/json;charset=UTF-8", "Accept": "application/json", "Accept-Encoding": "gzip, deflate, br", "user-token": user_token, "Cookie": Cookie} #request公共参数 shutil.rmtree(save_path) shutil.rmtree(out_path_forms) shutil.rmtree(out_path_goods) os.mkdir(save_path) os.mkdir(out_path_forms) os.mkdir(out_path_goods) print('已重置文件夹') try: if submit(startTime, endTime, headers,brandId) == 1 :#提交请求成功执信 s = 2 df_link,df_name = queryTasks(headers) while df_link == None and s <= 240: time.sleep(2) print("\rloading %d"%(s),end='') s += 2 df_link, df_name = queryTasks(headers) df_data = requests.get(df_link).content with open(save_path+r'/%s.xlsx' % str(df_name),mode='wb') as f: f.write(df_data) print("\n",df_name,'下载成功') excel2parts(save_path, out_path_forms, out_path_goods) db = linkTomySql('localhost', '111...Clown', 'zuzu_data') forms_sheet_data = excel2sheet(out_path_forms) csv_name_forms = excel2csv_forms(forms_sheet_data, out_path_forms) table_name_f = 'order_forms' loadDataInfile(out_path_forms, csv_name_forms, db, table_name_f) goods_sheet_data = excel2sheet(out_path_goods) csv_name_goods = excel2csv_goods(goods_sheet_data, out_path_goods) table_name_g = 'order_formsgoods' loadDataInfile(out_path_goods, csv_name_goods, db, table_name_g) operation_results = '执行成功' else: operation_results = '无法访问网页' df_name = 'name null' print("有未知错误,请检查") except Exception as ee: print('excel文件异常,请检查') operation_results = traceback.format_exc() df_name = 'name null' end_run = time.time() #脚本运行结束时间 running_Time = end_run - start_run #脚本运行时长 db = linkTomySql('localhost', '111...Clown', 'zuzu_data') #连接本地数据库 update_scriptrunninglogs(log_date, log_time, running_Time, operation_results, df_name, db) #更新数据库脚本执行日志