0%

某商户8月短信发送情况分析

公司某商户要求统计其签名的发送情况,及包含特定内容的短信数量统计,使用request.post 获取该商户整个8月的短信发送情况,共50多万条。

获取数据

导入相关库

1
2
3
4
5
6
7
import requests
import time
import datetime
from bs4 import BeautifulSoup
import os
import pandas as pd
import json
1
2
3
4
5
6
7
8
9
10
11
12
13
cookie = 'ASP.NET_SessionId=5j05wibdhiwm5lur5tiiyq2z; .ASPXAUTH=8C8DA9675470DF317C657D7180EEDB3C1F3AA434507EBFEEF8E0D2311FE7F05DDC2A4BC165D4D08413C2E78F65632521139050F4C104C103C588EBEA5BA20180B0FD21574B5984EF6AE4587B71D530BFDF146C2C7826EFAD3E23B879F95532B710563B921A083560ACD33CC6FBC73983FECAF0AEA9815391CBA5C269887B3C442362D27CD9AA41E92CF282B78D1B8A5B2185DE39063EC0256D5B6C793E4B84C4C48EE28F728D25D4CEEB3C5D161E9F7C6BC2F0433DC520572500CDD84E26AE6703B4DACD1E5A764CC9DADDBEC949E05F200F894FBA3C1007C1659CB087341A8D26B2377D0D926470B523CE705650F39CEDF5EEC487FF0A4F0975275FFFA86CF435A25C7614811FE6623FACDC58B934428EC29A658ADD56690A830758CCD35FDCE5F9ED8B5C0ACA50677BCA5E5E4359FD77E23805211164C0629A178E3C9012016A6CF80A317A0080D108E296107178D1EE0F5FF67A23DB626B7000790427A56C2805EC1C15BDBC7D43C13AE9CB07ED5EDEF335C2FF03C1818DCDBCE06F996362E3B8FFDD44CA3CF22BEA2818351DB6E92B059AC1507A9605C165DD1CAF72F771F216A12521524FC239ED0FE271D4D0B79A3E0E201568D74E9516854E4CBF403CF55D7876EF276CCAFEA048D8611D64825A37232687890A2D0594DC03DBC071DEDFA449E50C0186AFBA00157C1BCE84910C367FC8F6271D5F98DE4EF548B26A7D82DC71C7AE06E936DC766EEBA32CF3482509E5A0A9A9F6519D04810EE3F6AFDED3ACD487101F5BEF458205A6A6C2DB4088B522FE0258C736DC48DA89'
payloaddata = {"limit": 8, "offset": 0,
"model": {"OrderId": "", "BatchNum": "", "MerchId": "200056", "MerchUserId": "-1", "MerOrderNo": "",
"MsgTemplateId": "", "PhoneNum": "", "OrderSource": "0", "Operators": "-1",
"ProductType": "0", "OrderStatus": "8", "NotifySuccess": "-1", "ProvinceId": "-1",
"CityId": "-1", "TimeType": "1", "TagId": "0", "StartTime": "2021-08-01 00:00:00",
"EndTime": "2021-08-07 23:59:59", "MsgSignature": ""}}
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36',
'Content-Type': 'application/json',
'Cookie': cookie
}
posturl = '这里填你的url'
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def write_fo_file(rows):
f = open('E:\lgww.csv', 'a')
for row in rows:
f.write(f'{row["PhoneNum"]}| {row["AddTime"]}| {row["MsgSignature"]}| {row["SendContent"]}| {row["Quantity"]}\n')
f.closed

if __name__ == "__main__":
start_time = datetime.datetime(2021, 8, 1, 0, 0, 0)
end_time = datetime.datetime(2021, 8, 31, 23, 59, 59)
next_time = start_time + datetime.timedelta(days=7) - datetime.timedelta(seconds=1)

while next_time <= end_time:
payloaddata["model"]["StartTime"] = start_time.strftime("%Y-%m-%d %H:%M:%S")
payloaddata["model"]["EndTime"] = next_time.strftime("%Y-%m-%d %H:%M:%S")

print("start: {}, end: {}".format(payloaddata["model"]["StartTime"], payloaddata["model"]["EndTime"]))

offset = 0
limit = 1000

while True:
payloaddata["limit"] = limit
payloaddata["offset"] = offset
r = requests.post(posturl, headers=header, data=json.dumps(payloaddata), verify=False)
data = r.json()
total = data["total"]
offset += len(data["rows"])
print("offset: {}, limit:{} total:{}".format(offset, limit, total))

write_fo_file(data["rows"])

time.sleep(1)
if offset >= total:
break

if next_time == end_time:
break
start_time = next_time + datetime.timedelta(seconds=1)
next_time = start_time + datetime.timedelta(days=7) - datetime.timedelta(seconds=1)
if next_time > end_time:
next_time = end_time

分析数据

数据处理:导入数据,加列名,提取天和小时,给手机号脱敏

1
2
3
4
5
6
7
8
9
10
11
12
13
import pandas as pd
from matplotlib import pyplot as plt
from wordcloud import WordCloud,STOPWORDS
import jieba
df=pd.read_csv('E:\\lgww.csv',sep='|',header=None)
#pd.set_option('display.max_columns',None)
df.columns=['num','date','id','content','quantity']
df['num']=df['num'].astype('string')
df['num'] = df['num'].str.slice_replace(8, 12, '*' * 4)
df['day']=df['date'].apply(lambda x:x[0:11])
df['date']=pd.to_datetime(df['date'])
df['hourr']=df['date'].dt.hour
print(df.head())

导入数据的前五行

设置中文字体

1
plt.rcParams['font.sans-serif']=['simhei']

24小时发送量

该商户每天上午10点是发送高峰期,其次是下午的6点和7点,晚上11点至凌晨8点几乎不发送,避免打扰到用户。

1
2
3
4
5
6
7
8
plt.style.use('ggplot')
dh=df.groupby(['hourr']).count()
dh.reset_index(inplace=True)
plt.bar(dh['hourr'],dh['id'],edgecolor='k',linewidth=1,label='发送时刻分布')
plt.legend()
for a,b in zip(dh['hourr'],dh['id']):
plt.text(a,b,b,ha='center',va='bottom',rotation=90)
plt.show()

8月每天的发送量

该商户8月每天的发送量基本在4万条以下,27号发送量激增,考虑是临近月底,政府加大了力度宣传疫苗接种。

1
2
3
4
5
6
7
8
9
10
11
12
dc=df.groupby(['day']).count()
dc.reset_index(inplace=True)
plt.figure(figsize=(14,9))
plt.title('8月短信发送量',fontsize=18)
plt.plot(dc['day'],dc['id'],color='#99ff99',label='日发送量')
plt.legend()
plt.ylabel('发送量')
plt.xticks(rotation=60)
for a,b in zip(dc['day'],dc['id']):
plt.text(a,b,b,ha='center',va='bottom',rotation=90)
plt.savefig('E:\\day.png')
plt.show()
1
print(dc.describe())

可以看到,8月的平均每天发送量是:17751,中位数是:12964,最小发送量是89,最大发送量是:141554;

发送量前五的签名

1
2
3
4
5
6
7
8
9
10
dc1=df.groupby(['id']).count().sort_values('content',ascending=False).head(5)
dc1.reset_index(inplace=True)
colors = ['#ff9999','#66b3ff','#99ff99','#ffcc99','#55B4B0']
plt.title('发送量前五的签名')
plt.pie(dc1['content'], colors = colors, labels=dc1['id'],autopct="%0.2f%%",startangle=30)
centre_circle = plt.Circle((0,0),0.80,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
plt.savefig('E:\\id.png')
plt.show()

短信内容词云展示

1
2
3
4
5
6
7
8
9
wordlist=df['content'].tolist()
def gg(ml):
li=[''.join(jieba.cut(sentence))for sentence in ml]
new_text=''.join(li)
w=WordCloud(background_color='white',font_path=r'C:\Windows\Fonts\simsun.ttc').generate(new_text)
plt.imshow(w)
plt.axis('off')
plt.show()
gg(wordlist)

存储数据

将该商户8月的数据存储到mysql

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import pandas as pd
import pymysql
df=pd.read_csv('E:\\lgww.csv',sep='|',header=None)
row_num=df.shape[0]
con=pymysql.connect(host='localhost',user='root',password='******',charset='utf8',port=3306)
cursor=con.cursor()
try:
cursor.execute('use lj;')
cursor.execute('drop table if exists hdb;')
sql1 = "create table hdb(phonenum char(11),date datetime,msgid varchar(20),content varchar(200));"
cursor.execute(sql1)
for i in range(row_num):
sql='insert into hdb(phonenum,date,msgid,content)values (%s,%s,%s,%s)'
cursor.execute(sql,(df.iloc[i,:][0],df.iloc[i,:][1],df.iloc[i,:][2],df.iloc[i,:][3]))
con.commit()
finally:
con.close()