具体来说,希望回答什么以下问题:
在订单数据中,我们希望完成以下统计:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import matplotlib.pyplot as plt
from matplotlib import animation
def barlist(n):
taxiorder2019 = pd.read_csv(paths[n], nrows=None,
dtype = {
'GETON_LONGITUDE': np.float32,
'GETON_LATITUDE': np.float32,
'GETOFF_LONGITUDE': np.float32,
'GETOFF_LATITUDE': np.float32,
'PASS_MILE': np.float16,
'NOPASS_MILE': np.float16,
'WAITING_TIME': np.float16
})
taxiorder2019['GETON_DATE'] = pd.to_datetime(taxiorder2019['GETON_DATE'])
taxiorder2019['GETON_Hour'] = taxiorder2019['GETON_DATE'].dt.hour
return taxiorder2019.groupby(['GETON_Hour'])['PASS_MILE'].mean().values
fig=plt.figure()
paths = glob.glob('../input/taxiOrder20190*.csv')
paths.sort()
n = len(paths) #Number of frames
x = range(24)
barcollection = plt.bar(x,barlist(0))
plt.ylim(0, 8)
def animate(i):
print(i)
y=barlist(i+1)
for idx, b in enumerate(barcollection):
b.set_height(y[idx])
plt.ylim(0, 8)
print(i+1)
plt.title(paths[i+1].split('/')[-1])
plt.ylabel('PASS_MILE / KM')
plt.xlabel('Hour')
anim=animation.FuncAnimation(fig,animate,repeat=False,blit=False,frames=n-1,
interval=500)
anim.save('order.gif', dpi=150)
# 2019年数据
paths = glob.glob('../input/taxiOrder20190*.csv')
paths.sort()
for path in paths:
taxiorder2019 = pd.read_csv(path, nrows=None,
dtype = {
'GETON_LONGITUDE': np.float32,
'GETON_LATITUDE': np.float32,
'GETOFF_LONGITUDE': np.float32,
'GETOFF_LATITUDE': np.float32,
'PASS_MILE': np.float16,
'NOPASS_MILE': np.float16,
'WAITING_TIME': np.float16
})
taxiorder2019 = taxiorder2019[['NOPASS_MILE', 'PASS_MILE']].dropna()
taxiorder2019['NOPASS_Ratio'] = taxiorder2019['NOPASS_MILE'] / (taxiorder2019['NOPASS_MILE'] + taxiorder2019['PASS_MILE'])
print(path, taxiorder2019['NOPASS_Ratio'].astype(np.float32).mean())
# ../input/taxiOrder20190531.csv 0.27126783
# ../input/taxiOrder20190601.csv 0.27297953
# ../input/taxiOrder20190602.csv 0.30302802
# ../input/taxiOrder20190603.csv 0.31049386
# ../input/taxiOrder20190604.csv 0.3039471
# ../input/taxiOrder20190605.csv 0.2933384
# ../input/taxiOrder20190606.csv 0.2547359
# ../input/taxiOrder20190607.csv 0.28453994
# ../input/taxiOrder20190608.csv 0.304996
# ../input/taxiOrder20190609.csv 0.3115026
# 2020年数据
paths = glob.glob('../input/taxiOrder20200*.csv')
paths.sort()
for path in paths:
taxiorder2019 = pd.read_csv(path, nrows=None,
dtype = {
'GETON_LONGITUDE': np.float32,
'GETON_LATITUDE': np.float32,
'GETOFF_LONGITUDE': np.float32,
'GETOFF_LATITUDE': np.float32,
'PASS_MILE': np.float16,
'NOPASS_MILE': np.float16,
'WAITING_TIME': np.float16
})
taxiorder2019 = taxiorder2019[['NOPASS_MILE', 'PASS_MILE']].dropna()
taxiorder2019['NOPASS_Ratio'] = taxiorder2019['NOPASS_MILE'] / (taxiorder2019['NOPASS_MILE'] + taxiorder2019['PASS_MILE'])
print(path, taxiorder2019['NOPASS_Ratio'].astype(np.float32).mean())
# ../input/taxiOrder20200618.csv 0.34004667
# ../input/taxiOrder20200619.csv 0.31731918
# ../input/taxiOrder20200620.csv 0.33150223
# ../input/taxiOrder20200621.csv 0.3449821
# ../input/taxiOrder20200622.csv 0.33434668
# ../input/taxiOrder20200623.csv 0.3306154
# ../input/taxiOrder20200624.csv 0.29195258
# ../input/taxiOrder20200625.csv 0.342389
# ../input/taxiOrder20200626.csv 0.3628601
# ../input/taxiOrder20200627.csv 0.35649845
# 2019年
paths = glob.glob('../input/wycOrder2019*.csv')
paths.sort()
for path in paths:
wycorder2019 = pd.read_csv(path, nrows=None,
dtype={
'DEP_LONGITUDE': np.float32,
'DEP_LATITUDE': np.float32,
'DEST_LONGITUDE': np.float32,
'DEST_LATITUDE': np.float32,
})
wycorder2019 = wycorder2019.rename(columns={'CAR_NO':'CARNO'})
wycorder2019['DEP_TIME'] = pd.to_datetime(wycorder2019['DEP_TIME'])
wycorder2019 = wycorder2019[['WAIT_MILE', 'DRIVE_MILE']]
wycorder2019['NOPASS_Ratio'] = wycorder2019['WAIT_MILE'] / (wycorder2019['DRIVE_MILE'] + wycorder2019['WAIT_MILE'])
print(path, wycorder2019['NOPASS_Ratio'].mean())
# ../input/wycOrder20190531.csv 0.04377351902383912
# ../input/wycOrder20190601.csv 0.05089443118832635
# ../input/wycOrder20190602.csv 0.05027405204548952
# ../input/wycOrder20190603.csv 0.04410937618481343
# ../input/wycOrder20190604.csv 0.04556210882875603
# ../input/wycOrder20190605.csv 0.044291802481437374
# ../input/wycOrder20190606.csv 0.050471234377955004
# ../input/wycOrder20190607.csv 0.06292328749500437
# ../input/wycOrder20190608.csv 0.05154802709775605
# ../input/wycOrder20190609.csv 0.05645411864784134
paths = glob.glob('../input/wycOrder2020*.csv')
paths.sort()
for path in paths:
wycorder2019 = pd.read_csv(path, nrows=None, sep='\\',
dtype={
'DEP_LONGITUDE': np.float32,
'DEP_LATITUDE': np.float32,
'DEST_LONGITUDE': np.float32,
'DEST_LATITUDE': np.float32,
})
wycorder2019 = wycorder2019.rename(columns={'CAR_NO':'CARNO'})
wycorder2019['DEP_TIME'] = pd.to_datetime(wycorder2019['DEP_TIME'])
wycorder2019 = wycorder2019[wycorder2019['DRIVE_MILE'].apply(lambda x: '-' not in str(x) and '|' not in str(x) and
'路' not in str(x))]
wycorder2019['DRIVE_MILE'] = wycorder2019['DRIVE_MILE'].astype(float)
wycorder2019['WAIT_MILE'] = wycorder2019['WAIT_MILE'].astype(float)
wycorder2019 = wycorder2019[['WAIT_MILE', 'DRIVE_MILE']].dropna()
wycorder2019['NOPASS_Ratio'] = wycorder2019['WAIT_MILE'] / (wycorder2019['DRIVE_MILE'] + wycorder2019['WAIT_MILE'] + 0.1)
print(path, wycorder2019['NOPASS_Ratio'].mean())
# ../input/wycOrder20200618.csv 0.0366375999557114
# ../input/wycOrder20200619.csv 0.038303237011164316
# ../input/wycOrder20200621.csv 0.049643219579652106
# ../input/wycOrder20200622.csv 0.03599123977888786
# ../input/wycOrder20200623.csv 0.035535909940606306
# ../input/wycOrder20200624.csv 0.04063421181237617
# ../input/wycOrder20200625.csv 0.051779033543772356
# ../input/wycOrder20200626.csv 0.04035187796069988
# ../input/wycOrder20200627.csv 0.047062785762294765
© 2019-2023 coggle.club 版权所有 京ICP备20022947 京公网安备 11030102010643号