-
Notifications
You must be signed in to change notification settings - Fork 0
/
churn.py
179 lines (142 loc) · 6.21 KB
/
churn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/env python3
## update 2019-08
from os import listdir
from os.path import join, exists, getsize
from os import mkdir
from shutil import copyfile
from math import floor, ceil
from random import choice
from query import *
intervals = lambda nbseconds : int(DAY/nbseconds)
import sys
sys.path.append("scripts/datasets")
try:
from paths import *
except:
sys.path.append("../datasets/")
from paths import *
### Dynamic Model ###
def compute_active_periods(trace, delta=5):
active_periods = []
if trace: # some traces are empty looks like it...
start_ts = last_ts = trace[0][0]
for x in trace:
t = x[0]
if t <= last_ts + delta:
last_ts = t
else:
if last_ts > start_ts:
active_periods.append((start_ts, last_ts))
start_ts = last_ts = t
return active_periods
def compute_active_time_vehicles(folder,delta):
return [(file, compute_active_periods(load(join(folder,file)),delta)) for file in sorted(listdir(folder)) if getsize(join(folder,file)) > 0]
def write_active_time(folder=beijing_folder, filename="vehicles.dat", delta=15):
with open(filename, "w") as f:
for filename,vehicle in compute_active_time_vehicles(folder,delta):
print(filename, file=f, end=",")
print(','.join([f"{start},{end}" for start,end in vehicle]), file=f)
# CHURN and diverse stats computation on the datasets
# allow to compute also "churnin" or 'arrival rate'
def compute_churn(folder, nbintervals=1440, churn=True):
#nbintervals=int(DAY*1000/length)
cars, churn = [0]*nbintervals, [0]*nbintervals
offset = 1 if churn else -1
for filename in sorted(listdir(folder)):
with open(join(folder,filename), "r") as file:
intervals = set()
for line in file.readlines():
intervals.add( int( float(line.split(",")[0]) / (DAY/nbintervals) ) )
for interval in intervals:
cars[interval] += 1
if (interval+offset)%nbintervals not in intervals:
churn[interval] += 1
return cars, churn
def write_nb_cars(folder, outfilename, nbintervals=1440):
cars = [0]*nbintervals
for filename in sorted(listdir(folder)):
with open(join(folder,filename), "r") as file:
lastseen = -1
for line in file.readlines():
interval = int( float(line.split(",")[0]) / (DAY/nbintervals) )
if interval > lastseen:
cars[interval] += 1
lastseen = interval
with open(outfilename, "w") as outf:
for i in range(nbintervals):
print(i, cars[i], file=outf)
def write_churn(folder, outfilename, nbintervals=1440, churn=True):
cars, churn = compute_churn(folder, nbintervals, churn)
with open(outfilename, "w") as outf:
for i in range(len(cars)):
print(i, churn[i], sep=' ', file=outf)
def write_arrival_rate(folder, outfilename, nbintervals=1440):
write_churn(folder, outfilename, nbintervals, False)
def print_churn(folder, nbintervals=1440):
cars, churn = compute_churn(folder,nbintervals)
for i in range(len(churn)):
if cars[i]:
print(i*(DAY//nbintervals), cars[i], 100 * churn[i]/cars[i])
def write_average_churn(folder, outfile, length=60, nbbins=100):
nbintervals = DAY//length
cars, churn = compute_churn(folder, nbintervals)
bins = DAY//nbbins
def average_churn(timesec):
i = timesec//length
return churn[i]/cars[i] if cars[i] else 0
def average_cars(timesec):
return cars[timesec//length]
binit = lambda f: round(sum(f(j) for j in range(i*bins,(i+1)*bins))/bins, 3)
for i in range(nbbins-1):
print(i, binit(average_churn), binit(average_cars), file=outfile)
def write_churns(folder, outfileprefix):
for length in [10,30,60,120,300,900]:
outfilename = outfileprefix + "_" + str(length) + ".dat"
print(outfilename)
with open(outfilename, 'w') as outfile:
write_average_churn(folder, outfile, length, 100)
def max_churn(folder, nbintervals=1440):
cars, churn = compute_churn(folder,nbintervals)
return max(churn[i]/cars[i] for i in range(len(churn)) if cars[i])
def average_churn(folder, nbintervals=1440):
cars, churn = compute_churn(folder,nbintervals)
return sum(churn[i]/cars[i] for i in range(len(churn)) if cars[i])/len(churn)
def max_cars(folder, nbintervals=1440):
cars, churn = compute_churn(folder,nbintervals)
return max(cars[i] for i in range(len(cars)))
## inflate date for long queries
def active_vehicles(folder, start=64800, end=65100):
activefolder = join(datafolder,"active-"+str(start)+"-"+str(end))
if not exists(activefolder):
mkdir(activefolder)
for filename in sorted(listdir(folder)):
with open(join(folder,filename), "r") as file:
for line in file.readlines():
if start <= float(line.split(",")[0]) <= end :
copyfile(join(folder,filename), join(activefolder,filename))
break
def inflate_data(basefolder, activefolder, nbdays=30):
inflatedfolder = activefolder[:-1] + f"_{nbdays}days"
if not exists(inflatedfolder):
mkdir(inflatedfolder)
vehicles = sorted(listdir(basefolder))
for filename in sorted(listdir(activefolder)):
with open(join(inflatedfolder, filename),"w") as f:
for _ in range(nbdays):
print(choice(vehicles), file=f)
## sizes functions
def nb_records(folder):
return sum(len(load(join(folder,car))) for car in sorted(listdir(folder)))
def write_sizes(folder, outfilename):
with open(outfilename, 'w') as outf:
for car in sorted(listdir(folder)):
print(os.path.getsize(join(folder,car)), file=outf)
def write_sizesv(folder, outfilename):
with open(outfilename, 'w') as outf:
for car in sorted(listdir(folder)):
if 'gpx' in car:
basename = join(folder,car)[:-7]
print(sum(os.path.getsize(basename+ext) for ext in
['gpx.txt', 'EngineSpeed.txt', 'EradAout_N_Actl.txt']
if exists(basename+ext)),
file=outf)