Mercurial > hg > nsaunier > traffic-intelligence
comparison scripts/process.py @ 1059:a87b3072bd26
working version
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Wed, 11 Jul 2018 01:48:42 -0400 |
| parents | 16575ca4537d |
| children | 671426ce0f3e |
comparison
equal
deleted
inserted
replaced
| 1058:16575ca4537d | 1059:a87b3072bd26 |
|---|---|
| 6 | 6 |
| 7 #import matplotlib | 7 #import matplotlib |
| 8 #atplotlib.use('Agg') | 8 #atplotlib.use('Agg') |
| 9 import matplotlib.pyplot as plt | 9 import matplotlib.pyplot as plt |
| 10 import numpy as np | 10 import numpy as np |
| 11 from pandas import DataFrame | 11 import pandas as pd |
| 12 | 12 |
| 13 from trafficintelligence import storage, events, prediction, cvutils, utils, moving, processing, ml | 13 from trafficintelligence import storage, events, prediction, cvutils, utils, moving, processing, ml |
| 14 from trafficintelligence.metadata import * | 14 from trafficintelligence.metadata import * |
| 15 | 15 |
| 16 parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.') | 16 parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.') |
| 58 | 58 |
| 59 | 59 |
| 60 # analysis options | 60 # analysis options |
| 61 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event']) | 61 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event']) |
| 62 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) | 62 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) |
| 63 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = float, default = 15.) | 63 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15) |
| 64 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) | 64 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) |
| 65 parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) | 65 parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) |
| 66 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data') | 66 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data') |
| 67 dpi = 150 | 67 dpi = 150 |
| 68 # unit of analysis: site - camera-view | 68 # unit of analysis: site - camera-view |
| 211 ################################# | 211 ################################# |
| 212 if args.analyze == 'object': | 212 if args.analyze == 'object': |
| 213 # user speeds, accelerations | 213 # user speeds, accelerations |
| 214 # aggregation per site | 214 # aggregation per site |
| 215 data = [] # list of observation per site-user with time | 215 data = [] # list of observation per site-user with time |
| 216 headers = ['sites', 'date', 'time', 'user_type'] | 216 headers = ['site', 'date', 'time', 'user_type'] |
| 217 aggFunctions, tmpheaders = utils.aggregationMethods(arg.aggMethods, args.aggCentiles) | 217 aggFunctions, tmpheaders = utils.aggregationMethods(arg.aggMethods, args.aggCentiles) |
| 218 headers.extend(tmpheaders) | 218 headers.extend(tmpheaders) |
| 219 for vs in videoSequences: | 219 for vs in videoSequences: |
| 220 d = vs.startTime.date() | 220 d = vs.startTime.date() |
| 221 t1 = vs.startTime.time() | 221 t1 = vs.startTime.time() |
| 231 if method == 'centile': | 231 if method == 'centile': |
| 232 row += aggSpeeds.tolist() | 232 row += aggSpeeds.tolist() |
| 233 else: | 233 else: |
| 234 row.append(aggSpeeds) | 234 row.append(aggSpeeds) |
| 235 data.append(row) | 235 data.append(row) |
| 236 data = DataFrame(data, columns = headers) | 236 data = pd.DataFrame(data, columns = headers) |
| 237 if args.output == 'figure': | 237 if args.output == 'figure': |
| 238 for name in headers[4:]: | 238 for name in headers[4:]: |
| 239 plt.ioff() | 239 plt.ioff() |
| 240 plt.figure() | 240 plt.figure() |
| 241 plt.boxplot([data.loc[data['sites']==siteId, name] for siteId in siteIds], labels = [session.query(Site).get(siteId).name for siteId in siteIds]) | 241 plt.boxplot([data.loc[data['sites']==siteId, name] for siteId in siteIds], labels = [session.query(Site).get(siteId).name for siteId in siteIds]) |
| 274 plt.boxplot(tmp, labels = [session.query(Site).get(siteId).name for siteId in indicators]) | 274 plt.boxplot(tmp, labels = [session.query(Site).get(siteId).name for siteId in indicators]) |
| 275 plt.ylabel(events.Interaction.indicatorNames[i]+' ('+events.Interaction.indicatorUnits[i]+')') | 275 plt.ylabel(events.Interaction.indicatorNames[i]+' ('+events.Interaction.indicatorUnits[i]+')') |
| 276 plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150) | 276 plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150) |
| 277 plt.close() | 277 plt.close() |
| 278 | 278 |
| 279 if args.analyze == 'event': # aggregate event data by 15 min interval (arg.intervalDuration) | 279 if args.analyze == 'event': # aggregate event data by 15 min interval (arg.intervalDuration), count events with thresholds |
| 280 data = pd.read_csv(arg.eventFilename) | 280 data = pd.read_csv(args.eventFilename, parse_dates = [2]) |
| 281 #data = pd.read_csv('./speeds.csv', converters = {'time': lambda s: datetime.datetime.strptime(s, "%H:%M:%S").time()}, nrows = 5000) | |
| 281 # create time for end of each 15 min, then group by, using the agg method for each data column | 282 # create time for end of each 15 min, then group by, using the agg method for each data column |
| 282 headers = ['sites', 'date', 'intervalend15'] | 283 headers = ['sites', 'date', 'intervalend15', 'duration', 'count'] |
| 283 # add n road users (by type?) | 284 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles) |
| 284 aggFunctions, tmpheaders = utils.aggregationMethods(arg.aggMethods, args.aggCentiles) | 285 dataColumns = list(data.columns[4:]) |
| 285 headers.extend(tmpheaders) | 286 for h in dataColumns: |
| 287 for h2 in tmpheaders: | |
| 288 headers.append(h+'-'+h2) | |
| 289 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time()) | |
| 290 outputData = [] | |
| 291 for name, group in data.groupby(['site', 'date', 'intervalend15']): | |
| 292 # get duration as intervalend15-min(time), apply agg methods to each centile | |
| 293 row = [] | |
| 294 row.extend(name) | |
| 295 row.append((name[2].minute-group.time.min().minute) % 60) | |
| 296 row.append(len(group)) | |
| 297 for h in dataColumns: | |
| 298 for method,func in aggFunctions.items(): | |
| 299 aggregated = func(group[h]) | |
| 300 if method == 'centile': | |
| 301 row.extend(aggregated) | |
| 302 else: | |
| 303 row.append(aggregated) | |
| 304 outputData.append(row) | |
| 305 pd.DataFrame(outputData, columns = headers).to_csv('aggregated-speeds.csv', index = False) |
