Mercurial > hg > nsaunier > traffic-intelligence
comparison scripts/process.py @ 1083:5b597b021aed
added function to aggregate interactions
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Mon, 23 Jul 2018 20:17:27 -0400 |
| parents | 706034a4c6cd |
| children | 1a7e0b2c858b |
comparison
equal
deleted
inserted
replaced
| 1082:706034a4c6cd | 1083:5b597b021aed |
|---|---|
| 22 # main function | 22 # main function |
| 23 parser.add_argument('--delete', dest = 'delete', help = 'data to delete', choices = ['feature', 'object', 'classification', 'interaction']) | 23 parser.add_argument('--delete', dest = 'delete', help = 'data to delete', choices = ['feature', 'object', 'classification', 'interaction']) |
| 24 parser.add_argument('--process', dest = 'process', help = 'data to process', choices = ['feature', 'object', 'classification', 'prototype', 'interaction']) | 24 parser.add_argument('--process', dest = 'process', help = 'data to process', choices = ['feature', 'object', 'classification', 'prototype', 'interaction']) |
| 25 parser.add_argument('--display', dest = 'display', help = 'data to display (replay over video)', choices = ['feature', 'object', 'classification', 'interaction']) | 25 parser.add_argument('--display', dest = 'display', help = 'data to display (replay over video)', choices = ['feature', 'object', 'classification', 'interaction']) |
| 26 parser.add_argument('--progress', dest = 'progress', help = 'information about the progress of processing', action = 'store_true') | 26 parser.add_argument('--progress', dest = 'progress', help = 'information about the progress of processing', action = 'store_true') |
| 27 parser.add_argument('--analyze', dest = 'analyze', help = 'data to analyze (results)', choices = ['feature', 'object', 'classification', 'interaction', 'event']) | 27 parser.add_argument('--analyze', dest = 'analyze', help = 'data to analyze (results)', choices = ['feature', 'object', 'classification', 'interaction', 'event-speed', 'event-interaction']) |
| 28 | 28 |
| 29 # common options | 29 # common options |
| 30 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') | 30 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') |
| 31 parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int) | 31 parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int) |
| 32 parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories', choices = ['feature', 'object'], default = 'feature') | 32 parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories', choices = ['feature', 'object'], default = 'feature') |
| 333 # plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150) | 333 # plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150) |
| 334 # plt.close() | 334 # plt.close() |
| 335 elif args.output == 'event': | 335 elif args.output == 'event': |
| 336 data.to_csv(args.eventFilename, index = False) | 336 data.to_csv(args.eventFilename, index = False) |
| 337 | 337 |
| 338 if args.analyze == 'event': # aggregate event data by 15 min interval (args.intervalDuration), count events with thresholds | 338 if args.analyze == 'event-speed': # aggregate event data by 15 min interval (args.intervalDuration), count events with thresholds |
| 339 data = pd.read_csv(args.eventFilename, parse_dates = [2]) | 339 data = pd.read_csv(args.eventFilename, parse_dates = [2], nrows = 10000) |
| 340 #data = pd.read_csv('./speeds.csv', converters = {'time': lambda s: datetime.datetime.strptime(s, "%H:%M:%S").time()}, nrows = 5000) | 340 #data = pd.read_csv('./speeds.csv', converters = {'time': lambda s: datetime.datetime.strptime(s, "%H:%M:%S").time()}, nrows = 5000) |
| 341 # create time for end of each 15 min, then group by, using the agg method for each data column | 341 # create time for end of each 15 min, then group by, using the agg method for each data column |
| 342 headers = ['site', 'date', 'intervalend15', 'duration', 'count'] | 342 headers = ['site', 'date', 'intervalend15', 'duration', 'count'] |
| 343 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles) | 343 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles) |
| 344 dataColumns = list(data.columns[4:]) | 344 dataColumns = list(data.columns[4:]) |
| 345 for h in dataColumns: | 345 for h in dataColumns: |
| 346 for h2 in tmpheaders: | 346 for h2 in tmpheaders: |
| 347 headers.append(h+'-'+h2) | 347 headers.append(h+'-'+h2) |
| 348 for h in dataColumns: | 348 if args.eventThresholds is not None: |
| 349 for t in args.eventThresholds: | 349 for h in dataColumns: |
| 350 headers.append('n-{}-{}'.format(h, t)) | 350 for t in args.eventThresholds: |
| 351 headers.append('n-{}-{}'.format(h, t)) | |
| 351 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time()) | 352 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time()) |
| 352 outputData = [] | 353 outputData = [] |
| 353 for name, group in data.groupby(['site', 'date', 'intervalend15']): | 354 for name, group in data.groupby(['site', 'date', 'intervalend15']): |
| 354 row = [] | 355 row = [] |
| 355 row.extend(name) | 356 row.extend(name) |
| 362 aggregated = func(group[h]) | 363 aggregated = func(group[h]) |
| 363 if method == 'centile': | 364 if method == 'centile': |
| 364 row.extend(aggregated) | 365 row.extend(aggregated) |
| 365 else: | 366 else: |
| 366 row.append(aggregated) | 367 row.append(aggregated) |
| 367 for h in dataColumns: | 368 if args.eventThresholds is not None: |
| 368 for t in args.eventThresholds: | 369 for h in dataColumns: |
| 369 row.append((group[h] > t).sum()) | 370 for t in args.eventThresholds: |
| 371 row.append((group[h] > t).sum()) | |
| 370 outputData.append(row) | 372 outputData.append(row) |
| 371 pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False) | 373 pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False) |
| 374 | |
| 375 elif args.analyze == 'event-interaction': # aggregate event data by 15 min interval (args.intervalDuration), count events with thresholds | |
| 376 data = pd.read_csv(args.eventFilename, parse_dates = [2], nrows = 20000) | |
| 377 headers = ['site', 'date', 'intervalend15', 'duration', 'count'] | |
| 378 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles) | |
| 379 dataColumns = list(data.columns[3:]) | |
| 380 for h in dataColumns: | |
| 381 if not 'speed' in h.lower(): # proximity indicators are reversed, taking 85th centile of this column will yield the 15th centile (which we have to take the opposite again) | |
| 382 data[h] = -data[h] | |
| 383 for h in dataColumns: | |
| 384 for h2 in tmpheaders: | |
| 385 headers.append(h+'-'+h2) | |
| 386 for h,t in zip(dataColumns, args.eventThresholds): # each threshold in this case applies to one indicator | |
| 387 headers.append('n-{}-{}'.format(h, t)) | |
| 388 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time()) | |
| 389 outputData = [] | |
| 390 for name, group in data.groupby(['site', 'date', 'intervalend15']): | |
| 391 row = [] | |
| 392 row.extend(name) | |
| 393 groupStartTime = group.time.min() | |
| 394 groupEndTime = group.time.max() | |
| 395 row.append((groupEndTime.minute+1-groupStartTime.minute) % 60)#(name[2].minute*60+name[2].second-groupStartTime.minute*60+groupStartTime.second) % 3600) | |
| 396 row.append(len(group)) | |
| 397 for h in dataColumns: | |
| 398 for method,func in aggFunctions.items(): | |
| 399 tmp = group.loc[~group[h].isna(), h] | |
| 400 if len(tmp)>0: | |
| 401 aggregated = func(tmp) # todo invert if the resulting stat is negative | |
| 402 if method == 'centile': | |
| 403 row.extend(np.abs(aggregated)) | |
| 404 else: | |
| 405 row.append(np.abs(aggregated)) | |
| 406 else: | |
| 407 row.extend([None]*len(aggFunctions)) | |
| 408 for h,t in zip(dataColumns, args.eventThresholds): # each threshold in this case applies to one indicator | |
| 409 if 'speed' in h.lower(): | |
| 410 row.append((group[h] > t).sum()) | |
| 411 else: | |
| 412 row.append((group[h] > -t).sum()) # take larger than than negative threshold for proximity indicators | |
| 413 outputData.append(row) | |
| 414 pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False) |
