Mercurial > hg > nsaunier > traffic-intelligence
comparison scripts/process.py @ 1077:3939ae415be0
Merging
| author | Wendlasida |
|---|---|
| date | Fri, 20 Jul 2018 14:03:34 -0400 |
| parents | 0154133e77df |
| children | 58994b08be42 |
comparison
equal
deleted
inserted
replaced
| 1076:108c5dc4e34a | 1077:3939ae415be0 |
|---|---|
| 6 | 6 |
| 7 #import matplotlib | 7 #import matplotlib |
| 8 #atplotlib.use('Agg') | 8 #atplotlib.use('Agg') |
| 9 import matplotlib.pyplot as plt | 9 import matplotlib.pyplot as plt |
| 10 import numpy as np | 10 import numpy as np |
| 11 from pandas import DataFrame | 11 import pandas as pd |
| 12 | 12 |
| 13 from trafficintelligence import storage, events, prediction, cvutils, utils, moving, processing, ml | 13 from trafficintelligence import storage, events, prediction, cvutils, utils, moving, processing, ml |
| 14 from trafficintelligence.metadata import * | 14 from trafficintelligence.metadata import * |
| 15 | 15 |
| 16 parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.') | 16 parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.') |
| 17 # input | 17 # input |
| 18 parser.add_argument('--db', dest = 'metadataFilename', help = 'name of the metadata file', required = True) | 18 parser.add_argument('--db', dest = 'metadataFilename', help = 'name of the metadata file', required = True) |
| 19 parser.add_argument('--videos', dest = 'videoIds', help = 'indices of the video sequences', nargs = '*', type = int) | 19 parser.add_argument('--videos', dest = 'videoIds', help = 'indices of the video sequences', nargs = '*') |
| 20 parser.add_argument('--sites', dest = 'siteIds', help = 'indices of the video sequences', nargs = '*') | 20 parser.add_argument('--sites', dest = 'siteIds', help = 'indices of the video sequences', nargs = '*') |
| 21 | 21 |
| 22 # main function | 22 # main function |
| 23 parser.add_argument('--delete', dest = 'delete', help = 'data to delete', choices = ['feature', 'object', 'classification', 'interaction']) | 23 parser.add_argument('--delete', dest = 'delete', help = 'data to delete', choices = ['feature', 'object', 'classification', 'interaction']) |
| 24 parser.add_argument('--process', dest = 'process', help = 'data to process', choices = ['feature', 'object', 'classification', 'prototype', 'interaction']) | 24 parser.add_argument('--process', dest = 'process', help = 'data to process', choices = ['feature', 'object', 'classification', 'prototype', 'interaction']) |
| 25 parser.add_argument('--display', dest = 'display', help = 'data to display (replay over video)', choices = ['feature', 'object', 'classification', 'interaction']) | 25 parser.add_argument('--display', dest = 'display', help = 'data to display (replay over video)', choices = ['feature', 'object', 'classification', 'interaction']) |
| 26 parser.add_argument('--analyze', dest = 'analyze', help = 'data to analyze (results)', choices = ['feature', 'object', 'classification', 'interaction']) | 26 parser.add_argument('--progress', dest = 'progress', help = 'information about the progress of processing', action = 'store_true') |
| 27 parser.add_argument('--analyze', dest = 'analyze', help = 'data to analyze (results)', choices = ['feature', 'object', 'classification', 'interaction', 'event']) | |
| 27 | 28 |
| 28 # common options | 29 # common options |
| 29 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') | 30 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') |
| 30 parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int) | 31 parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int) |
| 31 parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories', choices = ['feature', 'object'], default = 'feature') | 32 parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories', choices = ['feature', 'object'], default = 'feature') |
| 58 | 59 |
| 59 | 60 |
| 60 # analysis options | 61 # analysis options |
| 61 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event']) | 62 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event']) |
| 62 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) | 63 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) |
| 63 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = float, default = 15.) | 64 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15) |
| 64 parser.add_argument('--aggregation', dest = 'aggMethod', help = 'aggregation method per user/event and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) | 65 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) |
| 65 parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) | 66 parser.add_argument('--aggregation-centiles', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) |
| 67 parser.add_argument('--event-thresholds', dest = 'eventThresholds', help = 'threshold to count severe situations', nargs = '*', type = float) | |
| 68 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data') | |
| 66 dpi = 150 | 69 dpi = 150 |
| 67 # unit of analysis: site or video sequence? | 70 # unit of analysis: site - camera-view |
| 68 | 71 |
| 69 # need way of selecting sites as similar as possible to sql alchemy syntax | 72 # need way of selecting sites as similar as possible to sql alchemy syntax |
| 70 # override tracking.cfg from db | 73 # override tracking.cfg from db |
| 71 # manage cfg files, overwrite them (or a subset of parameters) | 74 # manage cfg files, overwrite them (or a subset of parameters) |
| 72 # delete sqlite files | 75 # delete sqlite files |
| 80 session = connectDatabase(args.metadataFilename) | 83 session = connectDatabase(args.metadataFilename) |
| 81 parentPath = Path(args.metadataFilename).parent # files are relative to metadata location | 84 parentPath = Path(args.metadataFilename).parent # files are relative to metadata location |
| 82 videoSequences = [] | 85 videoSequences = [] |
| 83 sites = [] | 86 sites = [] |
| 84 if args.videoIds is not None: | 87 if args.videoIds is not None: |
| 85 videoSequences = [session.query(VideoSequence).get(videoId) for videoId in args.videoIds] | 88 for videoId in args.videoIds: |
| 86 siteIds = set([vs.cameraView.siteIdx for vs in videoSequences]) | 89 if '-' in videoId: |
| 90 videoSequences.extend([session.query(VideoSequence).get(i) for i in moving.TimeInterval.parse(videoId)]) | |
| 91 else: | |
| 92 videoSequences.append(session.query(VideoSequence).get(int(videoId))) | |
| 93 videoSequences = [vs for vs in videoSequences if vs is not None] | |
| 94 sites = set([vs.cameraView.site for vs in videoSequences]) | |
| 87 elif args.siteIds is not None: | 95 elif args.siteIds is not None: |
| 88 siteIds = set(args.siteIds) | 96 for siteId in args.siteIds: |
| 89 for siteId in siteIds: | 97 if '-' in siteId: |
| 90 tmpsites = getSite(session, siteId) | 98 sites.extend([session.query(Site).get(i) for i in moving.TimeInterval.parse(siteId)]) |
| 91 sites.extend(tmpsites) | 99 else: |
| 92 for site in tmpsites: | 100 sites.append(session.query(Site).get(int(siteId))) |
| 93 for cv in site.cameraViews: | 101 sites = [s for s in sites if s is not None] |
| 94 videoSequences.extend(cv.videoSequences) | 102 for site in sites: |
| 103 videoSequences.extend(getSiteVideoSequences(site)) | |
| 95 else: | 104 else: |
| 96 print('No video/site to process') | 105 print('No video/site to process') |
| 97 | 106 |
| 98 if args.nProcesses > 1: | 107 if args.nProcesses > 1: |
| 99 pool = Pool(args.nProcesses) | 108 pool = Pool(args.nProcesses) |
| 109 | |
| 110 ################################# | |
| 111 # Report progress in the processing | |
| 112 ################################# | |
| 113 if args.progress: | |
| 114 print('Providing information on data progress') | |
| 115 headers = ['site', 'vs', 'features', 'objects', 'interactions'] # todo add prototypes and object classification | |
| 116 data = [] | |
| 117 for site in sites: | |
| 118 unprocessedVideoSequences = [] | |
| 119 for vs in getSiteVideoSequences(site): | |
| 120 if (parentPath/vs.getDatabaseFilename()).is_file(): # TODO check time of file? | |
| 121 tableNames = storage.tableNames(str(parentPath.absolute()/vs.getDatabaseFilename())) | |
| 122 data.append([site.name, vs.idx, 'positions' in tableNames, 'objects' in tableNames, 'interactions' in tableNames]) | |
| 123 else: | |
| 124 unprocessedVideoSequences.append(vs) | |
| 125 data.append([site.name, vs.idx, False, False, False]) | |
| 126 #if len(unprocessedVideoSequences): | |
| 127 # print('Site {} ({}) has {} completely unprocessed video sequences'.format (site.name, site.idx, len(unprocessedVideoSequences))) | |
| 128 data = pd.DataFrame(data, columns = headers) | |
| 129 print('-'*80) | |
| 130 print('\t'+' '.join(headers[2:])) | |
| 131 print('-'*80) | |
| 132 for name, group in data.groupby(['site']): #.agg({'vs': 'count'})) | |
| 133 n = group.vs.count() | |
| 134 print('{}: {} % / {} % / {} % ({})'.format(name, 100*group.features.sum()/float(n), 100*group.objects.sum()/float(n), 100*group.interactions.sum()/float(n), n)) | |
| 135 print('-'*80) | |
| 136 print(data) | |
| 100 | 137 |
| 101 ################################# | 138 ################################# |
| 102 # Delete | 139 # Delete |
| 103 ################################# | 140 ################################# |
| 104 if args.delete is not None: | 141 if args.delete is not None: |
| 117 # Process | 154 # Process |
| 118 ################################# | 155 ################################# |
| 119 if args.process in ['feature', 'object']: # tracking | 156 if args.process in ['feature', 'object']: # tracking |
| 120 if args.nProcesses == 1: | 157 if args.nProcesses == 1: |
| 121 for vs in videoSequences: | 158 for vs in videoSequences: |
| 122 if not (parentPath/vs.getDatabaseFilename()).exists() or args.process == 'object': | 159 if not (parentPath/vs.getDatabaseFilename()).is_file() or args.process == 'object': |
| 123 if args.configFilename is None: | 160 if args.configFilename is None: |
| 124 configFilename = str(parentPath/vs.cameraView.getTrackingConfigurationFilename()) | 161 configFilename = str(parentPath/vs.cameraView.getTrackingConfigurationFilename()) |
| 125 else: | 162 else: |
| 126 configFilename = args.configFilename | 163 configFilename = args.configFilename |
| 127 if vs.cameraView.cameraType is None: | 164 if vs.cameraView.cameraType is None: |
| 128 cvutils.tracking(configFilename, args.process == 'object', str(parentPath.absolute()/vs.getVideoSequenceFilename()), str(parentPath.absolute()/vs.getDatabaseFilename()), str(parentPath.absolute()/vs.cameraView.getHomographyFilename()), str(parentPath.absolute()/vs.cameraView.getMaskFilename()), False, None, None, args.dryRun) | 165 cvutils.tracking(configFilename, args.process == 'object', str(parentPath.absolute()/vs.getVideoSequenceFilename()), str(parentPath.absolute()/vs.getDatabaseFilename()), str(parentPath.absolute()/vs.cameraView.getHomographyFilename()), str(parentPath.absolute()/vs.cameraView.getMaskFilename()), False, None, None, args.dryRun) |
| 129 else: | 166 else: #caution: cameratype can be not none, but without parameters for undistortion |
| 130 cvutils.tracking(configFilename, args.process == 'object', str(parentPath.absolute()/vs.getVideoSequenceFilename()), str(parentPath.absolute()/vs.getDatabaseFilename()), str(parentPath.absolute()/vs.cameraView.getHomographyFilename()), str(parentPath.absolute()/vs.cameraView.getMaskFilename()), True, vs.cameraView.cameraType.intrinsicCameraMatrix, vs.cameraView.cameraType.distortionCoefficients, args.dryRun) | 167 cvutils.tracking(configFilename, args.process == 'object', str(parentPath.absolute()/vs.getVideoSequenceFilename()), str(parentPath.absolute()/vs.getDatabaseFilename()), str(parentPath.absolute()/vs.cameraView.getHomographyFilename()), str(parentPath.absolute()/vs.cameraView.getMaskFilename()), True, vs.cameraView.cameraType.intrinsicCameraMatrix, vs.cameraView.cameraType.distortionCoefficients, args.dryRun) |
| 131 else: | 168 else: |
| 132 print('SQLite already exists: {}'.format(parentPath/vs.getDatabaseFilename())) | 169 print('SQLite already exists: {}'.format(parentPath/vs.getDatabaseFilename())) |
| 133 else: | 170 else: |
| 134 for vs in videoSequences: | 171 for vs in videoSequences: |
| 135 if not (parentPath/vs.getDatabaseFilename()).exists() or args.process == 'object': | 172 if not (parentPath/vs.getDatabaseFilename()).is_file() or args.process == 'object': |
| 136 if args.configFilename is None: | 173 if args.configFilename is None: |
| 137 configFilename = str(parentPath/vs.cameraView.getTrackingConfigurationFilename()) | 174 configFilename = str(parentPath/vs.cameraView.getTrackingConfigurationFilename()) |
| 138 else: | 175 else: |
| 139 configFilename = args.configFilename | 176 configFilename = args.configFilename |
| 140 if vs.cameraView.cameraType is None: | 177 if vs.cameraView.cameraType is None: |
| 145 print('SQLite already exists: {}'.format(parentPath/vs.getDatabaseFilename())) | 182 print('SQLite already exists: {}'.format(parentPath/vs.getDatabaseFilename())) |
| 146 pool.close() | 183 pool.close() |
| 147 pool.join() | 184 pool.join() |
| 148 | 185 |
| 149 elif args.process == 'prototype': # motion pattern learning | 186 elif args.process == 'prototype': # motion pattern learning |
| 150 # learn by site by default -> group videos by site (or by camera view? TODO add cameraviews) | 187 # learn by site by default -> group videos by camera view TODO |
| 151 # by default, load all objects, learn and then assign (BUT not save the assignments) | 188 # by default, load all objects, learn and then assign (BUT not save the assignments) |
| 152 for site in sites: | 189 for site in sites: |
| 153 print('Learning motion patterns for site {} ({})'.format(site.idx, site.name)) | 190 print('Learning motion patterns for site {} ({})'.format(site.idx, site.name)) |
| 154 objects = {} | 191 objects = {} |
| 155 object2VideoSequences = {} | 192 object2VideoSequences = {} |
| 175 prototypeIndices, labels = processing.learnAssignMotionPatterns(True, True, trainingObjects, similarities, args.minSimilarity, similarityFunc, args.minClusterSize, args.optimizeCentroid, args.randomInitialization, True, []) | 212 prototypeIndices, labels = processing.learnAssignMotionPatterns(True, True, trainingObjects, similarities, args.minSimilarity, similarityFunc, args.minClusterSize, args.optimizeCentroid, args.randomInitialization, True, []) |
| 176 if args.outputPrototypeDatabaseFilename is None: | 213 if args.outputPrototypeDatabaseFilename is None: |
| 177 outputPrototypeDatabaseFilename = args.databaseFilename | 214 outputPrototypeDatabaseFilename = args.databaseFilename |
| 178 else: | 215 else: |
| 179 outputPrototypeDatabaseFilename = args.outputPrototypeDatabaseFilename | 216 outputPrototypeDatabaseFilename = args.outputPrototypeDatabaseFilename |
| 180 # TODO maintain mapping from object prototype to db filename + compute nmatchings before | |
| 181 clusterSizes = ml.computeClusterSizes(labels, prototypeIndices, -1) | 217 clusterSizes = ml.computeClusterSizes(labels, prototypeIndices, -1) |
| 182 storage.savePrototypesToSqlite(str(parentPath/site.getPath()/outputPrototypeDatabaseFilename), [moving.Prototype(object2VideoSequences[trainingObjects[i]].getDatabaseFilename(False), trainingObjects[i].getNum(), prototypeType, clusterSizes[i]) for i in prototypeIndices]) | 218 storage.savePrototypesToSqlite(str(parentPath/site.getPath()/outputPrototypeDatabaseFilename), [moving.Prototype(object2VideoSequences[trainingObjects[i]].getDatabaseFilename(False), trainingObjects[i].getNum(), prototypeType, clusterSizes[i]) for i in prototypeIndices]) |
| 183 | 219 |
| 184 | 220 |
| 185 elif args.process == 'interaction': | 221 elif args.process == 'interaction': |
| 211 # Analyze | 247 # Analyze |
| 212 ################################# | 248 ################################# |
| 213 if args.analyze == 'object': | 249 if args.analyze == 'object': |
| 214 # user speeds, accelerations | 250 # user speeds, accelerations |
| 215 # aggregation per site | 251 # aggregation per site |
| 252 if args.eventFilename is None: | |
| 253 print('Missing output filename (event-filename). Exiting') | |
| 254 sys.exit(0) | |
| 216 data = [] # list of observation per site-user with time | 255 data = [] # list of observation per site-user with time |
| 217 headers = ['sites', 'date', 'time', 'user_type'] | 256 headers = ['site', 'date', 'time', 'user_type'] |
| 218 aggFunctions = {} | 257 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles) |
| 219 for method in args.aggMethod: | 258 headers.extend(tmpheaders) |
| 220 if method == 'centile': | 259 if args.nProcesses == 1: |
| 221 aggFunctions[method] = utils.aggregationFunction(method, args.aggCentiles) | 260 for vs in videoSequences: |
| 222 for c in args.aggCentiles: | 261 data.extend(processing.extractVideoSequenceSpeeds(str(parentPath/vs.getDatabaseFilename()), vs.cameraView.site.name, args.nObjects, vs.startTime, vs.cameraView.cameraType.frameRate, args.minUserDuration, args.aggMethods, args.aggCentiles)) |
| 223 headers.append('{}{}'.format(method,c)) | 262 else: |
| 224 else: | 263 jobs = [pool.apply_async(processing.extractVideoSequenceSpeeds, args = (str(parentPath/vs.getDatabaseFilename()), vs.cameraView.site.name, args.nObjects, vs.startTime, vs.cameraView.cameraType.frameRate, args.minUserDuration, args.aggMethods, args.aggCentiles)) for vs in videoSequences] |
| 225 aggFunctions[method] = utils.aggregationFunction(method) | 264 for job in jobs: |
| 226 headers.append(method) | 265 data.extend(job.get()) |
| 227 for vs in videoSequences: | 266 pool.close() |
| 228 d = vs.startTime.date() | 267 data = pd.DataFrame(data, columns = headers) |
| 229 t1 = vs.startTime.time() | |
| 230 minUserDuration = args.minUserDuration*vs.cameraView.cameraType.frameRate | |
| 231 print('Extracting speed from '+vs.getDatabaseFilename()) | |
| 232 objects = storage.loadTrajectoriesFromSqlite(str(parentPath/vs.getDatabaseFilename()), 'object', args.nObjects) | |
| 233 for o in objects: | |
| 234 if o.length() > minUserDuration: | |
| 235 row = [vs.cameraView.siteIdx, d, utils.framesToTime(o.getFirstInstant(), vs.cameraView.cameraType.frameRate, t1), o.getUserType()] | |
| 236 tmp = o.getSpeeds() | |
| 237 for method,func in aggFunctions.items(): | |
| 238 aggSpeeds = vs.cameraView.cameraType.frameRate*3.6*func(tmp) | |
| 239 if method == 'centile': | |
| 240 row += aggSpeeds.tolist() | |
| 241 else: | |
| 242 row.append(aggSpeeds) | |
| 243 data.append(row) | |
| 244 data = DataFrame(data, columns = headers) | |
| 245 if args.output == 'figure': | 268 if args.output == 'figure': |
| 246 for name in headers[4:]: | 269 for name in headers[4:]: |
| 247 plt.ioff() | 270 plt.ioff() |
| 248 plt.figure() | 271 plt.figure() |
| 249 plt.boxplot([data.loc[data['sites']==siteId, name] for siteId in siteIds], labels = [session.query(Site).get(siteId).name for siteId in siteIds]) | 272 plt.boxplot([data.loc[data['site']==site.name, name] for site in sites], labels = [site.name for site in sites]) |
| 250 plt.ylabel(name+' Speeds (km/h)') | 273 plt.ylabel(name+' Speeds (km/h)') |
| 251 plt.savefig(name.lower()+'-speeds.png', dpi=dpi) | 274 plt.savefig(name.lower()+'-speeds.png', dpi=dpi) |
| 252 plt.close() | 275 plt.close() |
| 253 elif args.output == 'event': | 276 elif args.output == 'event': |
| 254 data.to_csv('speeds.csv', index = False) | 277 data.to_csv(args.eventFilename, index = False) |
| 255 if args.analyze == 'interaction': | 278 |
| 279 if args.analyze == 'interaction': # redo as for object, export in dataframe all interaction data | |
| 256 indicatorIds = [2,5,7,10] | 280 indicatorIds = [2,5,7,10] |
| 257 conversionFactors = {2: 1., 5: 30.*3.6, 7:1./30, 10:1./30} | 281 conversionFactors = {2: 1., 5: 30.*3.6, 7:1./30, 10:1./30} |
| 258 maxIndicatorValue = {2: float('inf'), 5: float('inf'), 7:10., 10:10.} | 282 maxIndicatorValue = {2: float('inf'), 5: float('inf'), 7:10., 10:10.} |
| 259 indicators = {} | 283 indicators = {} |
| 260 interactions = {} | 284 interactions = {} |
| 280 plt.figure() | 304 plt.figure() |
| 281 plt.boxplot(tmp, labels = [session.query(Site).get(siteId).name for siteId in indicators]) | 305 plt.boxplot(tmp, labels = [session.query(Site).get(siteId).name for siteId in indicators]) |
| 282 plt.ylabel(events.Interaction.indicatorNames[i]+' ('+events.Interaction.indicatorUnits[i]+')') | 306 plt.ylabel(events.Interaction.indicatorNames[i]+' ('+events.Interaction.indicatorUnits[i]+')') |
| 283 plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150) | 307 plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150) |
| 284 plt.close() | 308 plt.close() |
| 309 | |
| 310 if args.analyze == 'event': # aggregate event data by 15 min interval (args.intervalDuration), count events with thresholds | |
| 311 data = pd.read_csv(args.eventFilename, parse_dates = [2]) | |
| 312 #data = pd.read_csv('./speeds.csv', converters = {'time': lambda s: datetime.datetime.strptime(s, "%H:%M:%S").time()}, nrows = 5000) | |
| 313 # create time for end of each 15 min, then group by, using the agg method for each data column | |
| 314 headers = ['site', 'date', 'intervalend15', 'duration', 'count'] | |
| 315 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles) | |
| 316 dataColumns = list(data.columns[4:]) | |
| 317 for h in dataColumns: | |
| 318 for h2 in tmpheaders: | |
| 319 headers.append(h+'-'+h2) | |
| 320 for h in dataColumns: | |
| 321 for t in args.eventThresholds: | |
| 322 headers.append('n-{}-{}'.format(h, t)) | |
| 323 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time()) | |
| 324 outputData = [] | |
| 325 for name, group in data.groupby(['site', 'date', 'intervalend15']): | |
| 326 row = [] | |
| 327 row.extend(name) | |
| 328 groupStartTime = group.time.min() | |
| 329 groupEndTime = group.time.max() | |
| 330 row.append((groupEndTime.minute+1-groupStartTime.minute) % 60)#(name[2].minute*60+name[2].second-groupStartTime.minute*60+groupStartTime.second) % 3600) | |
| 331 row.append(len(group)) | |
| 332 for h in dataColumns: | |
| 333 for method,func in aggFunctions.items(): | |
| 334 aggregated = func(group[h]) | |
| 335 if method == 'centile': | |
| 336 row.extend(aggregated) | |
| 337 else: | |
| 338 row.append(aggregated) | |
| 339 for h in dataColumns: | |
| 340 for t in args.eventThresholds: | |
| 341 row.append((group[h] > t).sum()) | |
| 342 outputData.append(row) | |
| 343 pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False) |
