comparison scripts/process.py @ 1077:3939ae415be0

Merging
author Wendlasida
date Fri, 20 Jul 2018 14:03:34 -0400
parents 0154133e77df
children 58994b08be42
comparison
equal deleted inserted replaced
1076:108c5dc4e34a 1077:3939ae415be0
6 6
7 #import matplotlib 7 #import matplotlib
8 #atplotlib.use('Agg') 8 #atplotlib.use('Agg')
9 import matplotlib.pyplot as plt 9 import matplotlib.pyplot as plt
10 import numpy as np 10 import numpy as np
11 from pandas import DataFrame 11 import pandas as pd
12 12
13 from trafficintelligence import storage, events, prediction, cvutils, utils, moving, processing, ml 13 from trafficintelligence import storage, events, prediction, cvutils, utils, moving, processing, ml
14 from trafficintelligence.metadata import * 14 from trafficintelligence.metadata import *
15 15
16 parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.') 16 parser = argparse.ArgumentParser(description='This program manages the processing of several files based on a description of the sites and video data in an SQLite database following the metadata module.')
17 # input 17 # input
18 parser.add_argument('--db', dest = 'metadataFilename', help = 'name of the metadata file', required = True) 18 parser.add_argument('--db', dest = 'metadataFilename', help = 'name of the metadata file', required = True)
19 parser.add_argument('--videos', dest = 'videoIds', help = 'indices of the video sequences', nargs = '*', type = int) 19 parser.add_argument('--videos', dest = 'videoIds', help = 'indices of the video sequences', nargs = '*')
20 parser.add_argument('--sites', dest = 'siteIds', help = 'indices of the video sequences', nargs = '*') 20 parser.add_argument('--sites', dest = 'siteIds', help = 'indices of the video sequences', nargs = '*')
21 21
22 # main function 22 # main function
23 parser.add_argument('--delete', dest = 'delete', help = 'data to delete', choices = ['feature', 'object', 'classification', 'interaction']) 23 parser.add_argument('--delete', dest = 'delete', help = 'data to delete', choices = ['feature', 'object', 'classification', 'interaction'])
24 parser.add_argument('--process', dest = 'process', help = 'data to process', choices = ['feature', 'object', 'classification', 'prototype', 'interaction']) 24 parser.add_argument('--process', dest = 'process', help = 'data to process', choices = ['feature', 'object', 'classification', 'prototype', 'interaction'])
25 parser.add_argument('--display', dest = 'display', help = 'data to display (replay over video)', choices = ['feature', 'object', 'classification', 'interaction']) 25 parser.add_argument('--display', dest = 'display', help = 'data to display (replay over video)', choices = ['feature', 'object', 'classification', 'interaction'])
26 parser.add_argument('--analyze', dest = 'analyze', help = 'data to analyze (results)', choices = ['feature', 'object', 'classification', 'interaction']) 26 parser.add_argument('--progress', dest = 'progress', help = 'information about the progress of processing', action = 'store_true')
27 parser.add_argument('--analyze', dest = 'analyze', help = 'data to analyze (results)', choices = ['feature', 'object', 'classification', 'interaction', 'event'])
27 28
28 # common options 29 # common options
29 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') 30 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file')
30 parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int) 31 parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int)
31 parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories', choices = ['feature', 'object'], default = 'feature') 32 parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories', choices = ['feature', 'object'], default = 'feature')
58 59
59 60
60 # analysis options 61 # analysis options
61 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event']) 62 parser.add_argument('--output', dest = 'output', help = 'kind of output to produce (interval means)', choices = ['figure', 'interval', 'event'])
62 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1) 63 parser.add_argument('--min-user-duration', dest = 'minUserDuration', help = 'mininum duration we have to see the user to take into account in the analysis (s)', type = float, default = 0.1)
63 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = float, default = 15.) 64 parser.add_argument('--interval-duration', dest = 'intervalDuration', help = 'length of time interval to aggregate data (min)', type = int, default = 15)
64 parser.add_argument('--aggregation', dest = 'aggMethod', help = 'aggregation method per user/event and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median']) 65 parser.add_argument('--aggregation', dest = 'aggMethods', help = 'aggregation method per user/interaction and per interval', choices = ['mean', 'median', 'centile'], nargs = '*', default = ['median'])
65 parser.add_argument('--aggregation-centile', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int) 66 parser.add_argument('--aggregation-centiles', dest = 'aggCentiles', help = 'centile(s) to compute from the observations', nargs = '*', type = int)
67 parser.add_argument('--event-thresholds', dest = 'eventThresholds', help = 'threshold to count severe situations', nargs = '*', type = float)
68 parser.add_argument('--event-filename', dest = 'eventFilename', help = 'filename of the event data')
66 dpi = 150 69 dpi = 150
67 # unit of analysis: site or video sequence? 70 # unit of analysis: site - camera-view
68 71
69 # need way of selecting sites as similar as possible to sql alchemy syntax 72 # need way of selecting sites as similar as possible to sql alchemy syntax
70 # override tracking.cfg from db 73 # override tracking.cfg from db
71 # manage cfg files, overwrite them (or a subset of parameters) 74 # manage cfg files, overwrite them (or a subset of parameters)
72 # delete sqlite files 75 # delete sqlite files
80 session = connectDatabase(args.metadataFilename) 83 session = connectDatabase(args.metadataFilename)
81 parentPath = Path(args.metadataFilename).parent # files are relative to metadata location 84 parentPath = Path(args.metadataFilename).parent # files are relative to metadata location
82 videoSequences = [] 85 videoSequences = []
83 sites = [] 86 sites = []
84 if args.videoIds is not None: 87 if args.videoIds is not None:
85 videoSequences = [session.query(VideoSequence).get(videoId) for videoId in args.videoIds] 88 for videoId in args.videoIds:
86 siteIds = set([vs.cameraView.siteIdx for vs in videoSequences]) 89 if '-' in videoId:
90 videoSequences.extend([session.query(VideoSequence).get(i) for i in moving.TimeInterval.parse(videoId)])
91 else:
92 videoSequences.append(session.query(VideoSequence).get(int(videoId)))
93 videoSequences = [vs for vs in videoSequences if vs is not None]
94 sites = set([vs.cameraView.site for vs in videoSequences])
87 elif args.siteIds is not None: 95 elif args.siteIds is not None:
88 siteIds = set(args.siteIds) 96 for siteId in args.siteIds:
89 for siteId in siteIds: 97 if '-' in siteId:
90 tmpsites = getSite(session, siteId) 98 sites.extend([session.query(Site).get(i) for i in moving.TimeInterval.parse(siteId)])
91 sites.extend(tmpsites) 99 else:
92 for site in tmpsites: 100 sites.append(session.query(Site).get(int(siteId)))
93 for cv in site.cameraViews: 101 sites = [s for s in sites if s is not None]
94 videoSequences.extend(cv.videoSequences) 102 for site in sites:
103 videoSequences.extend(getSiteVideoSequences(site))
95 else: 104 else:
96 print('No video/site to process') 105 print('No video/site to process')
97 106
98 if args.nProcesses > 1: 107 if args.nProcesses > 1:
99 pool = Pool(args.nProcesses) 108 pool = Pool(args.nProcesses)
109
110 #################################
111 # Report progress in the processing
112 #################################
113 if args.progress:
114 print('Providing information on data progress')
115 headers = ['site', 'vs', 'features', 'objects', 'interactions'] # todo add prototypes and object classification
116 data = []
117 for site in sites:
118 unprocessedVideoSequences = []
119 for vs in getSiteVideoSequences(site):
120 if (parentPath/vs.getDatabaseFilename()).is_file(): # TODO check time of file?
121 tableNames = storage.tableNames(str(parentPath.absolute()/vs.getDatabaseFilename()))
122 data.append([site.name, vs.idx, 'positions' in tableNames, 'objects' in tableNames, 'interactions' in tableNames])
123 else:
124 unprocessedVideoSequences.append(vs)
125 data.append([site.name, vs.idx, False, False, False])
126 #if len(unprocessedVideoSequences):
127 # print('Site {} ({}) has {} completely unprocessed video sequences'.format (site.name, site.idx, len(unprocessedVideoSequences)))
128 data = pd.DataFrame(data, columns = headers)
129 print('-'*80)
130 print('\t'+' '.join(headers[2:]))
131 print('-'*80)
132 for name, group in data.groupby(['site']): #.agg({'vs': 'count'}))
133 n = group.vs.count()
134 print('{}: {} % / {} % / {} % ({})'.format(name, 100*group.features.sum()/float(n), 100*group.objects.sum()/float(n), 100*group.interactions.sum()/float(n), n))
135 print('-'*80)
136 print(data)
100 137
101 ################################# 138 #################################
102 # Delete 139 # Delete
103 ################################# 140 #################################
104 if args.delete is not None: 141 if args.delete is not None:
117 # Process 154 # Process
118 ################################# 155 #################################
119 if args.process in ['feature', 'object']: # tracking 156 if args.process in ['feature', 'object']: # tracking
120 if args.nProcesses == 1: 157 if args.nProcesses == 1:
121 for vs in videoSequences: 158 for vs in videoSequences:
122 if not (parentPath/vs.getDatabaseFilename()).exists() or args.process == 'object': 159 if not (parentPath/vs.getDatabaseFilename()).is_file() or args.process == 'object':
123 if args.configFilename is None: 160 if args.configFilename is None:
124 configFilename = str(parentPath/vs.cameraView.getTrackingConfigurationFilename()) 161 configFilename = str(parentPath/vs.cameraView.getTrackingConfigurationFilename())
125 else: 162 else:
126 configFilename = args.configFilename 163 configFilename = args.configFilename
127 if vs.cameraView.cameraType is None: 164 if vs.cameraView.cameraType is None:
128 cvutils.tracking(configFilename, args.process == 'object', str(parentPath.absolute()/vs.getVideoSequenceFilename()), str(parentPath.absolute()/vs.getDatabaseFilename()), str(parentPath.absolute()/vs.cameraView.getHomographyFilename()), str(parentPath.absolute()/vs.cameraView.getMaskFilename()), False, None, None, args.dryRun) 165 cvutils.tracking(configFilename, args.process == 'object', str(parentPath.absolute()/vs.getVideoSequenceFilename()), str(parentPath.absolute()/vs.getDatabaseFilename()), str(parentPath.absolute()/vs.cameraView.getHomographyFilename()), str(parentPath.absolute()/vs.cameraView.getMaskFilename()), False, None, None, args.dryRun)
129 else: 166 else: #caution: cameratype can be not none, but without parameters for undistortion
130 cvutils.tracking(configFilename, args.process == 'object', str(parentPath.absolute()/vs.getVideoSequenceFilename()), str(parentPath.absolute()/vs.getDatabaseFilename()), str(parentPath.absolute()/vs.cameraView.getHomographyFilename()), str(parentPath.absolute()/vs.cameraView.getMaskFilename()), True, vs.cameraView.cameraType.intrinsicCameraMatrix, vs.cameraView.cameraType.distortionCoefficients, args.dryRun) 167 cvutils.tracking(configFilename, args.process == 'object', str(parentPath.absolute()/vs.getVideoSequenceFilename()), str(parentPath.absolute()/vs.getDatabaseFilename()), str(parentPath.absolute()/vs.cameraView.getHomographyFilename()), str(parentPath.absolute()/vs.cameraView.getMaskFilename()), True, vs.cameraView.cameraType.intrinsicCameraMatrix, vs.cameraView.cameraType.distortionCoefficients, args.dryRun)
131 else: 168 else:
132 print('SQLite already exists: {}'.format(parentPath/vs.getDatabaseFilename())) 169 print('SQLite already exists: {}'.format(parentPath/vs.getDatabaseFilename()))
133 else: 170 else:
134 for vs in videoSequences: 171 for vs in videoSequences:
135 if not (parentPath/vs.getDatabaseFilename()).exists() or args.process == 'object': 172 if not (parentPath/vs.getDatabaseFilename()).is_file() or args.process == 'object':
136 if args.configFilename is None: 173 if args.configFilename is None:
137 configFilename = str(parentPath/vs.cameraView.getTrackingConfigurationFilename()) 174 configFilename = str(parentPath/vs.cameraView.getTrackingConfigurationFilename())
138 else: 175 else:
139 configFilename = args.configFilename 176 configFilename = args.configFilename
140 if vs.cameraView.cameraType is None: 177 if vs.cameraView.cameraType is None:
145 print('SQLite already exists: {}'.format(parentPath/vs.getDatabaseFilename())) 182 print('SQLite already exists: {}'.format(parentPath/vs.getDatabaseFilename()))
146 pool.close() 183 pool.close()
147 pool.join() 184 pool.join()
148 185
149 elif args.process == 'prototype': # motion pattern learning 186 elif args.process == 'prototype': # motion pattern learning
150 # learn by site by default -> group videos by site (or by camera view? TODO add cameraviews) 187 # learn by site by default -> group videos by camera view TODO
151 # by default, load all objects, learn and then assign (BUT not save the assignments) 188 # by default, load all objects, learn and then assign (BUT not save the assignments)
152 for site in sites: 189 for site in sites:
153 print('Learning motion patterns for site {} ({})'.format(site.idx, site.name)) 190 print('Learning motion patterns for site {} ({})'.format(site.idx, site.name))
154 objects = {} 191 objects = {}
155 object2VideoSequences = {} 192 object2VideoSequences = {}
175 prototypeIndices, labels = processing.learnAssignMotionPatterns(True, True, trainingObjects, similarities, args.minSimilarity, similarityFunc, args.minClusterSize, args.optimizeCentroid, args.randomInitialization, True, []) 212 prototypeIndices, labels = processing.learnAssignMotionPatterns(True, True, trainingObjects, similarities, args.minSimilarity, similarityFunc, args.minClusterSize, args.optimizeCentroid, args.randomInitialization, True, [])
176 if args.outputPrototypeDatabaseFilename is None: 213 if args.outputPrototypeDatabaseFilename is None:
177 outputPrototypeDatabaseFilename = args.databaseFilename 214 outputPrototypeDatabaseFilename = args.databaseFilename
178 else: 215 else:
179 outputPrototypeDatabaseFilename = args.outputPrototypeDatabaseFilename 216 outputPrototypeDatabaseFilename = args.outputPrototypeDatabaseFilename
180 # TODO maintain mapping from object prototype to db filename + compute nmatchings before
181 clusterSizes = ml.computeClusterSizes(labels, prototypeIndices, -1) 217 clusterSizes = ml.computeClusterSizes(labels, prototypeIndices, -1)
182 storage.savePrototypesToSqlite(str(parentPath/site.getPath()/outputPrototypeDatabaseFilename), [moving.Prototype(object2VideoSequences[trainingObjects[i]].getDatabaseFilename(False), trainingObjects[i].getNum(), prototypeType, clusterSizes[i]) for i in prototypeIndices]) 218 storage.savePrototypesToSqlite(str(parentPath/site.getPath()/outputPrototypeDatabaseFilename), [moving.Prototype(object2VideoSequences[trainingObjects[i]].getDatabaseFilename(False), trainingObjects[i].getNum(), prototypeType, clusterSizes[i]) for i in prototypeIndices])
183 219
184 220
185 elif args.process == 'interaction': 221 elif args.process == 'interaction':
211 # Analyze 247 # Analyze
212 ################################# 248 #################################
213 if args.analyze == 'object': 249 if args.analyze == 'object':
214 # user speeds, accelerations 250 # user speeds, accelerations
215 # aggregation per site 251 # aggregation per site
252 if args.eventFilename is None:
253 print('Missing output filename (event-filename). Exiting')
254 sys.exit(0)
216 data = [] # list of observation per site-user with time 255 data = [] # list of observation per site-user with time
217 headers = ['sites', 'date', 'time', 'user_type'] 256 headers = ['site', 'date', 'time', 'user_type']
218 aggFunctions = {} 257 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles)
219 for method in args.aggMethod: 258 headers.extend(tmpheaders)
220 if method == 'centile': 259 if args.nProcesses == 1:
221 aggFunctions[method] = utils.aggregationFunction(method, args.aggCentiles) 260 for vs in videoSequences:
222 for c in args.aggCentiles: 261 data.extend(processing.extractVideoSequenceSpeeds(str(parentPath/vs.getDatabaseFilename()), vs.cameraView.site.name, args.nObjects, vs.startTime, vs.cameraView.cameraType.frameRate, args.minUserDuration, args.aggMethods, args.aggCentiles))
223 headers.append('{}{}'.format(method,c)) 262 else:
224 else: 263 jobs = [pool.apply_async(processing.extractVideoSequenceSpeeds, args = (str(parentPath/vs.getDatabaseFilename()), vs.cameraView.site.name, args.nObjects, vs.startTime, vs.cameraView.cameraType.frameRate, args.minUserDuration, args.aggMethods, args.aggCentiles)) for vs in videoSequences]
225 aggFunctions[method] = utils.aggregationFunction(method) 264 for job in jobs:
226 headers.append(method) 265 data.extend(job.get())
227 for vs in videoSequences: 266 pool.close()
228 d = vs.startTime.date() 267 data = pd.DataFrame(data, columns = headers)
229 t1 = vs.startTime.time()
230 minUserDuration = args.minUserDuration*vs.cameraView.cameraType.frameRate
231 print('Extracting speed from '+vs.getDatabaseFilename())
232 objects = storage.loadTrajectoriesFromSqlite(str(parentPath/vs.getDatabaseFilename()), 'object', args.nObjects)
233 for o in objects:
234 if o.length() > minUserDuration:
235 row = [vs.cameraView.siteIdx, d, utils.framesToTime(o.getFirstInstant(), vs.cameraView.cameraType.frameRate, t1), o.getUserType()]
236 tmp = o.getSpeeds()
237 for method,func in aggFunctions.items():
238 aggSpeeds = vs.cameraView.cameraType.frameRate*3.6*func(tmp)
239 if method == 'centile':
240 row += aggSpeeds.tolist()
241 else:
242 row.append(aggSpeeds)
243 data.append(row)
244 data = DataFrame(data, columns = headers)
245 if args.output == 'figure': 268 if args.output == 'figure':
246 for name in headers[4:]: 269 for name in headers[4:]:
247 plt.ioff() 270 plt.ioff()
248 plt.figure() 271 plt.figure()
249 plt.boxplot([data.loc[data['sites']==siteId, name] for siteId in siteIds], labels = [session.query(Site).get(siteId).name for siteId in siteIds]) 272 plt.boxplot([data.loc[data['site']==site.name, name] for site in sites], labels = [site.name for site in sites])
250 plt.ylabel(name+' Speeds (km/h)') 273 plt.ylabel(name+' Speeds (km/h)')
251 plt.savefig(name.lower()+'-speeds.png', dpi=dpi) 274 plt.savefig(name.lower()+'-speeds.png', dpi=dpi)
252 plt.close() 275 plt.close()
253 elif args.output == 'event': 276 elif args.output == 'event':
254 data.to_csv('speeds.csv', index = False) 277 data.to_csv(args.eventFilename, index = False)
255 if args.analyze == 'interaction': 278
279 if args.analyze == 'interaction': # redo as for object, export in dataframe all interaction data
256 indicatorIds = [2,5,7,10] 280 indicatorIds = [2,5,7,10]
257 conversionFactors = {2: 1., 5: 30.*3.6, 7:1./30, 10:1./30} 281 conversionFactors = {2: 1., 5: 30.*3.6, 7:1./30, 10:1./30}
258 maxIndicatorValue = {2: float('inf'), 5: float('inf'), 7:10., 10:10.} 282 maxIndicatorValue = {2: float('inf'), 5: float('inf'), 7:10., 10:10.}
259 indicators = {} 283 indicators = {}
260 interactions = {} 284 interactions = {}
280 plt.figure() 304 plt.figure()
281 plt.boxplot(tmp, labels = [session.query(Site).get(siteId).name for siteId in indicators]) 305 plt.boxplot(tmp, labels = [session.query(Site).get(siteId).name for siteId in indicators])
282 plt.ylabel(events.Interaction.indicatorNames[i]+' ('+events.Interaction.indicatorUnits[i]+')') 306 plt.ylabel(events.Interaction.indicatorNames[i]+' ('+events.Interaction.indicatorUnits[i]+')')
283 plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150) 307 plt.savefig(events.Interaction.indicatorNames[i]+'.png', dpi=150)
284 plt.close() 308 plt.close()
309
310 if args.analyze == 'event': # aggregate event data by 15 min interval (args.intervalDuration), count events with thresholds
311 data = pd.read_csv(args.eventFilename, parse_dates = [2])
312 #data = pd.read_csv('./speeds.csv', converters = {'time': lambda s: datetime.datetime.strptime(s, "%H:%M:%S").time()}, nrows = 5000)
313 # create time for end of each 15 min, then group by, using the agg method for each data column
314 headers = ['site', 'date', 'intervalend15', 'duration', 'count']
315 aggFunctions, tmpheaders = utils.aggregationMethods(args.aggMethods, args.aggCentiles)
316 dataColumns = list(data.columns[4:])
317 for h in dataColumns:
318 for h2 in tmpheaders:
319 headers.append(h+'-'+h2)
320 for h in dataColumns:
321 for t in args.eventThresholds:
322 headers.append('n-{}-{}'.format(h, t))
323 data['intervalend15'] = data.time.apply(lambda t: (pd.Timestamp(year = t.year, month = t.month, day = t.day,hour = t.hour, minute = (t.minute // args.intervalDuration)*args.intervalDuration)+pd.Timedelta(minutes = 15)).time())
324 outputData = []
325 for name, group in data.groupby(['site', 'date', 'intervalend15']):
326 row = []
327 row.extend(name)
328 groupStartTime = group.time.min()
329 groupEndTime = group.time.max()
330 row.append((groupEndTime.minute+1-groupStartTime.minute) % 60)#(name[2].minute*60+name[2].second-groupStartTime.minute*60+groupStartTime.second) % 3600)
331 row.append(len(group))
332 for h in dataColumns:
333 for method,func in aggFunctions.items():
334 aggregated = func(group[h])
335 if method == 'centile':
336 row.extend(aggregated)
337 else:
338 row.append(aggregated)
339 for h in dataColumns:
340 for t in args.eventThresholds:
341 row.append((group[h] > t).sum())
342 outputData.append(row)
343 pd.DataFrame(outputData, columns = headers).to_csv(utils.removeExtension(args.eventFilename)+'-aggregated.csv', index = False)