# HG changeset patch # User Wendlasida # Date 1530916978 14400 # Node ID d13f9bfbf3ff8674f9bf6e894fbd8a7209682747 # Parent 60cc87e824c4bc553fe57b5c4d41312a1d88b3ba# Parent c9c03c97ed9f51f036e047ab09418422ed41a40c Retry diff -r 60cc87e824c4 -r d13f9bfbf3ff scripts/learn-motion-patterns.py --- a/scripts/learn-motion-patterns.py Thu Jul 05 22:24:31 2018 -0400 +++ b/scripts/learn-motion-patterns.py Fri Jul 06 18:42:58 2018 -0400 @@ -5,25 +5,26 @@ import numpy as np import matplotlib.pyplot as plt -from trafficintelligence import ml, utils, storage, moving +from trafficintelligence import ml, utils, storage, moving, processing -parser = argparse.ArgumentParser(description='''The program clusters trajectories, each cluster being represented by a trajectory. It can either work on the same dataset (database) or different ones, but only does learning or assignment at a time to avoid issues (the minimum cluster size argument is not used for now as it may change prototypes when assigning other trajectories)''') #, epilog = '' +parser = argparse.ArgumentParser(description='''The program clusters trajectories, each cluster being represented by a trajectory. It can either work on the same dataset (database) or different ones, but only does learning or assignment at a time to avoid issues''') #, epilog = '' #parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file', required = True) parser.add_argument('-o', dest = 'outputPrototypeDatabaseFilename', help = 'name of the Sqlite database file to save prototypes') parser.add_argument('-i', dest = 'inputPrototypeDatabaseFilename', help = 'name of the Sqlite database file for prototypes to start the algorithm with') -parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories to learn from', choices = ['objectfeature', 'feature', 'object'], default = 'objectfeatures') -parser.add_argument('--max-nobjectfeatures', dest = 'maxNObjectFeatures', help = 'maximum number of features per object to load', type = int, default = 1) -parser.add_argument('-n', dest = 'nTrajectories', help = 'number of the object or feature trajectories to load', type = int, default = None) +parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories to process', choices = ['feature', 'object'], default = 'feature') +parser.add_argument('--nfeatures-per-object', dest = 'nLongestFeaturesPerObject', help = 'maximum number of features per object to load', type = int) +parser.add_argument('-n', dest = 'nObjects', help = 'number of the object or feature trajectories to load', type = int, default = None) parser.add_argument('-e', dest = 'epsilon', help = 'distance for the similarity of trajectory points', type = float, required = True) parser.add_argument('--metric', dest = 'metric', help = 'metric for the similarity of trajectory points', default = 'cityblock') # default is manhattan distance parser.add_argument('-s', dest = 'minSimilarity', help = 'minimum similarity to put a trajectory in a cluster', type = float, required = True) -parser.add_argument('-c', dest = 'minClusterSize', help = 'minimum cluster size', type = int, default = 0) +#parser.add_argument('-c', dest = 'minClusterSize', help = 'minimum cluster size', type = int, default = 0) parser.add_argument('--learn', dest = 'learn', help = 'learn', action = 'store_true') parser.add_argument('--optimize', dest = 'optimizeCentroid', help = 'recompute centroid at each assignment', action = 'store_true') parser.add_argument('--random', dest = 'randomInitialization', help = 'random initialization of clustering algorithm', action = 'store_true') parser.add_argument('--subsample', dest = 'positionSubsamplingRate', help = 'rate of position subsampling (1 every n positions)', type = int) parser.add_argument('--display', dest = 'display', help = 'display trajectories', action = 'store_true') +parser.add_argument('--similarities-filename', dest = 'similaritiesFilename', help = 'filename of the similarities') parser.add_argument('--save-similarities', dest = 'saveSimilarities', help = 'save computed similarities (in addition to prototypes)', action = 'store_true') parser.add_argument('--save-assignments', dest = 'saveAssignments', help = 'saves the assignments of the objects to the prototypes', action = 'store_true') parser.add_argument('--assign', dest = 'assign', help = 'assigns the objects to the prototypes and saves the assignments', action = 'store_true') @@ -39,62 +40,41 @@ # TODO add possibility to cluster with velocities # TODO add possibility to load all trajectories and use minclustersize -# load trajectories to cluster or assign -if args.trajectoryType == 'objectfeature': - trajectoryType = 'feature' - objectFeatureNumbers = storage.loadObjectFeatureFrameNumbers(args.databaseFilename, objectNumbers = args.nTrajectories) - featureNumbers = [] - for numbers in objectFeatureNumbers.values(): - featureNumbers += numbers[:min(len(numbers), args.maxNObjectFeatures)] - objects = storage.loadTrajectoriesFromSqlite(args.databaseFilename, 'feature', objectNumbers = featureNumbers, timeStep = args.positionSubsamplingRate) +if args.learn and args.assign: + print('Cannot learn and assign simultaneously') + sys.exit(0) + +objects = storage.loadTrajectoriesFromSqlite(args.databaseFilename, args.trajectoryType, args.nObjects, timeStep = args.positionSubsamplingRate, nLongestFeaturesPerObject = args.nLongestFeaturesPerObject) +if args.trajectoryType == 'object' and args.nLongestFeaturesPerObject is not None: + objectsWithFeatures = objects + objects = [f for o in objectsWithFeatures for f in o.getFeatures()] + prototypeType = 'feature' else: - trajectoryType = args.trajectoryType - objects = storage.loadTrajectoriesFromSqlite(args.databaseFilename, trajectoryType, objectNumbers = args.nTrajectories, timeStep = args.positionSubsamplingRate) - -trajectories = [o.getPositions().asArray().T for o in objects] + prototypeType = args.trajectoryType # load initial prototypes, if any if args.inputPrototypeDatabaseFilename is not None: initialPrototypes = storage.loadPrototypesFromSqlite(args.inputPrototypeDatabaseFilename, True) - trajectories = [p.getMovingObject().getPositions().asArray().T for p in initialPrototypes]+trajectories - if len(initialPrototypes) > 0: - initialPrototypeIndices = list(range(len(initialPrototypes))) - else: - initialPrototypeIndices = None else: initialPrototypes = [] - initialPrototypeIndices = None lcss = utils.LCSS(metric = args.metric, epsilon = args.epsilon) -nTrajectories = len(trajectories) - -similarities = -np.ones((nTrajectories, nTrajectories)) similarityFunc = lambda x,y : lcss.computeNormalized(x, y) -# the next line can be called again without reinitializing similarities -if args.learn: - prototypeIndices = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, similarityFunc, args.optimizeCentroid, args.randomInitialization, initialPrototypeIndices) -else: - prototypeIndices = initialPrototypeIndices +nTrajectories = len(initialPrototypes)+len(objects) +if args.similaritiesFilename is not None: + similarities = np.loadtxt(args.similaritiesFilename) +if args.similaritiesFilename is None or similarities.shape[0] != nTrajectories or similarities.shape[1] != nTrajectories: + similarities = -np.ones((nTrajectories, nTrajectories)) -if args.assign: # TODO don't touch initial prototypes if not from same db as trajectories - #if not args.learn and args.minClusterSize >= 1: # allow only - # print('Warning: you did not learn the prototypes and you are using minimum cluster size of {}, which may lead to removing prototypes and assigning them to others'.format(args.minClusterSize)) - # if args.minClusterSize >= 1: - # if initialPrototypeIndices is None: - # prototypeIndices, labels = ml.assignToPrototypeClusters(trajectories, prototypeIndices, similarities, args.minSimilarity, similarityFunc, args.minClusterSize) - # else: - # print('Not assigning with non-zero minimum cluster size and initial prototypes (would remove initial prototypes based on other trajectories') - # else: - # prototypeIndices, labels = ml.assignToPrototypeClusters(trajectories, prototypeIndices, similarities, args.minSimilarity, similarityFunc) - assignedPrototypeIndices, labels = ml.assignToPrototypeClusters(trajectories, prototypeIndices, similarities, args.minSimilarity, similarityFunc) +prototypeIndices, labels = processing.learnAssignMotionPatterns(args.learn, args.assign, objects, similarities, args.minSimilarity, similarityFunc, 0, args.optimizeCentroid, args.randomInitialization, False, initialPrototypes) -if args.learn and not args.assign: +if args.learn:# and not args.assign: prototypes = [] for i in prototypeIndices: if i group videos by site (or by camera view? TODO add cameraviews) + # by default, load all objects, learn and then assign (BUT not save the assignments) + for site in sites: + print('Learning motion patterns for site {} ({})'.format(site.idx, site.name)) + objects = {} + object2VideoSequences = {} + for cv in site.cameraViews: + for vs in cv.videoSequences: + print('Loading '+vs.getDatabaseFilename()) + objects[vs.idx] = storage.loadTrajectoriesFromSqlite(str(parentPath/vs.getDatabaseFilename()), args.trajectoryType, args.nObjects, timeStep = args.positionSubsamplingRate, nLongestFeaturesPerObject = args.nLongestFeaturesPerObject) + if args.trajectoryType == 'object' and args.nLongestFeaturesPerObject is not None: + objectsWithFeatures = objects[vs.idx] + objects[vs.idx] = [f for o in objectsWithFeatures for f in o.getFeatures()] + prototypeType = 'feature' + else: + prototypeType = args.trajectoryType + for obj in objects[vs.idx]: + object2VideoSequences[obj] = vs + lcss = utils.LCSS(metric = args.metric, epsilon = args.epsilon) + similarityFunc = lambda x,y : lcss.computeNormalized(x, y) + trainingObjects = [o for tmpobjects in objects.values() for o in tmpobjects] + if args.nMPObjects is not None and args.nMPObjects < len(trainingObjects): + m = int(np.floor(float(len(trainingObjects))/args.nMPObjects)) + trainingObjects = trainingObjects[::m] + similarities = -np.ones((len(trainingObjects), len(trainingObjects))) + prototypeIndices, labels = processing.learnAssignMotionPatterns(True, True, trainingObjects, similarities, args.minSimilarity, similarityFunc, args.minClusterSize, args.optimizeCentroid, args.randomInitialization, True, []) + if args.outputPrototypeDatabaseFilename is None: + outputPrototypeDatabaseFilename = args.databaseFilename + else: + outputPrototypeDatabaseFilename = args.outputPrototypeDatabaseFilename + # TODO maintain mapping from object prototype to db filename + compute nmatchings before + clusterSizes = ml.computeClusterSizes(labels, prototypeIndices, -1) + storage.savePrototypesToSqlite(str(parentPath/site.getPath()/outputPrototypeDatabaseFilename), [moving.Prototype(object2VideoSequences[trainingObjects[i]].getDatabaseFilename(False), trainingObjects[i].getNum(), prototypeType, clusterSizes[i]) for i in prototypeIndices]) + elif args.process == 'interaction': # safety analysis TODO make function in safety analysis script @@ -183,10 +242,6 @@ row.append(aggSpeeds) data.append(row) data = DataFrame(data, columns = headers) - if args.siteIds is None: - siteIds = set([vs.cameraView.siteIdx for vs in videoSequences]) - else: - siteIds = set(args.siteIds) if args.output == 'figure': for name in headers[4:]: plt.ioff() diff -r 60cc87e824c4 -r d13f9bfbf3ff trafficintelligence/ml.py --- a/trafficintelligence/ml.py Thu Jul 05 22:24:31 2018 -0400 +++ b/trafficintelligence/ml.py Fri Jul 06 18:42:58 2018 -0400 @@ -150,16 +150,13 @@ code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) return code,sigma -def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0): +def assignToPrototypeClusters(instances, initialPrototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize = 0): '''Assigns instances to prototypes if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters and reassigning all elements in the cluster until no cluster is smaller than minClusterSize labels are indices in the prototypeIndices''' - if similarityFunc is None: - print('similarityFunc is None') - return None - + prototypeIndices = copy(initialPrototypeIndices) indices = [i for i in range(len(instances)) if i not in prototypeIndices] labels = [-1]*len(instances) assign = True @@ -184,7 +181,7 @@ indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] return prototypeIndices, labels -def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None): +def prototypeCluster(instances, similarities, minSimilarity, similarityFunc, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None): '''Finds exemplar (prototype) instance that represent each cluster Returns the prototype indices (in the instances list) @@ -205,22 +202,12 @@ if len(instances) == 0: print('no instances to cluster (empty list)') return None - if similarityFunc is None: - print('similarityFunc is None') - return None # sort instances based on length indices = list(range(len(instances))) if randomInitialization or optimizeCentroid: indices = np.random.permutation(indices).tolist() else: - def compare(i, j): - if len(instances[i]) > len(instances[j]): - return -1 - elif len(instances[i]) == len(instances[j]): - return 0 - else: - return 1 indices.sort(key=lambda i: len(instances[i])) # initialize clusters clusters = [] diff -r 60cc87e824c4 -r d13f9bfbf3ff trafficintelligence/moving.py --- a/trafficintelligence/moving.py Thu Jul 05 22:24:31 2018 -0400 +++ b/trafficintelligence/moving.py Fri Jul 06 18:42:58 2018 -0400 @@ -1368,7 +1368,7 @@ tmp = utils.sortByLength(self.getFeatures(), reverse = True) return tmp[:min(len(tmp), nFeatures)] - def getFeatureNumbers(self): + def getFeatureNumbersOverTime(self): '''Returns the number of features at each instant dict instant -> number of features''' if self.hasFeatures(): diff -r 60cc87e824c4 -r d13f9bfbf3ff trafficintelligence/processing.py --- a/trafficintelligence/processing.py Thu Jul 05 22:24:31 2018 -0400 +++ b/trafficintelligence/processing.py Fri Jul 06 18:42:58 2018 -0400 @@ -3,7 +3,7 @@ import numpy as np -from trafficintelligence import moving +from trafficintelligence import ml def extractSpeeds(objects, zone): speeds = {} @@ -17,3 +17,31 @@ else: objectsNotInZone.append(o) return speeds, objectsNotInZone + +def learnAssignMotionPatterns(learn, assign, objects, similarities, minSimilarity, similarityFunc, minClusterSize = 0, optimizeCentroid = False, randomInitialization = False, removePrototypesAfterAssignment = False, initialPrototypes = []): + '''Learns motion patterns + + During assignments, if using minClusterSize > 0, prototypes can change (be removed) + The argument removePrototypesAfterAssignment indicates whether the prototypes are removed or not''' + if len(initialPrototypes) > 0: + initialPrototypeIndices = list(range(len(initialPrototypes))) + trajectories = [p.getMovingObject().getPositions().asArray().T for p in initialPrototypes] + else: + initialPrototypeIndices = None + trajectories = [] + trajectories.extend([o.getPositions().asArray().T for o in objects]) + + if learn: + prototypeIndices = ml.prototypeCluster(trajectories, similarities, minSimilarity, similarityFunc, optimizeCentroid, randomInitialization, initialPrototypeIndices) + else: + prototypeIndices = initialPrototypeIndices + + if assign: + assignedPrototypeIndices, labels = ml.assignToPrototypeClusters(trajectories, prototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize) + if minClusterSize > 0 and removePrototypesAfterAssignment: # use prototypeIndices anyway + prototypeIndices = assignedPrototypeIndices + else: + labels = None + + return prototypeIndices, labels + diff -r 60cc87e824c4 -r d13f9bfbf3ff trafficintelligence/run-tests.sh --- a/trafficintelligence/run-tests.sh Thu Jul 05 22:24:31 2018 -0400 +++ b/trafficintelligence/run-tests.sh Fri Jul 06 18:42:58 2018 -0400 @@ -4,7 +4,3 @@ do python3 $f done -for f in ./tests/*.py -do - python3 $f -done diff -r 60cc87e824c4 -r d13f9bfbf3ff trafficintelligence/storage.py --- a/trafficintelligence/storage.py Thu Jul 05 22:24:31 2018 -0400 +++ b/trafficintelligence/storage.py Fri Jul 06 18:42:58 2018 -0400 @@ -7,7 +7,7 @@ from copy import copy import sqlite3, logging -from numpy import log, min as npmin, max as npmax, round as npround, array, sum as npsum, loadtxt, floor as npfloor, ceil as npceil, linalg +from numpy import log, min as npmin, max as npmax, round as npround, array, sum as npsum, loadtxt, floor as npfloor, ceil as npceil, linalg, int32, int64 from pandas import read_csv, merge from trafficintelligence import utils, moving, events, indicators @@ -22,6 +22,9 @@ 'object': 'objects', 'objectfeatures': 'positions'} +sqlite3.register_adapter(int64, lambda val: int(val)) +sqlite3.register_adapter(int32, lambda val: int(val)) + ######################### # Sqlite ######################### @@ -247,24 +250,18 @@ attributes[row[0]] = row[1] return attributes -def loadTrajectoriesFromSqlite(filename, trajectoryType, objectNumbers = None, withFeatures = False, timeStep = None, maxNObjectFeatures = 1): +def loadTrajectoriesFromSqlite(filename, trajectoryType, objectNumbers = None, withFeatures = False, timeStep = None, nLongestFeaturesPerObject = None): '''Loads the trajectories (in the general sense, - either features, objects (feature groups), longest features per object, or bounding box series) + either features, objects (feature groups), longest features per object, or bounding box series) + types are only feature or object + if object, features can be loaded with withFeatures or nLongestObjectFeatures used to select the n longest features The number loaded is either the first objectNumbers objects, or the indices in objectNumbers from the database''' objects = [] with sqlite3.connect(filename) as connection: - if trajectoryType == 'objectfeature': - objectFeatureNumbers = loadObjectFeatureFrameNumbers(filename, objectNumbers) - featureNumbers = [] - for numbers in objectFeatureNumbers.values(): - featureNumbers += numbers[:min(len(numbers), maxNObjectFeatures)] - objects = loadTrajectoriesFromTable(connection, 'positions', 'feature', featureNumbers, timeStep) - objectVelocities = loadTrajectoriesFromTable(connection, 'velocities', 'feature', featureNumbers, timeStep) - else: - objects = loadTrajectoriesFromTable(connection, 'positions', trajectoryType, objectNumbers, timeStep) - objectVelocities = loadTrajectoriesFromTable(connection, 'velocities', trajectoryType, objectNumbers, timeStep) + objects = loadTrajectoriesFromTable(connection, 'positions', trajectoryType, objectNumbers, timeStep) + objectVelocities = loadTrajectoriesFromTable(connection, 'velocities', trajectoryType, objectNumbers, timeStep) if len(objectVelocities) > 0: for o,v in zip(objects, objectVelocities): @@ -283,7 +280,7 @@ queryStatement += ' WHERE object_id '+getObjectCriteria(objectNumbers) queryStatement += ' ORDER BY object_id' # order is important to group all features per object logging.debug(queryStatement) - cursor.execute(queryStatement) + cursor.execute(queryStatement) featureNumbers = {} for row in cursor: @@ -303,13 +300,16 @@ obj.setUserType(userType) obj.setNObjects(nObjects) + # add features if withFeatures: - nFeatures = 0 for obj in objects: - nFeatures = max(nFeatures, max(obj.featureNumbers)) - features = loadTrajectoriesFromSqlite(filename, 'feature', nFeatures+1, timeStep = timeStep) + obj.features = loadTrajectoriesFromSqlite(filename, 'feature', obj.featureNumbers, timeStep = timeStep) + elif nLongestFeaturesPerObject is not None: for obj in objects: - obj.setFeatures(features) + queryStatement = 'SELECT trajectory_id, max(frame_number)-min(frame_number) AS length FROM positions WHERE trajectory_id '+getObjectCriteria(obj.featureNumbers)+' GROUP BY trajectory_id ORDER BY length DESC' + logging.debug(queryStatement) + cursor.execute(queryStatement) + obj.features = loadTrajectoriesFromSqlite(filename, 'feature', [row[0] for i,row in enumerate(cursor) if i