# HG changeset patch
# User Nicolas Saunier <nicolas.saunier@polymtl.ca>
# Date 1530824800 14400
# Node ID 75a6ad604cc5a5372cad5b849bf14c61f27eea77
# Parent  b735895c8815adfa0d58e30c55c8af059595dec6
work on motion patterns

diff -r b735895c8815 -r 75a6ad604cc5 scripts/learn-motion-patterns.py
--- a/scripts/learn-motion-patterns.py	Wed Jul 04 17:39:39 2018 -0400
+++ b/scripts/learn-motion-patterns.py	Thu Jul 05 17:06:40 2018 -0400
@@ -5,20 +5,20 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
-from trafficintelligence import ml, utils, storage, moving
+from trafficintelligence import ml, utils, storage, moving, processing
 
-parser = argparse.ArgumentParser(description='''The program clusters trajectories, each cluster being represented by a trajectory. It can either work on the same dataset (database) or different ones, but only does learning or assignment at a time to avoid issues (the minimum cluster size argument is not used for now as it may change prototypes when assigning other trajectories)''') #, epilog = ''
+parser = argparse.ArgumentParser(description='''The program clusters trajectories, each cluster being represented by a trajectory. It can either work on the same dataset (database) or different ones, but only does learning or assignment at a time to avoid issues''') #, epilog = ''
 #parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file')
 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file', required = True)
 parser.add_argument('-o', dest = 'outputPrototypeDatabaseFilename', help = 'name of the Sqlite database file to save prototypes')
 parser.add_argument('-i', dest = 'inputPrototypeDatabaseFilename', help = 'name of the Sqlite database file for prototypes to start the algorithm with')
-parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories to learn from', choices = ['objectfeature', 'feature', 'object'], default = 'objectfeature')
-parser.add_argument('--max-nobjectfeatures', dest = 'maxNObjectFeatures', help = 'maximum number of features per object to load', type = int, default = 1)
+parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories to process', choices = ['feature', 'object'], default = 'feature')
+parser.add_argument('--nfeatures-per-object', dest = 'nLongestFeaturesPerObject', help = 'maximum number of features per object to load', type = int)
 parser.add_argument('-n', dest = 'nTrajectories', help = 'number of the object or feature trajectories to load', type = int, default = None)
 parser.add_argument('-e', dest = 'epsilon', help = 'distance for the similarity of trajectory points', type = float, required = True)
 parser.add_argument('--metric', dest = 'metric', help = 'metric for the similarity of trajectory points', default = 'cityblock') # default is manhattan distance
 parser.add_argument('-s', dest = 'minSimilarity', help = 'minimum similarity to put a trajectory in a cluster', type = float, required = True)
-parser.add_argument('-c', dest = 'minClusterSize', help = 'minimum cluster size', type = int, default = 0)
+#parser.add_argument('-c', dest = 'minClusterSize', help = 'minimum cluster size', type = int, default = 0)
 parser.add_argument('--learn', dest = 'learn', help = 'learn', action = 'store_true')
 parser.add_argument('--optimize', dest = 'optimizeCentroid', help = 'recompute centroid at each assignment', action = 'store_true')
 parser.add_argument('--random', dest = 'randomInitialization', help = 'random initialization of clustering algorithm', action = 'store_true')
@@ -40,57 +40,41 @@
 # TODO add possibility to cluster with velocities
 # TODO add possibility to load all trajectories and use minclustersize
 
-# load trajectories to cluster or assign
-objects = storage.loadTrajectoriesFromSqlite(args.databaseFilename, args.trajectoryType, args.nTrajectories, timeStep = args.positionSubsamplingRate)
-trajectories = [o.getPositions().asArray().T for o in objects]
+if args.learn and args.assign:
+    print('Cannot learn and assign simultaneously')
+    sys.exit(0)
+
+objects = storage.loadTrajectoriesFromSqlite(args.databaseFilename, args.trajectoryType, args.nTrajectories, timeStep = args.positionSubsamplingRate, nLongestFeaturesPerObject = args.nLongestFeaturesPerObject)
+if args.trajectoryType == 'object' and args.nLongestFeaturesPerObject is not None:
+    objectsWithFeatures = objects
+    objects = [f for o in objectsWithFeatures for f in o.getFeatures()]
+    prototypeType = 'feature'
+else:
+    prototypeType = args.trajectoryType
 
 # load initial prototypes, if any    
 if args.inputPrototypeDatabaseFilename is not None:
     initialPrototypes = storage.loadPrototypesFromSqlite(args.inputPrototypeDatabaseFilename, True)
-    trajectories = [p.getMovingObject().getPositions().asArray().T for p in initialPrototypes]+trajectories
-    if len(initialPrototypes) > 0:
-        initialPrototypeIndices = list(range(len(initialPrototypes)))
-    else:
-        initialPrototypeIndices = None
 else:
     initialPrototypes = []
-    initialPrototypeIndices = None
 
 lcss = utils.LCSS(metric = args.metric, epsilon = args.epsilon)
+similarityFunc = lambda x,y : lcss.computeNormalized(x, y)
+nTrajectories = len(initialPrototypes)+len(objects)
 if args.similaritiesFilename is not None:
     similarities = np.loadtxt(args.similaritiesFilename)
-if args.similaritiesFilename is None or similarities.shape[0] != len(trajectories) or similarities.shape[1] != len(trajectories):
-    similarities = -np.ones((len(trajectories), len(trajectories)))
-similarityFunc = lambda x,y : lcss.computeNormalized(x, y)
-# the next line can be called again without reinitializing similarities
-if args.learn:
-    prototypeIndices = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, similarityFunc, args.optimizeCentroid, args.randomInitialization, initialPrototypeIndices)
-else:
-    prototypeIndices = initialPrototypeIndices
+if args.similaritiesFilename is None or similarities.shape[0] != nTrajectories or similarities.shape[1] != nTrajectories:
+    similarities = -np.ones((nTrajectories, nTrajectories))
 
-if args.assign: # TODO don't touch initial prototypes if not from same db as trajectories
-    #if not args.learn and args.minClusterSize >= 1: # allow only 
-    #   print('Warning: you did not learn the prototypes and you are using minimum cluster size of {}, which may lead to removing prototypes and assigning them to others'.format(args.minClusterSize))
-    # if args.minClusterSize >= 1:
-    #     if initialPrototypeIndices is None:
-    #         prototypeIndices, labels = ml.assignToPrototypeClusters(trajectories, prototypeIndices, similarities, args.minSimilarity, similarityFunc, args.minClusterSize)
-    #     else:
-    #         print('Not assigning with non-zero minimum cluster size and initial prototypes (would remove initial prototypes based on other trajectories')
-    # else:
-    #     prototypeIndices, labels = ml.assignToPrototypeClusters(trajectories, prototypeIndices, similarities, args.minSimilarity, similarityFunc)
-    assignedPrototypeIndices, labels = ml.assignToPrototypeClusters(trajectories, prototypeIndices, similarities, args.minSimilarity, similarityFunc)
+prototypeIndices, labels = processing.learnAssignMotionPatterns(args.learn, args.assign, objects, similarities, args.minSimilarity, similarityFunc, 0, args.optimizeCentroid, args.randomInitialization, False, initialPrototypes)
 
-if args.learn and not args.assign:
+if args.learn:# and not args.assign:
     prototypes = []
-    if args.trajectoryType == 'objectfeature':
-        trajectoryType = 'feature'
-    else:
-        trajectoryType = args.trajectoryType
     for i in prototypeIndices:
         if i<len(initialPrototypes):
             prototypes.append(initialPrototypes[i])
         else:
-            prototypes.append(moving.Prototype(args.databaseFilename, objects[i-len(initialPrototypes)].getNum(), trajectoryType))
+            prototypes.append(moving.Prototype(args.databaseFilename, objects[i-len(initialPrototypes)].getNum(), prototypeType))
 
     if args.outputPrototypeDatabaseFilename is None:
         outputPrototypeDatabaseFilename = args.databaseFilename
@@ -106,10 +90,10 @@
         plt.axis('equal')
         plt.show()
 
-if not args.learn and args.assign: # no modification to prototypes, can work with initialPrototypes
+if args.assign: # not args.learn and  no modification to prototypes, can work with initialPrototypes
     clusterSizes = ml.computeClusterSizes(labels, prototypeIndices, -1)
     for i in prototypeIndices:
-        nMatchings = clusterSizes[i]-1
+        nMatchings = clusterSizes[i]-1 # external prototypes
         if initialPrototypes[i].nMatchings is None:
             initialPrototypes[i].nMatchings = nMatchings
         else:
@@ -120,16 +104,22 @@
         outputPrototypeDatabaseFilename = args.outputPrototypeDatabaseFilename
     storage.setPrototypeMatchingsInSqlite(outputPrototypeDatabaseFilename, initialPrototypes)
     if args.saveAssignments:
-        if args.trajectoryType == 'objectfeature': # consider that the object is assigned through its longest features
+        if args.trajectoryType == 'object' and args.nLongestFeaturesPerObject is not None:
+            # consider that the object is assigned through its longest features
+            # issues are inconsistencies in the number of matchings per prototype and display (will display features, not objects)
             objectNumbers = []
             objectLabels = []
-            for objNum, objFeatureNumbers in objectFeatureNumbers.items():
+            i = 0
+            for obj in objectsWithFeatures:
                 objLabels = []
-                for i, o in enumerate(objects):
-                    if o.getNum() in objFeatureNumbers:
+                for f in obj.getFeatures():
+                    if f == objects[i]:
                         objLabels.append(labels[i+len(initialPrototypes)])
+                        i += 1
+                    else:
+                        print('Issue with obj {} and feature {} (trajectory {})'.format(obj.getNum(), f.getNum(), i))
                 objectLabels.append(utils.mostCommon(objLabels))
-                objectNumbers.append(objNum)
+                objectNumbers.append(obj.getNum())
             storage.savePrototypeAssignmentsToSqlite(args.databaseFilename, objectNumbers, 'object', objectLabels, initialPrototypes)
         else:
             storage.savePrototypeAssignmentsToSqlite(args.databaseFilename, [obj.getNum() for obj in objects], args.trajectoryType, labels[len(initialPrototypes):], initialPrototypes)
diff -r b735895c8815 -r 75a6ad604cc5 scripts/process.py
--- a/scripts/process.py	Wed Jul 04 17:39:39 2018 -0400
+++ b/scripts/process.py	Thu Jul 05 17:06:40 2018 -0400
@@ -28,7 +28,7 @@
 # common options
 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file')
 parser.add_argument('-n', dest = 'nObjects', help = 'number of objects/interactions to process', type = int)
-parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories', choices = ['objectfeature', 'feature', 'object'], default = 'objectfeature')
+parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories', choices = ['feature', 'object'], default = 'feature')
 parser.add_argument('--dry', dest = 'dryRun', help = 'dry run of processing', action = 'store_true')
 parser.add_argument('--nthreads', dest = 'nProcesses', help = 'number of processes to run in parallel', type = int, default = 1)
 parser.add_argument('--subsample', dest = 'positionSubsamplingRate', help = 'rate of position subsampling (1 every n positions)', type = int)
diff -r b735895c8815 -r 75a6ad604cc5 trafficintelligence/ml.py
--- a/trafficintelligence/ml.py	Wed Jul 04 17:39:39 2018 -0400
+++ b/trafficintelligence/ml.py	Thu Jul 05 17:06:40 2018 -0400
@@ -150,16 +150,13 @@
     code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
     return code,sigma
 
-def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0):
+def assignToPrototypeClusters(instances, initialPrototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize = 0):
     '''Assigns instances to prototypes 
     if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters
     and reassigning all elements in the cluster until no cluster is smaller than minClusterSize
 
     labels are indices in the prototypeIndices'''
-    if similarityFunc is None:
-        print('similarityFunc is None')
-        return None
-
+    prototypeIndices = copy(initialPrototypeIndices)
     indices = [i for i in range(len(instances)) if i not in prototypeIndices]
     labels = [-1]*len(instances)
     assign = True
@@ -184,7 +181,7 @@
             indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex]
     return prototypeIndices, labels
 
-def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None):
+def prototypeCluster(instances, similarities, minSimilarity, similarityFunc, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None):
     '''Finds exemplar (prototype) instance that represent each cluster
     Returns the prototype indices (in the instances list)
 
@@ -205,22 +202,12 @@
     if len(instances) == 0:
         print('no instances to cluster (empty list)')
         return None
-    if similarityFunc is None:
-        print('similarityFunc is None')
-        return None
 
     # sort instances based on length
     indices = list(range(len(instances)))
     if randomInitialization or optimizeCentroid:
         indices = np.random.permutation(indices).tolist()
     else:
-        def compare(i, j):
-            if len(instances[i]) > len(instances[j]):
-                return -1
-            elif len(instances[i]) == len(instances[j]):
-                return 0
-            else:
-                return 1
         indices.sort(key=lambda i: len(instances[i]))
     # initialize clusters
     clusters = []
diff -r b735895c8815 -r 75a6ad604cc5 trafficintelligence/moving.py
--- a/trafficintelligence/moving.py	Wed Jul 04 17:39:39 2018 -0400
+++ b/trafficintelligence/moving.py	Thu Jul 05 17:06:40 2018 -0400
@@ -1368,7 +1368,7 @@
             tmp = utils.sortByLength(self.getFeatures(), reverse = True)
             return tmp[:min(len(tmp), nFeatures)]                                        
         
-    def getFeatureNumbers(self):
+    def getFeatureNumbersOverTime(self):
         '''Returns the number of features at each instant
         dict instant -> number of features'''
         if self.hasFeatures():
diff -r b735895c8815 -r 75a6ad604cc5 trafficintelligence/processing.py
--- a/trafficintelligence/processing.py	Wed Jul 04 17:39:39 2018 -0400
+++ b/trafficintelligence/processing.py	Thu Jul 05 17:06:40 2018 -0400
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from trafficintelligence import moving
+from trafficintelligence import ml
 
 def extractSpeeds(objects, zone):
     speeds = {}
@@ -17,3 +17,31 @@
         else:
             objectsNotInZone.append(o)
     return speeds, objectsNotInZone
+
+def learnAssignMotionPatterns(learn, assign, objects, similarities, minSimilarity, similarityFunc, minClusterSize = 0, optimizeCentroid = False, randomInitialization = False, removePrototypesAfterAssignment = False, initialPrototypes = []):
+    '''Learns motion patterns
+
+    During assignments, if using minClusterSize > 0, prototypes can change (be removed)
+    The argument removePrototypesAfterAssignment indicates whether the prototypes are removed or not'''
+    if len(initialPrototypes) > 0:
+        initialPrototypeIndices = list(range(len(initialPrototypes)))
+        trajectories = [p.getMovingObject().getPositions().asArray().T for p in initialPrototypes]
+    else:
+        initialPrototypeIndices = None
+        trajectories = []
+    trajectories.extend([o.getPositions().asArray().T for o in objects])
+
+    if learn:
+        prototypeIndices = ml.prototypeCluster(trajectories, similarities, minSimilarity, similarityFunc, optimizeCentroid, randomInitialization, initialPrototypeIndices)
+    else:
+        prototypeIndices = initialPrototypeIndices
+
+    if assign:
+        assignedPrototypeIndices, labels = ml.assignToPrototypeClusters(trajectories, prototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize)
+        if minClusterSize > 0 and removePrototypesAfterAssignment: # use prototypeIndices anyway
+            prototypeIndices = assignedPrototypeIndices
+    else:
+        labels = None    
+
+    return prototypeIndices, labels
+    
diff -r b735895c8815 -r 75a6ad604cc5 trafficintelligence/run-tests.sh
--- a/trafficintelligence/run-tests.sh	Wed Jul 04 17:39:39 2018 -0400
+++ b/trafficintelligence/run-tests.sh	Thu Jul 05 17:06:40 2018 -0400
@@ -4,7 +4,3 @@
 do
     python3 $f
 done
-for f in ./tests/*.py
-do
-    python3 $f
-done
diff -r b735895c8815 -r 75a6ad604cc5 trafficintelligence/storage.py
--- a/trafficintelligence/storage.py	Wed Jul 04 17:39:39 2018 -0400
+++ b/trafficintelligence/storage.py	Thu Jul 05 17:06:40 2018 -0400
@@ -247,24 +247,18 @@
             attributes[row[0]] = row[1]
     return attributes
 
-def loadTrajectoriesFromSqlite(filename, trajectoryType, objectNumbers = None, withFeatures = False, timeStep = None, maxNObjectFeatures = 1):
+def loadTrajectoriesFromSqlite(filename, trajectoryType, objectNumbers = None, withFeatures = False, timeStep = None, nLongestFeaturesPerObject = None):
     '''Loads the trajectories (in the general sense, 
-    either features, objects (feature groups), longest features per object, or bounding box series) 
+    either features, objects (feature groups), longest features per object, or bounding box series)
+    types are only feature or object
+    if object, features can be loaded with withFeatures or nLongestObjectFeatures used to select the n longest features
 
     The number loaded is either the first objectNumbers objects,
     or the indices in objectNumbers from the database'''
     objects = []
     with sqlite3.connect(filename) as connection:
-        if trajectoryType == 'objectfeature':
-            objectFeatureNumbers = loadObjectFeatureFrameNumbers(filename, objectNumbers)
-            featureNumbers = []
-            for numbers in objectFeatureNumbers.values():
-                featureNumbers += numbers[:min(len(numbers), maxNObjectFeatures)]
-            objects = loadTrajectoriesFromTable(connection, 'positions', 'feature', featureNumbers, timeStep)
-            objectVelocities = loadTrajectoriesFromTable(connection, 'velocities', 'feature', featureNumbers, timeStep)
-        else:
-            objects = loadTrajectoriesFromTable(connection, 'positions', trajectoryType, objectNumbers, timeStep)
-            objectVelocities = loadTrajectoriesFromTable(connection, 'velocities', trajectoryType, objectNumbers, timeStep)
+        objects = loadTrajectoriesFromTable(connection, 'positions', trajectoryType, objectNumbers, timeStep)
+        objectVelocities = loadTrajectoriesFromTable(connection, 'velocities', trajectoryType, objectNumbers, timeStep)
 
         if len(objectVelocities) > 0:
             for o,v in zip(objects, objectVelocities):
@@ -283,7 +277,7 @@
                     queryStatement += ' WHERE object_id '+getObjectCriteria(objectNumbers)
                 queryStatement += ' ORDER BY object_id' # order is important to group all features per object
                 logging.debug(queryStatement)
-                cursor.execute(queryStatement) 
+                cursor.execute(queryStatement)
 
                 featureNumbers = {}
                 for row in cursor:
@@ -303,13 +297,16 @@
                     obj.setUserType(userType)
                     obj.setNObjects(nObjects)
 
+                # add features
                 if withFeatures:
-                    nFeatures = 0
                     for obj in objects:
-                        nFeatures = max(nFeatures, max(obj.featureNumbers))
-                    features = loadTrajectoriesFromSqlite(filename, 'feature', nFeatures+1, timeStep = timeStep)
+                        obj.features = loadTrajectoriesFromSqlite(filename, 'feature', obj.featureNumbers, timeStep = timeStep)
+                elif nLongestFeaturesPerObject is not None:
                     for obj in objects:
-                        obj.setFeatures(features)
+                        queryStatement = 'SELECT trajectory_id, max(frame_number)-min(frame_number) AS length FROM positions WHERE trajectory_id '+getObjectCriteria(obj.featureNumbers)+' GROUP BY trajectory_id ORDER BY length DESC'
+                        logging.debug(queryStatement)
+                        cursor.execute(queryStatement)
+                        obj.features = loadTrajectoriesFromSqlite(filename, 'feature', [row[0] for i,row in enumerate(cursor) if i<nLongestFeaturesPerObject], timeStep = timeStep)
 
             except sqlite3.OperationalError as error:
                 printDBError(error)
@@ -338,12 +335,6 @@
             printDBError(error)
             return None
 
-def loadObjectTrajectoriesFromSqlite():
-    '''Loads object trajectories 
-    either simply objects or features (defaults to loadTrajectoriesFromSqlite) 
-    or the longest features for each object '''
-        
-
 def addCurvilinearTrajectoriesFromSqlite(filename, objects):
     '''Adds curvilinear positions (s_coordinate, y_coordinate, lane)
     from a database to an existing MovingObject dict (indexed by each objects's num)'''
diff -r b735895c8815 -r 75a6ad604cc5 trafficintelligence/tests/tutorials.py
--- a/trafficintelligence/tests/tutorials.py	Wed Jul 04 17:39:39 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-import unittest
-
-class TestNGSIM(unittest.TestCase):
-    'Tutorial example for NGSIM data'
-
-    def test_ex1(self):
-        from trafficintelligence import storage
-        objects = storage.loadTrajectoriesFromNgsimFile('../samples/trajectories-0400-0415.txt',100)
-        for o in objects: o.plot()
-
-class TestTrajectoryLoading(unittest.TestCase):
-    'Tutorial example for NGSIM data'
-
-    def test_ex1(self):
-        from trafficintelligence import storage
-        objects = storage.loadTrajectoriesFromSqlite('../samples/laurier.sqlite', 'object')
-
-        speed = objects[0].getVelocityAtInstant(10).norm2()
-        timeInterval = objects[0].getTimeInterval()
-        speeds = [objects[0].getVelocityAtInstant(t).norm2() for t in range(timeInterval.first, timeInterval.last)]
-        speeds = [v.norm2() for v in objects[0].getVelocities()]
-
-        from matplotlib.pyplot import plot, close, axis
-        plot(range(timeInterval.first, timeInterval.last+1), speeds)
-
-        close('all')
-        objects[0].plot()
-        axis('equal')
-
-        features = storage.loadTrajectoriesFromSqlite('../samples/laurier.sqlite', 'feature')
-        objects[0].setFeatures(features)
-
-        for f in objects[0].features:
-            f.plot()
-        axis('equal')
-
-
-if __name__ == '__main__':
-    unittest.main()