Mercurial > hg > nsaunier > traffic-intelligence
comparison scripts/dltrack.py @ 1246:2397de73770d
dltrack saves after projecting coordinates
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Fri, 09 Feb 2024 17:47:33 -0500 |
| parents | 371c718e57d7 |
| children | 439207b6c146 |
comparison
equal
deleted
inserted
replaced
| 1245:371c718e57d7 | 1246:2397de73770d |
|---|---|
| 1 #! /usr/bin/env python3 | 1 #! /usr/bin/env python3 |
| 2 # from https://docs.ultralytics.com/modes/track/ | 2 # from https://docs.ultralytics.com/modes/track/ |
| 3 import sys, argparse | 3 import sys, argparse |
| 4 from math import inf | |
| 4 from copy import copy | 5 from copy import copy |
| 5 from collections import Counter | 6 from collections import Counter |
| 6 import numpy as np | 7 import numpy as np |
| 7 from scipy.optimize import linear_sum_assignment | 8 from scipy.optimize import linear_sum_assignment |
| 8 from ultralytics import YOLO | 9 from ultralytics import YOLO |
| 10 from torchvision.ops import box_iou | 11 from torchvision.ops import box_iou |
| 11 import cv2 | 12 import cv2 |
| 12 | 13 |
| 13 from trafficintelligence import cvutils, moving, storage, utils | 14 from trafficintelligence import cvutils, moving, storage, utils |
| 14 | 15 |
| 15 parser = argparse.ArgumentParser(description='The program tracks objects using the ultralytics models and trakcers.') | 16 parser = argparse.ArgumentParser(description='The program tracks objects using the ultralytics models and trackers.', |
| 17 epilog= '''The models can be found in the Ultralytics model zoo, | |
| 18 eg YOLOv8 (https://docs.ultralytics.com/models/yolov8/). | |
| 19 The tracking models can be found also online | |
| 20 (https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers). | |
| 21 The choice is to project the middle of the bottom line for persons, | |
| 22 and the bounding box center otherwise.''') | |
| 16 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') | 23 parser.add_argument('--cfg', dest = 'configFilename', help = 'name of the configuration file') |
| 17 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file (overrides the configuration file)') | 24 parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file (overrides the configuration file)') |
| 18 parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file (overrides the configuration file)') | 25 parser.add_argument('-i', dest = 'videoFilename', help = 'name of the video file (overrides the configuration file)') |
| 19 parser.add_argument('-m', dest = 'detectorFilename', help = 'name of the detection model file', required = True) | 26 parser.add_argument('-m', dest = 'detectorFilename', help = 'name of the detection model file', required = True) |
| 20 parser.add_argument('-t', dest = 'trackerFilename', help = 'name of the tracker file', required = True) | 27 parser.add_argument('-t', dest = 'trackerFilename', help = 'name of the tracker file', required = True) |
| 21 parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix', default = 'homography.txt') | 28 parser.add_argument('-o', dest = 'homographyFilename', help = 'filename of the homography matrix') |
| 22 parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file') | 29 #parser.add_argument('-k', dest = 'maskFilename', help = 'name of the mask file') |
| 23 parser.add_argument('--undistort', dest = 'undistort', help = 'undistort the video', action = 'store_true') | 30 parser.add_argument('--undistort', dest = 'undistort', help = 'undistort the video', action = 'store_true') |
| 24 parser.add_argument('--intrinsic', dest = 'intrinsicCameraMatrixFilename', help = 'name of the intrinsic camera file') | 31 parser.add_argument('--intrinsic', dest = 'intrinsicCameraMatrixFilename', help = 'name of the intrinsic camera file') |
| 25 parser.add_argument('--distortion-coefficients', dest = 'distortionCoefficients', help = 'distortion coefficients', nargs = '*', type = float) | 32 parser.add_argument('--distortion-coefficients', dest = 'distortionCoefficients', help = 'distortion coefficients', nargs = '*', type = float) |
| 26 parser.add_argument('--display', dest = 'display', help = 'show the raw detection and tracking results', action = 'store_true') | 33 parser.add_argument('--display', dest = 'display', help = 'show the raw detection and tracking results', action = 'store_true') |
| 27 parser.add_argument('--no-image-coordinates', dest = 'notSavingImageCoordinates', help = 'not saving the raw detection and tracking results', action = 'store_true') | 34 parser.add_argument('--no-image-coordinates', dest = 'notSavingImageCoordinates', help = 'not saving the raw detection and tracking results', action = 'store_true') |
| 28 parser.add_argument('-f', dest = 'firstFrameNum', help = 'number of first frame number to process', type = int, default = 0) | 35 parser.add_argument('-f', dest = 'firstFrameNum', help = 'number of first frame number to process', type = int, default = 0) |
| 29 parser.add_argument('-l', dest = 'lastFrameNum', help = 'number of last frame number to process', type = int, default = float('Inf')) | 36 parser.add_argument('-l', dest = 'lastFrameNum', help = 'number of last frame number to process', type = int, default = float('Inf')) |
| 30 parser.add_argument('--conf', dest = 'confindence', help = 'object confidence threshold for detection', type = float, default = 0.25) | 37 parser.add_argument('--conf', dest = 'confidence', help = 'object confidence threshold for detection', type = float, default = 0.25) |
| 31 parser.add_argument('--bike-prop', dest = 'bikeProportion', help = 'minimum proportion of time a person classified as bike or motorbike to be classified as cyclist', type = float, default = 0.2) | 38 parser.add_argument('--bike-prop', dest = 'bikeProportion', help = 'minimum proportion of time a person classified as bike or motorbike to be classified as cyclist', type = float, default = 0.2) |
| 32 parser.add_argument('--cyclist-iou', dest = 'cyclistIou', help = 'IoU threshold to associate a bike and ped bounding box', type = float, default = 0.15) | 39 parser.add_argument('--cyclist-iou', dest = 'cyclistIou', help = 'IoU threshold to associate a bike and ped bounding box', type = float, default = 0.15) |
| 33 parser.add_argument('--cyclist-match-prop', dest = 'cyclistMatchingProportion', help = 'minimum proportion of time a bike exists and is associated with a pedestrian to be merged as cyclist', type = float, default = 0.3) | 40 parser.add_argument('--cyclist-match-prop', dest = 'cyclistMatchingProportion', help = 'minimum proportion of time a bike exists and is associated with a pedestrian to be merged as cyclist', type = float, default = 0.3) |
| 34 parser.add_argument('--max-temp-overal', dest = 'maxTemporalOverlap', help = 'maximum proportion of time to merge 2 bikes associated with same pedestrian', type = float, default = 0.05) | 41 parser.add_argument('--max-temp-overal', dest = 'maxTemporalOverlap', help = 'maximum proportion of time to merge 2 bikes associated with same pedestrian', type = float, default = 0.05) |
| 35 | 42 |
| 36 args = parser.parse_args() | 43 args = parser.parse_args() |
| 37 params, videoFilename, databaseFilename, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args) | 44 params, videoFilename, databaseFilename, homography, invHomography, intrinsicCameraMatrix, distortionCoefficients, undistortedImageMultiplication, undistort, firstFrameNum = storage.processVideoArguments(args) |
| 38 | 45 |
| 46 if args.homographyFilename is not None: | |
| 47 homography = np.loadtxt(args.homographyFilename) | |
| 39 if args.intrinsicCameraMatrixFilename is not None: | 48 if args.intrinsicCameraMatrixFilename is not None: |
| 40 intrinsicCameraMatrix = loadtxt(args.intrinsicCameraMatrixFilename) | 49 intrinsicCameraMatrix = loadtxt(args.intrinsicCameraMatrixFilename) |
| 41 if args.distortionCoefficients is not None: | 50 if args.distortionCoefficients is not None: |
| 42 distortionCoefficients = args.distortionCoefficients | 51 distortionCoefficients = args.distortionCoefficients |
| 43 if args.firstFrameNum is not None: | 52 if args.firstFrameNum is not None: |
| 44 firstFrameNum = args.firstFrameNum | 53 firstFrameNum = args.firstFrameNum |
| 45 if args.lastFrameNum is not None: | 54 if args.lastFrameNum is not None: |
| 46 lastFrameNum = args.lastFrameNum | 55 lastFrameNum = args.lastFrameNum |
| 47 | 56 elif args.configFilename is not None: |
| 48 # TODO add option to refine position with mask for vehicles | 57 lastFrameNum = params.lastFrameNum |
| 58 else: | |
| 59 lastFrameNum = inf | |
| 60 | |
| 61 # TODO use mask | |
| 62 # TODO add option to refine position with mask for vehicles, to save different positions | |
| 49 # TODO work with optical flow (farneback or RAFT) https://pytorch.org/vision/main/models/raft.html | 63 # TODO work with optical flow (farneback or RAFT) https://pytorch.org/vision/main/models/raft.html |
| 50 | 64 |
| 51 # use 2 x bytetrack track buffer to remove objects from existing ones | 65 # use 2 x bytetrack track buffer to remove objects from existing ones |
| 52 | 66 |
| 53 # Load a model | 67 # Load a model |
| 54 model = YOLO(args.detectorFilename) # seg yolov8x-seg.pt | 68 model = YOLO(args.detectorFilename) # seg yolov8x-seg.pt |
| 55 # seg could be used on cropped image... if can be loaded and kept in memory | 69 # seg could be used on cropped image... if can be loaded and kept in memory |
| 56 # model = YOLO('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get' | 70 # model = YOLOX('/home/nicolas/Research/Data/classification-models/yolo_nas_l.pt ') # AttributeError: 'YoloNAS_L' object has no attribute 'get' |
| 57 | 71 |
| 58 # Track with the model | 72 # Track with the model |
| 59 if args.display: | 73 if args.display: |
| 60 windowName = 'frame' | 74 windowName = 'frame' |
| 61 cv2.namedWindow(windowName, cv2.WINDOW_NORMAL) | 75 cv2.namedWindow(windowName, cv2.WINDOW_NORMAL) |
| 62 | 76 |
| 63 capture = cv2.VideoCapture(args.videoFilename) | 77 capture = cv2.VideoCapture(videoFilename) |
| 64 objects = {} | 78 objects = {} |
| 65 featureNum = 1 | 79 featureNum = 1 |
| 66 frameNum = args.firstFrameNum | 80 frameNum = firstFrameNum |
| 67 capture.set(cv2.CAP_PROP_POS_FRAMES, frameNum) | 81 capture.set(cv2.CAP_PROP_POS_FRAMES, frameNum) |
| 68 lastFrameNum = args.lastFrameNum | |
| 69 | 82 |
| 70 success, frame = capture.read() | 83 success, frame = capture.read() |
| 71 if not success: | 84 if not success: |
| 72 print('Input {} could not be read. Exiting'.format(args.videoFilename)) | 85 print('Input {} could not be read. Exiting'.format(args.videoFilename)) |
| 73 import sys; sys.exit() | 86 import sys; sys.exit() |
| 74 | 87 |
| 75 results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), persist=True, verbose=False) | 88 results = model.track(frame, tracker=args.trackerFilename, classes=list(moving.cocoTypeNames.keys()), conf = args.confidence, persist=True, verbose=False) |
| 76 while capture.isOpened() and success and frameNum <= lastFrameNum: | 89 while capture.isOpened() and success and frameNum <= lastFrameNum: |
| 77 result = results[0] | 90 result = results[0] |
| 78 if frameNum %10 == 0: | 91 if frameNum %10 == 0: |
| 79 print(frameNum, len(result.boxes), 'objects') | 92 print(frameNum, len(result.boxes), 'objects') |
| 80 for box in result.boxes: | 93 for box in result.boxes: |
| 81 #print(box.cls, box.id, box.xyxy) | |
| 82 if box.id is not None: # None are objects with low confidence | 94 if box.id is not None: # None are objects with low confidence |
| 83 num = int(box.id.item()) | 95 num = int(box.id.item()) |
| 84 #xyxy = box.xyxy[0].tolist() | |
| 85 if num in objects: | 96 if num in objects: |
| 86 objects[num].timeInterval.last = frameNum | 97 objects[num].timeInterval.last = frameNum |
| 87 objects[num].features[0].timeInterval.last = frameNum | 98 objects[num].features[0].timeInterval.last = frameNum |
| 88 objects[num].features[1].timeInterval.last = frameNum | 99 objects[num].features[1].timeInterval.last = frameNum |
| 89 objects[num].bboxes[frameNum] = copy(box.xyxy) | 100 objects[num].bboxes[frameNum] = copy(box.xyxy) |
| 90 objects[num].userTypes.append(moving.coco2Types[int(box.cls.item())]) | 101 objects[num].userTypes.append(moving.coco2Types[int(box.cls.item())]) |
| 91 objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item()) | 102 objects[num].features[0].tmpPositions[frameNum] = moving.Point(box.xyxy[0,0].item(), box.xyxy[0,1].item()) # min |
| 92 objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item()) | 103 objects[num].features[1].tmpPositions[frameNum] = moving.Point(box.xyxy[0,2].item(), box.xyxy[0,3].item()) # max |
| 93 else: | 104 else: |
| 94 inter = moving.TimeInterval(frameNum, frameNum) | 105 inter = moving.TimeInterval(frameNum, frameNum) |
| 95 objects[num] = moving.MovingObject(num, inter) | 106 objects[num] = moving.MovingObject(num, inter) |
| 96 objects[num].bboxes = {frameNum: copy(box.xyxy)} | 107 objects[num].bboxes = {frameNum: copy(box.xyxy)} |
| 97 objects[num].userTypes = [moving.coco2Types[int(box.cls.item())]] | 108 objects[num].userTypes = [moving.coco2Types[int(box.cls.item())]] |
| 111 | 122 |
| 112 # classification | 123 # classification |
| 113 for num, obj in objects.items(): | 124 for num, obj in objects.items(): |
| 114 obj.setUserType(utils.mostCommon(obj.userTypes)) # improve? mix with speed? | 125 obj.setUserType(utils.mostCommon(obj.userTypes)) # improve? mix with speed? |
| 115 | 126 |
| 116 # add quality control: avoid U-turns | 127 # TODO add quality control: avoid U-turns |
| 117 | 128 |
| 118 # merge bikes and people | 129 # merge bikes and people |
| 119 twowheels = [num for num, obj in objects.items() if obj.getUserType() in (moving.userType2Num['motorcyclist'],moving.userType2Num['cyclist'])] | 130 twowheels = [num for num, obj in objects.items() if obj.getUserType() in (moving.userType2Num['motorcyclist'],moving.userType2Num['cyclist'])] |
| 120 pedestrians = [num for num, obj in objects.items() if obj.getUserType() == moving.userType2Num['pedestrian']] | 131 pedestrians = [num for num, obj in objects.items() if obj.getUserType() == moving.userType2Num['pedestrian']] |
| 121 | 132 |
| 186 ped = objects[pedestrians[pedInd]] | 197 ped = objects[pedestrians[pedInd]] |
| 187 mergeObjects(tw, ped) | 198 mergeObjects(tw, ped) |
| 188 del objects[pedestrians[pedInd]] | 199 del objects[pedestrians[pedInd]] |
| 189 #TODO Verif overlap piéton vélo : si long hors overlap, changement mode (trouver exemples) | 200 #TODO Verif overlap piéton vélo : si long hors overlap, changement mode (trouver exemples) |
| 190 | 201 |
| 191 # interpolate and generate velocity (?) for the features (bboxes) before saving | 202 # interpolate and save image coordinates |
| 192 for num, obj in objects.items(): | 203 for num, obj in objects.items(): |
| 193 #obj.features[1].timeInterval = copy(obj.getTimeInterval()) | |
| 194 for f in obj.getFeatures(): | 204 for f in obj.getFeatures(): |
| 195 if f.length() != len(f.tmpPositions): # interpolate | 205 if f.length() != len(f.tmpPositions): # interpolate |
| 196 f.positions = moving.Trajectory.fromPointDict(f.tmpPositions) | 206 f.positions = moving.Trajectory.fromPointDict(f.tmpPositions) |
| 197 #obj.features[1].positions = moving.Trajectory.fromPointDict(obj.features[1].tmpPositions) | |
| 198 else: | 207 else: |
| 199 f.positions = moving.Trajectory.fromPointList(list(f.tmpPositions.values())) | 208 f.positions = moving.Trajectory.fromPointList(list(f.tmpPositions.values())) |
| 200 #obj.features[1].positions = moving.Trajectory.fromPointList(list(obj.features[1].tmpPositions.values())) | 209 if not args.notSavingImageCoordinates: |
| 201 | 210 storage.saveTrajectoriesToSqlite(utils.removeExtension(args.databaseFilename)+'-bb.sqlite', list(objects.values()), 'object') |
| 211 # project, smooth and save | |
| 212 for num, obj in objects.items(): | |
| 213 features = obj.getFeatures() | |
| 214 if moving.userTypeNames[obj.getUserType()] == 'pedestrian': | |
| 215 assert len(features) == 2 | |
| 216 t1 = features[0].getPositions() | |
| 217 t2 = features[1].getPositions() | |
| 218 t = [[(p1.x+p2.x)/2., max(p1.y, p2.y)] for p1, p2 in zip(t1, t2)] | |
| 219 else: | |
| 220 t = [] | |
| 221 for instant in obj.getTimeInterval(): | |
| 222 points = [] | |
| 223 for f in features: | |
| 224 if f.existsAtInstant(instant): | |
| 225 points.append(f.getPositionAtInstant(instant)) | |
| 226 t.append(moving.Point.agg(points, np.mean).aslist()) | |
| 227 #t = sum([f.getPositions().asArray() for f in features])/len(features) | |
| 228 #t = (moving.Trajectory.add(t1, t2)*0.5).asArray() | |
| 229 projected = cvutils.imageToWorldProject(np.array(t).T, intrinsicCameraMatrix, distortionCoefficients, homography) | |
| 230 featureNum = features[0].getNum() | |
| 231 obj.features=[moving.MovingObject(featureNum, obj.getTimeInterval(), moving.Trajectory(projected.tolist()))] | |
| 232 obj.featureNumbers = [featureNum] | |
| 202 storage.saveTrajectoriesToSqlite(args.databaseFilename, list(objects.values()), 'object') | 233 storage.saveTrajectoriesToSqlite(args.databaseFilename, list(objects.values()), 'object') |
| 203 | 234 |
| 204 # todo save bbox and mask to study localization / representation | 235 # todo save bbox and mask to study localization / representation |
| 205 # apply quality checks deviation and acceleration bounds? | 236 # apply quality checks deviation and acceleration bounds? |
| 206 | 237 |
