Initial

2020-09-02 19:49:32 +04:00 · 2020-09-02 19:49:32 +04:00 · ede337a2d8
commit ede337a2d8
18 changed files with 3294 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+/target
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,22 @@
+[package]
+name = "deep-sort"
+version = "0.1.16"
+authors = ["Andrey Tkachenko <andreytkachenko64@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+grant-object-detector = {path = "../grant-object-detector"}
+ndarray-linalg = {version = "0.12.0", features = ["openblas"]}
+munkres = "0.5.1"
+openblas-src = {version = "0.9.0", features = ["static"]}
+onnx-model = {path = "../onnx-model"}
+opencv = "=0.41.0"
+err-derive = "0.2.4"
+
+[dependencies.ndarray]
+version = "0.13.1"
+features = ["blas"]
+default-features = false
+
+[dev-dependencies]
+anyhow = "1.0.31"
--- a/README.md
+++ b/README.md
@ -0,0 +1,2 @@
+# DeepSort Rust Implementation
+See the arXiv preprint for more information.
--- a/examples/demo.rs
+++ b/examples/demo.rs
@ -0,0 +1,607 @@
+use opencv::{
+    dnn,
+    core::{self, Mat, Scalar, Vector},
+    highgui,
+    prelude::*,
+    videoio,
+};
+use deep_sort::{
+    deep::ImageEncoder,
+    sort,
+};
+use ndarray::prelude::*;
+
+const CHANNELS: usize = 24;
+const CONFIDENCE_THRESHOLD: f32 = 0.6;
+const NMS_THRESHOLD: f32 = 0.4;
+pub const NAMES: [&'static str; 80] = [
+    "person",
+    "bicycle",
+    "car",
+    "motorbike",
+    "aeroplane",
+    "bus",
+    "train",
+    "truck",
+    "boat",
+    "traffic light",
+    "fire hydrant",
+    "stop sign",
+    "parking meter",
+    "bench",
+    "bird",
+    "cat",
+    "dog",
+    "horse",
+    "sheep",
+    "cow",
+    "elephant",
+    "bear",
+    "zebra",
+    "giraffe",
+    "backpack",
+    "umbrella",
+    "handbag",
+    "tie",
+    "suitcase",
+    "frisbee",
+    "skis",
+    "snowboard",
+    "sports ball",
+    "kite",
+    "baseball bat",
+    "baseball glove",
+    "skateboard",
+    "surfboard",
+    "tennis racket",
+    "bottle",
+    "wine glass",
+    "cup",
+    "fork",
+    "knife",
+    "spoon",
+    "bowl",
+    "banana",
+    "apple",
+    "sandwich",
+    "orange",
+    "broccoli",
+    "carrot",
+    "hot dog",
+    "pizza",
+    "donut",
+    "cake",
+    "chair",
+    "sofa",
+    "pottedplant",
+    "bed",
+    "diningtable",
+    "toilet",
+    "tvmonitor",
+    "laptop",
+    "mouse",
+    "remote",
+    "keyboard",
+    "cell phone",
+    "microwave",
+    "oven",
+    "toaster",
+    "sink",
+    "refrigerator",
+    "book",
+    "clock",
+    "vase",
+    "scissors",
+    "teddy bear",
+    "hair drier",
+    "toothbrush",
+];
+
+fn run() -> opencv::Result<()> {
+    let mut encoder = ImageEncoder::new("/home/andrey/workspace/ssl/deep_sort_pytorch/deep_sort/deep/reid1.onnx")?;
+    let max_cosine_distance = 0.2;
+    let nn_budget = 100;
+    let max_age = 70;
+    let max_iou_distance = 0.2;
+    let n_init = 3;
+    let kind = sort::NearestNeighborMetricKind::CosineDistance;
+    let metric = sort::NearestNeighborDistanceMetric::new(kind, max_cosine_distance, Some(nn_budget));
+    let mut tracker = sort::Tracker::new(metric, max_iou_distance, max_age, n_init);
+
+    let model = "/home/andrey/workspace/ssl/yolov3/yolov3.weights";
+    let config = "/home/andrey/workspace/ssl/yolov3/yolov3.cfg";
+    let framework = "";
+
+    let mut net = dnn::read_net(model, config, framework).unwrap();
+    net.set_preferable_backend(dnn::DNN_BACKEND_DEFAULT); 
+    net.set_preferable_target(dnn::DNN_TARGET_CPU);
+
+    let layer_names = net.get_layer_names()?;
+    let last_layer_id = net.get_layer_id(&layer_names.get(layer_names.len() - 1)?)?;
+    let last_layer = net.get_layer(dnn::DictValue::from_i32(last_layer_id)?)?;
+    let last_layer_type = last_layer.typ();
+
+    let out_names = net.get_unconnected_out_layers_names().unwrap();
+
+    let window = "video capture";
+    highgui::named_window(window, 1)?;
+
+    let mut cam = videoio::VideoCapture::new(0, videoio::CAP_ANY)?;  // 0 is the default camera
+    
+    let opened = videoio::VideoCapture::is_opened(&cam)?;
+    if !opened {
+        panic!("Unable to open default camera!");
+    }
+    
+    let mut outs = core::Vector::<core::Mat>::new(); //core::Mat::default()?;
+
+    let mut frame = core::Mat::default()?;
+    let mut flag = -1i64;
+
+    loop {
+        cam.read(&mut frame)?;
+
+        // flag += 1;
+        // if flag % 5 != 0 {
+        //     continue;
+        // }
+
+        let fsize = frame.size()?;
+
+        if fsize.width <= 0 {
+            continue;
+        }
+
+        let frame_height = fsize.height;
+        let frame_width = fsize.width;
+
+        //  Create a 4D blob from a frame.
+        let inp_width = 416;
+        let inp_height = 416;
+        let blob = dnn::blob_from_image(
+            &frame, 
+            1.0 / 255.0, 
+            core::Size::new(inp_width, inp_height), 
+            core::Scalar::new(0., 0., 0., 0.), 
+            true, 
+            false, 
+            core::CV_32F)
+            .unwrap();
+
+        //  Run a model
+        net.set_input(&blob, "", 1.0, core::Scalar::new(0.,0.,0.,0.));
+        net.forward(&mut outs, &out_names).unwrap();
+
+        let fsize = frame.size()?;
+
+        let frame_height = fsize.height;
+        let frame_width = fsize.width;
+        // let mut objects = vec![];
+    
+        match last_layer_type.as_str() {
+            "Region" => {
+                let mut detections = vec![];
+                let bboxes = detect(&outs)?;
+
+                for bbox in bboxes {
+                    let rect = bbox.cv_rect(frame.cols(), frame.rows());
+                    
+                    let roi = Mat::roi(&frame, rect)?;
+                    let blob = dnn::blob_from_image(
+                        &roi, 
+                        1.0 / 255.0, 
+                        core::Size::new(64, 128), 
+                        core::Scalar::new(0., 0., 0., 0.), 
+                        true, 
+                        false, 
+                        core::CV_32F)
+                        .unwrap();
+                    
+                    let code = encoder.encode_batch(&blob)?.get(0)?;
+                    let core = code.into_typed::<f32>()?;
+                    let feature = arr1(core.data_typed()?);
+
+                    detections.push(sort::Detection {
+                        bbox: sort::BBox::ltwh(
+                            rect.x as f32, 
+                            rect.y as f32, 
+                            rect.width as f32, 
+                            rect.height as f32
+                        ),
+                        confidence: bbox.class_confidence, 
+                        feature: Some(feature)
+                    });
+
+                    draw_pred(&mut frame, bbox)?;
+                }
+
+                tracker.predict();
+                tracker.update(detections.as_slice());
+
+                for t in tracker.tracks().iter().filter(|t|t.is_confirmed()) {
+                    draw_track(&mut frame, t.bbox().as_ltwh(), t.track_id);
+                }
+            },
+    
+            _ => panic!("unknown last layer type"),
+        }
+
+        highgui::imshow(window, &mut frame)?;
+
+        let key = highgui::wait_key(10)?;
+        if key > 0 && key != 255 {
+            break;
+        }
+    }
+
+    Ok(())
+}
+
+fn draw_pred(frame: &mut Mat, bbox: BBox) -> opencv::Result<()> {
+    let rect = bbox.cv_rect(frame.cols(), frame.rows());
+
+    //  Draw a bounding box.
+    opencv::imgproc::rectangle(
+        frame, 
+        rect, 
+        core::Scalar::new(255.0, 255.0, 0.0, 0.0), 
+        1, 
+        opencv::imgproc::LINE_8, 
+        0
+    )?;
+
+    // let label = format!("{} {:2}", NAMES[bbox.class_index], bbox.class_confidence);
+    // let mut base_line = 0; 
+    // let label_size = opencv::imgproc::get_text_size(&label, opencv::imgproc::FONT_HERSHEY_SIMPLEX, 0.6, 1, &mut base_line)?;
+    
+    // let label_rect = core::Rect::new(
+    //     rect.x, 
+    //     rect.y - label_size.height - 8, 
+    //     label_size.width + 8, 
+    //     label_size.height + 8
+    // );
+    
+    // opencv::imgproc::rectangle(frame, label_rect, core::Scalar::new(255.0, 255.0, 0.0, 0.0), opencv::imgproc::FILLED, opencv::imgproc::LINE_8, 0)?;
+    
+    // let pt = core::Point::new(rect.x, rect.y - 8);
+    // opencv::imgproc::put_text(
+    //     frame, 
+    //     &label, 
+    //     pt, 
+    //     opencv::imgproc::FONT_HERSHEY_SIMPLEX, 
+    //     0.6, 
+    //     core::Scalar::new(0.0, 0.0, 0.0, 0.0),
+    //     1,
+    //     opencv::imgproc::LINE_8, 
+    //     false
+    // )?;
+
+    Ok(())
+}
+
+fn draw_track(frame: &mut Mat, bbox: sort::BBox<sort::Ltwh>, track_id: i32) -> opencv::Result<()> {
+    let rect = opencv::core::Rect::new(
+        bbox.left() as i32,
+        bbox.top() as i32,
+        bbox.width() as i32,
+        bbox.height() as i32,
+    );
+
+    //  Draw a bounding box.
+    opencv::imgproc::rectangle(
+        frame, 
+        rect, 
+        core::Scalar::new(0.0, 255.0, 0.0, 0.0), 
+        1, 
+        opencv::imgproc::LINE_8, 
+        0
+    )?;
+
+    let label = format!("[{}]", track_id);
+    let mut base_line = 0; 
+    let label_size = opencv::imgproc::get_text_size(&label, opencv::imgproc::FONT_HERSHEY_SIMPLEX, 0.6, 1, &mut base_line)?;
+    
+    let label_rect = core::Rect::new(
+        rect.x, 
+        rect.y - label_size.height - 8, 
+        label_size.width + 8, 
+        label_size.height + 8
+    );
+    
+    opencv::imgproc::rectangle(frame, label_rect, core::Scalar::new(0.0, 255.0, 0.0, 0.0), opencv::imgproc::FILLED, opencv::imgproc::LINE_8, 0)?;
+    
+    let pt = core::Point::new(rect.x, rect.y - 8);
+    opencv::imgproc::put_text(
+        frame, 
+        &label, 
+        pt, 
+        opencv::imgproc::FONT_HERSHEY_SIMPLEX, 
+        0.6, 
+        core::Scalar::new(0.0, 0.0, 0.0, 0.0),
+        1,
+        opencv::imgproc::LINE_8, 
+        false
+    )?;
+
+    Ok(())
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct BBox {
+    xmin: f32,
+    ymin: f32,
+    xmax: f32,
+    ymax: f32,
+    confidence: f32,
+    class_index: usize,
+    class_confidence: f32,
+}
+
+impl BBox {
+    pub fn cv_rect(&self, frame_width: i32, frame_height: i32) -> opencv::core::Rect {
+        let frame_width_f = frame_width as f32;
+        let frame_height_f = frame_height as f32;
+        
+        let left = ((self.xmin * frame_width_f) as i32).max(0).min(frame_width);
+        let top = ((self.ymin * frame_height_f) as i32).max(0).min(frame_height);
+        let mut width = (((self.xmax - self.xmin) * frame_width_f) as i32).max(0);
+        let mut height = (((self.ymax - self.ymin) * frame_height_f) as i32).max(0);
+
+        if left + width > frame_width {
+            width = frame_width - left;
+        }
+
+        if top + height > frame_height {
+            height = frame_height - top;
+        }
+
+        core::Rect::new(
+            left,
+            top,
+            width,
+            height,
+        )
+    }
+}
+
+fn detect(pred_events: &Vector<Mat>) -> opencv::Result<Vec<BBox>> {
+
+    // The bounding boxes grouped by (maximum) class index.
+    let mut bboxes: Vec<(core::Vector<core::Rect2d>, core::Vector<f32>, Vec<BBox>)> = (0 .. 80)
+        .map(|_| (core::Vector::new(), core::Vector::new(), vec![]))
+        .collect();
+
+    for pred_event in pred_events {
+        let fsize = pred_event.size()?;
+        let npreds = pred_event.rows();
+        let pred_size = pred_event.cols();
+        let nclasses = (pred_size - 5) as usize;
+
+        // Extract the bounding boxes for which confidence is above the threshold.
+        for index in 0 .. npreds {
+            let pred = pred_event.row(index)?.into_typed::<f32>()?;
+            let detection = pred.data_typed()?;
+
+            let (center_x, center_y, width, height, confidence) = match &detection[0 .. 5] {
+                &[a,b,c,d,e] => (a,b,c,d,e),
+                _ => unreachable!()
+            };
+
+            let classes = &detection[5..];
+
+            if confidence > CONFIDENCE_THRESHOLD {
+                let mut class_index = -1;
+                let mut score = 0.0;
+
+                for (idx, &val) in classes.iter().enumerate() {
+                    if val > score {
+                        class_index = idx as i32;
+                        score = val;
+                    }
+                }
+
+                if class_index > -1 && score > 0. {
+                    let entry = &mut bboxes[class_index as usize];
+
+                    entry.0.push(core::Rect2d::new(
+                        (center_x - width / 2.) as f64,
+                        (center_y - height / 2.) as f64,
+                        width as f64,
+                        height as f64,
+                    ));
+                    entry.1.push(score);
+                    entry.2.push(BBox {
+                        xmin: center_x - width / 2.,
+                        ymin: center_y - height / 2.,
+                        xmax: center_x + width / 2.,
+                        ymax: center_y + height / 2.,
+                        
+                        confidence,
+                        class_index: class_index as _,
+                        class_confidence: score,
+                    });
+                }
+            }
+        }
+    }
+
+    let mut events = vec![];
+
+    for (rects, scores, bboxes) in bboxes.iter_mut() {
+        if bboxes.is_empty() {
+            continue;
+        }
+
+        let mut indices = core::Vector::<i32>::new();
+        dnn::nms_boxes_f64(
+            &rects, 
+            &scores, 
+            CONFIDENCE_THRESHOLD, 
+            NMS_THRESHOLD, 
+            &mut indices, 
+            1.0, 
+            0
+        )?;
+
+        let mut indices = indices.to_vec();
+
+        events.extend(bboxes.drain(..)
+            .enumerate()
+            .filter_map(|(idx, item)| if indices.contains(&(idx as i32)) {Some(item)} else {None}));
+    }
+
+    // Perform non-maximum suppression.
+    // for (idx, (_, _, bboxes_for_class)) in bboxes.iter_mut().enumerate() {
+    //     if bboxes_for_class.is_empty() {
+    //         continue;
+    //     }
+
+    //     bboxes_for_class.sort_unstable_by(|b1, b2| b2.confidence.partial_cmp(&b1.confidence).unwrap());
+    //     let mut current_index = 0;
+
+    //     for index in 0 .. bboxes_for_class.len() {
+    //         let mut drop = false;
+    //         for prev_index in 0..current_index {
+    //             let iou = iou(&bboxes_for_class[prev_index], &bboxes_for_class[index]);
+    //             if iou > NMS_THRESHOLD {
+    //                 drop = true;
+    //                 break;
+    //             }
+    //         }
+
+    //         if !drop {
+    //             bboxes_for_class.swap(current_index, index);
+    //             current_index += 1;
+    //         }
+    //     }
+
+    //     bboxes_for_class.truncate(current_index);
+    // }
+
+    for (class_index, (_, _, bboxes_for_class)) in bboxes.into_iter().enumerate() {
+        if bboxes_for_class.is_empty() {
+            continue;
+        }
+
+        let clamp = |x| if x < 0.0 { 0.0 } else if x > 1.0 { 1.0 } else { x };
+
+        for bbox in bboxes_for_class {
+            events.push(bbox);
+        }
+    }
+
+    Ok(events)
+}
+
+// Intersection over union of two bounding boxes.
+fn iou(b1: &BBox, b2: &BBox) -> f32 {
+    let b1_area = (b1.xmax - b1.xmin + 1.) * (b1.ymax - b1.ymin + 1.);
+    let b2_area = (b2.xmax - b2.xmin + 1.) * (b2.ymax - b2.ymin + 1.);
+    let i_xmin = b1.xmin.max(b2.xmin);
+    let i_xmax = b1.xmax.min(b2.xmax);
+    let i_ymin = b1.ymin.max(b2.ymin);
+    let i_ymax = b1.ymax.min(b2.ymax);
+    let i_area = (i_xmax - i_xmin + 1.).max(0.) * (i_ymax - i_ymin + 1.).max(0.);
+    i_area / (b1_area + b2_area - i_area)
+}
+
+fn postprocess(frame: &mut Mat, outs: &core::Vector<Mat>, last_layer_type: &str) -> opencv::Result<()> {
+    let fsize = frame.size()?;
+
+    let frame_height = fsize.height;
+    let frame_width = fsize.width;
+    // let mut objects = vec![];
+
+    match last_layer_type {
+        "Region" => {
+            // let bboxes = detect(&outs)?;
+
+            // for bbox in bboxes {
+            //     draw_pred(frame, bbox);
+            // }
+        },
+
+        _ => panic!("unknown last layer type"),
+    }
+
+    // classIds = []
+    // confidences = []
+    // boxes = []
+    // if lastLayer.type == 'DetectionOutput':
+    //     # Network produces output blob with a shape 1x1xNx7 where N is a number of
+    //     # detections and an every detection is a vector of values
+    //     # [batchId, classId, confidence, left, top, right, bottom]
+    //     for out in outs:
+    //         for detection in out[0, 0]:
+    //             confidence = detection[2]
+    //             if confidence > confThreshold:
+    //                 left = int(detection[3])
+    //                 top = int(detection[4])
+    //                 right = int(detection[5])
+    //                 bottom = int(detection[6])
+    //                 width = right - left + 1
+    //                 height = bottom - top + 1
+    //                 if width <= 2 or height <= 2:
+    //                     left = int(detection[3] * frameWidth)
+    //                     top = int(detection[4] * frameHeight)
+    //                     right = int(detection[5] * frameWidth)
+    //                     bottom = int(detection[6] * frameHeight)
+    //                     width = right - left + 1
+    //                     height = bottom - top + 1
+    //                 classIds.append(int(detection[1]) - 1)  # Skip background label
+    //                 confidences.append(float(confidence))
+    //                 boxes.append([left, top, width, height])
+    // elif lastLayer.type == 'Region':
+    //     # Network produces output blob with a shape NxC where N is a number of
+    //     # detected objects and C is a number of classes + 4 where the first 4
+    //     # numbers are [center_x, center_y, width, height]
+    //     for out in outs:
+    //         for detection in out:
+    //             scores = detection[5:]
+    //             classId = np.argmax(scores)
+    //             confidence = scores[classId]
+    //             if confidence > confThreshold:
+    //                 center_x = int(detection[0] * frameWidth)
+    //                 center_y = int(detection[1] * frameHeight)
+    //                 width = int(detection[2] * frameWidth)
+    //                 height = int(detection[3] * frameHeight)
+    //                 left = int(center_x - width / 2)
+    //                 top = int(center_y - height / 2)
+    //                 classIds.append(classId)
+    //                 confidences.append(float(confidence))
+    //                 boxes.append([left, top, width, height])
+    // else:
+    //     print('Unknown output layer type: ' + lastLayer.type)
+    //     exit()
+
+    // # NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
+    // # or NMS is required if number of outputs > 1
+    // if len(outNames) > 1 or lastLayer.type == 'Region' and args.backend != cv.dnn.DNN_BACKEND_OPENCV:
+    //     indices = []
+    //     classIds = np.array(classIds)
+    //     boxes = np.array(boxes)
+    //     confidences = np.array(confidences)
+    //     unique_classes = set(classIds)
+    //     for cl in unique_classes:
+    //         class_indices = np.where(classIds == cl)[0]
+    //         conf = confidences[class_indices]
+    //         box  = boxes[class_indices].tolist()
+    //         nms_indices = cv.dnn.NMSBoxes(box, conf, confThreshold, nmsThreshold)
+    //         nms_indices = nms_indices[:, 0] if len(nms_indices) else []
+    //         indices.extend(class_indices[nms_indices])
+    // else:
+    //     indices = np.arange(0, len(classIds))
+
+    // for i in indices:
+    //     box = boxes[i]
+    //     left = box[0]
+    //     top = box[1]
+    //     width = box[2]
+    //     height = box[3]
+    //     drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
+    Ok(())
+}
+
+fn main() {
+    run().unwrap()
+}
--- a/examples/file.rs
+++ b/examples/file.rs
@ -0,0 +1,636 @@
+use opencv::{
+    dnn,
+    core::{self, Mat, Scalar, Vector},
+    highgui,
+    prelude::*,
+    videoio,
+};
+use deep_sort::{
+    deep::{ImageEncoder, ORT_ENV},
+    sort,
+};
+use ndarray::prelude::*;
+use onnxruntime::*;
+use std::borrow::Borrow;
+
+const SIZE: usize = 416;
+const OUT_SIZE: usize = 10647; //6300; //16128 10647;
+
+const CHANNELS: usize = 24;
+const CONFIDENCE_THRESHOLD: f32 = 0.5;
+const NMS_THRESHOLD: f32 = 0.4;
+pub const NAMES: [&'static str; 80] = [
+    "person",
+    "bicycle",
+    "car",
+    "motorbike",
+    "aeroplane",
+    "bus",
+    "train",
+    "truck",
+    "boat",
+    "traffic light",
+    "fire hydrant",
+    "stop sign",
+    "parking meter",
+    "bench",
+    "bird",
+    "cat",
+    "dog",
+    "horse",
+    "sheep",
+    "cow",
+    "elephant",
+    "bear",
+    "zebra",
+    "giraffe",
+    "backpack",
+    "umbrella",
+    "handbag",
+    "tie",
+    "suitcase",
+    "frisbee",
+    "skis",
+    "snowboard",
+    "sports ball",
+    "kite",
+    "baseball bat",
+    "baseball glove",
+    "skateboard",
+    "surfboard",
+    "tennis racket",
+    "bottle",
+    "wine glass",
+    "cup",
+    "fork",
+    "knife",
+    "spoon",
+    "bowl",
+    "banana",
+    "apple",
+    "sandwich",
+    "orange",
+    "broccoli",
+    "carrot",
+    "hot dog",
+    "pizza",
+    "donut",
+    "cake",
+    "chair",
+    "sofa",
+    "pottedplant",
+    "bed",
+    "diningtable",
+    "toilet",
+    "tvmonitor",
+    "laptop",
+    "mouse",
+    "remote",
+    "keyboard",
+    "cell phone",
+    "microwave",
+    "oven",
+    "toaster",
+    "sink",
+    "refrigerator",
+    "book",
+    "clock",
+    "vase",
+    "scissors",
+    "teddy bear",
+    "hair drier",
+    "toothbrush",
+];
+
+pub enum Target {
+    Cpu,
+    Cuda,
+    Tensorrt,
+    Movidus
+}
+
+struct Inference {
+
+}
+
+impl Inference {
+    pub fn new(model: &str, config: &str) -> opencv::Result<Self> {
+        unimplemented!()
+    }
+
+    pub fn set_preferable_target(&mut self, target: Target) {
+
+    }
+
+    pub fn set_input(&mut self, name: &str, tensor: Tensor<f32>) {
+
+    }
+
+    pub fn get_output(&self, name: &str) {
+
+    }
+
+    pub fn forward(&mut self) {
+
+    }
+}
+
+#[derive(Debug)]
+pub enum Error {
+    OpenCv(opencv::Error),
+    OnnxRuntime(onnxruntime::Error),
+}
+
+impl std::fmt::Display for Error {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::OpenCv(i) => writeln!(f, "Error: {}", i),
+            Self::OnnxRuntime(i) => writeln!(f, "Error: {}", i),
+        }
+    }
+}
+
+impl std::error::Error for Error {}
+
+impl From<opencv::Error> for Error {
+    fn from(err: opencv::Error) -> Self {
+        Self::OpenCv(err)
+    }
+}
+
+
+impl From<onnxruntime::Error> for Error {
+    fn from(err: onnxruntime::Error) -> Self {
+        Self::OnnxRuntime(err)
+    }
+}
+
+
+fn run() -> std::result::Result<(), Error> {
+    let max_cosine_distance = 0.2;
+    let nn_budget = 100;
+    let max_age = 70;
+    let max_iou_distance = 0.7;
+    let n_init = 3;
+    let kind = sort::NearestNeighborMetricKind::CosineDistance;
+    let metric = sort::NearestNeighborDistanceMetric::new(kind, max_cosine_distance, Some(nn_budget));
+    let mut tracker = sort::Tracker::new(metric, max_iou_distance, max_age, n_init);
+
+    let window = "video capture";
+    highgui::named_window(window, 1)?;
+
+    let mut cam = videoio::VideoCapture::from_file("./videoplayback.mp4", videoio::CAP_ANY)?;  // 0 is the default camera
+    cam.set(videoio::CAP_PROP_POS_FRAMES, 150.0);
+
+    let opened = videoio::VideoCapture::is_opened(&cam)?;
+    if !opened {
+        panic!("Unable to open default camera!");
+    }
+
+    let mut frame = core::Mat::default()?;
+    let mut flag = -1i64;
+
+    let (in_tx, in_rx): (std::sync::mpsc::Sender<(Tensor<f32>, Mat)>, _) = std::sync::mpsc::channel();
+    let (out_tx, out_rx) = std::sync::mpsc::channel();
+    let (bb_tx, bb_rx) = std::sync::mpsc::channel();
+
+    std::thread::spawn(move || {
+        let mut so = SessionOptions::new().unwrap();
+        let ro = RunOptions::new();
+    
+        // so.set_execution_mode(ExecutionMode::Parallel).unwrap();
+        // so.add_tensorrt(0);
+        so.add_cuda(0);
+        // so.add_cpu(true);
+    
+        let session = Session::new(&ORT_ENV, "/home/andrey/workspace/ssl/yolov4/yolov4_416.onnx", &so).unwrap();
+        let mut out_vals = Tensor::<f32>::init(&[1, OUT_SIZE as _, 84], 0.0).unwrap();
+        
+        let input = std::ffi::CStr::from_bytes_with_nul(b"input\0").unwrap();
+        let output = std::ffi::CStr::from_bytes_with_nul(b"output\0").unwrap();
+
+        while let Ok((in_vals, frame)) = in_rx.recv() {
+            let in_vals: Tensor<f32> = in_vals;
+            session
+                .run_mut(&ro, &[input], &[in_vals.as_ref()], &[output], &mut [out_vals.as_mut()])
+                .expect("run");
+
+            let xx: &[f32] = out_vals.borrow();
+
+            let arr = Array3::from_shape_vec([1, OUT_SIZE, 84], xx.to_vec()).unwrap();
+
+            out_tx.send((arr, frame)).unwrap();
+        }
+    });
+
+    std::thread::spawn(move || {
+        let mut encoder = ImageEncoder::new("/home/andrey/workspace/ssl/deep_sort_pytorch/deep_sort/deep/reid.onnx").unwrap();
+    
+        while let Ok((out_vals, frame)) = out_rx.recv() {
+            let mut preds = vec![];
+            let mut tracks = vec![];
+
+            // let mut detections = vec![];
+            // let out_shape = out_vals.shape();
+            let bboxes = detect(out_vals.view()).unwrap();
+
+            let nboxes = bboxes.len();
+            // let mut in_vals = Array4::from_elem([nboxes, 3, 128, 64], 0.0);
+
+            // for (index, bbox) in bboxes.iter().enumerate() {
+            //     let rect = bbox.cv_rect(frame.cols(), frame.rows());
+                
+            //     let roi = Mat::roi(&frame, rect).unwrap();
+            //     let blob = dnn::blob_from_image(
+            //         &roi, 
+            //         1.0 / 255.0, 
+            //         core::Size::new(64, 128), 
+            //         core::Scalar::new(0., 0., 0., 0.), 
+            //         true, 
+            //         false, 
+            //         core::CV_32F)
+            //         .unwrap();
+
+            //     let core = blob.into_typed::<f32>().unwrap();
+            //     let data: &[f32] = core.data_typed().unwrap();
+
+            //     let a = aview1(data).into_shape((3, 128, 64)).unwrap();
+            //     in_vals.index_axis_mut(Axis(0), index)
+            //         .assign(&a)
+            // }
+
+            // let t = TensorView::new(in_vals.shape(), in_vals.as_slice().unwrap());
+            // let code = encoder.encode_batch(t).unwrap();
+            // let features = aview1(code.borrow()).into_shape((nboxes, 512)).unwrap();
+
+            for (i, bbox) in bboxes.into_iter().enumerate() {
+                let rect = bbox.cv_rect(frame.cols(), frame.rows());
+                // let feature = features.index_axis(Axis(0), i);
+
+                if bbox.class_index <= 8 {
+                    // detections.push(sort::Detection {
+                    //     bbox: sort::BBox::ltwh(
+                    //         rect.x as f32, 
+                    //         rect.y as f32, 
+                    //         rect.width as f32, 
+                    //         rect.height as f32
+                    //     ),
+                    //     confidence: bbox.confidence, 
+                    //     feature: Some(feature.into_owned())
+                    // });
+
+                    preds.push(bbox);
+                }
+            }
+
+            // tracker.predict();
+            // tracker.update(detections.as_slice());
+
+            // for t in tracker.tracks().iter().filter(|t| t.is_confirmed() && t.time_since_update <= 1) {
+            //     tracks.push((t.bbox().as_ltwh(), t.track_id));
+            //     // draw_track(&mut frame, t.bbox().as_ltwh(), t.track_id);
+            // }
+
+            bb_tx.send((preds, tracks)).unwrap();
+        }
+    });
+
+    loop {
+        let begin = std::time::Instant::now();
+        cam.read(&mut frame)?;
+
+        let fsize = frame.size()?;
+
+        if fsize.width <= 0 {
+            continue;
+        }
+
+        let frame_height = fsize.height;
+        let frame_width = fsize.width;
+
+        //  Create a 4D blob from a frame.
+        let inp_width = SIZE as _;
+        let inp_height = SIZE as _;
+        let blob = dnn::blob_from_image(
+            &frame, 
+            1.0 / 255.0, 
+            core::Size::new(inp_width, inp_height), 
+            core::Scalar::new(0., 0., 0., 0.), 
+            true, 
+            false, 
+            core::CV_32F)
+            .unwrap();
+        
+        let core = blob.try_into_typed::<f32>()?;
+        let data: &[f32] = core.data_typed()?;
+        let in_vals = Tensor::new(&[1, 3, SIZE as _, SIZE as _], data.to_vec()).unwrap();
+
+        // //  Run a model
+
+        in_tx.send((in_vals, frame.clone()?)).unwrap();
+        let (preds, tracks) = bb_rx.recv().unwrap();
+        
+        for p in preds {
+            draw_pred(&mut frame, p)?;
+        }
+
+        for (t, i) in tracks {
+            draw_track(&mut frame, t, i);
+        }
+
+        // let mut objects = vec![];
+        
+        // let mut detections = vec![];
+        // // let out_shape = out_vals.dims();
+        // let bboxes = detect(aview1(out_vals.borrow())
+        //     .into_shape([out_shape[0] as usize, out_shape[1] as usize, out_shape[2]as usize]).unwrap())?;
+
+        // for bbox in bboxes {
+        //     let rect = bbox.cv_rect(frame.cols(), frame.rows());
+            
+        //     let roi = Mat::roi(&frame, rect)?;
+        //     let blob = dnn::blob_from_image(
+        //         &roi, 
+        //         1.0 / 255.0, 
+        //         core::Size::new(64, 128), 
+        //         core::Scalar::new(0., 0., 0., 0.), 
+        //         true, 
+        //         false, 
+        //         core::CV_32F)
+        //         .unwrap();
+            
+        //     let code = encoder.encode_batch(&blob)?.get(0)?;
+        //     let core = code.into_typed::<f32>()?;
+        //     let feature = arr1(core.data_typed()?);
+
+        //     if bbox.class_index <= 8 {
+        //         detections.push(sort::Detection {
+        //             bbox: sort::BBox::ltwh(
+        //                 rect.x as f32, 
+        //                 rect.y as f32, 
+        //                 rect.width as f32, 
+        //                 rect.height as f32
+        //             ),
+        //             confidence: bbox.confidence, 
+        //             feature: Some(feature)
+        //         });
+
+        //         draw_pred(&mut frame, bbox)?;
+        //     }
+        // }
+
+        // tracker.predict();
+        // tracker.update(detections.as_slice());
+
+        // for t in tracker.tracks().iter().filter(|t| t.is_confirmed() && t.time_since_update <= 1) {
+        //     draw_track(&mut frame, t.bbox().as_ltwh(), t.track_id);
+        // }
+
+        let diff = std::time::Instant::now() - begin;
+        let label = format!("{:?}", 1.0 / ((diff.as_millis() as f32) * 0.001));
+
+        opencv::imgproc::put_text(
+            &mut frame, 
+            &label, 
+            core::Point::new(30, 30), 
+            opencv::imgproc::FONT_HERSHEY_SIMPLEX, 
+            0.6, 
+            core::Scalar::new(0.0, 255.0, 0.0, 0.0),
+            1,
+            opencv::imgproc::LINE_8, 
+            false
+        )?;
+
+        highgui::imshow(window, &mut frame)?;
+
+        let key = highgui::wait_key(10)?;
+        if key > 0 && key != 255 {
+            break;
+        }
+    }
+
+    Ok(())
+}
+
+
+fn detect(pred_events: &ArrayView3<'_, f32>) -> opencv::Result<Vec<BBox>> {
+
+    // The bounding boxes grouped by (maximum) class index.
+    let mut bboxes: Vec<(core::Vector<core::Rect2d>, core::Vector<f32>, Vec<BBox>)> = (0 .. 80)
+        .map(|_| (core::Vector::new(), core::Vector::new(), vec![]))
+        .collect();
+
+    for pred_event in pred_events {
+        let fsize = pred_event.size()?;
+        let npreds = pred_event.rows();
+        let pred_size = pred_event.cols();
+        let nclasses = (pred_size - 5) as usize;
+
+        // Extract the bounding boxes for which confidence is above the threshold.
+        for index in 0 .. npreds {
+            let pred = pred_event.row(index)?.try_into_typed::<f32>()?;
+            let detection = pred.data_typed()?;
+
+            let (center_x, center_y, width, height, confidence) = match &detection[0 .. 5] {
+                &[a,b,c,d,e] => (a,b,c,d,e),
+                _ => unreachable!()
+            };
+
+            let classes = &detection[5..];
+
+            if confidence > CONFIDENCE_THRESHOLD {
+                let mut class_index = -1;
+                let mut score = 0.0;
+
+                for (idx, &val) in classes.iter().enumerate() {
+                    if val > score {
+                        class_index = idx as i32;
+                        score = val;
+                    }
+                }
+
+                if class_index > -1 && score > 0. {
+                    let entry = &mut bboxes[class_index as usize];
+
+                    entry.0.push(core::Rect2d::new(
+                        (center_x - width / 2.) as f64,
+                        (center_y - height / 2.) as f64,
+                        width as f64,
+                        height as f64,
+                    ));
+                    entry.1.push(score);
+                    entry.2.push(BBox {
+                        xmin: center_x - width / 2.,
+                        ymin: center_y - height / 2.,
+                        xmax: center_x + width / 2.,
+                        ymax: center_y + height / 2.,
+                        
+                        confidence,
+                        class_index: class_index as _,
+                        class_confidence: score,
+                    });
+                }
+            }
+        }
+    }
+
+    let mut events = vec![];
+
+    for (rects, scores, bboxes) in bboxes.iter_mut() {
+        if bboxes.is_empty() {
+            continue;
+        }
+
+        let mut indices = core::Vector::<i32>::new();
+        dnn::nms_boxes_f64(
+            &rects, 
+            &scores, 
+            CONFIDENCE_THRESHOLD, 
+            NMS_THRESHOLD, 
+            &mut indices, 
+            1.0, 
+            0
+        )?;
+
+        let mut indices = indices.to_vec();
+
+        events.extend(bboxes.drain(..)
+            .enumerate()
+            .filter_map(|(idx, item)| if indices.contains(&(idx as i32)) {Some(item)} else {None}));
+    }
+
+    // Perform non-maximum suppression.
+    // for (idx, (_, _, bboxes_for_class)) in bboxes.iter_mut().enumerate() {
+    //     if bboxes_for_class.is_empty() {
+    //         continue;
+    //     }
+
+    //     bboxes_for_class.sort_unstable_by(|b1, b2| b2.confidence.partial_cmp(&b1.confidence).unwrap());
+    //     let mut current_index = 0;
+
+    //     for index in 0 .. bboxes_for_class.len() {
+    //         let mut drop = false;
+    //         for prev_index in 0..current_index {
+    //             let iou = iou(&bboxes_for_class[prev_index], &bboxes_for_class[index]);
+    //             if iou > NMS_THRESHOLD {
+    //                 drop = true;
+    //                 break;
+    //             }
+    //         }
+
+    //         if !drop {
+    //             bboxes_for_class.swap(current_index, index);
+    //             current_index += 1;
+    //         }
+    //     }
+
+    //     bboxes_for_class.truncate(current_index);
+    // }
+
+    for (class_index, (_, _, bboxes_for_class)) in bboxes.into_iter().enumerate() {
+        if bboxes_for_class.is_empty() {
+            continue;
+        }
+
+        let clamp = |x| if x < 0.0 { 0.0 } else if x > 1.0 { 1.0 } else { x };
+
+        for bbox in bboxes_for_class {
+            events.push(bbox);
+        }
+    }
+
+    Ok(events)
+}
+
+fn draw_pred(frame: &mut Mat, bbox: BBox) -> opencv::Result<()> {
+    let rect = bbox.cv_rect(frame.cols(), frame.rows());
+
+    //  Draw a bounding box.
+    opencv::imgproc::rectangle(
+        frame, 
+        rect, 
+        core::Scalar::new(255.0, 255.0, 0.0, 0.0), 
+        1, 
+        opencv::imgproc::LINE_8, 
+        0
+    )?;
+
+    // let label = format!("{} {:2}", NAMES[bbox.class_index], bbox.class_confidence);
+    // let mut base_line = 0; 
+    // let label_size = opencv::imgproc::get_text_size(&label, opencv::imgproc::FONT_HERSHEY_SIMPLEX, 0.6, 1, &mut base_line)?;
+    
+    // let label_rect = core::Rect::new(
+    //     rect.x, 
+    //     rect.y - label_size.height - 8, 
+    //     label_size.width + 8, 
+    //     label_size.height + 8
+    // );
+    
+    // opencv::imgproc::rectangle(frame, label_rect, core::Scalar::new(255.0, 255.0, 0.0, 0.0), opencv::imgproc::FILLED, opencv::imgproc::LINE_8, 0)?;
+    
+    // let pt = core::Point::new(rect.x, rect.y - 8);
+    // opencv::imgproc::put_text(
+    //     frame, 
+    //     &label, 
+    //     pt, 
+    //     opencv::imgproc::FONT_HERSHEY_SIMPLEX, 
+    //     0.6, 
+    //     core::Scalar::new(0.0, 0.0, 0.0, 0.0),
+    //     1,
+    //     opencv::imgproc::LINE_8, 
+    //     false
+    // )?;
+
+    Ok(())
+}
+
+fn draw_track(frame: &mut Mat, bbox: sort::BBox<sort::Ltwh>, track_id: i32) -> opencv::Result<()> {
+    let rect = opencv::core::Rect::new(
+        bbox.left() as i32,
+        bbox.top() as i32,
+        bbox.width() as i32,
+        bbox.height() as i32,
+    );
+
+    //  Draw a bounding box.
+    opencv::imgproc::rectangle(
+        frame, 
+        rect, 
+        core::Scalar::new(0.0, 255.0, 0.0, 0.0), 
+        1, 
+        opencv::imgproc::LINE_8, 
+        0
+    )?;
+
+    let label = format!("[{}]", track_id);
+    let mut base_line = 0; 
+    let label_size = opencv::imgproc::get_text_size(&label, opencv::imgproc::FONT_HERSHEY_SIMPLEX, 0.6, 1, &mut base_line)?;
+    
+    let label_rect = core::Rect::new(
+        rect.x, 
+        rect.y - label_size.height - 8, 
+        label_size.width + 8, 
+        label_size.height + 8
+    );
+    
+    opencv::imgproc::rectangle(frame, label_rect, core::Scalar::new(0.0, 255.0, 0.0, 0.0), opencv::imgproc::FILLED, opencv::imgproc::LINE_8, 0)?;
+    
+    let pt = core::Point::new(rect.x, rect.y - 8);
+    opencv::imgproc::put_text(
+        frame, 
+        &label, 
+        pt, 
+        opencv::imgproc::FONT_HERSHEY_SIMPLEX, 
+        0.6, 
+        core::Scalar::new(0.0, 0.0, 0.0, 0.0),
+        1,
+        opencv::imgproc::LINE_8, 
+        false
+    )?;
+
+    Ok(())
+}
+
+fn main() {
+    run().unwrap()
+}
--- a/examples/yolo.rs
+++ b/examples/yolo.rs
@ -0,0 +1,211 @@
+use anyhow::Error;
+use grant_object_detector::{Detection, YoloDetector, YoloDetectorConfig};
+use deep_sort::{sort, DeepSortConfig, DeepSort, Track};
+use ndarray::prelude::*;
+use opencv::{
+    dnn,
+    core::{self, Mat, Scalar, Vector},
+    highgui,
+    prelude::*,
+    videoio,
+};
+
+const PALLETE: (i32, i32, i32) = (2047, 32767, 1048575);
+
+fn compute_color_for_labels(label: i32) -> (f64, f64, f64) {
+    let c = label * label - label + 1;
+
+    (
+        ((PALLETE.0 * c)  % 255) as _,
+        ((PALLETE.1 * c)  % 255) as _,
+        ((PALLETE.2 * c)  % 255) as _,
+    )
+}
+
+pub struct YoloDeepSort {
+    yolo: YoloDetector,
+    deep_sort: DeepSort,
+}
+
+impl YoloDeepSort {
+    fn new(yolo: &str, reid: &str) -> Result<Self, Error> {
+        let device = onnx_model::get_cuda_if_available();
+        let mut config = YoloDetectorConfig::new(vec![0, 2, 3, 5, 7]);
+        config.confidence_threshold = 0.2;
+
+        Ok(Self {
+            yolo: YoloDetector::new(yolo, config, device)?,
+            deep_sort: DeepSort::new(DeepSortConfig::new(reid.to_string()))?,
+        })
+    }
+
+    pub fn detectect(&mut self, frames: &[Mat]) -> Result<Vec<Vec<Detection>>, Error> {
+        let (mut frame_width, mut frame_height) = (0i32, 0i32);
+        const SIZE: usize = 416; 
+        
+        let mut inpt = unsafe { Array4::uninitialized([frames.len(), 3, SIZE, SIZE]) };
+        for (idx, frame) in frames.iter().enumerate() {
+            let fsize = frame.size()?;
+
+            if fsize.width <= 0 {
+                continue;
+            }
+
+            frame_height = fsize.height;
+            frame_width = fsize.width;
+
+            //  Create a 4D blob from a frame.
+            let inp_width = SIZE as _;
+            let inp_height = SIZE as _;
+            let blob = dnn::blob_from_image(
+                &frame, 
+                1.0 / 255.0, 
+                core::Size::new(inp_width, inp_height), 
+                core::Scalar::new(0., 0., 0., 0.), 
+                true, 
+                false, 
+                core::CV_32F)
+                .unwrap();
+            
+            let core = blob.try_into_typed::<f32>()?;
+            let view = aview1(core.data_typed()?).into_shape([3, SIZE, SIZE]).unwrap();
+            inpt.index_axis_mut(Axis(0), idx).assign(&view);
+        }
+
+        let detections = self.yolo.detect(inpt.view(), frame_width, frame_height)?;
+
+        Ok(detections)
+    }
+
+    pub fn track(&mut self, frames: &[Mat], detections: &[Vec<Detection>]) -> Result<&[Track], Error> {
+        self.deep_sort.update(frames, detections)?;
+
+        Ok(self.deep_sort.tracks())
+    }
+}
+
+fn draw_pred(frame: &mut Mat, det: Detection) -> opencv::Result<()> {
+    let rect = core::Rect::new(det.xmin, det.ymin, det.xmax - det.xmin, det.ymax - det.ymin);
+
+    //  Draw a bounding box.
+    opencv::imgproc::rectangle(
+        frame, 
+        rect, 
+        core::Scalar::new(255.0, 255.0, 0.0, 0.0), 
+        1, 
+        opencv::imgproc::LINE_8, 
+        0
+    )?;
+    Ok(())
+}
+
+
+fn draw_track(frame: &mut Mat, bbox: sort::BBox<sort::Ltwh>, track_id: i32, color: (f64, f64, f64)) -> opencv::Result<()> {
+    let rect = opencv::core::Rect::new(
+        bbox.left() as i32,
+        bbox.top() as i32,
+        bbox.width() as i32,
+        bbox.height() as i32,
+    );
+
+    //  Draw a bounding box.
+    opencv::imgproc::rectangle(
+        frame, 
+        rect, 
+        core::Scalar::new(color.0, color.1, color.2, 0.0), 
+        1, 
+        opencv::imgproc::LINE_8, 
+        0
+    )?;
+
+    // let label = format!("[{}]", track_id);
+    // let mut base_line = 0; 
+    // let label_size = opencv::imgproc::get_text_size(&label, opencv::imgproc::FONT_HERSHEY_SIMPLEX, 0.6, 1, &mut base_line)?;
+    
+    // let label_rect = core::Rect::new(
+    //     rect.x, 
+    //     rect.y - label_size.height - 8, 
+    //     label_size.width + 8, 
+    //     label_size.height + 8
+    // );
+    
+    // opencv::imgproc::rectangle(frame, label_rect, core::Scalar::new(0.0, 255.0, 0.0, 0.0), opencv::imgproc::FILLED, opencv::imgproc::LINE_8, 0)?;
+    
+    // let pt = core::Point::new(rect.x, rect.y - 8);
+    // opencv::imgproc::put_text(
+    //     frame, 
+    //     &label, 
+    //     pt, 
+    //     opencv::imgproc::FONT_HERSHEY_SIMPLEX, 
+    //     0.6, 
+    //     core::Scalar::new(0.0, 0.0, 0.0, 0.0),
+    //     1,
+    //     opencv::imgproc::LINE_8, 
+    //     false
+    // )?;
+
+    Ok(())
+}
+
+
+fn main() -> Result<(), anyhow::Error> {
+    let mut tracker = YoloDeepSort::new(
+        "/home/andrey/workspace/ssl/yolov4/yolov4_416.onnx", 
+        // "/home/andrey/workspace/ssl/reid/onnx_model.onnx",
+        "/home/andrey/workspace/ssl/grant/models/model-96.onnx",
+        // "/home/andrey/workspace/ssl/deep_sort_pytorch/deep_sort/deep/reid.onnx",
+    )?;
+
+    let window = "video capture";
+    highgui::named_window(window, 1)?;
+
+    let mut cam = videoio::VideoCapture::from_file("../videoplayback_6.avi", videoio::CAP_ANY)?;  // 0 is the default camera
+    // cam.set(videoio::CAP_PROP_POS_FRAMES, 150.0);
+
+    let opened = videoio::VideoCapture::is_opened(&cam)?;
+    if !opened {
+        panic!("Unable to open default camera!");
+    }
+    
+    let mut frames = [core::Mat::default()?];
+    loop {
+        let begin = std::time::Instant::now();
+        cam.read(&mut frames[0])?;
+
+        let detections = tracker.detectect(&frames)?;
+        // for d in detections.iter().cloned() {
+        //     for d in d {
+        //         draw_pred(&mut frames[0], d);
+        //     }
+        // }
+
+        let tracks = tracker.track(&frames, detections.as_slice())?;
+        for t in tracks.iter().filter(|t| t.is_confirmed() && t.time_since_update <= 1) {
+            draw_track(&mut frames[0], t.bbox().as_ltwh(), t.track_id, compute_color_for_labels(t.track_id));
+        }
+
+        let diff = std::time::Instant::now() - begin;
+        let label = format!("{:?}", 1.0 / ((diff.as_millis() as f32) * 0.001));
+
+        opencv::imgproc::put_text(
+            &mut frames[0], 
+            &label, 
+            core::Point::new(30, 30), 
+            opencv::imgproc::FONT_HERSHEY_SIMPLEX, 
+            0.6, 
+            core::Scalar::new(0.0, 255.0, 0.0, 0.0),
+            1,
+            opencv::imgproc::LINE_8, 
+            false
+        )?;
+
+        highgui::imshow(window, &mut frames[0])?;
+
+        let key = highgui::wait_key(10)?;
+        if key > 0 && key != 255 {
+            break;
+        }
+    }
+
+    Ok(())
+}
--- a/src/deep/image_encoder.rs
+++ b/src/deep/image_encoder.rs
@ -0,0 +1,38 @@
+use crate::error::Error;
+use onnx_model::{OnnxInferenceModel, OnnxInferenceDevice, TensorView};
+use ndarray::prelude::*;
+
+pub struct ImageEncoder {
+    model: OnnxInferenceModel, 
+}
+
+impl ImageEncoder {
+    pub fn new(model_filename: &str, device: OnnxInferenceDevice) -> Result<Self, Error> {
+        Ok(Self {
+            model: OnnxInferenceModel::new(model_filename, device)?,
+        })
+    }
+
+    #[inline]
+    pub fn input_shape(&self) -> &[i64] {
+        let inpt = &self.model.get_input_infos()[0];
+
+        &inpt.shape.dims[1..]
+    }
+
+    #[inline]
+    pub fn output_shape(&self) -> &[i64] {
+        let otpt = &self.model.get_output_infos()[0];
+
+        &otpt.shape.dims[1..]
+    }
+
+    pub fn encode_batch(&mut self, in_vals: ArrayView4<'_, f32>) -> std::result::Result<Array2<f32>, Error> {
+        let inpt = TensorView::new(in_vals.shape(), in_vals.as_slice().unwrap());
+        let otpt = self.model.run(&[inpt])?.pop().unwrap();
+        let shape = otpt.dims().clone();
+        let features = Array2::from_shape_vec([shape[0] as usize, shape[1] as usize], otpt.to_vec()).unwrap();
+
+        Ok(features)
+    }
+}
--- a/src/deep/mod.rs
+++ b/src/deep/mod.rs
@ -0,0 +1,3 @@
+mod image_encoder;
+
+pub use image_encoder::ImageEncoder;
--- a/src/error.rs
+++ b/src/error.rs
@ -0,0 +1,13 @@
+use err_derive::Error;
+
+#[derive(Debug, Error)]
+pub enum Error {
+    #[error(display = "OnnxModel Error: {}", _0)]
+    OnnxModelError(onnx_model::error::Error),
+}
+
+impl From<onnx_model::error::Error> for Error {
+    fn from(err: onnx_model::error::Error) -> Self {
+        Self::OnnxModelError(err) 
+    }
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -0,0 +1,165 @@
+pub mod deep;
+pub mod sort;
+pub mod error;
+
+
+pub use sort::Track;
+use deep::ImageEncoder;
+use sort::{Tracker, NearestNeighborMetricKind, NearestNeighborDistanceMetric};
+use ndarray::prelude::*;
+use opencv::core::{self, Mat, Rect};
+use opencv::core::MatTrait;
+
+use opencv::dnn;
+use error::Error;
+use grant_object_detector as detector;
+
+use std::collections::HashMap;
+
+fn get_detection_cv_rect(det: &detector::Detection, frame_width: i32, frame_height: i32) -> Rect {
+    let left = det.xmin.max(0).min(frame_width);
+    let top = det.ymin.max(0).min(frame_height);
+
+    let mut width = (det.xmax - det.xmin).max(0);
+    let mut height = (det.ymax - det.ymin).max(0);
+
+    if left + width > frame_width {
+        width = frame_width - left;
+    }
+
+    if top + height > frame_height {
+        height = frame_height - top;
+    }
+
+    Rect::new(
+        left,
+        top,
+        width,
+        height,
+    )
+}
+
+pub struct DeepSortConfig {
+    pub reid_model_path: String,
+    pub max_cosine_distance: f32,
+    pub nn_budget: Option<usize>,
+    pub max_age: i32,
+    pub max_iou_distance: f32,
+    pub n_init: i32,
+}
+
+impl DeepSortConfig {
+    pub fn new(reid_model_path: String) -> Self {
+        Self {
+            reid_model_path,
+            max_cosine_distance: 0.2,
+            nn_budget: Some(100),
+            max_age: 70,
+            max_iou_distance: 0.7,
+            n_init: 3
+        }
+    }
+}
+
+pub struct DeepSort {
+    device: onnx_model::OnnxInferenceDevice,
+    encoder: ImageEncoder,
+    sample_tracker: Tracker<NearestNeighborDistanceMetric>,
+    trackers: HashMap<String, Tracker<NearestNeighborDistanceMetric>>,
+}
+
+impl DeepSort {
+    pub fn new(config: DeepSortConfig) -> Result<Self, Error> {
+        let metric = NearestNeighborDistanceMetric::new(
+            NearestNeighborMetricKind::CosineDistance,
+            config.max_cosine_distance,
+            config.nn_budget
+        );
+
+        let device = onnx_model::get_cuda_if_available();
+
+        Ok(Self {
+            device,
+            sample_tracker: Tracker::new(
+                metric,
+                config.max_iou_distance,
+                config.max_age,
+                config.n_init
+            ),
+            encoder: ImageEncoder::new(&config.reid_model_path, device)?,
+            trackers: HashMap::<String, Tracker<NearestNeighborDistanceMetric>>::new(),
+        })
+    }
+
+    #[inline]
+    pub fn tracks(&self, src_url: String) -> &[sort::Track] {
+        self.trackers.get(&src_url)
+            .unwrap()
+            .tracks()
+    }
+
+    pub fn update(&mut self, frames: &[Mat], dets: &[Vec<detector::Detection>], src_url: String) -> Result<(), Error> {
+        let total_count = dets.iter().map(|i|i.len()).sum::<usize>();
+
+        let tracker = self.trackers.entry(src_url.clone()).or_insert(self.sample_tracker.clone());
+
+        if total_count == 0 {
+            return Ok(());
+        }
+
+        let input_shape = self.encoder.input_shape();
+        let frag_channs = input_shape[0] as usize;
+        let frag_height = input_shape[1] as usize;
+        let frag_width = input_shape[2] as usize;
+
+        let mut idets = unsafe { Array4::uninitialized([total_count, frag_channs, frag_height, frag_width]) };
+        let mut index = 0usize;
+        for (frame, dets) in frames.iter().zip(dets.iter()) {
+            for det in dets {
+                let rect = get_detection_cv_rect(&det, frame.cols(), frame.rows());
+                let roi = Mat::roi(&frame, rect).unwrap();
+                let blob = dnn::blob_from_image(
+                    &roi,
+                    1.0 / 255.0,
+                    core::Size::new(frag_width as i32, frag_height as i32),
+                    core::Scalar::new(0., 0., 0., 0.),
+                    true,
+                    false,
+                    core::CV_32F).unwrap();
+
+                let core = blob.try_into_typed::<f32>().unwrap();
+                let data: &[f32] = core.data_typed().unwrap();
+                let a = aview1(data).into_shape((frag_channs, frag_height, frag_width)).unwrap();
+                idets.index_axis_mut(Axis(0), index).assign(&a);
+                index += 1;
+            }
+        }
+
+        let features = self.encoder.encode_batch(idets.view())?;
+        // let features = Array2::zeros((dets.len(), 512));
+        let mut detections = vec![];
+
+        for dets in dets.iter() {
+            detections.clear();
+            for (feature, det) in features.axis_iter(Axis(0)).zip(dets.iter()) {
+                let x = sort::Detection {
+                    bbox: sort::BBox::ltwh(
+                        det.xmin as _,
+                        det.ymin as _,
+                        (det.xmax - det.xmin) as _,
+                        (det.ymax - det.ymin) as _,
+                    ),
+                    confidence: det.confidence,
+                    feature: Some(feature.into_owned())
+                };
+
+                detections.push(x);
+            }
+
+            tracker.predict();
+            tracker.update(detections.as_slice());
+        }
+
+        Ok(())
+    }
+}
--- a/src/sort/detection.rs
+++ b/src/sort/detection.rs
@ -0,0 +1,17 @@
+use ndarray::prelude::*;
+use crate::sort::{BBox, Ltwh};
+
+/// 
+/// This class represents a bounding box detection in a single image.
+/// Parameters
+/// 
+/// ltwh : BBox in format `(x, y, w, h)`.
+/// confidence : f32 - Detector confidence score.
+/// feature : Vec<Array1<f32>> A feature vector that describes the object contained in this image.
+///
+#[derive(Debug, Clone)]
+pub struct Detection {
+    pub bbox: BBox<Ltwh>,
+    pub confidence: f32, 
+    pub feature: Option<Array1<f32>>
+}
--- a/src/sort/iou_matching.rs
+++ b/src/sort/iou_matching.rs
@ -0,0 +1,89 @@
+use ndarray::prelude::*;
+
+use crate::sort::linear_assignment::INFTY_COST;
+use crate::sort::{Track, Detection, BBox, Ltwh};
+
+/// Computer intersection over union.
+/// Parameters
+/// ----------
+/// bbox : ndarray
+///     A bounding box in format `(top left x, top left y, width, height)`.
+/// candidates : ndarray
+///     A matrix of candidate bounding boxes (one per row) in the same format
+///     as `bbox`.
+/// Returns
+/// -------
+/// ndarray
+///     The intersection over union in [0, 1] between the `bbox` and each
+///     candidate. A higher score means a larger fraction of the `bbox` is
+///     occluded by the candidate.
+pub fn iou(bbox: &BBox<Ltwh>, candidates: &[BBox<Ltwh>]) -> Array1<f32> {
+    let bbox_area = bbox.width() * bbox.height();
+
+    candidates
+        .iter()
+        .map(|c_ltwh| {
+            let b1 = bbox.as_ltrb();
+            let b2 = c_ltwh.as_ltrb();
+
+            let i_xmin = b1.left().max(b2.left());
+            let i_ymin = b1.top().max(b2.top());
+
+            let i_xmax = b1.right().min(b2.right());
+            let i_ymax = b1.bottom().min(b2.bottom());
+            
+            let intersection_area = ((i_xmax - i_xmin).max(0.0) * (i_ymax - i_ymin).max(0.0));
+            let candidate_area = c_ltwh.width() * c_ltwh.height();
+
+            intersection_area / (bbox_area + candidate_area - intersection_area)
+        })
+        .collect()
+}
+
+///
+/// An intersection over union distance metric.
+/// Parameters
+/// ----------
+/// tracks : List[deep_sort.track.Track]
+///     A list of tracks.
+/// detections : List[deep_sort.detection.Detection]
+///     A list of detections.
+/// track_indices : Optional[List[int]]
+///     A list of indices to tracks that should be matched. Defaults to
+///     all `tracks`.
+/// detection_indices : Optional[List[int]]
+///     A list of indices to detections that should be matched. Defaults
+///     to all `detections`.
+/// Returns
+/// -------
+/// ndarray
+///     Returns a cost matrix of shape
+///     len(track_indices), len(detection_indices) where entry (i, j) is
+///     `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
+///
+pub fn iou_cost(tracks: &[Track], detections: &[Detection], track_indices: &[usize], detection_indices: &[usize]) -> Array2<f32> {
+    let track_n = track_indices.len();
+    let det_n = detection_indices.len();
+    let n = track_n.max(det_n);
+
+    let mut cost_matrix = Array2::from_elem((n, n), 1.0);
+
+    for (row, &track_idx) in track_indices.iter().enumerate() {
+        let track = &tracks[track_idx];
+
+        if track.time_since_update > 1 {
+            continue;
+        } 
+
+        let bbox = track.bbox().as_ltwh();
+        let candidates: Vec<_> = detection_indices
+            .iter()
+            .map(|&i| detections[i].bbox.clone())
+            .collect();
+
+
+        cost_matrix.slice_mut(s![row, ..det_n]).assign(&(1.0 - iou(&bbox, &candidates)));
+    }
+
+    cost_matrix
+}
--- a/src/sort/kalman_filter.rs
+++ b/src/sort/kalman_filter.rs
@ -0,0 +1,318 @@
+use crate::sort::{BBox, Xyah};
+use ndarray::prelude::*;
+use ndarray_linalg::cholesky::*;
+use ndarray_linalg::triangular::*;
+
+///
+/// Table for the 0.95 quantile of the chi-square distribution with N degrees of
+/// freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
+/// function and used as Mahalanobis gating threshold.
+///
+pub const CHI_2_INV_95: [f32; 9] = [
+    3.8415, // 1
+    5.9915, // 2
+    7.8147, // 3
+    9.4877, // 4
+    11.070, // 5
+    12.592, // 6
+    14.067, // 7
+    15.507, // 8
+    16.919, // 9
+];
+
+/// A simple Kalman filter for tracking bounding boxes in image space.
+///
+///     The 8-dimensional state space
+///
+///         x, y, a, h, vx, vy, va, vh
+///
+///     contains the bounding box center position (x, y), aspect ratio a, height h,
+///     and their respective velocities.
+///
+///     Object motion follows a constant velocity model. The bounding box location
+///     (x, y, a, h) is taken as direct observation of the state space (linear
+///     observation model).
+///
+///     update_mat: Array2<f32> of shape (4, 8)
+///
+#[derive(Clone)]
+pub struct KalmanFilter {
+    ndim: usize,
+    dt: f32,
+    motion_mat: Array2<f32>,
+    update_mat: Array2<f32>,
+    std_weight_position: f32,
+    std_weight_velocity: f32,
+}
+
+impl Default for KalmanFilter {
+    fn default() -> Self {
+        let (ndim, dt) = (4, 1.);
+
+        // Create Kalman filter model matrices.
+        let mut motion_mat = Array2::eye(2 * ndim);
+
+        for i in 0..ndim {
+            motion_mat[(i, ndim + i)] = dt;
+        }
+
+        let mut update_mat = Array2::zeros((ndim, 2 * ndim));
+
+        for i in 0..ndim {
+            update_mat[(i, i)] = 1.0;
+        }
+
+        // Motion and observation uncertainty are chosen relative to the current
+        // state estimate. These weights control the amount of uncertainty in
+        // the model. This is a bit hacky.
+        let std_weight_position = 1.0 / 20.0;
+        let std_weight_velocity = 1.0 / 160.0;
+
+        Self {
+            ndim,
+            dt,
+            motion_mat,
+            update_mat,
+            std_weight_position,
+            std_weight_velocity,
+        }
+    }
+}
+
+impl KalmanFilter {
+    ///
+    /// Create track from unassociated measurement.
+    ///
+    ///     Parameters
+    ///     ----------
+    ///     measurement : ndarray
+    ///         Bounding box coordinates (x, y, a, h) with center position (x, y),
+    ///         aspect ratio a, and height h.
+    ///
+    ///     Returns
+    ///     -------
+    ///     (ndarray, ndarray)
+    ///         Returns the mean vector (8 dimensional) and covariance matrix (8x8
+    ///         dimensional) of the new track. Unobserved velocities are initialized
+    ///         to 0 mean.
+    ///
+    pub fn initiate(&self, measurement: BBox<Xyah>) -> (Array1<f32>, Array2<f32>) {
+        let mut mean = Array1::zeros((8,));
+        mean.slice_mut(s![..4]).assign(&measurement.as_view());
+
+        let std = arr1(&[
+            2.0 * self.std_weight_position * measurement.height(),
+            2.0 * self.std_weight_position * measurement.height(),
+            1.0e-2,
+            2.0 * self.std_weight_position * measurement.height(),
+            10.0 * self.std_weight_velocity * measurement.height(),
+            10.0 * self.std_weight_velocity * measurement.height(),
+            1.0e-5,
+            10.0 * self.std_weight_velocity * measurement.height(),
+        ]);
+
+        let covariance = Array2::from_diag(&(&std * &std));
+
+        (mean, covariance)
+    }
+
+    /// Run Kalman filter prediction step.
+    ///
+    ///     Parameters
+    ///     ----------
+    ///     mean : ndarray
+    ///         The 8 dimensional mean vector of the object state at the previous
+    ///         time step.
+    ///     covariance : ndarray
+    ///         The 8x8 dimensional covariance matrix of the object state at the
+    ///         previous time step.
+    ///
+    ///     Returns
+    ///     -------
+    ///     (ndarray, ndarray)
+    ///         Returns the mean vector and covariance matrix of the predicted
+    ///         state. Unobserved velocities are initialized to 0 mean.
+    ///
+    pub fn predict(&self, mean: ArrayView1<'_, f32>, covariance: ArrayView2<'_, f32>) -> (Array1<f32>, Array2<f32>) {
+        let std = arr1(&[
+            // posititon
+            self.std_weight_position * mean[3],
+            self.std_weight_position * mean[3],
+            1e-2,
+            self.std_weight_position * mean[3],
+
+            // velocity
+            self.std_weight_velocity * mean[3],
+            self.std_weight_velocity * mean[3],
+            1e-5,
+            self.std_weight_velocity * mean[3],
+        ]);
+
+        let motion_cov = Array2::from_diag(&(&std * &std));
+        let mean = self.motion_mat.dot(&mean);
+
+        let covariance = self.motion_mat.dot(&covariance).dot(&self.motion_mat.t());
+
+        (mean, covariance + motion_cov)
+    }
+
+    // Project state distribution to measurement space.
+    //
+    //     Parameters
+    //     ----------
+    //     mean : ndarray
+    //         The state's mean vector (8 dimensional array).
+    //     covariance : ndarray
+    //         The state's covariance matrix (8x8 dimensional).
+    //
+    //     Returns
+    //     -------
+    //     (ndarray, ndarray)
+    //         Returns the projected mean and covariance matrix of the given state
+    //         estimate.
+    //
+    fn project(&self, mean: ArrayView1<'_, f32>, covariance: ArrayView2<'_, f32>) -> (Array1<f32>, Array2<f32>) {
+        let std = arr1(&[
+            self.std_weight_position * mean[3],
+            self.std_weight_position * mean[3],
+            1e-1,
+            self.std_weight_position * mean[3],
+        ]);
+
+        let innovation_cov = Array2::from_diag(&(&std * &std));
+        let mean = self.update_mat.dot(&mean);
+        let covariance = self.update_mat.dot(&covariance).dot(&self.update_mat.t());
+
+        (mean, covariance + innovation_cov)
+    }
+
+    /// Run Kalman filter correction step.
+    ///
+    ///     Parameters
+    ///     ----------
+    ///     mean : ndarray
+    ///         The predicted state's mean vector (8 dimensional).
+    ///     covariance : ndarray
+    ///         The state's covariance matrix (8x8 dimensional).
+    ///     measurement : ndarray
+    ///         The 4 dimensional measurement vector (x, y, a, h), where (x, y)
+    ///         is the center position, a the aspect ratio, and h the height of the
+    ///         bounding box.
+    ///
+    ///     Returns
+    ///     -------
+    ///     (ndarray, ndarray)
+    ///         Returns the measurement-corrected state distribution.
+    ///
+    pub fn update(
+        &self,
+        mean: ArrayView1<'_, f32>,
+        covariance: ArrayView2<'_, f32>,
+        measurement: ArrayView1<'_, f32>,
+    ) -> (Array1<f32>, Array2<f32>) {
+        let (projected_mean, projected_cov) = self.project(mean, covariance);
+
+        // chol shape (4, 4)
+        let chol = projected_cov.factorizec(UPLO::Lower).unwrap();
+
+        //       (8, 4)
+        let mut kalman_gain = covariance.dot(&self.update_mat.t());
+
+        for mut axis in kalman_gain.axis_iter_mut(Axis(0)) {
+            // axis shape (4, )
+            chol.solvec_inplace(&mut axis).unwrap();
+        }
+
+        let innovation = &measurement - &projected_mean;
+        let new_mean = &mean + &innovation.dot(&kalman_gain.t());
+        let new_covariance = &covariance - &kalman_gain.dot(&projected_cov).dot(&kalman_gain.t());
+
+        (new_mean, new_covariance)
+    }
+
+    /// Compute gating distance between state distribution and measurements.
+    ///
+    ///     A suitable distance threshold can be obtained from `chi2inv95`. If
+    ///     `only_position` is False, the chi-square distribution has 4 degrees of
+    ///     freedom, otherwise 2.
+    ///
+    ///     Parameters
+    ///     ----------
+    ///     mean : ndarray
+    ///         Mean vector over the state distribution (8 dimensional).
+    ///     covariance : ndarray
+    ///         Covariance of the state distribution (8x8 dimensional).
+    ///     measurements : ndarray
+    ///         An Nx4 dimensional matrix of N measurements, each in
+    ///         format (x, y, a, h) where (x, y) is the bounding box center
+    ///         position, a the aspect ratio, and h the height.
+    ///     only_position : Optional[bool]
+    ///         If True, distance computation is done with respect to the bounding
+    ///         box center position only.
+    ///
+    ///     Returns
+    ///     -------
+    ///     ndarray
+    ///         Returns an array of length N, where the i-th element contains the
+    ///         squared Mahalanobis distance between (mean, covariance) and
+    ///         `measurements[i]`.
+    ///
+    pub fn gating_distance(
+        &self,
+        mean: ArrayView1<'_, f32>,
+        covariance: ArrayView2<'_, f32>,
+        measurements: ArrayView2<'_, f32>,
+        only_position: bool,
+    ) -> Array1<f32> {
+        let (mean, covariance) = self.project(mean, covariance);
+
+        let (mean, covariance, measurements) = if only_position {
+            (
+                mean.slice(s!(..2)),
+                covariance.slice(s!(..2, ..2)),
+                measurements.slice(s!(.., ..2)),
+            )
+        } else {
+            (mean.view(), covariance.view(), measurements.view())
+        };
+
+        let d = &measurements - &mean;
+
+        let cholesky_lower = covariance.cholesky(UPLO::Lower).unwrap();
+        let z = cholesky_lower
+            .solve_triangular_into(UPLO::Lower, Diag::NonUnit, d.reversed_axes())
+            .unwrap();
+
+        (&z * &z).sum_axis(Axis(0))
+    }
+}
+
+#[test]
+fn test_kalman() {
+    let mut kl = KalmanFilter::default();
+    let (m, c) = kl.initiate(BBox::xyah(128.0, 128.0, 0.5, 64.0));
+
+    // let target_mean = &[128. , 128. ,   0.5,  64. ,   0. ,   0. ,   0. ,   0. ]), array([[67.2   ,  0.    ,  0.    ,  0.    , 16.    ,  0.    ,  0.    ,
+    //     0.    ],
+    //   [ 0.    , 67.2   ,  0.    ,  0.    ,  0.    , 16.    ,  0.    ,
+    //     0.    ],
+    //   [ 0.    ,  0.    ,  0.0002,  0.    ,  0.    ,  0.    ,  0.    ,
+    //     0.    ],
+    //   [ 0.    ,  0.    ,  0.    , 67.2   ,  0.    ,  0.    ,  0.    ,
+    //    16.    ],
+    //   [16.    ,  0.    ,  0.    ,  0.    , 16.16  ,  0.    ,  0.    ,
+    //     0.    ],
+    //   [ 0.    , 16.    ,  0.    ,  0.    ,  0.    , 16.16  ,  0.    ,
+    //     0.    ],
+    //   [ 0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
+    //     0.    ],
+    //   [ 0.    ,  0.    ,  0.    , 16.    ,  0.    ,  0.    ,  0.    ,
+    //    16.16  ]]))
+
+    // println!("{:?}", kl.predict(m.view(),c.view()))
+
+    let (m, c) = kl.update(m.view(),c.view(),aview1(&[192.0, 192.0, 0.5, 68.0]));
+
+    println!("{:?}", kl.gating_distance(m.view(), c.view(), aview2(&[[256.0, 256.0, 0.5, 80.0]]), false));
+
+}
--- a/src/sort/linear_assignment.rs
+++ b/src/sort/linear_assignment.rs
@ -0,0 +1,269 @@
+use std::collections::HashSet;
+
+use ndarray::prelude::*;
+use crate::sort::{Track, Detection, KalmanFilter};
+
+pub const INFTY_COST: f32 = 1e+5;
+
+
+/// Solve linear assignment problem.
+///
+/// Parameters
+/// ----------
+/// distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+///     The distance metric is given a list of tracks and detections as well as
+///     a list of N track indices and M detection indices. The metric should
+///     return the NxM dimensional cost matrix, where element (i, j) is the
+///     association cost between the i-th track in the given track indices and
+///     the j-th detection in the given detection_indices.
+/// max_distance : float
+///     Gating threshold. Associations with cost larger than this value are
+///     disregarded.
+/// tracks : List[track.Track]
+///     A list of predicted tracks at the current time step.
+/// detections : List[detection.Detection]
+///     A list of detections at the current time step.
+/// track_indices : List[int]
+///     List of track indices that maps rows in `cost_matrix` to tracks in
+///     `tracks` (see description above).
+/// detection_indices : List[int]
+///     List of detection indices that maps columns in `cost_matrix` to
+///     detections in `detections` (see description above).
+/// 
+/// Returns
+/// -------
+/// (List[(int, int)], List[int], List[int])
+///     Returns a tuple with the following three entries:
+///     * A list of matched track and detection indices.
+///     * A list of unmatched track indices.
+///     * A list of unmatched detection indices.
+/// 
+pub fn min_cost_matching<D: Fn(&[Track], &[Detection], &[usize], &[usize]) -> Array2<f32>>(
+    distance_metric: &D, 
+    max_distance: f32, 
+    tracks: &[Track], 
+    detections: &[Detection], 
+    track_indices: Option<Vec<usize>>, 
+    detection_indices: Option<Vec<usize>>
+) -> (Vec<(usize, usize)>, Vec<usize>, Vec<usize>) {
+    let track_indices = track_indices.unwrap_or_else(||(0..tracks.len()).collect());
+    let detection_indices = detection_indices.unwrap_or_else(||(0..detections.len()).collect());
+
+    if detection_indices.is_empty() || track_indices.is_empty() {
+        return (vec![], track_indices, detection_indices);  // Nothing to match.
+    }
+
+    let mut cost_matrix = distance_metric(tracks, detections, &track_indices, &detection_indices);
+    cost_matrix.mapv_inplace(|x| if x > max_distance { max_distance + 1.0e-5 } else { x });
+
+    let mut weights = munkres::WeightMatrix::from_row_vec(cost_matrix.nrows(), cost_matrix.iter().copied().collect());
+    let indices = munkres::solve_assignment(&mut weights).unwrap();
+    
+    let (mut matches, mut unmatched_tracks, mut unmatched_detections) = (vec![], vec![], vec![]);
+    
+    for (idx, &detection_idx) in detection_indices.iter().enumerate() {
+        let mut present = false;
+
+        for pos in indices.iter() {
+            if idx == pos.column && pos.row < track_indices.len() {
+                present = true;
+                break;
+            }
+        }
+
+        if !present {
+            unmatched_detections.push(detection_idx);
+        }
+    }
+
+    for (idx, &track_idx) in track_indices.iter().enumerate() {
+        let mut present = false;
+
+        for pos in indices.iter() {
+            if idx == pos.row && pos.column < detection_indices.len() {
+                present = true;
+                break;
+            }
+        }
+
+        if !present {
+            unmatched_tracks.push(track_idx);
+        }
+    }
+
+    for pos in indices.into_iter() {
+        if pos.row < track_indices.len() &&
+           pos.column < detection_indices.len() {
+
+            let track_idx = track_indices[pos.row];
+            let detection_idx = detection_indices[pos.column];
+
+            if cost_matrix[(pos.row, pos.column)] > max_distance {
+                unmatched_tracks.push(track_idx);
+                unmatched_detections.push(detection_idx);
+            } else {
+                matches.push((track_idx, detection_idx))
+            }
+        }
+    }
+
+    (matches, unmatched_tracks, unmatched_detections)
+}
+
+/// Run matching cascade.
+/// 
+/// Parameters
+/// ----------
+/// distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+///     The distance metric is given a list of tracks and detections as well as
+///     a list of N track indices and M detection indices. The metric should
+///     return the NxM dimensional cost matrix, where element (i, j) is the
+///     association cost between the i-th track in the given track indices and
+///     the j-th detection in the given detection indices.
+/// max_distance : float
+///     Gating threshold. Associations with cost larger than this value are
+///     disregarded.
+/// cascade_depth: int
+///     The cascade depth, should be se to the maximum track age.
+/// tracks : List[track.Track]
+///     A list of predicted tracks at the current time step.
+/// detections : List[detection.Detection]
+///     A list of detections at the current time step.
+/// track_indices : Optional[List[int]]
+///     List of track indices that maps rows in `cost_matrix` to tracks in
+///     `tracks` (see description above). Defaults to all tracks.
+/// detection_indices : Optional[List[int]]
+///     List of detection indices that maps columns in `cost_matrix` to
+///     detections in `detections` (see description above). Defaults to all
+///     detections.
+/// 
+/// Returns
+/// -------
+/// (List[(int, int)], List[int], List[int])
+///     Returns a tuple with the following three entries:
+///     * A list of matched track and detection indices.
+///     * A list of unmatched track indices.
+///     * A list of unmatched detection indices.
+///
+pub fn matching_cascade<D: Fn(&[Track], &[Detection], &[usize], &[usize]) -> Array2<f32>>(
+    distance_metric: &D, 
+    max_distance: f32, 
+    cascade_depth: i32,
+    tracks: &[Track], 
+    detections: &[Detection], 
+    track_indices: Option<Vec<usize>>, 
+    detection_indices: Option<Vec<usize>>) -> (Vec<(usize, usize)>, Vec<usize>, Vec<usize>)
+{
+    let track_indices = track_indices.unwrap_or_else(||(0..tracks.len()).collect());
+    let detection_indices = detection_indices.unwrap_or_else(||(0..detections.len()).collect());
+    let mut unmatched_detections = detection_indices.clone();
+    let mut matches = vec![];
+
+    for level in 0..cascade_depth {
+        if unmatched_detections.is_empty() { // No detections left
+            break;
+        }
+
+        let track_indices_l: Vec<_> = track_indices
+            .iter()
+            .copied()
+            .filter(|&idx|tracks[idx].time_since_update == 1 + level)
+            .collect();
+
+        if track_indices_l.is_empty() {  // Nothing to match at this level
+            continue;
+        }
+
+        let (mut matches_l, _, unmatched_detections_new) = min_cost_matching(
+            distance_metric, max_distance, tracks, detections, 
+            Some(track_indices_l), Some(unmatched_detections.clone()));
+        
+        unmatched_detections = unmatched_detections_new;
+
+        matches.append(&mut matches_l);
+    }
+
+    let track_indices_set: HashSet<_> = track_indices.into_iter().collect();
+    let matches_track_indices_set: HashSet<_> = matches.iter().map(|&(k, _)|k).collect();
+
+    let unmatched_tracks: Vec<usize> = track_indices_set.difference(&matches_track_indices_set)
+        .copied()
+        .collect();
+
+    (matches, unmatched_tracks, unmatched_detections)
+}
+
+/// Invalidate infeasible entries in cost matrix based on the state distributions obtained by Kalman filtering.
+/// 
+/// Parameters
+/// ----------
+/// kf : The Kalman filter.
+/// cost_matrix : ndarray
+///     The NxM dimensional cost matrix, where N is the number of track indices
+///     and M is the number of detection indices, such that entry (i, j) is the
+///     association cost between `tracks[track_indices[i]]` and
+///     `detections[detection_indices[j]]`.
+/// tracks : List[track.Track]
+///     A list of predicted tracks at the current time step.
+/// detections : List[detection.Detection]
+///     A list of detections at the current time step.
+/// track_indices : List[int]
+///     List of track indices that maps rows in `cost_matrix` to tracks in
+///     `tracks` (see description above).
+/// detection_indices : List[int]
+///     List of detection indices that maps columns in `cost_matrix` to
+///     detections in `detections` (see description above).
+/// gated_cost : Optional[float]
+///     Entries in the cost matrix corresponding to infeasible associations are
+///     set this value. Defaults to a very large value.
+/// only_position : Optional[bool]
+///     If True, only the x, y position of the state distribution is considered
+///     during gating. Defaults to False.
+/// 
+/// Returns
+/// -------
+/// ndarray
+///     Returns the modified cost matrix.
+/// 
+pub fn gate_cost_matrix(
+    kf: &KalmanFilter, 
+    mut cost_matrix: ArrayViewMut2<'_, f32>, 
+    tracks: &[Track], 
+    detections: &[Detection], 
+    track_indices: &[usize], 
+    detection_indices: &[usize],
+    gated_cost: Option<f32>, 
+    only_position: Option<bool>)
+{
+    let gated_cost = gated_cost.unwrap_or(INFTY_COST);
+    let only_position = only_position.unwrap_or(false);
+    let gating_dim = if only_position {1} else {3}; // indexes for 2 and 4 dims respectivly
+    let gating_threshold = crate::sort::kalman_filter::CHI_2_INV_95[gating_dim];
+
+    let mut measurements: Array2<f32> = unsafe { Array2::uninitialized((detection_indices.len(), 4)) };
+
+    for (mut row, &idx) in measurements.axis_iter_mut(Axis(0)).zip(detection_indices.iter()) {
+        let bbox = &detections[idx].bbox.as_xyah();
+
+        row.assign(&bbox.as_view());
+    }
+
+    for (row, &track_idx) in track_indices.iter().enumerate() {
+        let track = &tracks[track_idx];
+        let gating_distance = kf.gating_distance(
+            track.mean(), track.covariance(), measurements.view(), only_position);
+
+        let mut axis = cost_matrix
+            .index_axis_mut(Axis(0), row);
+        
+        for (idx, val) in axis.indexed_iter_mut() {
+            if idx >= gating_distance.len() {
+                break;
+            }
+
+            if gating_distance[idx] > gating_threshold {
+                *val = gated_cost;
+            }
+        }
+    }
+}
--- a/src/sort/mod.rs
+++ b/src/sort/mod.rs
@ -0,0 +1,246 @@
+pub mod detection;
+pub mod iou_matching;
+pub mod kalman_filter;
+pub mod nn_matching;
+pub mod linear_assignment;
+pub mod tracker;
+pub mod track;
+
+pub use detection::Detection;
+pub use kalman_filter::KalmanFilter;
+pub use tracker::Tracker;
+pub use track::{Track, TrackState};
+pub use nn_matching::*;
+
+use core::marker::PhantomData;
+use ndarray::prelude::*;
+
+pub trait BBoxFormat: std::fmt::Debug {}
+
+#[derive(Debug, Copy, Clone)]
+pub struct Ltwh;
+impl BBoxFormat for Ltwh {}
+
+#[derive(Debug, Copy, Clone)]
+pub struct Xyah;
+impl BBoxFormat for Xyah {}
+
+#[derive(Debug, Copy, Clone)]
+pub struct Ltrb;
+impl BBoxFormat for Ltrb {}
+
+
+#[derive(Debug, Clone)]
+pub struct BBox<F: BBoxFormat>([f32; 4], PhantomData<F>);
+impl<F: BBoxFormat> BBox<F> {
+    #[inline]
+    pub fn as_view(&self) -> ArrayView1<'_, f32> {
+        aview1(&self.0)
+    }
+
+    #[inline]
+    pub fn xyah(x1: f32, x2: f32, x3: f32, x4: f32) -> Self {
+        BBox(
+            [x1, x2, x3, x4],
+            Default::default(),
+        )
+    }
+}
+
+impl BBox<Ltwh> {
+    #[inline(always)]
+    pub fn left(&self) -> f32 {
+        self.0[0]
+    }
+
+    #[inline(always)]
+    pub fn top(&self) -> f32 {
+        self.0[1]
+    }
+
+    #[inline(always)]
+    pub fn width(&self) -> f32 {
+        self.0[2]
+    }
+
+    #[inline(always)]
+    pub fn height(&self) -> f32 {
+        self.0[3]
+    }
+
+    #[inline]
+    pub fn as_xyah(&self) -> BBox<Xyah> {
+        self.into()
+    }
+
+    #[inline]
+    pub fn as_ltrb(&self) -> BBox<Ltrb> {
+        self.into()
+    }
+
+    #[inline]
+    pub fn ltwh(x1: f32, x2: f32, x3: f32, x4: f32) -> Self {
+        BBox(
+            [x1, x2, x3, x4],
+            Default::default(),
+        )
+    }
+}
+
+impl BBox<Ltrb> {
+    #[inline]
+    pub fn ltrb(x1: f32, x2: f32, x3: f32, x4: f32) -> Self {
+        BBox(
+            [x1, x2, x3, x4],
+            Default::default(),
+        )
+    }
+
+    #[inline]
+    pub fn as_ltwh(&self) -> BBox<Ltwh> {
+        self.into()
+    }
+
+    #[inline(always)]
+    pub fn left(&self) -> f32 {
+        self.0[0]
+    }
+
+    #[inline(always)]
+    pub fn top(&self) -> f32 {
+        self.0[1]
+    }
+
+    #[inline(always)]
+    pub fn right(&self) -> f32 {
+        self.0[2]
+    }
+
+    #[inline(always)]
+    pub fn bottom(&self) -> f32 {
+        self.0[3]
+    }
+}
+
+impl BBox<Xyah> {
+    #[inline(always)]
+    pub fn as_ltrb(&self) -> BBox<Ltrb> {
+        self.into()
+    }
+
+    #[inline(always)]
+    pub fn as_ltwh(&self) -> BBox<Ltwh> {
+        self.into()
+    }
+
+    #[inline(always)]
+    pub fn cx(&self) -> f32 {
+        self.0[0]
+    }
+
+    #[inline(always)]
+    pub fn cy(&self) -> f32 {
+        self.0[1]
+    }
+
+    #[inline(always)]
+    pub fn height(&self) -> f32 {
+        self.0[3]
+    }
+}
+
+impl <'a> From<&'a BBox<Ltwh>> for BBox<Xyah> {
+    #[inline]
+    fn from(v: &'a BBox<Ltwh>) -> Self {
+        Self([
+            v.0[0] + v.0[2] / 2.0, 
+            v.0[1] + v.0[3] / 2.0, 
+            v.0[2] / v.0[3], 
+            v.0[3],
+        ], Default::default())
+    }
+}
+
+impl <'a> From<&'a BBox<Ltwh>> for BBox<Ltrb> {
+    #[inline]
+    fn from(v: &'a BBox<Ltwh>) -> Self {
+        Self([
+            v.0[0], 
+            v.0[1], 
+            v.0[2] + v.0[0], 
+            v.0[3] + v.0[1],
+        ], Default::default())
+    }
+}
+
+impl <'a> From<&'a BBox<Ltrb>> for BBox<Ltwh> {
+    #[inline]
+    fn from(v: &'a BBox<Ltrb>) -> Self {
+        Self([
+            v.0[0], 
+            v.0[1], 
+            v.0[2] - v.0[0], 
+            v.0[3] - v.0[1],
+        ], Default::default())
+    }
+}
+
+impl <'a> From<&'a BBox<Xyah>> for BBox<Ltwh> {
+    #[inline]
+    fn from(v: &'a BBox<Xyah>) -> Self {
+        let height = v.0[3];
+        let width = v.0[2] * height;
+
+        Self([
+            v.0[0] - width / 2.0, 
+            v.0[1] - height / 2.0, 
+            width, 
+            height,
+        ], Default::default())
+    }
+}
+
+impl <'a> From<&'a BBox<Xyah>> for BBox<Ltrb> {
+    #[inline]
+    fn from(v: &'a BBox<Xyah>) -> Self {
+        (&v.as_ltwh()).into()
+    }
+}
+
+pub trait DistanceMetric {
+
+    /// Getting the matching threshold
+    ///
+    fn matching_threshold(&self) -> f32;
+
+    /// Update the distance metric with new data.
+    ///
+    ///     Parameters
+    ///     ----------
+    ///     features : ndarray
+    ///         An NxM matrix of N features of dimensionality M.
+    ///     targets : ndarray
+    ///         An integer array of associated target identities.
+    ///     active_targets : List[int]
+    ///         A list of targets that are currently present in the scene.
+    ///     
+    fn partial_fit(&mut self, features: Vec<Array1<f32>>,  targets: Vec<i32>, active_targets: Vec<i32>);
+
+    /// Compute distance between features and targets.
+    ///
+    ///     Parameters
+    ///     ----------
+    ///     features : ndarray
+    ///         An NxM matrix of N features of dimensionality M.
+    ///     targets : List[int]
+    ///         A list of targets to match the given `features` against.
+    ///    
+    ///     Returns
+    ///     -------
+    ///     ndarray
+    ///         Returns a cost matrix of shape len(targets), len(features), where
+    ///         element (i, j) contains the closest squared distance between
+    ///         `targets[i]` and `features[j]`.
+    ///    
+    fn distance(&self, features: ArrayView2<'_, f32>, targets: Vec<i32>) -> Array2<f32>;
+}
--- a/src/sort/nn_matching.rs
+++ b/src/sort/nn_matching.rs
@ -0,0 +1,249 @@
+use std::collections::{VecDeque, HashMap};
+use ndarray::prelude::*;
+use crate::sort::DistanceMetric;
+use crate::sort::linear_assignment::INFTY_COST;
+
+// Compute pair-wise squared distance between points in `a` and `b`.
+//
+//     Parameters
+//     ----------
+//     a : array_like
+//         An NxM matrix of N samples of dimensionality M.
+//     b : array_like
+//         An LxM matrix of L samples of dimensionality M.
+//
+//     Returns
+//     -------
+//     ndarray
+//         Returns a matrix of size len(a), len(b) such that eleement (i, j)
+//         contains the squared distance between `a[i]` and `b[j]`.
+//
+fn pdist(a: ArrayView2<'_, f32>, b: ArrayView2<'_, f32>) -> Array2<f32> {
+    if a.is_empty() || b.is_empty() {
+        return Array2::zeros((a.len(), b.len()));
+    }
+
+    let (a2, b2) = (
+        (&a * &a).sum_axis(Axis(1)).insert_axis(Axis(1)),
+        (&b * &b).sum_axis(Axis(1)).insert_axis(Axis(0))
+    );
+
+    let mut r2 = -2.0 * a.dot(&b.t()) + a2 + b2;
+
+    r2.mapv_inplace(|x| if x < 0.0 {
+        0.0
+    } else {
+        x
+    });
+
+    r2
+}
+
+/// Compute pair-wise cosine distance between points in `a` and `b`.
+///
+///     Parameters
+///     ----------
+///     a : array_like
+///         An NxM matrix of N samples of dimensionality M.
+///     b : array_like
+///         An LxM matrix of L samples of dimensionality M.
+///     data_is_normalized : Optional[bool]
+///         If True, assumes rows in a and b are unit length vectors.
+///         Otherwise, a and b are explicitly normalized to lenght 1.
+///
+///     Returns
+///     -------
+///     ndarray
+///         Returns a matrix of size len(a), len(b) such that eleement (i, j)
+///         contains the squared distance between `a[i]` and `b[j]`.
+///
+fn cosine_distance(a: ArrayView2<'_, f32>, b: ArrayView2<'_, f32>, data_is_normalized: bool) -> Array2<f32> {
+    if data_is_normalized {
+        -a.dot(&b.t()) + 1.0
+    } else {
+        let length_a = a.map_axis(Axis(1), |x|x.fold(0.0, |a, x|a + x*x).sqrt());
+        let length_b = b.map_axis(Axis(1), |x|x.fold(0.0, |a, x|a + x*x).sqrt());
+
+        let a = &a / &length_a.insert_axis(Axis(1));
+        let b = &b / &length_b.insert_axis(Axis(1));
+
+        -a.dot(&b.t()) + 1.0
+    }
+}
+
+
+#[test]
+fn cosine_distance_test() {
+    println!("{:?}", cosine_distance(aview2(&[[1.0f32,2.,3.,4.,5.,6.]]), aview2(&[[5.,6.,7.,8.,9.,10.]]), false));
+}
+
+/// Helper function for nearest neighbor distance metric (Euclidean).
+///
+///     Parameters
+///     ----------
+///     x : ndarray
+///         A matrix of N row-vectors (sample points).
+///     y : ndarray
+///         A matrix of M row-vectors (query points).
+///
+///     Returns
+///     -------
+///     ndarray
+///         A vector of length M that contains for each entry in `y` the
+///         smallest Euclidean distance to a sample in `x`.
+///
+fn nn_euclidean_distance(x: ArrayView2<'_, f32>, y: ArrayView2<'_, f32>) -> Array1<f32> {
+    let distances = pdist(x, y);
+
+    distances.map_axis(Axis(0), |view|view.fold(f32::MAX, |a, &x| if x < a { x } else { a }))
+        .mapv_into(|x|x.max(0.0))
+}
+
+/// Helper function for nearest neighbor distance metric (cosine).
+///
+///     Parameters
+///     ----------
+///     x : ndarray
+///         A matrix of N row-vectors (sample points).
+///     y : ndarray
+///         A matrix of M row-vectors (query points).
+///
+///     Returns
+///     -------
+///     ndarray
+///         A vector of length M that contains for each entry in `y` the
+///         smallest cosine distance to a sample in `x`.
+///
+fn nn_cosine_distance(x: ArrayView2<'_, f32>, y: ArrayView2<'_, f32>) -> Array1<f32> {
+    let distances = cosine_distance(x, y, false);
+
+    distances.map_axis(Axis(0), |view|view.fold(f32::MAX, |a, &x| if x < a { x } else { a }))
+}
+
+#[derive(Clone)]
+pub enum NearestNeighborMetricKind {
+    EuclideanDistance,
+    CosineDistance,
+}
+
+///
+/// A nearest neighbor distance metric that, for each target, returns
+/// the closest distance to any sample that has been observed so far.
+///
+///     Parameters
+///     ----------
+///     metric : str
+///         Either "euclidean" or "cosine".
+///     matching_threshold: float
+///         The matching threshold. Samples with larger distance are considered an
+///         invalid match.
+///     budget : Optional[int]
+///         If not None, fix samples per class to at most this number. Removes
+///         the oldest samples when the budget is reached.
+///
+///     Attributes
+///     ----------
+///     samples : Dict[int -> List[ndarray]]
+///         A dictionary that maps from target identities to the list of samples
+///         that have been observed so far.
+///
+#[derive(Clone)]
+pub struct NearestNeighborDistanceMetric {
+    metric_kind: NearestNeighborMetricKind,
+    matching_threshold: f32,
+    budget: Option<usize>,
+    samples: HashMap<i32, VecDeque<Array1<f32>>>,
+}
+
+impl NearestNeighborDistanceMetric {
+    pub fn new(metric_kind: NearestNeighborMetricKind, matching_threshold: f32, budget: Option<usize>) -> Self {
+        Self {
+            metric_kind,
+            matching_threshold,
+            budget,
+            samples: Default::default(),
+        }
+    }
+}
+
+impl DistanceMetric for NearestNeighborDistanceMetric {
+    #[inline]
+    fn matching_threshold(&self) -> f32 {
+        self.matching_threshold
+    }
+
+    /// Update the distance metric with new data.
+    ///
+    ///     Parameters
+    ///     ----------
+    ///     features : ndarray
+    ///         An NxM matrix of N features of dimensionality M.
+    ///     targets : ndarray
+    ///         An integer array of associated target identities.
+    ///     active_targets : List[int]
+    ///         A list of targets that are currently present in the scene.
+    ///
+    fn partial_fit(&mut self, features: Vec<Array1<f32>>, targets: Vec<i32>, active_targets: Vec<i32>) {
+        for (feature, target) in features.into_iter().zip(targets.into_iter()) {
+            let deque = self.samples
+                .entry(target)
+                .or_insert_with(VecDeque::new);
+
+            deque.push_front(feature);
+
+            if let Some(budget) = self.budget {
+                deque.truncate(budget);
+            }
+        }
+
+        let new_samples = active_targets
+            .into_iter()
+            .filter_map(|k| Some((k, self.samples.remove(&k)?)))
+            .collect();
+
+        self.samples = new_samples;
+    }
+
+    /// Compute distance between features and targets.
+    ///
+    ///     Parameters
+    ///     ----------
+    ///     features : ndarray
+    ///         An NxM matrix of N features of dimensionality M.
+    ///     targets : List[int]
+    ///         A list of targets to match the given `features` against.
+    ///
+    ///     Returns
+    ///     -------
+    ///     ndarray
+    ///         Returns a cost matrix of shape len(targets), len(features), where
+    ///         element (i, j) contains the closest squared distance between
+    ///         `targets[i]` and `features[j]`.
+    ///
+    fn distance(&self, features: ArrayView2<'_, f32>, targets: Vec<i32>) -> Array2<f32> {
+        let ntargets = targets.len();
+        let nfeatures = features.nrows();
+        let n = nfeatures.max(ntargets);
+
+        let mut cost_matrix = Array2::from_elem((n, n), INFTY_COST);
+
+        for (i, target) in targets.into_iter().enumerate() {
+            let sample_features_deq = &self.samples[&target];
+
+            let mut sample_features = unsafe { Array::uninitialized((sample_features_deq.len(), features.ncols())) };
+            sample_features_deq
+                .iter()
+                .enumerate()
+                .for_each(|(idx, arr)| sample_features.index_axis_mut(Axis(0), idx).assign(&arr));
+
+            cost_matrix
+                .slice_mut(s![i, ..nfeatures])
+                .assign(&match self.metric_kind {
+                    NearestNeighborMetricKind::EuclideanDistance => nn_euclidean_distance(sample_features.view(), features.view()),
+                    NearestNeighborMetricKind::CosineDistance => nn_cosine_distance(sample_features.view(), features.view()),
+                });
+        }
+
+        cost_matrix
+    }
+}
--- a/src/sort/track.rs
+++ b/src/sort/track.rs
@ -0,0 +1,198 @@
+use ndarray::prelude::*;
+use crate::sort::{Detection, KalmanFilter, BBox, Xyah};
+
+///
+///   Enumeration type for the single target track state. Newly created tracks are
+///   classified as `tentative` until enough evidence has been collected. Then,
+///   the track state is changed to `confirmed`. Tracks that are no longer alive
+///   are classified as `deleted` to mark them for removal from the set of active
+///   tracks.
+///
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum TrackState {
+    Tentative,
+    Confirmed,
+    Deleted,
+}
+
+///
+///     A single target track with state space `(x, y, a, h)` and associated
+///     velocities, where `(x, y)` is the center of the bounding box, `a` is the
+///     aspect ratio and `h` is the height.
+///
+///     Parameters
+///     ----------
+///     mean : ndarray
+///         Mean vector of the initial state distribution.
+///     covariance : ndarray
+///         Covariance matrix of the initial state distribution.
+///     track_id : int
+///         A unique track identifier.
+///     n_init : int
+///         Number of consecutive detections before the track is confirmed. The
+///         track state is set to `Deleted` if a miss occurs within the first
+///         `n_init` frames.
+///     max_age : int
+///         The maximum number of consecutive misses before the track state is
+///         set to `Deleted`.
+///     feature : Optional[ndarray]
+///         Feature vector of the detection this track originates from. If not None,
+///         this feature is added to the `features` cache.
+///
+///     Attributes
+///     ----------
+///     mean : ndarray
+///         Mean vector of the initial state distribution.
+///     covariance : ndarray
+///         Covariance matrix of the initial state distribution.
+///     track_id : int
+///         A unique track identifier.
+///     hits : int
+///         Total number of measurement updates.
+///     age : int
+///         Total number of frames since first occurance.
+///     time_since_update : int
+///         Total number of frames since last measurement update.
+///     state : TrackState
+///         The current track state.
+///     features : List[ndarray]
+///         A cache of features. On each measurement update, the associated feature
+///         vector is added to this list.
+///
+#[derive(Clone)]
+pub struct Track {
+    pub track_id: i32,
+    pub time_since_update: i32,
+    pub features: Vec<Array1<f32>>,
+
+    covariance: Array2<f32>,
+    mean: Array1<f32>,
+    hits: i32,
+    age: i32,
+    state: TrackState,
+    n_init: i32,
+    max_age: i32,
+}
+
+impl Track {
+    pub fn new(mean: Array1<f32>, covariance: Array2<f32>, track_id: i32, n_init: i32, max_age: i32, feature: Option<Array1<f32>>) -> Self {
+        Self {
+            track_id,
+            mean,
+            covariance,
+            hits: 1,
+            age: 1,
+            time_since_update: 0,
+            state: TrackState::Tentative,
+            features: feature.into_iter().collect(),
+            n_init,
+            max_age,
+        }
+    }
+
+    /// Get current position in bounding box format `(top left x, top left y, width, height)`.
+    ///
+    ///     Returns
+    ///     -------
+    ///     ndarray
+    ///         The bounding box.
+    ///
+    #[inline]
+    pub fn bbox(&self) -> BBox<Xyah> {
+        BBox::xyah(
+            self.mean[0],
+            self.mean[1],
+            self.mean[2],
+            self.mean[3],
+        )
+    }
+
+    #[inline]
+    pub fn mean(&self) -> ArrayView1<'_, f32> {
+        self.mean.view()
+    }
+
+    #[inline]
+    pub fn covariance(&self) -> ArrayView2<'_, f32> {
+        self.covariance.view()
+    }
+
+    /// Propagate the state distribution to the current time step using a
+    ///     Kalman filter prediction step.
+    ///
+    ///     Parameters
+    ///     ----------
+    ///     kf : kalman_filter.KalmanFilter
+    ///         The Kalman filter.
+    ///
+    pub fn predict(&mut self, kf: &mut KalmanFilter) {
+        let (mean, covariance) = kf.predict(self.mean.view(), self.covariance.view());
+        self.mean = mean;
+        self.covariance = covariance;
+        self.age += 1;
+        self.time_since_update += 1;
+    }
+
+    /// Perform Kalman filter measurement update step and update the feature cache.
+    ///
+    ///     Parameters
+    ///     ----------
+    ///     kf : kalman_filter.KalmanFilter
+    ///         The Kalman filter.
+    ///     detection : Detection
+    ///         The associated detection.
+    ///
+    pub fn update(&mut self, kf: &mut KalmanFilter, detection: &Detection) {
+        let (mean, covariance) = kf.update(
+            self.mean.view(),
+            self.covariance.view(),
+            detection.bbox.as_xyah().as_view()
+        );
+
+        self.mean = mean;
+        self.covariance = covariance;
+        self.features.extend(detection.feature.clone().into_iter());
+
+        self.hits += 1;
+        self.time_since_update = 0;
+
+        if self.state == TrackState::Tentative && self.hits >= self.n_init {
+            self.state = TrackState::Confirmed;
+        }
+    }
+
+    ///
+    /// Mark this track as missed (no association at the current time step).
+    ///
+    #[inline]
+    pub fn mark_missed(&mut self) {
+        if self.state == TrackState::Tentative {
+            self.state = TrackState::Deleted;
+        } else if self.time_since_update > self.max_age {
+            self.state = TrackState::Deleted;
+        }
+    }
+    ///
+    /// Returns True if this track is tentative (unconfirmed).
+    ///
+    #[inline]
+    pub fn is_tentative(&self) -> bool {
+        self.state == TrackState::Tentative
+    }
+
+    ///
+    /// Returns True if this track is confirmed.
+    ///
+    #[inline]
+    pub fn is_confirmed(&self) -> bool {
+        self.state == TrackState::Confirmed
+    }
+
+    ///
+    /// Returns True if this track is dead and should be deleted.
+    ///
+    #[inline]
+    pub fn is_deleted(&self) -> bool {
+        self.state == TrackState::Deleted
+    }
+}
--- a/src/sort/tracker.rs
+++ b/src/sort/tracker.rs
@ -0,0 +1,210 @@
+use ndarray::prelude::*;
+use std::collections::HashSet;
+use crate::sort::{Track, Detection, KalmanFilter, DistanceMetric};
+
+/// This is the multi-target tracker.
+///
+///     Parameters
+///     ----------
+///     metric : nn_matching.NearestNeighborDistanceMetric
+///         A distance metric for measurement-to-track association.
+///     max_age : int
+///         Maximum number of missed misses before a track is deleted.
+///     n_init : int
+///         Number of consecutive detections before the track is confirmed. The
+///         track state is set to `Deleted` if a miss occurs within the first
+///         `n_init` frames.
+///
+///     Attributes
+///     ----------
+///     metric : nn_matching.NearestNeighborDistanceMetric
+///         The distance metric used for measurement to track association.
+///     max_age : int
+///         Maximum number of missed misses before a track is deleted.
+///     n_init : int
+///         Number of frames that a track remains in initialization phase.
+///     kf : kalman_filter.KalmanFilter
+///         A Kalman filter to filter target trajectories in image space.
+///     tracks : List[Track]
+///         The list of active tracks at the current time step.
+///
+#[derive(Clone)]
+pub struct Tracker<M: DistanceMetric> {
+    metric: M,
+    max_iou_distance: f32,
+    max_age: i32,
+    n_init: i32,
+    kf: KalmanFilter,
+    tracks: Vec<Track>,
+    next_id: i32,
+}
+
+impl<M: DistanceMetric> Tracker<M> {
+    pub fn new(metric: M, max_iou_distance: f32 /*=0.7*/, max_age: i32/*=70*/, n_init: i32/*=3*/) -> Self {
+        Self {
+            metric,
+            max_iou_distance,
+            max_age,
+            n_init,
+            kf: Default::default(),
+            next_id: 1,
+            tracks: Vec::new(),
+        }
+    }
+
+    #[inline]
+    pub fn tracks(&self) -> &[Track] {
+        self.tracks.as_slice()
+    }
+
+    ///
+    /// Propagate track state distributions one time step forward.
+    ///
+    /// This function should be called once every time step, before `update`.
+    ///
+    pub fn predict(&mut self) {
+        for track in &mut self.tracks {
+            track.predict(&mut self.kf);
+        }
+    }
+
+    /// Perform measurement update and track management.
+    ///
+    ///     Parameters
+    ///     ----------
+    ///     detections : List[deep_sort.detection.Detection]
+    ///         A list of detections at the current time step.
+    ///
+    pub fn update(&mut self, detections: &[Detection]) {
+        let (matches, unmatched_tracks, unmatched_detections) = self.do_match(&detections);
+
+        for (track_idx, detection_idx) in matches {
+            self.tracks[track_idx].update(
+                &mut self.kf, &detections[detection_idx]);
+        }
+
+        for track_idx in unmatched_tracks {
+            self.tracks[track_idx].mark_missed();
+        }
+
+        for detection_idx in unmatched_detections {
+            self.initiate_track(&detections[detection_idx]);
+        }
+
+        self.tracks.retain(|t|!t.is_deleted());
+
+        let (
+            mut features,
+            mut targets,
+            mut active_targets) = (vec![], vec![], vec![]);
+
+        for track in &mut self.tracks {
+            if !track.is_confirmed() {
+                continue;
+            }
+
+            active_targets.push(track.track_id);
+
+            for feature in track.features.iter() {
+                targets.push(track.track_id);
+                features.push(feature.clone());
+            }
+        }
+
+        self.metric.partial_fit(
+            features,
+            targets,
+            active_targets
+        )
+    }
+
+    fn do_match(&mut self, detections: &[Detection]) -> (Vec<(usize, usize)>, Vec<usize>, Vec<usize>) {
+        let gated_metric = |tracks: &[Track], dets: &[Detection], track_indices: &[usize], detection_indices: &[usize]| {
+            let mut features = unsafe { Array2::uninitialized((detection_indices.len(), dets[0].feature.as_ref().unwrap().len())) };
+
+            for (idx, mut axis) in features.axis_iter_mut(Axis(0)).enumerate() {
+                let index = dets[detection_indices[idx]].feature.as_ref().unwrap();
+                axis.assign(index);
+            }
+
+            let targets: Vec<_> = track_indices.iter().map(|&i|tracks[i].track_id).collect();
+            let mut cost_matrix = self.metric.distance(features.view(), targets);
+
+            crate::sort::linear_assignment::gate_cost_matrix(
+                &self.kf,
+                cost_matrix.view_mut(),
+                &tracks,
+                &dets,
+                &track_indices,
+                detection_indices,
+                None,
+                None
+            );
+
+            cost_matrix
+        };
+
+        // Split track set into confirmed and unconfirmed tracks.
+        let (mut confirmed_tracks, mut unconfirmed_tracks) = (vec![], vec![]);
+        for (i, t) in self.tracks.iter().enumerate() {
+            if t.is_confirmed() {
+                confirmed_tracks.push(i);
+            } else {
+                unconfirmed_tracks.push(i);
+            }
+        }
+
+        let matching_threshold = self.metric.matching_threshold();
+        let max_age = self.max_age;
+
+        // Associate confirmed tracks using appearance features.
+        let (matches_a, unmatched_tracks_a, unmatched_detections) =
+            crate::sort::linear_assignment::matching_cascade(
+                &gated_metric,
+                matching_threshold,
+                max_age,
+                &self.tracks,
+                detections,
+                Some(confirmed_tracks),
+                None);
+
+        // Associate remaining tracks together with unconfirmed tracks using IOU.
+        let (iou_track_candidates, unmatched_tracks_a): (Vec<_>, Vec<_>) = unmatched_tracks_a
+            .into_iter()
+            .partition(|&k|self.tracks[k].time_since_update == 1);
+
+        let iou_track_candidates = [unconfirmed_tracks.as_slice(), iou_track_candidates.as_slice()].concat();
+
+        let (matches_b, unmatched_tracks_b, unmatched_detections) =
+            crate::sort::linear_assignment::min_cost_matching(
+                &crate::sort::iou_matching::iou_cost,
+                self.max_iou_distance,
+                &self.tracks,
+                &detections,
+                Some(iou_track_candidates),
+                Some(unmatched_detections));
+
+        let matches = [matches_a, matches_b].concat();
+        let unmatched_tracks: HashSet<_> = unmatched_tracks_a
+            .into_iter()
+            .chain(unmatched_tracks_b.into_iter())
+            .collect();
+
+        (matches, unmatched_tracks.into_iter().collect(), unmatched_detections)
+    }
+
+    fn initiate_track(&mut self, detection: &Detection) {
+        let (mean, covariance) = self.kf.initiate(detection.bbox.as_xyah());
+
+        self.tracks.push(Track::new(
+            mean,
+            covariance,
+            self.next_id,
+            self.n_init,
+            self.max_age,
+            detection.feature.clone()
+        ));
+
+        self.next_id += 1;
+    }
+}