[ { "Average accuracy of 3 splits": 82.48, "code_links": [], "date": "2019-06-13", "date2": 20190613, "model": "HAF+BoW/FV halluc", "paper": { "title": "Hallucinating IDT Descriptors and I3D Optical Flow Features for Action Recognition with CNNs", "url": "https://cknow.io/lib/45901c51dd6467aa" }, "paper_data_uoa": "45901c51dd6467aa" }, { "Average accuracy of 3 splits": 82.1, "code_links": [ { "title": "piergiaj/evanet-iccv19", "url": "https://github.com/piergiaj/evanet-iccv19" } ], "date": "2018-11-26", "date2": 20181126, "model": "EvaNet", "paper": { "title": "Evolving Space-Time Neural Architectures for Videos", "url": "https://cknow.io/lib/a44e4af3371c9b83" }, "paper_data_uoa": "a44e4af3371c9b83" }, { "Average accuracy of 3 splits": 81.9, "code_links": [], "date": "2019-08-27", "date2": 20190827, "model": "CCS + TSN (ImageNet+Kinetics pretrained)", "paper": { "title": "Cooperative Cross-Stream Network for Discriminative Action Representation", "url": "https://cknow.io/lib/09a74f888e9817da" }, "paper_data_uoa": "09a74f888e9817da" }, { "Average accuracy of 3 splits": 81.1, "code_links": [ { "title": "piergiaj/representation-flow-cvpr19", "url": "https://github.com/piergiaj/representation-flow-cvpr19" } ], "date": "2018-10-02", "date2": 20181002, "model": "RepFlow-50", "paper": { "title": "Representation Flow for Action Recognition", "url": "https://cknow.io/lib/5354d908b72741e5" }, "paper_data_uoa": "5354d908b72741e5" }, { "Average accuracy of 3 splits": 80.92, "code_links": [], "date": "2019-03-20", "date2": 20190320, "model": "Multi-stream I3D ", "paper": { "title": "Contextual Action Cues from Camera Sensor for Multi-Stream Action Recognition", "url": "https://cknow.io/lib/e85e30290a1982be" }, "paper_data_uoa": "e85e30290a1982be" }, { "Average accuracy of 3 splits": 80.9, "code_links": [ { "title": "craston/MARS", "url": "https://github.com/craston/MARS" } ], "date": "2019-06-01", "date2": 20190601, "model": "MARS+RGB+FLow (64 frames, Kinetics pretrained)", "paper": { "title": "MARS: Motion-Augmented RGB Stream for Action Recognition", "url": "https://cknow.io/lib/e917d94a27b04d45" }, "paper_data_uoa": "e917d94a27b04d45" }, { "Average accuracy of 3 splits": 78.7, "code_links": [ { "title": "bryanyzhu/two-stream-pytorch", "url": "https://github.com/bryanyzhu/two-stream-pytorch" }, { "title": "bryanyzhu/Hidden-Two-Stream", "url": "https://github.com/bryanyzhu/Hidden-Two-Stream" }, { "title": "AbdalaDiasse/Video-classification-for-oil-quality-estimation", "url": "https://github.com/AbdalaDiasse/Video-classification-for-oil-quality-estimation" } ], "date": "2017-04-02", "date2": 20170402, "model": "Hidden Two-Stream", "paper": { "title": "Hidden Two-Stream Convolutional Networks for Action Recognition", "url": "https://cknow.io/lib/1b3bdadb4a42b57d" }, "paper_data_uoa": "1b3bdadb4a42b57d" }, { "Average accuracy of 3 splits": 76.2, "code_links": [ { "title": "holistic-video-understanding/Mini-HVU", "url": "https://github.com/holistic-video-understanding/Mini-HVU" }, { "title": "holistic-video-understanding/HVU-Dataset", "url": "https://github.com/holistic-video-understanding/HVU-Dataset" } ], "date": "2019-04-25", "date2": 20190425, "model": "HATNet (32 frames, HVU pretrained)", "paper": { "title": "Large Scale Holistic Video Understanding", "url": "https://cknow.io/lib/5b16b09584bdea1a" }, "paper_data_uoa": "5b16b09584bdea1a" }, { "Average accuracy of 3 splits": 75.7, "code_links": [], "date": "2019-06-10", "date2": 20190610, "model": "FASTER32 (Kinetics pretrain)", "paper": { "title": "FASTER Recurrent Networks for Efficient Video Classification", "url": "https://cknow.io/lib/474ff5967144a33a" }, "paper_data_uoa": "474ff5967144a33a" }, { "Average accuracy of 3 splits": 74.3, "code_links": [], "date": "2018-07-24", "date2": 20180724, "model": "ADL+ResNet+IDT", "paper": { "title": "Contrastive Video Representation Learning via Adversarial Perturbations", "url": "https://cknow.io/lib/1e9f286c44f46a11" }, "paper_data_uoa": "1e9f286c44f46a11" }, { "Average accuracy of 3 splits": 72.2, "code_links": [ { "title": "feichtenhofer/st-resnet", "url": "https://github.com/feichtenhofer/st-resnet" } ], "date": "2017-07-01", "date2": 20170701, "model": "STM Network+IDT", "paper": { "title": "Spatiotemporal Multiplier Networks for Video Action Recognition", "url": "https://cknow.io/lib/002d244a408c4574" }, "paper_data_uoa": "002d244a408c4574" }, { "Average accuracy of 3 splits": 72.2, "code_links": [], "date": "2019-08-07", "date2": 20190807, "model": "STM (ImageNet+Kinetics pretrain)", "paper": { "title": "STM: SpatioTemporal and Motion Encoding for Action Recognition", "url": "https://cknow.io/lib/dbf65072610598a3" }, "paper_data_uoa": "dbf65072610598a3" }, { "Average accuracy of 3 splits": 72, "code_links": [], "date": "2019-04-05", "date2": 20190405, "model": "Prob-Distill", "paper": { "title": "Paying More Attention to Motion: Attention Distillation for Learning Video Representations", "url": "https://cknow.io/lib/b222433297a9a5c3" }, "paper_data_uoa": "b222433297a9a5c3" }, { "Average accuracy of 3 splits": 71.13, "code_links": [], "date": "2019-05-29", "date2": 20190529, "model": "HF-ECOLite (ImageNet+Kinetics pretrain)", "paper": { "title": "Hierarchical Feature Aggregation Networks for Video Action Recognition", "url": "https://cknow.io/lib/dd1247b8980e18a8" }, "paper_data_uoa": "dd1247b8980e18a8" }, { "Average accuracy of 3 splits": 62.8, "code_links": [], "date": "2019-06-19", "date2": 20190619, "model": "R-STAN-50", "paper": { "title": "R-STAN: Residual Spatial-Temporal Attention Network for Action Recognition", "url": "https://cknow.io/lib/1c94b929dfcdb35e" }, "paper_data_uoa": "1c94b929dfcdb35e" }, { "Average accuracy of 3 splits": 62.7, "code_links": [], "date": "2018-12-03", "date2": 20181203, "model": "SUSiNet (multi, Kinetics pretrained)", "paper": { "title": "SUSiNet: See, Understand and Summarize it", "url": "https://cknow.io/lib/2c70a2a80d451699" }, "paper_data_uoa": "2c70a2a80d451699" }, { "Average accuracy of 3 splits": 55.16, "code_links": [], "date": "2019-06-19", "date2": 20190619, "model": "R-STAN-152", "paper": { "title": "R-STAN: Residual Spatial-Temporal Attention Network for Action Recognition", "url": "https://cknow.io/lib/1c94b929dfcdb35e" }, "paper_data_uoa": "1c94b929dfcdb35e" }, { "Average accuracy of 3 splits": 54.8, "code_links": [], "date": "2019-01-26", "date2": 20190126, "model": "R(2+1)D-18 (DistInit pretraining)", "paper": { "title": "DistInit: Learning Video Representations Without a Single Labeled Video", "url": "https://cknow.io/lib/a410b42da91f2d04" }, "paper_data_uoa": "a410b42da91f2d04" }, { "Average accuracy of 3 splits": 51.8, "code_links": [], "date": "2018-03-22", "date2": 20180322, "model": "CD-UAR", "paper": { "title": "Towards Universal Representation for Unseen Action Recognition", "url": "https://cknow.io/lib/9c5347f4953a03dc" }, "paper_data_uoa": "9c5347f4953a03dc" }, { "Average accuracy of 3 splits": 33.4, "code_links": [ { "title": "laura-wang/video_repres_mas", "url": "https://github.com/laura-wang/video_repres_mas" } ], "date": "2019-04-07", "date2": 20190407, "model": "Pretrained on Kinetics", "paper": { "title": "Self-supervised Spatio-temporal Representation Learning for Videos by Predicting Motion and Appearance Statistics", "url": "https://cknow.io/lib/510450184304f519" }, "paper_data_uoa": "510450184304f519" } ]