[
  {
    "Top-1 Accuracy": 66.6,
    "Top-5 Accuracy": 91.3,
    "code_links": [
      {
        "title": "MIT-HAN-LAB/temporal-shift-module",
        "url": "https://github.com/MIT-HAN-LAB/temporal-shift-module"
      },
      {
        "title": "PaParaZz1/TemporalShiftModule",
        "url": "https://github.com/PaParaZz1/TemporalShiftModule"
      },
      {
        "title": "WavesUR/embedded_TSM",
        "url": "https://github.com/WavesUR/embedded_TSM"
      },
      {
        "title": "niveditarahurkar/CS231N-ActionRecognition",
        "url": "https://github.com/niveditarahurkar/CS231N-ActionRecognition"
      }
    ],
    "date": "2018-11-20",
    "date2": 20181120,
    "model": "TSM (RGB + Flow)",
    "paper": {
      "title": "TSM: Temporal Shift Module for Efficient Video Understanding",
      "url": "https://cknow.io/lib/0443f1df43248fc9"
    },
    "paper_data_uoa": "0443f1df43248fc9"
  },
  {
    "Top-1 Accuracy": 64.2,
    "Top-5 Accuracy": 89.8,
    "code_links": [],
    "date": "2019-08-07",
    "date2": 20190807,
    "model": "STM (16 frames, ImageNet pretraining)",
    "paper": {
      "title": "STM: SpatioTemporal and Motion Encoding for Action Recognition",
      "url": "https://cknow.io/lib/dbf65072610598a3"
    },
    "paper_data_uoa": "dbf65072610598a3"
  },
  {
    "Top-1 Accuracy": 62.2,
    "Top-5 Accuracy": 90.3,
    "code_links": [],
    "date": "2019-08-27",
    "date2": 20190827,
    "model": "TRG-ResNet-50",
    "paper": {
      "title": "Temporal Reasoning Graph for Activity Recognition",
      "url": "https://cknow.io/lib/48eac55d1d0207b6"
    },
    "paper_data_uoa": "48eac55d1d0207b6"
  },
  {
    "Top-1 Accuracy": 62,
    "code_links": [],
    "date": "2020-04-07",
    "date2": 20200407,
    "model": "TPN (TSM-50)",
    "paper": {
      "title": "Temporal Pyramid Network for Action Recognition",
      "url": "https://cknow.io/lib/f3df98b54f0c923e"
    },
    "paper_data_uoa": "f3df98b54f0c923e"
  },
  {
    "Top-1 Accuracy": 61.3,
    "Top-5 Accuracy": 91.4,
    "code_links": [],
    "date": "2019-08-27",
    "date2": 20190827,
    "model": "TRG-Inception-V3",
    "paper": {
      "title": "Temporal Reasoning Graph for Activity Recognition",
      "url": "https://cknow.io/lib/48eac55d1d0207b6"
    },
    "paper_data_uoa": "48eac55d1d0207b6"
  },
  {
    "Top-1 Accuracy": 61.2,
    "Top-5 Accuracy": 89.3,
    "code_links": [],
    "date": "2019-08-27",
    "date2": 20190827,
    "model": "CCS + two-stream + TRN",
    "paper": {
      "title": "Cooperative Cross-Stream Network for Discriminative Action Representation",
      "url": "https://cknow.io/lib/09a74f888e9817da"
    },
    "paper_data_uoa": "09a74f888e9817da"
  },
  {
    "Top-1 Accuracy": 57.65,
    "Top-5 Accuracy": 83.95,
    "code_links": [
      {
        "title": "xingyul/cpnet",
        "url": "https://github.com/xingyul/cpnet"
      },
      {
        "title": "xingyul/meteornet",
        "url": "https://github.com/xingyul/meteornet"
      }
    ],
    "date": "2019-05-20",
    "date2": 20190520,
    "model": "CPNet Res34, 5 CP",
    "paper": {
      "title": "Learning Video Representations from Correspondence Proposals",
      "url": "https://cknow.io/lib/9e742789227dfb82"
    },
    "paper_data_uoa": "9e742789227dfb82"
  },
  {
    "Top-1 Accuracy": 55.52,
    "Top-5 Accuracy": 83.06,
    "code_links": [
      {
        "title": "metalbubble/TRN-pytorch",
        "url": "https://github.com/metalbubble/TRN-pytorch"
      },
      {
        "title": "okankop/MFF-pytorch",
        "url": "https://github.com/okankop/MFF-pytorch"
      }
    ],
    "date": "2017-11-22",
    "date2": 20171122,
    "model": "2-Stream TRN",
    "paper": {
      "title": "Temporal Relational Reasoning in Videos",
      "url": "https://cknow.io/lib/ab89498599fa31b3"
    },
    "paper_data_uoa": "ab89498599fa31b3"
  },
  {
    "Top-1 Accuracy": 52.3,
    "code_links": [],
    "date": "2019-06-27",
    "date2": 20190627,
    "model": "TAM (5-shot)",
    "paper": {
      "title": "Few-Shot Video Classification via Temporal Alignment",
      "url": "https://cknow.io/lib/46c481c9757c6ff1"
    },
    "paper_data_uoa": "46c481c9757c6ff1"
  },
  {
    "Top-1 Accuracy": 51.33,
    "Top-5 Accuracy": 80.46,
    "code_links": [
      {
        "title": "TwentyBN/smth-smth-v2-baseline-with-models",
        "url": "https://github.com/TwentyBN/smth-smth-v2-baseline-with-models"
      },
      {
        "title": "TwentyBN/something-something-v2-baseline",
        "url": "https://github.com/TwentyBN/something-something-v2-baseline"
      },
      {
        "title": "caspillaga/Conv3DSelfAttention",
        "url": "https://github.com/caspillaga/Conv3DSelfAttention"
      }
    ],
    "date": "2017-06-13",
    "date2": 20170613,
    "model": "model3D_1 with left-right augmentation and fps jitter",
    "paper": {
      "title": "The \"something something\" video database for learning and evaluating visual common sense",
      "url": "https://cknow.io/lib/25d6c0d541fdb268"
    },
    "paper_data_uoa": "25d6c0d541fdb268"
  },
  {
    "Top-1 Accuracy": 49.9,
    "Top-5 Accuracy": 79.1,
    "code_links": [],
    "date": "2019-04-05",
    "date2": 20190405,
    "model": "Prob-Distill",
    "paper": {
      "title": "Paying More Attention to Motion: Attention Distillation for Learning Video Representations",
      "url": "https://cknow.io/lib/b222433297a9a5c3"
    },
    "paper_data_uoa": "b222433297a9a5c3"
  },
  {
    "Top-1 Accuracy": 47.73,
    "code_links": [
      {
        "title": "fubel/stmodeling",
        "url": "https://github.com/fubel/stmodeling"
      }
    ],
    "date": "2019-09-11",
    "date2": 20190911,
    "model": "STM + TRNMultiscale",
    "paper": {
      "title": "Comparative Analysis of CNN-based Spatiotemporal Reasoning in Videos",
      "url": "https://cknow.io/lib/fbe3c24794e9cae7"
    },
    "paper_data_uoa": "fbe3c24794e9cae7"
  }
]