[ { "Percentage error": 5.5, "code_links": [], "date": "2017-03-06", "date2": 20170306, "model": "ResNet + BiLSTMs acoustic model", "paper": { "title": "English Conversational Telephone Speech Recognition by Humans and Machines", "url": "https://cknow.io/lib/d65a228a6d6d00a9" }, "paper_data_uoa": "d65a228a6d6d00a9" }, { "Percentage error": 5.8, "code_links": [], "date": "2016-10-17", "date2": 20161017, "model": "Microsoft 2016b", "paper": { "title": "Achieving Human Parity in Conversational Speech Recognition", "url": "https://cknow.io/lib/7f54ea194dbda268" }, "paper_data_uoa": "7f54ea194dbda268" }, { "Percentage error": 6.2, "code_links": [], "date": "2016-09-12", "date2": 20160912, "model": "Microsoft 2016", "paper": { "title": "The Microsoft 2016 Conversational Speech Recognition System", "url": "https://cknow.io/lib/1ba47009467bdc18" }, "paper_data_uoa": "1ba47009467bdc18" }, { "Percentage error": 6.3, "code_links": [], "date": "2016-09-12", "date2": 20160912, "model": "VGG/Resnet/LACE/BiLSTM acoustic model trained on SWB+Fisher+CH, N-gram + RNNLM language model trained on Switchboard+Fisher+Gigaword+Broadcast", "paper": { "title": "The Microsoft 2016 Conversational Speech Recognition System", "url": "https://cknow.io/lib/1ba47009467bdc18" }, "paper_data_uoa": "1ba47009467bdc18" }, { "Percentage error": 6.6, "code_links": [], "date": "2016-04-27", "date2": 20160427, "model": "RNN + VGG + LSTM acoustic model trained on SWB+Fisher+CH, N-gram + \"model M\" + NNLM language model", "paper": { "title": "The IBM 2016 English Conversational Telephone Speech Recognition System", "url": "https://cknow.io/lib/68870341a9802e1a" }, "paper_data_uoa": "68870341a9802e1a" }, { "Percentage error": 6.6, "code_links": [], "date": "2016-10-17", "date2": 20161017, "model": "CNN-LSTM", "paper": { "title": "Achieving Human Parity in Conversational Speech Recognition", "url": "https://cknow.io/lib/7f54ea194dbda268" }, "paper_data_uoa": "7f54ea194dbda268" }, { "Percentage error": 6.8, "code_links": [ { "title": "mozilla/DeepSpeech", "url": "https://github.com/mozilla/DeepSpeech" }, { "title": "shelling203/SpecAugment", "url": "https://github.com/shelling203/SpecAugment" }, { "title": "DemisEom/SpecAugment", "url": "https://github.com/DemisEom/SpecAugment" }, { "title": "lRomul/argus-freesound", "url": "https://github.com/lRomul/argus-freesound" }, { "title": "ebouteillon/freesound-audio-tagging-2019", "url": "https://github.com/ebouteillon/freesound-audio-tagging-2019" }, { "title": "sh951011/Korean-Speech-Recognition", "url": "https://github.com/sh951011/Korean-Speech-Recognition/blob/master/package/feature.py" }, { "title": "sh951011/Korean-Speech-Recognition", "url": "https://github.com/sh951011/Korean-Speech-Recognition" }, { "title": "KimJeongSun/SpecAugment_numpy_scipy", "url": "https://github.com/KimJeongSun/SpecAugment_numpy_scipy" }, { "title": "hgstudent/las", "url": "https://github.com/hgstudent/las" }, { "title": "viig99/mixmatch-freesound", "url": "https://github.com/viig99/mixmatch-freesound" }, { "title": "knlee-voice/PaperNotes", "url": "https://github.com/knlee-voice/PaperNotes" } ], "date": "2019-04-18", "date2": 20190418, "model": "LAS + SpecAugment (SM)", "paper": { "title": "SpecAugment: A Simple Data Augmentation Method for Automatic Speech Recognition", "url": "https://cknow.io/lib/15408ea14d4c82f9" }, "paper_data_uoa": "15408ea14d4c82f9" }, { "Percentage error": 6.9, "code_links": [], "date": "2016-04-27", "date2": 20160427, "model": "IBM 2016", "paper": { "title": "The IBM 2016 English Conversational Telephone Speech Recognition System", "url": "https://cknow.io/lib/68870341a9802e1a" }, "paper_data_uoa": "68870341a9802e1a" }, { "Percentage error": 6.9, "code_links": [], "date": "2016-09-12", "date2": 20160912, "model": "RNNLM", "paper": { "title": "The Microsoft 2016 Conversational Speech Recognition System", "url": "https://cknow.io/lib/1ba47009467bdc18" }, "paper_data_uoa": "1ba47009467bdc18" }, { "Percentage error": 8, "code_links": [], "date": "2015-05-21", "date2": 20150521, "model": "IBM 2015", "paper": { "title": "The IBM 2015 English Conversational Telephone Speech Recognition System", "url": "https://cknow.io/lib/4d8bf3030d142d17" }, "paper_data_uoa": "4d8bf3030d142d17" }, { "Percentage error": 12.2, "code_links": [], "date": "2015-09-29", "date2": 20150929, "model": "Deep CNN (10 conv, 4 FC layers), multi-scale feature maps", "paper": { "title": "Very Deep Multilingual Convolutional Neural Networks for LVCSR", "url": "https://cknow.io/lib/bfdb5a624b4b9299" }, "paper_data_uoa": "bfdb5a624b4b9299" }, { "Percentage error": 12.6, "code_links": [ { "title": "mozilla/DeepSpeech", "url": "https://github.com/mozilla/DeepSpeech" }, { "title": "robmsmt/KerasDeepSpeech", "url": "https://github.com/robmsmt/KerasDeepSpeech" }, { "title": "myrtleSoftware/deepspeech", "url": "https://github.com/myrtleSoftware/deepspeech" }, { "title": "IBM/MAX-Speech-to-Text-Converter", "url": "https://github.com/IBM/MAX-Speech-to-Text-Converter" }, { "title": "mangushev/deep_speech", "url": "https://github.com/mangushev/deep_speech" }, { "title": "RashadGarayev/TRSpeech-to-text", "url": "https://github.com/RashadGarayev/TRSpeech-to-text" }, { "title": "WalterJohnson0/DeepSpeech-KerasRebuild", "url": "https://github.com/WalterJohnson0/DeepSpeech-KerasRebuild" }, { "title": "Loghijiaha/DeepSpeech-Indo", "url": "https://github.com/Loghijiaha/DeepSpeech-Indo" } ], "date": "2014-12-17", "date2": 20141217, "model": "Deep Speech + FSH", "paper": { "title": "Deep Speech: Scaling up end-to-end speech recognition", "url": "https://cknow.io/lib/23ae74412eb5aaa6" }, "paper_data_uoa": "23ae74412eb5aaa6" }, { "Percentage error": 12.6, "code_links": [ { "title": "mozilla/DeepSpeech", "url": "https://github.com/mozilla/DeepSpeech" }, { "title": "robmsmt/KerasDeepSpeech", "url": "https://github.com/robmsmt/KerasDeepSpeech" }, { "title": "myrtleSoftware/deepspeech", "url": "https://github.com/myrtleSoftware/deepspeech" }, { "title": "IBM/MAX-Speech-to-Text-Converter", "url": "https://github.com/IBM/MAX-Speech-to-Text-Converter" }, { "title": "mangushev/deep_speech", "url": "https://github.com/mangushev/deep_speech" }, { "title": "RashadGarayev/TRSpeech-to-text", "url": "https://github.com/RashadGarayev/TRSpeech-to-text" }, { "title": "WalterJohnson0/DeepSpeech-KerasRebuild", "url": "https://github.com/WalterJohnson0/DeepSpeech-KerasRebuild" }, { "title": "Loghijiaha/DeepSpeech-Indo", "url": "https://github.com/Loghijiaha/DeepSpeech-Indo" } ], "date": "2014-12-17", "date2": 20141217, "model": "CNN + Bi-RNN + CTC (speech to letters), 25.9% WER if trainedonlyon SWB", "paper": { "title": "Deep Speech: Scaling up end-to-end speech recognition", "url": "https://cknow.io/lib/23ae74412eb5aaa6" }, "paper_data_uoa": "23ae74412eb5aaa6" }, { "Percentage error": 15, "code_links": [], "date": "2014-06-30", "date2": 20140630, "model": "DNN + Dropout", "paper": { "title": "Building DNN Acoustic Models for Large Vocabulary Speech Recognition", "url": "https://cknow.io/lib/1be14a5be24c6385" }, "paper_data_uoa": "1be14a5be24c6385" }, { "Percentage error": 16, "code_links": [], "date": "2014-06-30", "date2": 20140630, "model": "DNN", "paper": { "title": "Building DNN Acoustic Models for Large Vocabulary Speech Recognition", "url": "https://cknow.io/lib/1be14a5be24c6385" }, "paper_data_uoa": "1be14a5be24c6385" }, { "Percentage error": 20, "code_links": [ { "title": "mozilla/DeepSpeech", "url": "https://github.com/mozilla/DeepSpeech" }, { "title": "robmsmt/KerasDeepSpeech", "url": "https://github.com/robmsmt/KerasDeepSpeech" }, { "title": "myrtleSoftware/deepspeech", "url": "https://github.com/myrtleSoftware/deepspeech" }, { "title": "IBM/MAX-Speech-to-Text-Converter", "url": "https://github.com/IBM/MAX-Speech-to-Text-Converter" }, { "title": "mangushev/deep_speech", "url": "https://github.com/mangushev/deep_speech" }, { "title": "RashadGarayev/TRSpeech-to-text", "url": "https://github.com/RashadGarayev/TRSpeech-to-text" }, { "title": "WalterJohnson0/DeepSpeech-KerasRebuild", "url": "https://github.com/WalterJohnson0/DeepSpeech-KerasRebuild" }, { "title": "Loghijiaha/DeepSpeech-Indo", "url": "https://github.com/Loghijiaha/DeepSpeech-Indo" } ], "date": "2014-12-17", "date2": 20141217, "model": "Deep Speech", "paper": { "title": "Deep Speech: Scaling up end-to-end speech recognition", "url": "https://cknow.io/lib/23ae74412eb5aaa6" }, "paper_data_uoa": "23ae74412eb5aaa6" } ]