| Sample1 | |
|---|---|
|
Raw |
Proposed MbG-ExcitNet |
|
Baseline WaveNet |
Baseline ExcitNet |
|
Baseline G-WaveNet |
Baseline G-ExcitNet |
| Sample2 | |
|
Raw |
Proposed MbG-ExcitNet |
|
Baseline WaveNet |
Baseline ExcitNet |
|
Baseline G-WaveNet |
Baseline G-ExcitNet |
| Sample3 | |
|
Raw |
Proposed MbG-ExcitNet |
|
Baseline WaveNet |
Baseline ExcitNet |
|
Baseline G-WaveNet |
Baseline G-ExcitNet |
| Sample4 | |
|
Raw |
Proposed MbG-ExcitNet |
|
Baseline WaveNet |
Baseline ExcitNet |
|
Baseline G-WaveNet |
Baseline G-ExcitNet |
@inproceedings{song2020neural,
title={Neural text-to-speech with a modeling-by-generation excitation vocoder},
author={Song, Eunwoo and Hwang, Min-Jae and Yamamoto, Ryuichi and Kim, Jin-Seob and Kwon, Ohsung and Kim, Jae-Min},
booktitle={Proc. INTERSPEECH},
pages={3570--3574},
year={2020}
}