@inproceedings{6f01a01c0e4f4f87b8a1f40889c6fc4d,
title = "Speaker Classification from vowel sound segments",
abstract = "This study proposes a speaker detection method based on vowel segments and transfer learning using VGGish and YAMNet networks. The implementation of an X-vector based system was also explored, which did not give good results with the samples it was trained on. A compact system for isolating vowel segments from audio recordings was also implemented. The classification system using Parselmouth and a neural network showed effectiveness with an average accuracy of 89.81%. The DIMEx100 corpus served as a consistent database for training, and Parselmouth demonstrated its effectiveness for audio analysis and acoustic feature extraction. Transfer learning applied to the VGGish and YAMNet networks proved to be effective, adapting to the specific task and achieving significant levels of accuracy in vowel classification. Variations in accuracy were observed depending on the vowels, with some exceeding 98% and others hovering around 94-95%. The results confirm the applicability of transfer learning in the classification of speakers and vowel segments, opening new lines of research in the field of speaker identification in Spanish.",
author = "Vargas, {Andr{\'e}s G.D.} and Florez, {Johana M.L.} and {Pedro Vizcaya}, G.",
note = "Publisher Copyright: {\textcopyright} AES New York 2023. All rights reserved.; AES New York 2023: 155th Audio Engineering Society Convention ; Conference date: 25-10-2023 Through 27-10-2023",
year = "2023",
language = "English",
series = "AES New York 2023: 155th Audio Engineering Society Convention",
publisher = "Audio Engineering Society",
editor = "Areti Andreopoulou and Braxton Boren",
booktitle = "AES New York 2023",
}