Publications

2016
del-Agua, Miguel Ángel; Martínez-Villaronga, Adrià; Giménez, Adrià; Sanchis, Alberto; Civera, Jorge; Juan, Alfons The MLLP system for the 4th CHiME Challenge Inproceedings Proc. of the 4th Intl. Workshop on Speech Processing in Everyday Environments (CHiME 2016), pp. 57–59, San Francisco (USA), 2016. Abstract \| Links \| BibTeX \| Tags: @inproceedings{del-Aguadel-Agua2016, title = {The MLLP system for the 4th CHiME Challenge}, author = {Miguel Ángel del-Agua and Adrià Martínez-Villaronga and Adrià Giménez and Alberto Sanchis and Jorge Civera and Alfons Juan}, url = {http://www.mllp.upv.es/wp-content/uploads/2017/11/DelAgua2016-The_MLLP_system_for_the_4th_CHiME_Challenge.pdf http://hdl.handle.net/10251/177497 http://spandh.dcs.shef.ac.uk/chime_workshop/chime2016/chime2016proceedings.pdf}, year = {2016}, date = {2016-01-01}, booktitle = {Proc. of the 4th Intl. Workshop on Speech Processing in Everyday Environments (CHiME 2016)}, pages = {57--59}, address = {San Francisco (USA)}, abstract = {The MLLP's CHiME-4 system is presented in this paper. It has been built using the transLectures-UPV toolkit (TLK), developed by the MLLP research group, which makes use of state-of-the-art speech techniques. Our best system built for the CHiME-4 challenge consists on the combination of different sub-systems in order to deal with the variety of acoustic conditions. Each sub-system in turn, follows a hybrid approach with different acoustic models, such as Deep Neural Networks or BLSTM Networks.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Close The MLLP's CHiME-4 system is presented in this paper. It has been built using the transLectures-UPV toolkit (TLK), developed by the MLLP research group, which makes use of state-of-the-art speech techniques. Our best system built for the CHiME-4 challenge consists on the combination of different sub-systems in order to deal with the variety of acoustic conditions. Each sub-system in turn, follows a hybrid approach with different acoustic models, such as Deep Neural Networks or BLSTM Networks. Close http://www.mllp.upv.es/wp-content/uploads/2017/11/DelAgua2016-The_MLLP_system_fo[...] http://hdl.handle.net/10251/177497 http://spandh.dcs.shef.ac.uk/chime_workshop/chime2016/chime2016proceedings.pdf Close
2015
del-Agua, Miguel Ángel; Martínez-Villaronga, Adrià; Piqueras, Santiago; Giménez, Adrià; Sanchis, Alberto; Civera, Jorge; Juan, Alfons The MLLP ASR Systems for IWSLT 2015 Inproceedings Proc. of 12th Intl. Workshop on Spoken Language Translation (IWSLT 2015), pp. 39–44, Da Nang (Vietnam), 2015. Abstract \| Links \| BibTeX \| Tags: @inproceedings{delAgua15, title = {The MLLP ASR Systems for IWSLT 2015}, author = {Miguel Ángel del-Agua and Adrià Martínez-Villaronga and Santiago Piqueras and Adrià Giménez and Alberto Sanchis and Jorge Civera and Alfons Juan}, url = {https://aclanthology.org/2015.iwslt-evaluation.5/}, year = {2015}, date = {2015-12-03}, booktitle = {Proc. of 12th Intl. Workshop on Spoken Language Translation (IWSLT 2015)}, pages = {39--44}, address = {Da Nang (Vietnam)}, abstract = {This paper describes the Machine Learning and Language Processing (MLLP) ASR systems for the 2015 IWSLT evaluation campaing. The English system is based on the combination of five different subsystems which consist of two types of Neural Networks architectures (Deep feed-forward and Convolutional), two types of activation functions (sigmoid and rectified linear) and two types of input features (fMLLR and FBANK). All subsystems perform a speaker adaptation step based on confidence measures, the output of which is then combined with ROVER. This system achieves a Word Error Rate (WER) of 13.3% on the official IWSLT 2015 English test set.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Close This paper describes the Machine Learning and Language Processing (MLLP) ASR systems for the 2015 IWSLT evaluation campaing. The English system is based on the combination of five different subsystems which consist of two types of Neural Networks architectures (Deep feed-forward and Convolutional), two types of activation functions (sigmoid and rectified linear) and two types of input features (fMLLR and FBANK). All subsystems perform a speaker adaptation step based on confidence measures, the output of which is then combined with ROVER. This system achieves a Word Error Rate (WER) of 13.3% on the official IWSLT 2015 English test set. Close https://aclanthology.org/2015.iwslt-evaluation.5/ Close
2014
Wuebker, Joern; Ney, Hermann; Martínez-Villaronga, Adrià; Giménez, Adrià; Juan, Alfons; Servan, Christophe; Dymetman, Marc; Mirkin, Shachar Comparison of Data Selection Techniques for the Translation of Video Lectures Inproceedings Proc. of the Eleventh Biennial Conf. of the Association for Machine Translation in the Americas (AMTA-2014), pp. 193–207, Vancouver (Canada), 2014. Links \| BibTeX \| Tags: @inproceedings{WueMarSer14, title = {Comparison of Data Selection Techniques for the Translation of Video Lectures}, author = {Joern Wuebker and Hermann Ney and Adrià Martínez-Villaronga and Adrià Giménez and Alfons Juan and Christophe Servan and Marc Dymetman and Shachar Mirkin}, url = {https://aclanthology.org/2014.amta-researchers.15/}, year = {2014}, date = {2014-01-01}, booktitle = {Proc. of the Eleventh Biennial Conf. of the Association for Machine Translation in the Americas (AMTA-2014)}, pages = {193--207}, address = {Vancouver (Canada)}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Close https://aclanthology.org/2014.amta-researchers.15/ Close
2013
Martínez-Villaronga, Adrià Language model adaptation for video lecture transcription Masters Thesis Universitat Politècnica de València, 2013. Abstract \| Links \| BibTeX \| Tags: Automatic Speech Recognition, language model adaptation, Video Lectures @mastersthesis{Martínez-Villaronga2013, title = {Language model adaptation for video lecture transcription}, author = {Adrià Martínez-Villaronga}, url = {http://hdl.handle.net/10251/37114}, year = {2013}, date = {2013-09-25}, school = {Universitat Politècnica de València}, abstract = {In this work we propose a method to adapt language models to specific lectures in the context of video lecture automatic transcriptions. We explore different variations of the adaptation technique obtaining a significant WER reduction for the Spanish repository Polimedia.}, keywords = {Automatic Speech Recognition, language model adaptation, Video Lectures}, pubstate = {published}, tppubtype = {mastersthesis} } Close In this work we propose a method to adapt language models to specific lectures in the context of video lecture automatic transcriptions. We explore different variations of the adaptation technique obtaining a significant WER reduction for the Spanish repository Polimedia. Close http://hdl.handle.net/10251/37114 Close
2012
Martínez-Villaronga, Adrià Adaptació dels models de llenguatge per a la transcripció de vídeos de Polimedia Miscellaneous Final Year Project (Computer Science and Engineering at Universitat Politècnica de València), 2012. Links \| BibTeX \| Tags: Automatic Speech Recognition, language model adaptation @misc{Martínez-Villaronga2012, title = {Adaptació dels models de llenguatge per a la transcripció de vídeos de Polimedia}, author = {Adrià Martínez-Villaronga}, url = {http://hdl.handle.net/10251/16936}, year = {2012}, date = {2012-07-30}, howpublished = {Final Year Project (Computer Science and Engineering at Universitat Politècnica de València)}, keywords = {Automatic Speech Recognition, language model adaptation}, pubstate = {published}, tppubtype = {misc} } Close http://hdl.handle.net/10251/16936 Close