@article {OTERO201488, title = {Bootstrap analysis of multiple repetitions of experiments using an interval-valued multiple comparison procedure}, journal = {Journal of Computer and System Sciences}, volume = {80}, number = {1}, year = {2014}, pages = {88 - 100}, abstract = {A new bootstrap test is introduced that allows for assessing the significance of the differences between stochastic algorithms in a cross-validation with repeated folds experimental setup. Intervals are used for modeling the variability of the data that can be attributed to the repetition of learning and testing stages over the same folds in cross validation. Numerical experiments are provided that support the following three claims: (1) Bootstrap tests can be more powerful than ANOVA or Friedman test for comparing multiple classifiers. (2) In the presence of outliers, interval-valued bootstrap tests achieve a better discrimination between stochastic algorithms than nonparametric tests. (3) Choosing ANOVA, Friedman or Bootstrap can produce different conclusions in experiments involving actual data from machine learning tasks.}, keywords = {Cross validation, Statistical comparisons of algorithms, Tests for interval-valued data}, issn = {0022-0000}, doi = {https://doi.org/10.1016/j.jcss.2013.03.009}, url = {http://www.sciencedirect.com/science/article/pii/S0022000013000731}, author = {Jos{\'e} Otero and Luciano S{\'a}nchez and In{\'e}s Couso and Ana Palacios} }