@conference {10.1007/978-3-319-24834-9_5, title = {Addressing Overlapping in Classification with Imbalanced Datasets: A First Multi-objective Approach for Feature and Instance Selection}, booktitle = {Intelligent Data Engineering and Automated Learning {\textendash} IDEAL 2015}, year = {2015}, pages = {36{\textendash}44}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, address = {Cham}, abstract = {In classification tasks with imbalanced datasets the distribution of examples between the classes is uneven. However, it is not the imbalance itself which hinders the performance, but there are other related intrinsic data characteristics which have a significance in the final accuracy. Among all, the overlapping between the classes is possibly the most significant one for a correct discrimination between the classes.}, isbn = {978-3-319-24834-9}, author = {Fern{\'a}ndez, Alberto and M. J. del Jesus and F. Herrera}, editor = {Jackowski, Konrad and Burduk, Robert and Walkowiak, Krzysztof and Wozniak, Michal and Yin, Hujun} } @article {doi:10.1002/widm.1134, title = {Big Data with Cloud Computing: an insight on the computing environment, MapReduce, and programming frameworks}, journal = {WIREs Data Mining and Knowledge Discovery}, volume = {4}, number = {5}, year = {2014}, pages = {380-409}, abstract = {The term {\textquoteleft}Big Data{\textquoteright} has spread rapidly in the framework of Data Mining and Business Intelligence. This new scenario can be defined by means of those problems that cannot be effectively or efficiently addressed using the standard computing resources that we currently have. We must emphasize that Big Data does not just imply large volumes of data but also the necessity for scalability, i.e., to ensure a response in an acceptable elapsed time. When the scalability term is considered, usually traditional parallel-type solutions are contemplated, such as the Message Passing Interface or high performance and distributed Database Management Systems. Nowadays there is a new paradigm that has gained popularity over the latter due to the number of benefits it offers. This model is Cloud Computing, and among its main features we has to stress its elasticity in the use of computing resources and space, less management effort, and flexible costs. In this article, we provide an overview on the topic of Big Data, and how the current problem can be addressed from the perspective of Cloud Computing and its programming frameworks. In particular, we focus on those systems for large-scale analytics based on the MapReduce scheme and Hadoop, its open-source implementation. We identify several libraries and software projects that have been developed for aiding practitioners to address this new programming model. We also analyze the advantages and disadvantages of MapReduce, in contrast to the classical solutions in this field. Finally, we present a number of programming frameworks that have been proposed as an alternative to MapReduce, developed under the premise of solving the shortcomings of this model in certain scenarios and platforms. WIREs Data Mining Knowl Discov 2014, 4:380{\textendash}409. doi: 10.1002/widm.1134 This article is categorized under: Technologies > Classification Technologies > Computer Architectures for Data Mining}, issn = {1942-4787}, doi = {10.1002/widm.1134}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/widm.1134}, author = {Fern{\'a}ndez, Alberto and del R{\'\i}o, Sara and L{\'o}pez, Victoria and Bawakid, Abdullah and M. J. del Jesus and Benitez, Jose M. and F. Herrera} } @conference {inproceedings, title = {Cost Sensitive and Preprocessing for Classification with Imbalanced Data-sets: Similar Behaviour and Potential Hybridizations}, booktitle = {ICPRAM 2012 - Proceedings of the 1st International Conference on Pattern Recognition Applications and Methods}, volume = {2}, year = {2012}, month = {02}, author = {L{\'o}pez, Victoria and Fern{\'a}ndez, Alberto and M. J. del Jesus and F. Herrera} } @conference {inproceedings, title = {Un sistema de clasificaci{\'o}n basado en reglas difusas jer{\'a}rquico con programaci{\'o}n gen{\'e}tica para problemas de clasificaci{\'o}n altamente no balanceados}, year = {2012}, month = {02}, author = {L{\'o}pez, Victoria and Fern{\'a}ndez, Alberto and M. J. del Jesus and F. Herrera} } @article {article, title = {Analysis of an evolutionary RBFN design algorithm, CO2RBFN, for imbalanced data sets}, journal = {Pattern Recognition Letters}, volume = {31}, number = {15}, year = {2010}, month = {11}, pages = {2375-2388}, issn = {0167-8655}, doi = {10.1016/j.patrec.2010.07.010}, author = {M.D. P{\'e}rez-Godoy and Fern{\'a}ndez, Alberto and Rivera Rivas, Antonio and M. J. del Jesus} } @conference {article, title = {Un primer estudio sobre el uso de los sistemas de clasificaci{\'o}n basados en reglas difusas en problemas de clasificaci{\'o}n con clases no balanceadas}, booktitle = {XIV Congreso Espa{\~n}ol sobre tecnolog{\'\i}as y l{\'o}gica fuzzy}, year = {2006}, month = {01}, address = {Ciudad Real (Espa{\~n}ol)}, author = {Fern{\'a}ndez, Alberto and Garc{\'\i}a, Salvador and F. Herrera and M. J. del Jesus} }