@article {795, title = {Temporal association rule mining: An overview considering the time variable as an integral or implied component}, journal = {WIREs Data Mining and Knowledge Discovery}, volume = {10}, year = {2020}, note = {TIN2017-89517-P; P18-RT-2248}, month = {04/2020}, abstract = {Association rules are commonly used to provide decision-makers with knowledge that helps them to make good decisions. Most of the published proposals mine association rules without paying particular attention to temporal information. However, in real-life applications data usually change over time or presenting different temporal situations. Therefore, the extracted knowledge may not be useful, since we may not know whether the rules are currently applicable or whether they will be applicable in the future. For this reason, in recent years, many methods have been proposed in the literature for mining temporal association rules, which introduce a greater predictive and descriptive power providing an additional degree of interestingness. One of the main problems in this research field is the lack of visibility most works suffer since there is no standard terminology to refer to it, making it difficult to find and compare proposals and studies in the field. This contribution attempts to offer a well-defined framework that allows researchers both to easily locate the previous proposals and to propose well-grounded methods in the future. To accomplish both objectives, a two-level taxonomy is proposed according to whether the time variable is considered to provide order to the data collection and to locate some temporal constraints, or whether it is considered as an attribute within the learning process. Some recent applications, available software tools, and a bibliographical analysis in accordance with the Web of Science are also shown. Finally, some critical considerations and potential further directions are discussed. This article is categorized under: Technologies > Association Rules Algorithmic Development > Association Rules }, keywords = {calendar association rules, cyclic association rules, data mining, sequential association rules, temporal association rules}, doi = {https://www.doi.org/10.1002/widm.1367}, author = {Alberto Segura-Delgado and M. J. Gacto and Alcal{\'a}, Rafael and J. Alcal{\'a}-Fdez} } @article {794, title = {Experimental Study on 164 Algorithms Available in Software Tools for Solving Standard Non-Linear Regression Problems}, journal = {IEEE Access}, volume = {7}, year = {2019}, month = {08/2019}, pages = { 108916-108939}, abstract = {In the specialized literature, researchers can find a large number of proposals for solving regression problems that come from different research areas. However, researchers tend to use only proposals from the area in which they are experts. This paper analyses the performance of a large number of the available regression algorithms from some of the most known and widely used software tools in order to help non-expert users from other areas to properly solve their own regression problems and to help specialized researchers developing well-founded future proposals by properly comparing and identifying algorithms that will enable them to focus on significant further developments. To sum up, we have analyzed 164 algorithms that come from 14 main different families available in 6 software tools (Neural Networks, Support Vector Machines, Regression Trees, Rule-Based Methods, Stacking, Random Forests, Model trees, Generalized Linear Models, Nearest Neighbor methods, Partial Least Squares and Principal Component Regression, Multivariate Adaptive Regression Splines, Bagging, Boosting, and other methods) over 52 datasets. A new measure has also been proposed to show the goodness of each algorithm with respect to the others. Finally, a statistical analysis by non-parametric tests has been carried out over all the algorithms and on the best 30 algorithms, both with and without bagging. Results show that the algorithms from Random Forest, Model Tree and Support Vector Machine families get the best positions in the rankings obtained by the statistical tests when bagging is not considered. In addition, the use of bagging techniques significantly improves the performance of the algorithms without excessive increase in computational times.}, keywords = {data mining, experimental study., regression algorithms, supervised learning}, doi = {https://doi.org/10.1109/ACCESS.2019.2933261}, author = {M. J. Gacto and Jose Manuel Soto-Hidalgo and J. Alcal{\'a}-Fdez and Alcal{\'a}, Rafael} } @article {8049471, title = {Mining Context-Aware Association Rules Using Grammar-Based Genetic Programming}, journal = {IEEE Transactions on Cybernetics}, volume = {48}, number = {11}, year = {2018}, month = {Nov}, pages = {3030-3044}, abstract = {Real-world data usually comprise features whose interpretation depends on some contextual information. Such contextual-sensitive features and patterns are of high interest to be discovered and analyzed in order to obtain the right meaning. This paper formulates the problem of mining context-aware association rules, which refers to the search for associations between itemsets such that the strength of their implication depends on a contextual feature. For the discovery of this type of associations, a model that restricts the search space and includes syntax constraints by means of a grammar-based genetic programming methodology is proposed. Grammars can be considered as a useful way of introducing subjective knowledge to the pattern mining process as they are highly related to the background knowledge of the user. The performance and usefulness of the proposed approach is examined by considering synthetically generated datasets. A posteriori analysis on different domains is also carried out to demonstrate the utility of this kind of associations. For example, in educational domains, it is essential to identify and understand contextual and context-sensitive factors that affect overall and individual student behavior and performance. The results of the experiments suggest that the approach is feasible and it automatically identifies interesting context-aware associations from real-world datasets.}, keywords = {Association rules, Computer science, context awareness, context-aware association rules mining, context-sensitive factors, contextual features, contextual information, contextual-sensitive features, data mining, Feature extraction, genetic algorithms, Genetic programming, Grammar, grammar-based genetic programming methodology, grammars, pattern mining process, Proposals, ubiquitous computing}, issn = {2168-2267}, doi = {10.1109/TCYB.2017.2750919}, author = {J. M. Luna and M. Pechenizkiy and M. J. del Jesus and S. Ventura} } @conference {8015572, title = {Mining association rules in R using the package RKEEL}, booktitle = {2017 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)}, year = {2017}, month = {July}, pages = {1-6}, abstract = {The discovery of fuzzy associations comprises a collection of data mining methods used to extract knowledge from large data sets. Although there is an extensive catalog of specialized algorithms that cover different aspects of the problem, the most recent approaches are not yet packaged in mainstream software environments. This makes it difficult to incorporate novel association rules methods to the data mining workflow. In this paper an extension of the RKEEL package is described that allows calling from the programming language R to those association rules methods contained in KEEL, which is one of the most comprehensive open source software suites. The potential of the proposed tool is illustrated through a case study comprising seven real-world datasets.}, keywords = {Computer science, data mining, data mining methods, data mining workflow, Electronic mail, fuzzy associations, fuzzy set theory, knowledge extraction, large data sets, Measurement, mining association rules, Open source software, open source software suites, programming language R, programming languages, public domain software, real-world datasets, RKEEL package, Software algorithms, software environments, software packages, Tools}, issn = {1558-4739}, doi = {10.1109/FUZZ-IEEE.2017.8015572}, author = {O. S{\'a}nchez and J. M. Moyano and L. S{\'a}nchez and J. Alc{\'a}la-F{\'a}dez} } @article {PALACIOS2015358, title = {Genetic learning of the membership functions for mining fuzzy association rules from low quality data}, journal = {Information Sciences}, volume = {295}, year = {2015}, pages = {358 - 378}, abstract = {Many methods have been proposed to mine fuzzy association rules from databases with crisp values in order to help decision-makers make good decisions and tackle new types of problems. However, most real-world problems present a certain degree of imprecision. Various studies have been proposed to mine fuzzy association rules from imprecise data but they assume that the membership functions are known in advance and it is not an easy task to know a priori the most appropriate fuzzy sets to cover the domains of the variables. In this paper, we propose FARLAT-LQD, a new fuzzy data-mining algorithm to obtain both suitable membership functions and useful fuzzy association rules from databases with a wide range of types of uncertain data. To accomplish this, first we perform a genetic learning of the membership functions based on the 3-tuples linguistic representation model to reduce the search space and to learn the most adequate context for each fuzzy partition, maximizing the fuzzy supports and the interpretability measure GM3M in order to preserve the semantic interpretability of the obtained membership functions. Moreover, we propose a new algorithm based on the Fuzzy Frequent Pattern-growth algorithm, called FFP-growth-LQD, to efficiently mine the fuzzy association rules from inaccurate data considering the learned membership functions in the genetic process. The results obtained over 3 databases of different sizes and kinds of imprecisions demonstrate the effectiveness of the proposed algorithm.}, keywords = {3-tuples linguistic representation, data mining, Fuzzy association rules, Genetic Fuzzy Systems, GM3M measure, Imprecise data}, issn = {0020-0255}, doi = {https://doi.org/10.1016/j.ins.2014.10.027}, url = {http://www.sciencedirect.com/science/article/pii/S002002551401010X}, author = {Ana Mar{\'\i}a Palacios and Jos{\'e} Luis Palacios and Luciano S{\'a}nchez and Jes{\'u}s Alcal{\'a}-Fdez} } @article {FERNANDEZ2015109, title = {Revisiting Evolutionary Fuzzy Systems: Taxonomy, applications, new trends and challenges}, journal = {Knowledge-Based Systems}, volume = {80}, year = {2015}, note = {25th anniversary of Knowledge-Based Systems}, pages = {109 - 121}, abstract = {Evolutionary Fuzzy Systems are a successful hybridization between fuzzy systems and Evolutionary Algorithms. They integrate both the management of imprecision/uncertainty and inherent interpretability of Fuzzy Rule Based Systems, with the learning and adaptation capabilities of evolutionary optimization. Over the years, many different approaches in Evolutionary Fuzzy Systems have been developed for improving the behavior of fuzzy systems, either acting on the Fuzzy Rule Base Systems{\textquoteright} elements, or by defining new approaches for the evolutionary components. All these efforts have enabled Evolutionary Fuzzy Systems to be successfully applied in several areas of Data Mining and engineering. In accordance with the former, a wide number of applications have been also taken advantage of these types of systems. However, with the new advances in computation, novel problems and challenges are raised every day. All these issues motivate researchers to make an effort in releasing new ways of addressing them with Evolutionary Fuzzy Systems. In this paper, we will review the progression of Evolutionary Fuzzy Systems by analyzing their taxonomy and components. We will also stress those problems and applications already tackled by this type of approach. We will present a discussion on the most recent and difficult Data Mining tasks to be addressed, and which are the latest trends in the development of Evolutionary Fuzzy Systems.}, keywords = {Big Data, data mining, Evolutionary Fuzzy Systems, fuzzy rule based systems, Multi-Objective Evolutionary Fuzzy Systems, New trends, Scalability, Taxonomy}, issn = {0950-7051}, doi = {https://doi.org/10.1016/j.knosys.2015.01.013}, url = {http://www.sciencedirect.com/science/article/pii/S0950705115000209}, author = {Alberto Fernandez and Victoria L{\'o}pez and M. J. del Jesus and F. Herrera} } @conference {6622418, title = {CI-LQD: A software tool for modeling and decision making with Low Quality Data}, booktitle = {2013 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)}, year = {2013}, month = {July}, pages = {1-8}, abstract = {The software tool CI-LQD (Computational Intelligence for Low Quality Data) is introduced in this paper. CI-LQD is an ongoing project that includes a lightweight open source software that has been designed with scientific and teaching purposes in mind. The main usefulness of the software is to automate the calculations involved in the statistical comparisons of different algorithms, with both numerical and graphical techniques, when the available information is interval-valued, fuzzy, incomplete or otherwise vague. A growing catalog of evolutionary algorithms for learning classifiers, models and association rules, along with their corresponding data conditioning and preprocessing techniques, is included. A demonstrative example of the tool is described that illustrates the capabilities of the software.}, keywords = {Algorithm design and analysis, Artificial intelligence, Association rules, CI-LQD, Computational intelligence, data conditioning, data mining, data preprocessing technique, Databases, Decision making, evolutionary algorithm, Evolutionary algorithms, evolutionary computation, graphical technique, learning classifier, lightweight open source software, Low Quality Data, Probability distribution, Software algorithms, software tool, software tools, statistical analysis, statistical comparison}, issn = {1098-7584}, doi = {10.1109/FUZZ-IEEE.2013.6622418}, author = {A. M. Palacios and L. S{\'a}nchez and I. Couso} } @conference {4626756, title = {An Study on Data Mining Methods for Short-Term Forecasting of the Extra Virgin Olive Oil Price in the Spanish Market}, booktitle = {2008 Eighth International Conference on Hybrid Intelligent Systems}, year = {2008}, month = {Sep.}, pages = {943-946}, abstract = {This paper presents the adaptation of an evolutionary cooperative competitive RBFN learning algorithm, CO2RBFN, for short-term forecasting of extra virgin olive oil price. The olive oil time series has been analyzed with a new evolutionary proposal for the design of RBFNs, CO2RBFN. Results obtained has been compared with ARIMA models and other data mining methods such as a fuzzy system developed with a GA-P algorithm, a multilayer perceptron trained with a conjugate gradient algorithm and a radial basis function network trained with a LMS algorithm. The experimentation shows the high efficacy reached for the applied methods, specially for data mining methods which have slightly outperformed ARIMA methodology.}, keywords = {Algorithm design and analysis, ARIMA, ARIMA models, Artificial neural networks, autoregressive moving average processes, CO2RBFN, conjugate gradient algorithm, conjugate gradient methods, data mining, data mining methods, evolutionary cooperative competitive, extra virgin olive oil price, Forecasting, fuzzy system, Fuzzy systems, GA-P algorithm, genetic algorithms, least mean squares methods, LMS algorithm, multilayer perceptron, multilayer perceptrons, Olive Oil Price, olive oil time series, Petroleum, pricing, radial basis function network, radial basis function networks, RBFN learning algorithm, short-term forecasting, Spanish market, time series, Time series analysis, time series forecasting, Training, vegetable oils}, doi = {10.1109/HIS.2008.132}, author = {P. P{\'e}rez and M. P. Fr{\'\i}as and M.D. P{\'e}rez-Godoy and A.J. Rivera-Rivas and M. J. d. Jesus and M. Parras and F. J. Torres} } @conference {4295638, title = {Niching genetic feature selection algorithms applied to the design of fuzzy rule-based classification systems}, booktitle = {2007 IEEE International Fuzzy Systems Conference}, year = {2007}, month = {July}, pages = {1-6}, abstract = {In the design of fuzzy rule-based classification systems (FRBCSs) a feature selection process which determines the most relevant features is a crucial component in the majority of the classification problems. This simplification process increases the efficiency of the design process, improves the interpretability of the FRBCS obtained and its generalization capacity. Most of the feature selection algorithms provide a set of variables which are adequate for the induction process according to different quality measures. Nevertheless it can be useful for the induction process to determine not only a set of variables but also different set of variables. These sets of variables can be used for the design of a set of FRBCSs which can be combined in a multiclassifler system, improving the prediction capacity increasing its description capacity. In this work, different proposals of niching genetic algorithms for the feature selection process are analyzed. The different sets of features provided by them are used in a multiclassifier system designed by means of a genetic proposal. The experimentation shows the adaptation of this type of genetic algorithms to the FRBCS design.}, keywords = {Algorithm design and analysis, classification, data mining, Databases, description capacity, Feature extraction, feature selection algorithms, Fuzzy reasoning, fuzzy rule-based classification systems, fuzzy set theory, Fuzzy sets, Fuzzy systems, genetic algorithms, induction process, Knowledge representation, multiclassifler system, niching genetic algorithms, prediction capacity, Process design, Proposals}, issn = {1098-7584}, doi = {10.1109/FUZZY.2007.4295638}, author = {Jos{\'e} Aguilera and M. Chica and M. J. del Jesus and F. Herrera} } @conference {1681710, title = {Knowledge Extraction from Fuzzy Data for Estimating Consumer Behavior Models}, booktitle = {2006 IEEE International Conference on Fuzzy Systems}, year = {2006}, month = {July}, pages = {164-170}, abstract = {For certain problems of casual modeling in marketing, the information is obtained by means of questionnaires. When these questionnaires include more than one item for each observable variable, the value of this variable can not be assigned a number, but a potentially scattered set of values. In this paper, we propose to represent the information contained in this set of values by means of a fuzzy number. A novel fuzzy statistics-based interpretation of the semantic of a fuzzy set will be used for this purpose, as we will consider that this fuzzy number is a nested family of confidence intervals for a central tendency measure of the value of the variable. A genetic learning algorithm, able to extract association fuzzy rules from this data, is also proposed. The accuracy of the model will be expressed by means of a fuzzy-valued function. We propose to jointly minimize this function and the complexity of the rule based model with multicriteria genetic algorithms, that in turn will depend on a fuzzy ranking-based ordering of individuals.}, keywords = {Artificial intelligence, association fuzzy rule extraction, casual modeling, Computer science, Computer science education, Consumer behavior, consumer behavior model, consumer behaviour, data mining, fuzzy data, fuzzy logic, fuzzy number, fuzzy ranking, fuzzy set theory, Fuzzy sets, fuzzy statistics, genetic algorithms, knowledge extraction, learning (artificial intelligence), learning algorithm, marketing data processing, multicriteria genetic algorithm, Scattering, semantic interpretation, statistical analysis}, issn = {1098-7584}, doi = {10.1109/FUZZY.2006.1681710}, author = {J. Casillas and L. S{\'a}nchez} }