@conference {4295665, title = {Some Results about Mutual Information-based Feature Selection and Fuzzy Discretization of Vague Data}, booktitle = {2007 IEEE International Fuzzy Systems Conference}, year = {2007}, month = {July}, pages = {1-6}, abstract = {Algorithms for preprocessing databases with incomplete and imprecise data are seldom studied, partly because we lack numerical tools to quantify the interdependency between fuzzy random variables. In particular, many filter-type feature selection algorithms rely on crisp discretizations for estimating the mutual information between continuous variables, effectively preventing the use of vague data. Fuzzy rule based systems pass continuous input variables, in turn, through their own fuzzification interface. In the context of feature selection, should we rank the relevance of the inputs by means of their mutual information, it might happen that an apparently informative variable is useless after having been codified as a fuzzy subset of our catalog of linguistic terms. In this paper we propose to address both problems by estimating the mutual information with the same set of fuzzy partitions that will be used to codify the antecedents of the fuzzy rules. That is to say, we introduce a numerical algorithm for estimating the mutual information between two fuzzified continuous variables. This algorithm can be included in certain feature selection algorithms, and can also be used to obtain the most informative fuzzy partition for the data. The use of our definition will be exemplified with the help of some benchmark problems.}, keywords = {codification, computational linguistics, Data preprocessing, Feature extraction, feature selection, fuzzification interface, fuzzy discretization, fuzzy random variables, fuzzy rule based systems, fuzzy set theory, Fuzzy sets, Fuzzy systems, Information filtering, Information filters, knowledge based systems, linguistic terms, mutual information, Partitioning algorithms, Random variables, Spatial databases, vague data}, issn = {1098-7584}, doi = {10.1109/FUZZY.2007.4295665}, author = {L. S{\'a}nchez and M. R. Suarez and J. R. Villar and I. Couso} } @conference {4016720, title = {A Multiobjective Genetic Fuzzy System with Imprecise Probability Fitness for Vague Data}, booktitle = {2006 International Symposium on Evolving Fuzzy Systems}, year = {2006}, month = {Sep.}, pages = {131-136}, abstract = {When questionnaires are designed, each factor under study can be assigned a set of different items. The answers to these questions must be merged in order to obtain the level of that input. Therefore, it is typical for data acquired from questionnaires that each of the inputs and outputs are not numbers, but sets of values. In this paper, we represent the information contained in such a set of values by means of a fuzzy number. A fuzzy statistics-based interpretation of the semantic of a fuzzy set is used for this purpose, as we consider that this fuzzy number is a nested family of confidence intervals for the value of the variable. The accuracy of the model is expressed by means of an interval-valued function, derived from a definition of the variance of a fuzzy random variable. A multicriteria genetic learning algorithm, able to optimize this interval-valued function, is proposed. As an example of the application of this algorithm, a practical problem of modeling in marketing is solved}, keywords = {Computer errors, Computer science, Consumer behavior, fuzzy random variable variance, fuzzy set, fuzzy set theory, Fuzzy sets, fuzzy statistics-based interpretation, Fuzzy systems, genetic algorithms, Genetics, imprecise probability fitness, interval-valued function, learning (artificial intelligence), multicriteria genetic learning algorithm, multiobjective genetic fuzzy system, probability, Random variables, Statistics, vague data}, doi = {10.1109/ISEFS.2006.251156}, author = {L. S{\'a}nchez and I. Couso and J. Casillas} }