@article{Deb2003b,
abstract = {In the area of bioinformatics, the identification of gene subsets responsible for classifying available disease samples to two or more of its variants is an important task. Such problems have been solved in the past by means of unsupervised learning methods (hierarchical clustering, self-organizing maps, k-mean clustering, etc.) and supervised learning methods (weighted voting approach, k-nearest neighbor method, support vector machine method, etc.). Such problems can also be posed as optimization problems of minimizing gene subset size to achieve reliable and accurate classification. The main difficulties in solving the resulting optimization problem are the availability of only a few samples compared to the number of genes in the samples and the exorbitantly large search space of solutions. Although there exist a few applications of evolutionary algorithms (EAs) for this task, here we treat the problem as a multiobjective optimization problem of minimizing the gene subset size and minimizing the number of misclassified samples. Moreover, for a more reliable classification, we consider multiple training sets in evaluating a classifier. Contrary to the past studies, the use of a multiobjective EA (NSGA-II) has enabled us to discover a smaller gene subset size (such as four or five) to correctly classify 100{\%} or near 100{\%} samples for three cancer samples (Leukemia, Lymphoma, and Colon). We have also extended the NSGA-II to obtain multiple non-dominated solutions discovering as much as 352 different three-gene combinations providing a 100{\%} correct classification to the Leukemia data. In order to have further confidence in the identification task, we have also introduced a prediction strength threshold for determining a sample's belonging to one class or the other. All simulation results show consistent gene subset identifications on three disease samples and exhibit the flexibilities and efficacies in using a multiobjective EA for the gene subset identification task. {\textcopyright} 2003 Elsevier Ireland Ltd. All rights reserved.},
author = {Deb, Kalyanmoy and {Raji Reddy}, A.},
doi = {10.1016/S0303-2647(03)00138-2},
issn = {03032647},
journal = {BioSystems},
keywords = {Classification of cancer data,DNA microarray,Evolutionary algorithms,Gene subset identification,Multiobjective optimization,Prediction strength},
number = {1-2},
pages = {111--129},
pmid = {14642662},
publisher = {Biosystems},
title = {{Reliable classification of two-class cancer data using evolutionary algorithms}},
url = {https://pubmed.ncbi.nlm.nih.gov/14642662/},
volume = {72},
year = {2003}
}