ITK  6.0.0
Insight Toolkit
Examples/Statistics/BayesianPluginClassifier.cxx
/*=========================================================================
*
* Copyright NumFOCUS
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*=========================================================================*/
// Software Guide : BeginLatex
//
// \index{Statistics!Bayesian plugin classifier}
// \index{itk::Statistics::SampleClassifier}
// \index{itk::Statistics::GaussianMembershipFunction}
// \index{itk::Statistics::NormalVariateGenerator}
//
// In this example, we present a system that places measurement vectors into
// two Gaussian classes. The Figure~\ref{fig:BayesianPluginClassifier} shows
// all the components of the classifier system and the data flow. This system
// differs with the previous k-means clustering algorithms in several
// ways. The biggest difference is that this classifier uses the
// \subdoxygen{Statistics}{GaussianDensityFunction}s as membership functions
// instead of the
// \subdoxygen{Statistics}{DistanceToCentroidMembershipFunction}. Since the
// membership function is different, the membership function requires a
// different set of parameters, mean vectors and covariance matrices. We
// choose the \subdoxygen{Statistics}{CovarianceSampleFilter} (sample
// covariance) for the estimation algorithms of the two parameters. If we want
// a more robust estimation algorithm, we can replace this estimation
// algorithm with more alternatives without changing other components in the
// classifier system.
//
// It is a bad idea to use the same sample for test and training
// (parameter estimation) of the parameters. However, for simplicity, in
// this example, we use a sample for test and training.
//
// \begin{figure}
// \centering
// \includegraphics[width=0.9\textwidth]{BayesianPluginClassifier}
// \itkcaption[Bayesian plug-in classifier for two Gaussian
// classes]{Bayesian plug-in classifier for two Gaussian classes.}
// \protect\label{fig:BayesianPluginClassifier}
// \end{figure}
//
// We use the \subdoxygen{Statistics}{ListSample} as the sample (test
// and training). The \doxygen{Vector} is our measurement vector
// class. To store measurement vectors into two separate sample
// containers, we use the \subdoxygen{Statistics}{Subsample} objects.
//
// Software Guide : EndLatex
// Software Guide : BeginCodeSnippet
#include "itkVector.h"
#include "itkListSample.h"
#include "itkSubsample.h"
// Software Guide : EndCodeSnippet
// Software Guide : BeginLatex
//
// The following file provides us the parameter estimation algorithm.
//
// Software Guide : EndLatex
// Software Guide : BeginCodeSnippet
// Software Guide : EndCodeSnippet
// Software Guide : BeginLatex
//
// The following files define the components required by ITK statistical
// classification framework: the decision rule, the membership
// function, and the classifier.
//
// Software Guide : EndLatex
// Software Guide : BeginCodeSnippet
// Software Guide : EndCodeSnippet
// Software Guide : BeginLatex
//
// We will fill the sample with random variables from two normal
// distribution using the \subdoxygen{Statistics}{NormalVariateGenerator}.
//
// Software Guide : EndLatex
// Software Guide : BeginCodeSnippet
// Software Guide : EndCodeSnippet
int
main(int, char *[])
{
// Software Guide : BeginLatex
//
// Since the NormalVariateGenerator class only supports 1-D, we define our
// measurement vector type as a one component vector. We then, create a
// ListSample object for data inputs.
//
// We also create two Subsample objects that will store
// the measurement vectors in \code{sample} into two separate
// sample containers. Each Subsample object stores only the
// measurement vectors belonging to a single class. This class sample
// will be used by the parameter estimation algorithms.
//
// Software Guide : EndLatex
// Software Guide : BeginCodeSnippet
constexpr unsigned int measurementVectorLength = 1;
using MeasurementVectorType = itk::Vector<double, measurementVectorLength>;
auto sample = SampleType::New();
// length of measurement vectors in the sample.
sample->SetMeasurementVectorSize(measurementVectorLength);
using ClassSampleType = itk::Statistics::Subsample<SampleType>;
std::vector<ClassSampleType::Pointer> classSamples;
for (unsigned int i = 0; i < 2; ++i)
{
classSamples.push_back(ClassSampleType::New());
classSamples[i]->SetSample(sample);
}
// Software Guide : EndCodeSnippet
// Software Guide : BeginLatex
//
// The following code snippet creates a NormalVariateGenerator
// object. Since the random variable generator returns values according to
// the standard normal distribution (the mean is zero, and the standard
// deviation is one) before pushing random values into the \code{sample},
// we change the mean and standard deviation. We want two normal (Gaussian)
// distribution data. We have two for loops. Each for loop uses different
// mean and standard deviation. Before we fill the \code{sample} with the
// second distribution data, we call \code{Initialize(random seed)} method,
// to recreate the pool of random variables in the
// \code{normalGenerator}. In the second for loop, we fill the two class
// samples with measurement vectors using the \code{AddInstance()} method.
//
// To see the probability density plots from the two distributions,
// refer to Figure~\ref{fig:TwoNormalDensityFunctionPlot}.
//
// Software Guide : EndLatex
// Software Guide : BeginCodeSnippet
using NormalGeneratorType = itk::Statistics::NormalVariateGenerator;
auto normalGenerator = NormalGeneratorType::New();
normalGenerator->Initialize(101);
MeasurementVectorType mv;
double mean = 100;
double standardDeviation = 30;
SampleType::InstanceIdentifier id = 0UL;
for (unsigned int i = 0; i < 100; ++i)
{
mv.Fill((normalGenerator->GetVariate() * standardDeviation) + mean);
sample->PushBack(mv);
classSamples[0]->AddInstance(id);
++id;
}
normalGenerator->Initialize(3024);
mean = 200;
standardDeviation = 30;
for (unsigned int i = 0; i < 100; ++i)
{
mv.Fill((normalGenerator->GetVariate() * standardDeviation) + mean);
sample->PushBack(mv);
classSamples[1]->AddInstance(id);
++id;
}
// Software Guide : EndCodeSnippet
// Software Guide : BeginLatex
//
// In the following code snippet, notice that the template argument for the
// CovarianceCalculator is \code{ClassSampleType} (i.e., type of Subsample)
// instead of SampleType (i.e., type of ListSample). This is because the
// parameter estimation algorithms are applied to the class sample.
//
// Software Guide : EndLatex
// Software Guide : BeginCodeSnippet
using CovarianceEstimatorType =
std::vector<CovarianceEstimatorType::Pointer> covarianceEstimators;
for (unsigned int i = 0; i < 2; ++i)
{
covarianceEstimators.push_back(CovarianceEstimatorType::New());
covarianceEstimators[i]->SetInput(classSamples[i]);
covarianceEstimators[i]->Update();
}
// Software Guide : EndCodeSnippet
// Software Guide : BeginLatex
//
// We print out the estimated parameters.
//
// Software Guide : EndLatex
// Software Guide : BeginCodeSnippet
for (unsigned int i = 0; i < 2; ++i)
{
std::cout << "class[" << i << "] " << std::endl;
std::cout << " estimated mean : " << covarianceEstimators[i]->GetMean()
<< " covariance matrix : "
<< covarianceEstimators[i]->GetCovarianceMatrix() << std::endl;
}
// Software Guide : EndCodeSnippet
// Software Guide : BeginLatex
//
// After creating a SampleClassifier object and a
// MaximumRatioDecisionRule object, we plug in the
// \code{decisionRule} and the \code{sample} to the classifier. Then,
// we specify the number of classes that will be considered using
// the \code{SetNumberOfClasses()} method.
//
// The MaximumRatioDecisionRule requires a vector of \emph{a
// priori} probability values. Such \emph{a priori} probability will
// be the $P(\omega_{i})$ of the following variation of the Bayes
// decision rule:
// \begin{equation}
// \textrm{Decide } \omega_{i} \textrm{ if }
// \frac{p(\overrightarrow{x}|\omega_{i})}
// {p(\overrightarrow{x}|\omega_{j})}
// > \frac{P(\omega_{j})}{P(\omega_{i})} \textrm{ for all } j \not= i
// \label{eq:bayes2}
// \end{equation}
//
// The remainder of the code snippet shows how to use user-specified class
// labels. The classification result will be stored in a
// MembershipSample object, and for each measurement vector, its
// class label will be one of the two class labels, 100 and 200
// (\code{unsigned int}).
//
// Software Guide : EndLatex
// Software Guide : BeginCodeSnippet
using MembershipFunctionType =
auto decisionRule = DecisionRuleType::New();
DecisionRuleType::PriorProbabilityVectorType aPrioris;
aPrioris.push_back(
static_cast<double>(classSamples[0]->GetTotalFrequency()) /
static_cast<double>(sample->GetTotalFrequency()));
aPrioris.push_back(
static_cast<double>(classSamples[1]->GetTotalFrequency()) /
static_cast<double>(sample->GetTotalFrequency()));
decisionRule->SetPriorProbabilities(aPrioris);
auto classifier = ClassifierType::New();
classifier->SetDecisionRule(decisionRule);
classifier->SetInput(sample);
classifier->SetNumberOfClasses(2);
using ClassLabelVectorObjectType =
ClassifierType::ClassLabelVectorObjectType;
using ClassLabelVectorType = ClassifierType::ClassLabelVectorType;
auto classLabelVectorObject = ClassLabelVectorObjectType::New();
ClassLabelVectorType classLabelVector = classLabelVectorObject->Get();
constexpr ClassifierType::ClassLabelType class1 = 100;
classLabelVector.push_back(class1);
constexpr ClassifierType::ClassLabelType class2 = 200;
classLabelVector.push_back(class2);
classLabelVectorObject->Set(classLabelVector);
classifier->SetClassLabels(classLabelVectorObject);
// Software Guide : EndCodeSnippet
// Software Guide : BeginLatex
//
// The \code{classifier} is almost ready to perform the classification
// except that it needs two membership functions that represent the two
// clusters.
//
// In this example, we can imagine that the two clusters are modeled by two
// Gaussian distribution functions. The distribution functions have two
// parameters, the mean, set by the \code{SetMean()} method, and the
// covariance, set by the \code{SetCovariance()} method. To plug-in two
// distribution functions, we create a new instance of
// \code{MembershipFunctionVectorObjectType} and populate its internal
// vector with new instances of \code{MembershipFunction} (i.e.
// GaussianMembershipFunction). This is done by calling the \code{Get()}
// method of \code{membershipFunctionVectorObject} to get the internal
// vector, populating this vector with two new membership functions and then
// calling
// \code{membershipFunctionVectorObject->Set( membershipFunctionVector )}.
// Finally, the invocation of the \code{Update()} method will perform the
// classification.
//
// Software Guide : EndLatex
// Software Guide : BeginCodeSnippet
using MembershipFunctionVectorObjectType =
ClassifierType::MembershipFunctionVectorObjectType;
using MembershipFunctionVectorType =
ClassifierType::MembershipFunctionVectorType;
auto membershipFunctionVectorObject =
MembershipFunctionVectorType membershipFunctionVector =
membershipFunctionVectorObject->Get();
for (unsigned int i = 0; i < 2; ++i)
{
auto membershipFunction = MembershipFunctionType::New();
membershipFunction->SetMean(covarianceEstimators[i]->GetMean());
membershipFunction->SetCovariance(
covarianceEstimators[i]->GetCovarianceMatrix());
membershipFunctionVector.emplace_back(membershipFunction);
}
membershipFunctionVectorObject->Set(membershipFunctionVector);
classifier->SetMembershipFunctions(membershipFunctionVectorObject);
classifier->Update();
// Software Guide : EndCodeSnippet
// Software Guide : BeginLatex
//
// The following code snippet prints out pairs of a measurement vector and
// its class label in the \code{sample}.
//
// Software Guide : EndLatex
// Software Guide : BeginCodeSnippet
const ClassifierType::MembershipSampleType * membershipSample =
classifier->GetOutput();
ClassifierType::MembershipSampleType::ConstIterator iter =
membershipSample->Begin();
while (iter != membershipSample->End())
{
std::cout << "measurement vector = " << iter.GetMeasurementVector()
<< " class label = " << iter.GetClassLabel() << std::endl;
++iter;
}
// Software Guide : EndCodeSnippet
return EXIT_SUCCESS;
}
Calculates the covariance matrix of the target sample data.
void SetInput(const SampleType *sample)
GaussianMembershipFunction models class membership through a multivariate Gaussian function.
This class is the native implementation of the a Sample with an STL container.
Definition: itkListSample.h:52
A decision rule that operates as a frequentest's approximation to Bayes rule.
This class stores a subset of instance identifiers from another sample object. You can create a subsa...
Definition: itkSubsample.h:43
void SetSample(const TSample *sample)
A templated class holding a n-Dimensional vector.
Definition: itkVector.h:63
static Pointer New()