#
# fcn -- function to be integrated.
# X -- array (N x n) of sample points from a corresponding
# distribution.
# K -- number of clusters.
<- function(fcn, X, K){
integrate_kmeans <- kmeans(X, K) # we use R function kmeans
km <- km$centers # centers of obtained clusters
Xc <- km$size/sum(km$size) # proportions of sample points in clusters
w
<- purrr::map_dbl(seq_len(K), ~ fcn(Xc[., ]))
F <- sum(w*F)
I
return(I)
}
k-means clustering approach to an integral evaluation
k-means clustering approache to calculating multi-dimensional integrals as an alternative to Monte Carlo.
Problem formulation
Let us consider an integral
where
Using Monte Carlo (MC) approach, the integral can be evaluates as
where
K-means clustering approach to integration
An alternative approach can be applied, which is base on the k-means clustering. The algorithm is the following:
- Sample
points from distribution. - Apply k-means clustering to the sample obtained. An output of the clustering procedure is:
– clusters’ centers. – proportions of sample points in cluster.
- Calculate an approximated value of the integral as
R code implementing k-means-based algorithm
R code implementing Monte Carlo-based algorithm
# fcn -- function to be integrated.
# X -- array (N x n) of sample points from a corresponding
# distribution.
<- function(fcn, X){
integrate_mc <- purrr::map_dbl(seq_len(K), ~ fcn(X[., ]))
F <- mean(F)
I
return(I)
}
An Example
Let us evaluate a two-dimensional integral
which can be rewritten as
where
and
is a probability density function of a normal distribution
<- function(x){
fcn <- 4*pi*ifelse(x[1]^2+x[2]^2 <= 4, 1, 0)
result return(result)
}
<- 2 # number of dimensions
n <- 10000 # sample size from normal distribution
N <- 8 # number of clusters for k-means clustering
K
# sampling from N(mu, Sigma)
<- c(0, 0)
mu <- diag(c(2, 2))
Sigma
set.seed(3141592)
<- MASS::mvrnorm(N, mu, Sigma)
X
# integration by using MC
<- integrate_mc(fcn, X)
mc_value
# integration by using k-means
<- integrate_kmeans(fcn, X, K)
kmeans_value
# true value
<- 4*pi*(1-exp(-1))
true_value
::tibble(
tibble
true_value, mc_value, kmeans_value )
# A tibble: 1 × 3
true_value mc_value kmeans_value
<dbl> <dbl> <dbl>
1 7.94 7.85 7.73