Vine copulas for discrete variables
Import the required libraries
[1]:
import math
import numpy as np
import pyvinecopulib as pv
Simulate dummy data and convert to pseudo-observations
[2]:
np.random.seed(1234) # seed for the random generator
n = 30
d = 5
x = np.random.normal(size=n).reshape(n, 1) * np.ones(
(n, d)
) + 0.5 * np.random.normal(size=(n, d))
# Convert to pseudo-observations
u = pv.to_pseudo_obs(x)
Fit a continuous model
[3]:
# Some fit controls
controls = pv.FitControlsVinecop(family_set=[pv.BicopFamily.gaussian])
# A continuous example
fit_cont = pv.Vinecop.from_data(u, controls=controls)
print(str(fit_cont))
<pyvinecopulib.Vinecop> Vinecop model with 5 variables
tree edge conditioned variables conditioning variables var_types family rotation parameters df tau
1 1 4, 1 c, c Gaussian 0 0.85 1.0 0.64
1 2 1, 3 c, c Gaussian 0 0.85 1.0 0.64
1 3 2, 5 c, c Gaussian 0 0.90 1.0 0.71
1 4 3, 5 c, c Gaussian 0 0.89 1.0 0.70
2 1 4, 3 1 c, c Gaussian 0 0.36 1.0 0.24
2 2 1, 5 3 c, c Gaussian 0 0.30 1.0 0.19
2 3 2, 3 5 c, c Gaussian 0 0.04 1.0 0.03
3 1 4, 5 3, 1 c, c Gaussian 0 0.11 1.0 0.07
3 2 1, 2 5, 3 c, c Gaussian 0 0.10 1.0 0.07
4 1 4, 2 5, 3, 1 c, c Gaussian 0 0.30 1.0 0.19
Model for discrete data : transform to Poisson margins
[4]:
# Percent Point Function (Inverse CDF, PPF)
@np.vectorize
def poisson_ppf(p, mu):
cumulative_prob = 0.0
k = 0
while cumulative_prob < p:
cumulative_prob += math.exp(-mu) * (mu**k) / math.factorial(k)
if cumulative_prob >= p:
return k
k += 1
return k # In case p is exactly 1
# Using Poisson(1) transformation
# Cumulative Distribution Function (CDF)
@np.vectorize
def poisson_cdf(k, mu):
return sum(
math.exp(-mu) * (mu**i) / math.factorial(i) for i in range(int(k) + 1)
)
x_poisson = poisson_ppf(u, 1)
u_disc = np.hstack(
(poisson_cdf(x_poisson, 1), poisson_cdf(x_poisson - 1, 1))
) # Discrete pseudo-observations
# Fit vine copula model for discrete data
fit_disc = pv.Vinecop.from_data(u_disc, var_types=["d"] * 5, controls=controls)
print(str(fit_disc))
<pyvinecopulib.Vinecop> Vinecop model with 5 variables
tree edge conditioned variables conditioning variables var_types family rotation parameters df tau
1 1 1, 5 d, d Gaussian 0 0.84 1.0 0.63
1 2 4, 3 d, d Gaussian 0 0.81 1.0 0.60
1 3 2, 5 d, d Gaussian 0 0.91 1.0 0.72
1 4 3, 5 d, d Gaussian 0 0.84 1.0 0.64
2 1 1, 3 5 d, d Gaussian 0 0.37 1.0 0.24
2 2 4, 5 3 d, d Gaussian 0 0.29 1.0 0.18
2 3 2, 3 5 d, d Gaussian 0 0.17 1.0 0.11
3 1 1, 4 3, 5 d, d Gaussian 0 0.49 1.0 0.32
3 2 4, 2 5, 3 d, d Gaussian 0 0.44 1.0 0.29
4 1 1, 2 4, 3, 5 d, d Gaussian 0 0.02 1.0 0.01
Model for mixed data: only the first variable is transformed to a Poisson margin
[5]:
# Transform first variable to Poisson margin
x_poisson_mixed = poisson_ppf(u[:, 0], 1)
u_disc_mixed = np.hstack(
(
poisson_cdf(x_poisson_mixed, 1).reshape(-1, 1),
u[:, 1:5],
poisson_cdf(x_poisson_mixed - 1, 1).reshape(-1, 1),
)
)
# Fit vine copula model for mixed data
fit_mixed = pv.Vinecop.from_data(
u_disc_mixed, var_types=["d"] + ["c"] * 4, controls=controls
)
print(str(fit_mixed))
<pyvinecopulib.Vinecop> Vinecop model with 5 variables
tree edge conditioned variables conditioning variables var_types family rotation parameters df tau
1 1 1, 5 d, c Gaussian 0 0.83 1.0 0.62
1 2 4, 3 c, c Gaussian 0 0.82 1.0 0.61
1 3 2, 5 c, c Gaussian 0 0.90 1.0 0.71
1 4 3, 5 c, c Gaussian 0 0.89 1.0 0.70
2 1 1, 3 5 d, c Gaussian 0 0.32 1.0 0.21
2 2 4, 5 3 c, c Gaussian 0 0.24 1.0 0.15
2 3 2, 3 5 c, c Gaussian 0 0.04 1.0 0.03
3 1 1, 4 3, 5 d, c Gaussian 0 0.33 1.0 0.22
3 2 4, 2 5, 3 c, c Gaussian 0 0.31 1.0 0.20
4 1 1, 2 4, 3, 5 d, c Gaussian 0 0.05 1.0 0.03