Vine copulas for discrete variables

Import the required libraries

[1]:
import math

import numpy as np

import pyvinecopulib as pv

Simulate dummy data and convert to pseudo-observations

[2]:
np.random.seed(1234)  # seed for the random generator
n = 30
d = 5
x = np.random.normal(size=n).reshape(n, 1) * np.ones(
  (n, d)
) + 0.5 * np.random.normal(size=(n, d))

# Convert to pseudo-observations
u = pv.to_pseudo_obs(x)

Fit a continuous model

[3]:
# Some fit controls
controls = pv.FitControlsVinecop(family_set=[pv.BicopFamily.gaussian])


# A continuous example
fit_cont = pv.Vinecop.from_data(u, controls=controls)
print(str(fit_cont))
<pyvinecopulib.Vinecop> Vinecop model with 5 variables
tree edge conditioned variables conditioning variables var_types   family rotation parameters  df  tau
   1    1                  4, 1                             c, c Gaussian        0       0.85 1.0 0.64
   1    2                  1, 3                             c, c Gaussian        0       0.85 1.0 0.64
   1    3                  2, 5                             c, c Gaussian        0       0.90 1.0 0.71
   1    4                  3, 5                             c, c Gaussian        0       0.89 1.0 0.70
   2    1                  4, 3                      1      c, c Gaussian        0       0.36 1.0 0.24
   2    2                  1, 5                      3      c, c Gaussian        0       0.30 1.0 0.19
   2    3                  2, 3                      5      c, c Gaussian        0       0.04 1.0 0.03
   3    1                  4, 5                   3, 1      c, c Gaussian        0       0.11 1.0 0.07
   3    2                  1, 2                   5, 3      c, c Gaussian        0       0.10 1.0 0.07
   4    1                  4, 2                5, 3, 1      c, c Gaussian        0       0.30 1.0 0.19

Model for discrete data : transform to Poisson margins

[4]:
# Percent Point Function (Inverse CDF, PPF)
@np.vectorize
def poisson_ppf(p, mu):
  cumulative_prob = 0.0
  k = 0
  while cumulative_prob < p:
    cumulative_prob += math.exp(-mu) * (mu**k) / math.factorial(k)
    if cumulative_prob >= p:
      return k
    k += 1
  return k  # In case p is exactly 1


# Using Poisson(1) transformation
# Cumulative Distribution Function (CDF)
@np.vectorize
def poisson_cdf(k, mu):
  return sum(
    math.exp(-mu) * (mu**i) / math.factorial(i) for i in range(int(k) + 1)
  )


x_poisson = poisson_ppf(u, 1)
u_disc = np.hstack(
  (poisson_cdf(x_poisson, 1), poisson_cdf(x_poisson - 1, 1))
)  # Discrete pseudo-observations

# Fit vine copula model for discrete data
fit_disc = pv.Vinecop.from_data(u_disc, var_types=["d"] * 5, controls=controls)
print(str(fit_disc))
<pyvinecopulib.Vinecop> Vinecop model with 5 variables
tree edge conditioned variables conditioning variables var_types   family rotation parameters  df  tau
   1    1                  1, 5                             d, d Gaussian        0       0.84 1.0 0.63
   1    2                  4, 3                             d, d Gaussian        0       0.81 1.0 0.60
   1    3                  2, 5                             d, d Gaussian        0       0.91 1.0 0.72
   1    4                  3, 5                             d, d Gaussian        0       0.84 1.0 0.64
   2    1                  1, 3                      5      d, d Gaussian        0       0.37 1.0 0.24
   2    2                  4, 5                      3      d, d Gaussian        0       0.29 1.0 0.18
   2    3                  2, 3                      5      d, d Gaussian        0       0.17 1.0 0.11
   3    1                  1, 4                   3, 5      d, d Gaussian        0       0.49 1.0 0.32
   3    2                  4, 2                   5, 3      d, d Gaussian        0       0.44 1.0 0.29
   4    1                  1, 2                4, 3, 5      d, d Gaussian        0       0.02 1.0 0.01

Model for mixed data: only the first variable is transformed to a Poisson margin

[5]:
# Transform first variable to Poisson margin
x_poisson_mixed = poisson_ppf(u[:, 0], 1)
u_disc_mixed = np.hstack(
  (
    poisson_cdf(x_poisson_mixed, 1).reshape(-1, 1),
    u[:, 1:5],
    poisson_cdf(x_poisson_mixed - 1, 1).reshape(-1, 1),
  )
)

# Fit vine copula model for mixed data
fit_mixed = pv.Vinecop.from_data(
  u_disc_mixed, var_types=["d"] + ["c"] * 4, controls=controls
)
print(str(fit_mixed))
<pyvinecopulib.Vinecop> Vinecop model with 5 variables
tree edge conditioned variables conditioning variables var_types   family rotation parameters  df  tau
   1    1                  1, 5                             d, c Gaussian        0       0.83 1.0 0.62
   1    2                  4, 3                             c, c Gaussian        0       0.82 1.0 0.61
   1    3                  2, 5                             c, c Gaussian        0       0.90 1.0 0.71
   1    4                  3, 5                             c, c Gaussian        0       0.89 1.0 0.70
   2    1                  1, 3                      5      d, c Gaussian        0       0.32 1.0 0.21
   2    2                  4, 5                      3      c, c Gaussian        0       0.24 1.0 0.15
   2    3                  2, 3                      5      c, c Gaussian        0       0.04 1.0 0.03
   3    1                  1, 4                   3, 5      d, c Gaussian        0       0.33 1.0 0.22
   3    2                  4, 2                   5, 3      c, c Gaussian        0       0.31 1.0 0.20
   4    1                  1, 2                4, 3, 5      d, c Gaussian        0       0.05 1.0 0.03