Fitting a vine copula on dataset and sampling from the model

Import the required libraries

[1]:
import numpy as np
import pyvinecopulib as pv

Simulate some data

[2]:
np.random.seed(1234)  # seed for the random generator
n = 1000  # number of observations
d = 5  # the dimension
mean = 1 + np.random.normal(size=d)  # mean vector
cov = np.random.normal(size=(d, d))  # covariance matrix
cov = np.dot(cov.transpose(), cov)  # make it non-negative definite
x = np.random.multivariate_normal(mean, cov, n)

Fit a model

[3]:
# Transform copula data using the empirical distribution
u = pv.to_pseudo_obs(x)


# Fit a Gaussian vine
# (i.e., properly specified since the data is multivariate normal)
controls = pv.FitControlsVinecop(family_set=[pv.BicopFamily.gaussian])
cop = pv.Vinecop.from_data(u, controls=controls)
print(cop)
<pyvinecopulib.Vinecop> Vinecop model with 5 variables
tree edge conditioned variables conditioning variables var_types   family rotation parameters  df   tau
   1    1                  3, 1                             c, c Gaussian        0       0.40 1.0  0.26
   1    2                  2, 1                             c, c Gaussian        0       0.67 1.0  0.47
   1    3                  4, 1                             c, c Gaussian        0      -0.61 1.0 -0.42
   1    4                  1, 5                             c, c Gaussian        0      -0.63 1.0 -0.44
   2    1                  3, 2                      1      c, c Gaussian        0      -0.74 1.0 -0.53
   2    2                  2, 4                      1      c, c Gaussian        0      -0.15 1.0 -0.09
   2    3                  4, 5                      1      c, c Gaussian        0      -0.80 1.0 -0.60
   3    1                  3, 4                   2, 1      c, c Gaussian        0      -0.05 1.0 -0.03
   3    2                  2, 5                   4, 1      c, c Gaussian        0      -0.49 1.0 -0.33
   4    1                  3, 5                4, 2, 1      c, c Gaussian        0      -0.06 1.0 -0.04

Sample from the model

[4]:
# Sample from the copula
n_sim = 1000
u_sim = cop.simulate(n_sim, seeds=[1, 2, 3, 4])

# Transform back simulations to the original scale
x_sim = np.asarray([np.quantile(x[:, i], u_sim[:, i]) for i in range(0, d)])

# Both the mean and covariance matrix look ok!
[mean, np.mean(x_sim, 1)]
[cov, np.cov(x_sim)]
[4]:
[array([[ 2.37095772,  1.72011592,  1.34581349, -2.33400987, -3.1412032 ],
        [ 1.72011592,  2.77391072, -0.8386675 , -1.94255384, -3.0356469 ],
        [ 1.34581349, -0.8386675 ,  4.73656299, -1.11520579, -1.00737741],
        [-2.33400987, -1.94255384, -1.11520579,  6.17099976, -0.86804342],
        [-3.1412032 , -3.0356469 , -1.00737741, -0.86804342, 10.39309954]]),
 array([[ 2.22422017,  1.5946634 ,  1.44467283, -2.24840034, -2.86432409],
        [ 1.5946634 ,  2.56490065, -0.64236225, -1.88381331, -2.55280137],
        [ 1.44467283, -0.64236225,  4.48378773, -1.21625326, -1.35825765],
        [-2.24840034, -1.88381331, -1.21625326,  6.11239488, -1.14967754],
        [-2.86432409, -2.55280137, -1.35825765, -1.14967754,  9.93512784]])]