Multi-output Gaussian Processes in PyMC [GSoC Week 10-12]
A personal note on the progress of incoporating Multi-output Gaussian Processes (MOGPs) into PyMC. Week 10-12 focus on implementing ICM and LCM using Kronecker product.
- Set up training data: same X, three Y outputs
- Option 1: Implement ICM (one kernel) by using LatentKron with Coregion kernel
- Option 2.1: Implement ICM (one kernel) by using pm.gp.cov.Kron with pm.gp.Marginal
- Option 2.2: Implement LCM by using pm.gp.cov.Kron with pm.gp.Marginal
This work is supported by GSoC, NumFOCUS, and PyMC team.
Given input data $x$ and different outputs $o$, the ICM kernel $K$ is calculated by Kronecker product:
$$ K = K_1(x, x') \otimes K_2(o, o') $$NOTE: This Kronecker product can ONLY work with same input data. In addition, The kernels for input data need to be stationary.
import numpy as np
import pymc as pm
from pymc.gp.cov import Covariance
import arviz as az
import matplotlib.pyplot as plt
# set the seed
np.random.seed(1)
import math
%matplotlib inline
%load_ext autoreload
%reload_ext autoreload
%autoreload 2
N = 50
train_x = np.linspace(0, 1, N)
train_y = np.stack([
np.sin(train_x * (2 * math.pi)) + np.random.randn(len(train_x)) * 0.2,
np.cos(train_x * (2 * math.pi)) + np.random.randn(len(train_x)) * 0.2,
np.cos(train_x * (1 * math.pi)) + np.random.randn(len(train_x)) * 0.1,
], -1)
train_x.shape, train_y.shape
fig, ax = plt.subplots(1,1, figsize=(12,5))
ax.scatter(train_x, train_y[:,0])
ax.scatter(train_x, train_y[:,1])
ax.scatter(train_x, train_y[:,2])
plt.legend(["y1", "y2", "y3"])
train_x.shape, train_y.shape
x = train_x.reshape(-1,1)
y = train_y.reshape(-1,1)
x.shape, y.shape
task_i = np.linspace(0, 2, 3)[:, None]
Xs = [x, task_i] # For training
Xs[0].shape, Xs[1].shape, x.shape
M = 100
xnew = np.linspace(-0.5, 1.5, M)
Xnew = pm.math.cartesian(xnew, task_i) # For prediction
Xnew.shape
Xs[0].shape, Xs[1]
y = (K + noise) * α = (L x L.T) * α = y
B = L * α
L.T * B = y
B = solve(y, L) = (L\y)
α = solve(B, L.T) = (B\L.T) = L\(L\y)
with pm.Model() as model:
# Kernel: K_1(x,x')
ell = pm.Gamma("ell", alpha=2, beta=0.5)
eta = pm.Gamma("eta", alpha=3, beta=1)
cov = eta**2 * pm.gp.cov.ExpQuad(input_dim=1, ls=ell)
# Coregion B matrix: K_2(o,o')
W = pm.Normal("W", mu=0, sigma=3, shape=(3,2), initval=np.random.randn(3,2))
kappa = pm.Gamma("kappa", alpha=1.5, beta=1, shape=3)
coreg = pm.gp.cov.Coregion(input_dim=1, kappa=kappa, W=W)
# Specify the GP. The default mean function is `Zero`.
mogp = pm.gp.LatentKron(cov_funcs=[cov, coreg])
sigma = pm.HalfNormal("sigma", sigma=3)
# Place a GP prior over thXse function f.
f = mogp.prior("f", Xs=Xs)
y_ = pm.Normal("y_", mu=f, sigma=sigma, observed=y.squeeze())
coreg.full(task_i).eval()
pm.model_to_graphviz(model)
%%time
with model:
gp_trace = pm.sample(500, chains=1)
%%time
with model:
preds = mogp.conditional("preds", Xnew, jitter=1e-6)
gp_samples = pm.sample_posterior_predictive(gp_trace, var_names=['preds'])
pm.model_to_graphviz(model)
f_pred = gp_samples.posterior_predictive["preds"].sel(chain=0)
f_pred.shape
from pymc.gp.util import plot_gp_dist
fig, axes = plt.subplots(1,1, figsize=(8,4))
plt.plot(x, train_y[:,0], 'ok', ms=3, alpha=0.5, label="Data 1");
plot_gp_dist(axes, f_pred[:, 0:N], x)
plot_gp_dist(axes, f_pred[:,Xnew[:,1] == 0], xnew)
plt.show()
from pymc.gp.util import plot_gp_dist
fig, axes = plt.subplots(1,1, figsize=(8,4))
plt.plot(x, train_y[:,1], 'ok', ms=3, alpha=0.5, label="Data 1");
plot_gp_dist(axes, f_pred[:, N:2*N], x)
plot_gp_dist(axes, f_pred[:,Xnew[:,1] == 1], xnew)
plt.show()
X = pm.math.cartesian(x, task_i)
x.shape, task_i.shape, X.shape
with pm.Model() as model:
ell = pm.Gamma("ell", alpha=2, beta=0.5)
eta = pm.Gamma("eta", alpha=3, beta=1)
cov = eta**2 * pm.gp.cov.ExpQuad(1, ls=ell)
W = pm.Normal("W", mu=0, sigma=3, shape=(3,2), initval=np.random.randn(3,2))
kappa = pm.Gamma("kappa", alpha=1.5, beta=1, shape=3)
coreg = pm.gp.cov.Coregion(input_dim=1, kappa=kappa, W=W)
cov_func = pm.gp.cov.Kron([cov, coreg])
sigma = pm.HalfNormal("sigma", sigma=3)
gp = pm.gp.Marginal(cov_func=cov_func)
y_ = gp.marginal_likelihood("f", X, y.squeeze(), noise=sigma)
cov(x).eval().shape, coreg(task_i).eval().shape, cov_func(X).eval().shape
%%time
with model:
gp_trace = pm.sample(500, chains=1)
%%time
with model:
preds = gp.conditional("preds", Xnew, jitter=1e-6)
gp_samples = pm.sample_posterior_predictive(gp_trace, var_names=['preds'])
pm.model_to_graphviz(model)
Xnew.shape
Marginalf_pred = gp_samples.posterior_predictive["preds"].sel(chain=0)
f_pred.shape
from pymc.gp.util import plot_gp_dist
fig, axes = plt.subplots(3,1, figsize=(10,10))
for idx in range(3):
axes[idx].plot(x, train_y[:,idx], 'ok', ms=3, alpha=0.5, label=f"Data {idx}");
plot_gp_dist(axes[idx], f_pred[:,Xnew[:,1] == idx], xnew,
fill_alpha=0.5, samples_alpha=0.1)
plt.show()
az.summary(gp_trace)
az.plot_trace(gp_trace);
plt.tight_layout()
X = pm.math.cartesian(x, task_i)
x.shape, task_i.shape, X.shape
with pm.Model() as model:
ell = pm.Gamma("ell", alpha=2, beta=0.5)
eta = pm.Gamma("eta", alpha=3, beta=1)
cov = eta**2 * pm.gp.cov.ExpQuad(1, ls=ell)
ell2 = pm.Gamma("ell2", alpha=2, beta=0.5)
eta2 = pm.Gamma("eta2", alpha=3, beta=1)
cov2 = eta2**2 * pm.gp.cov.Matern32(1, ls=ell2)
W = pm.Normal("W", mu=0, sigma=3, shape=(3,2), initval=np.random.randn(3,2))
kappa = pm.Gamma("kappa", alpha=1.5, beta=1, shape=3)
coreg = pm.gp.cov.Coregion(input_dim=1, kappa=kappa, W=W)
cov_func = pm.gp.cov.Kron([cov+cov2, coreg])
sigma = pm.HalfNormal("sigma", sigma=3)
gp = pm.gp.Marginal(cov_func=cov_func)
y_ = gp.marginal_likelihood("f", X, y.squeeze(), noise=sigma)
cov(x).eval().shape, coreg(task_i).eval().shape, cov_func(X).eval().shape
%%time
with model:
gp_trace = pm.sample(500, chains=1)
%%time
with model:
preds = gp.conditional("preds", Xnew, jitter=1e-6)
gp_samples = pm.sample_posterior_predictive(gp_trace, var_names=['preds'])
pm.model_to_graphviz(model)
Xnew.shape
f_pred = gp_samples.posterior_predictive["preds"].sel(chain=0)
f_pred.shape
from pymc.gp.util import plot_gp_dist
fig, axes = plt.subplots(3,1, figsize=(10,10))
for idx in range(3):
axes[idx].plot(x, train_y[:,idx], 'ok', ms=3, alpha=0.5, label=f"Data {idx}");
plot_gp_dist(axes[idx], f_pred[:,Xnew[:,1] == idx], xnew,
fill_alpha=0.5, samples_alpha=0.1)
plt.show()
az.summary(gp_trace)
%load_ext watermark
%watermark -n -u -v -iv -w