This work is supported by GSoC, NumFOCUS, and PyMC team.
In the last 12 weeks, I focused on implementing the Intrinsic Coregionalization Model (ICM) and Linear Coregionalization Model (LCM) in PyMC. All the experimental codes are published on this Github repository.
Weeks 10-12 focus on implementing ICM and LCM using Kronecker product. It is noted that this Kronecker product can ONLY work with same input data. In addition, The kernels for input data need to be stationary.
Create a PR on pymc-experimental github repo. This is a work-in-progress (WIP) PR as I still need to try and test different APIs options for different kinds of input data.
MultiOutputGP
for Hadamard product and Kronecker product. The project allows me to learn more on Gaussian Process (GP), its advantages and also limitations. I think GP has a huge potential for spatial and temporal (time-series) data sets.
Besides, implementing GP helps me further understand on the Multivariate Normal distribution :) Although there are still a lot to learn and do. I'm especially interested in learning more on other methods for time-series data, and a comparison on the performance between these models.
Finally, I would like to thank the PyMC devs team, especially my mentors Chris Fonnesbeck, and Bill Engels for their great guidance and supports. I will definitely not able to perform the project well without their insightful suggestions. I would love to involve and contribute more to the PyMC community after this project. Also thank you NumFOCUS and GSoC program for providing me this opportunity to work on the Multi-output Gaussian Processes in PyMC project.
This work is supported by GSoC, NumFOCUS, and PyMC team.
Given input data $x$ and different outputs $o$, the ICM kernel $K$ is calculated by Kronecker product:
$$ K = K_1(x, x') \otimes K_2(o, o') $$NOTE: This Kronecker product can ONLY work with same input data. In addition, The kernels for input data need to be stationary.
import numpy as np
import pymc as pm
from pymc.gp.cov import Covariance
import arviz as az
import matplotlib.pyplot as plt
# set the seed
np.random.seed(1)
import math
%matplotlib inline
%load_ext autoreload
%reload_ext autoreload
%autoreload 2
N = 50
train_x = np.linspace(0, 1, N)
train_y = np.stack([
np.sin(train_x * (2 * math.pi)) + np.random.randn(len(train_x)) * 0.2,
np.cos(train_x * (2 * math.pi)) + np.random.randn(len(train_x)) * 0.2,
np.cos(train_x * (1 * math.pi)) + np.random.randn(len(train_x)) * 0.1,
], -1)
train_x.shape, train_y.shape
fig, ax = plt.subplots(1,1, figsize=(12,5))
ax.scatter(train_x, train_y[:,0])
ax.scatter(train_x, train_y[:,1])
ax.scatter(train_x, train_y[:,2])
plt.legend(["y1", "y2", "y3"])
train_x.shape, train_y.shape
x = train_x.reshape(-1,1)
y = train_y.reshape(-1,1)
x.shape, y.shape
task_i = np.linspace(0, 2, 3)[:, None]
Xs = [x, task_i] # For training
Xs[0].shape, Xs[1].shape, x.shape
M = 100
xnew = np.linspace(-0.5, 1.5, M)
Xnew = pm.math.cartesian(xnew, task_i) # For prediction
Xnew.shape
Xs[0].shape, Xs[1]
y = (K + noise) * α = (L x L.T) * α = y
B = L * α
L.T * B = y
B = solve(y, L) = (L\y)
α = solve(B, L.T) = (B\L.T) = L\(L\y)
with pm.Model() as model:
# Kernel: K_1(x,x')
ell = pm.Gamma("ell", alpha=2, beta=0.5)
eta = pm.Gamma("eta", alpha=3, beta=1)
cov = eta**2 * pm.gp.cov.ExpQuad(input_dim=1, ls=ell)
# Coregion B matrix: K_2(o,o')
W = pm.Normal("W", mu=0, sigma=3, shape=(3,2), initval=np.random.randn(3,2))
kappa = pm.Gamma("kappa", alpha=1.5, beta=1, shape=3)
coreg = pm.gp.cov.Coregion(input_dim=1, kappa=kappa, W=W)
# Specify the GP. The default mean function is `Zero`.
mogp = pm.gp.LatentKron(cov_funcs=[cov, coreg])
sigma = pm.HalfNormal("sigma", sigma=3)
# Place a GP prior over thXse function f.
f = mogp.prior("f", Xs=Xs)
y_ = pm.Normal("y_", mu=f, sigma=sigma, observed=y.squeeze())
coreg.full(task_i).eval()
pm.model_to_graphviz(model)
%%time
with model:
gp_trace = pm.sample(500, chains=1)
%%time
with model:
preds = mogp.conditional("preds", Xnew, jitter=1e-6)
gp_samples = pm.sample_posterior_predictive(gp_trace, var_names=['preds'])
pm.model_to_graphviz(model)
f_pred = gp_samples.posterior_predictive["preds"].sel(chain=0)
f_pred.shape
from pymc.gp.util import plot_gp_dist
fig, axes = plt.subplots(1,1, figsize=(8,4))
plt.plot(x, train_y[:,0], 'ok', ms=3, alpha=0.5, label="Data 1");
plot_gp_dist(axes, f_pred[:, 0:N], x)
plot_gp_dist(axes, f_pred[:,Xnew[:,1] == 0], xnew)
plt.show()
from pymc.gp.util import plot_gp_dist
fig, axes = plt.subplots(1,1, figsize=(8,4))
plt.plot(x, train_y[:,1], 'ok', ms=3, alpha=0.5, label="Data 1");
plot_gp_dist(axes, f_pred[:, N:2*N], x)
plot_gp_dist(axes, f_pred[:,Xnew[:,1] == 1], xnew)
plt.show()
X = pm.math.cartesian(x, task_i)
x.shape, task_i.shape, X.shape
with pm.Model() as model:
ell = pm.Gamma("ell", alpha=2, beta=0.5)
eta = pm.Gamma("eta", alpha=3, beta=1)
cov = eta**2 * pm.gp.cov.ExpQuad(1, ls=ell)
W = pm.Normal("W", mu=0, sigma=3, shape=(3,2), initval=np.random.randn(3,2))
kappa = pm.Gamma("kappa", alpha=1.5, beta=1, shape=3)
coreg = pm.gp.cov.Coregion(input_dim=1, kappa=kappa, W=W)
cov_func = pm.gp.cov.Kron([cov, coreg])
sigma = pm.HalfNormal("sigma", sigma=3)
gp = pm.gp.Marginal(cov_func=cov_func)
y_ = gp.marginal_likelihood("f", X, y.squeeze(), noise=sigma)
cov(x).eval().shape, coreg(task_i).eval().shape, cov_func(X).eval().shape
%%time
with model:
gp_trace = pm.sample(500, chains=1)
%%time
with model:
preds = gp.conditional("preds", Xnew, jitter=1e-6)
gp_samples = pm.sample_posterior_predictive(gp_trace, var_names=['preds'])
pm.model_to_graphviz(model)
Xnew.shape
Marginalf_pred = gp_samples.posterior_predictive["preds"].sel(chain=0)
f_pred.shape
from pymc.gp.util import plot_gp_dist
fig, axes = plt.subplots(3,1, figsize=(10,10))
for idx in range(3):
axes[idx].plot(x, train_y[:,idx], 'ok', ms=3, alpha=0.5, label=f"Data {idx}");
plot_gp_dist(axes[idx], f_pred[:,Xnew[:,1] == idx], xnew,
fill_alpha=0.5, samples_alpha=0.1)
plt.show()
az.summary(gp_trace)
az.plot_trace(gp_trace);
plt.tight_layout()
X = pm.math.cartesian(x, task_i)
x.shape, task_i.shape, X.shape
with pm.Model() as model:
ell = pm.Gamma("ell", alpha=2, beta=0.5)
eta = pm.Gamma("eta", alpha=3, beta=1)
cov = eta**2 * pm.gp.cov.ExpQuad(1, ls=ell)
ell2 = pm.Gamma("ell2", alpha=2, beta=0.5)
eta2 = pm.Gamma("eta2", alpha=3, beta=1)
cov2 = eta2**2 * pm.gp.cov.Matern32(1, ls=ell2)
W = pm.Normal("W", mu=0, sigma=3, shape=(3,2), initval=np.random.randn(3,2))
kappa = pm.Gamma("kappa", alpha=1.5, beta=1, shape=3)
coreg = pm.gp.cov.Coregion(input_dim=1, kappa=kappa, W=W)
cov_func = pm.gp.cov.Kron([cov+cov2, coreg])
sigma = pm.HalfNormal("sigma", sigma=3)
gp = pm.gp.Marginal(cov_func=cov_func)
y_ = gp.marginal_likelihood("f", X, y.squeeze(), noise=sigma)
cov(x).eval().shape, coreg(task_i).eval().shape, cov_func(X).eval().shape
%%time
with model:
gp_trace = pm.sample(500, chains=1)
%%time
with model:
preds = gp.conditional("preds", Xnew, jitter=1e-6)
gp_samples = pm.sample_posterior_predictive(gp_trace, var_names=['preds'])
pm.model_to_graphviz(model)
Xnew.shape
f_pred = gp_samples.posterior_predictive["preds"].sel(chain=0)
f_pred.shape
from pymc.gp.util import plot_gp_dist
fig, axes = plt.subplots(3,1, figsize=(10,10))
for idx in range(3):
axes[idx].plot(x, train_y[:,idx], 'ok', ms=3, alpha=0.5, label=f"Data {idx}");
plot_gp_dist(axes[idx], f_pred[:,Xnew[:,1] == idx], xnew,
fill_alpha=0.5, samples_alpha=0.1)
plt.show()
az.summary(gp_trace)
%load_ext watermark
%watermark -n -u -v -iv -w
This work is supported by GSoC, NumFOCUS, and PyMC team
Given input data $x$ and different outputs $o$, the ICM kernel $K$ is calculated by Hadamard or element-wise product: $$ K = K_1(x, x') * K_2(o, o') $$
Where $K_2(o, o')$ is broadcast into the shape of input data $K_2(x, x')$ using Coregion kernel.
NOTE: This Hadamard product can work with same input data or different input data.
import numpy as np
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt
# set the seed
np.random.seed(1)
from multi_ouputs import build_XY, ICM, LCM, MultiMarginal
from mo import MultiOutputMarginal
import math
%matplotlib inline
%load_ext autoreload
%reload_ext autoreload
%autoreload 2
N = 50
train_x = np.linspace(0, 1, N)
train_y = np.stack([
np.sin(train_x * (2 * math.pi)) + np.random.randn(len(train_x)) * 0.2,
np.cos(train_x * (2 * math.pi)) + np.random.randn(len(train_x)) * 0.2,
np.cos(train_x * (1 * math.pi)) + np.random.randn(len(train_x)) * 0.1,
], -1)
train_x.shape, train_y.shape
fig, ax = plt.subplots(1,1, figsize=(12,5))
ax.scatter(train_x, train_y[:,0])
ax.scatter(train_x, train_y[:,1])
ax.scatter(train_x, train_y[:,2])
plt.legend(["y1", "y2", "y3"])
train_x.shape, train_y.shape
np.vstack([train_y[:,0], train_y[:,1], train_y[:,2]]).shape
x = train_x.reshape(-1,1)
X, Y, _ = build_XY([x,x,x],
[train_y[:,0].reshape(-1,1),
train_y[:,1].reshape(-1,1),
train_y[:,2].reshape(-1,1)])
x.shape, X.shape, Y.shape
M = 100
x_new = np.linspace(-0.5, 1.5, M)[:, None]
X_new, _, _ = build_XY([x_new, x_new, x_new])
X_new.shape
import aesara.tensor as at
with pm.Model() as model:
ell = pm.Gamma("ell", alpha=2, beta=0.5)
eta = pm.Gamma("eta", alpha=3, beta=1)
cov = eta**2 * pm.gp.cov.ExpQuad(input_dim=2, ls=ell, active_dims=[0])
W = np.random.rand(3,2) # (n_outputs, w_rank)
kappa = np.random.rand(3)
B = pm.Deterministic('B', at.dot(W, W.T) + at.diag(kappa))
sigma = pm.HalfNormal("sigma", sigma=3)
mogp = MultiOutputMarginal(means=0, kernels=[cov], input_dim=2, active_dims=[1], num_outputs=3, B=B)
y_ = mogp.marginal_likelihood("f", X, Y.squeeze(), noise=sigma)
pm.model_to_graphviz(model)
%%time
with model:
gp_trace = pm.sample(500, chains=1)
%%time
with model:
preds = mogp.conditional("preds", X_new)
gp_samples = pm.sample_posterior_predictive(gp_trace, var_names=['preds'], random_seed=42)
pm.model_to_graphviz(model)
from pymc.gp.util import plot_gp_dist
f_pred = gp_samples.posterior_predictive["preds"].sel(chain=0)
fig, axes = plt.subplots(3,1, figsize=(10,10))
for idx in range(3):
plot_gp_dist(axes[idx], f_pred[:,n_points*idx:n_points*(idx+1)],
X_new[n_points*idx:n_points*(idx+1),0],
palette="Blues", fill_alpha=0.5, samples_alpha=0.1)
axes[idx].plot(x, train_y[:,idx], 'ok', ms=3, alpha=0.5, label="Data 1");
az.summary(gp_trace)
with pm.Model() as model:
# Priors
ell = pm.Gamma("ell", alpha=2, beta=0.5, shape=2)
eta = pm.Gamma("eta", alpha=3, beta=1, shape=2)
kernels = [pm.gp.cov.ExpQuad, pm.gp.cov.Matern32]
sigma = pm.HalfNormal("sigma", sigma=3)
# Define a list of covariance functions
cov_list = [eta[idx] ** 2 * kernel(2,ls=ell[idx], active_dims=[0])
for idx, kernel in enumerate(kernels)]
# Define a Multi-output GP
mogp = MultiOutputMarginal(means=0, kernels=cov_list, input_dim=2, active_dims=[1], num_outputs=3)
y_ = mogp.marginal_likelihood("f", X, Y.squeeze(), noise=sigma)
pm.model_to_graphviz(model)
# x1, y1
# x2, y2
# x3, y3
%%time
with model:
gp_trace = pm.sample(500, chains=1)
%%time
with model:
preds = mogp.conditional("preds", X_new)
gp_samples = pm.sample_posterior_predictive(gp_trace, var_names=['preds'], random_seed=42)
pm.model_to_graphviz(model)
from pymc.gp.util import plot_gp_dist
f_pred = gp_samples.posterior_predictive["preds"].sel(chain=0)
fig, axes = plt.subplots(3,1, figsize=(10,10))
for idx in range(3):
plot_gp_dist(axes[idx], f_pred[:,n_points*idx:n_points*(idx+1)],
X_new[n_points*idx:n_points*(idx+1),0],
palette="Blues", fill_alpha=0.5, samples_alpha=0.1)
axes[idx].plot(x, train_y[:,idx], 'ok', ms=3, alpha=0.5, label="Data 1");
az.summary(gp_trace)
az.plot_trace(gp_trace);
plt.tight_layout()
%load_ext watermark
%watermark -n -u -v -iv -w
This work is supported by GSoC, NumFOCUS, and PyMC team.
import math
import numpy as np
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt
# set the seed
np.random.seed(1)
%matplotlib inline
%load_ext autoreload
%reload_ext autoreload
%autoreload 2
train_x = np.linspace(0, 1, 50)
train_y = np.stack([
np.sin(train_x * (2 * math.pi)) + np.random.randn(len(train_x)) * 0.2,
np.cos(train_x * (2 * math.pi)) + np.random.randn(len(train_x)) * 0.2,
np.cos(train_x * (1 * math.pi)) + np.random.randn(len(train_x)) * 0.1,
], -1)
train_x.shape, train_y.shape
fig, ax = plt.subplots(1,1, figsize=(12,5))
ax.scatter(train_x, train_y[:,0])
ax.scatter(train_x, train_y[:,1])
ax.scatter(train_x, train_y[:,2])
plt.legend(["sin", "cos"])
x = train_x
xx = np.concatenate((x, x, x), axis=0)[:,None]
n = len(x)
idx2 = np.ones(n) + 1
idx = np.concatenate((np.zeros(n), np.ones(n), idx2))[:,None]
X = np.concatenate((xx, idx), axis=1)
y = np.concatenate((train_y[:,0], train_y[:,1], train_y[:,2]))
x.shape, X.shape, y.shape
X.shape, y.shape
with pm.Model() as model:
ell = pm.Gamma("ell", alpha=2, beta=0.5)
eta = pm.Gamma("eta", alpha=2, beta=0.5)
cov = eta**2 * pm.gp.cov.ExpQuad(2, ls=ell, active_dims=[0])
ell2 = pm.Gamma("ell2", alpha=2, beta=0.5)
eta2 = pm.Gamma("eta2", alpha=2, beta=0.5)
cov2 = eta**2 * pm.gp.cov.Matern32(2, ls=ell, active_dims=[0])
W = pm.Normal("W", mu=0, sigma=3, shape=(3,2), initval=np.random.randn(3,2))
kappa = pm.Gamma("kappa", alpha=1.5, beta=1, shape=3)
coreg = pm.gp.cov.Coregion(input_dim=2, active_dims=[1], kappa=kappa, W=W)
W2 = pm.Normal("W2", mu=0, sigma=3, shape=(3,2), initval=np.random.randn(3,2))
kappa2 = pm.Gamma("kappa2", alpha=1.5, beta=1, shape=3)
coreg2 = pm.gp.cov.Coregion(input_dim=2, active_dims=[1], kappa=kappa2, W=W2)
cov_func1 = coreg * cov #pm.gp.cov.Prod([coreg, cov])
cov_func2 = coreg2 * cov2 #pm.gp.cov.Prod([coreg2, cov2])
cov_func = cov_func1 + cov_func2 #pm.gp.cov.Add([cov_func1, cov_func2])
sigma = pm.HalfNormal("sigma", sigma=3)
gp = pm.gp.Marginal(cov_func=cov_func)
y_ = gp.marginal_likelihood("f", X, y, noise=sigma)
%%time
with model:
gp_trace = pm.sample(500, chains=1)
x_new = np.linspace(-0.5, 1.5, 200)[:, None]
xx_new = np.concatenate((x_new, x_new, x_new), axis=0)
idx2 = np.ones(200) + 1
idx2 = np.concatenate((np.zeros(200), np.ones(200), idx2))[:, None]
X_new = np.concatenate((xx_new, idx2), axis=1)
X_new.shape
with model:
preds = gp.conditional("preds", X_new)
gp_samples = pm.sample_posterior_predictive(gp_trace, var_names=['preds'], random_seed=42)
from pymc.gp.util import plot_gp_dist
fig = plt.figure(figsize=(12,5))
ax = fig.gca()
f_pred = gp_samples.posterior_predictive["preds"].sel(chain=0)
plot_gp_dist(ax, f_pred[:,:200], X_new[:200,0], palette="Blues", fill_alpha=0.5, samples_alpha=0.1)
ax.plot(x, train_y[:,0], 'ok', ms=3, alpha=0.5, label="Data 1");
from pymc.gp.util import plot_gp_dist
fig = plt.figure(figsize=(12,5))
ax = fig.gca()
plot_gp_dist(ax, f_pred[:,200:400], X_new[200:400,0], palette="Blues", fill_alpha=0.9, samples_alpha=0.1)
ax.plot(x, train_y[:,1], 'ok', ms=3, alpha=0.5, label="Data 2");
ax.set_ylim([-4,4])
from pymc.gp.util import plot_gp_dist
fig = plt.figure(figsize=(12,5))
ax = fig.gca()
plot_gp_dist(ax, f_pred[:,400:], X_new[400:,0], palette="Blues", fill_alpha=0.9, samples_alpha=0.1)
ax.plot(x, train_y[:,2], 'ok', ms=3, alpha=0.5, label="Data 2");
ax.set_ylim([-4,4])
az.summary(gp_trace)
az.plot_trace(gp_trace);
%load_ext watermark
%watermark -n -u -v -iv -w
This work is supported by GSoC, NumFOCUS, and PyMC team.
In the previous weeks, I focused on implementing the Intrinsic Coregionalization Model (ICM) in PyMC.
In the beginning, I've started with a small goal, which is to run an Intrinsic Coregionalization Model (ICM) in PyMC. The main part of codes was already developed in PyMC v3 by Bill Engels (one of my mentors), so I just need to convert the PyMC v3 notebook into a PyMC v4 notebook.
The next goal is replicating the Coregionalized Regression Model example notebook in GPy. The result if ICM for this dataset is in this notebook. In addition, the example from GPytorch also be translated into PyMC here with 3 dimensional outputs.
What about two or more outputs with real datasets? Using the data sets here with 4 outputs: GOLD, OIL, NASDAQ, and USD. It seems to work alright in this notebook, but it still needs further improvement.
There are several issues that I faced along the way:
This seems a popular issue: ValueError: Mass matrix contains zeros on the diagonal.
when input y with shape [n,1]
Should we use inputs and outputs as a list similar to GPy: [x1, x2, x3]
and [y1, y2, y3]
? The pros is that it can include datasets of different sizes.
The output shape was also discussed on this pull request. I will need to look into it in detail.
with pm.Model() as model:
ell = pm.Gamma("ell", alpha=2, beta=0.5)
eta = pm.Gamma("eta", alpha=2, beta=0.5)
cov = eta**2 * pm.gp.cov.ExpQuad(1, ls=ell, active_dims=[0])
W = pm.Normal("W", mu=0, sigma=3, shape=(2,2), testval=np.random.randn(2,2))
kappa = pm.Gamma("kappa", alpha=1.5, beta=1, shape=2)
coreg = pm.gp.cov.Coregion(input_dim=2, active_dims=[1], kappa=kappa, W=W)
cov_func = coreg * cov
This coreg * cov
seems not a Kronecker product?
Several things that I plan to do:
I also found the PyMCon2020 talk: My Journey in Learning and Relearning Bayesian Statistics by Ali Akbar Septiandri is really helpful.
The lecture video and notes on the Machine Learning for Intelligent Systems course at Cornell University is a great introduction on general kernels as as Linear, Polynomial, Radial Basis Function (RBF) (aka Gaussian Kernel), Exponential Kernel, ...
https://www.cs.cornell.edu/courses/cs4780/2018fa/lectures/lecturenote14.html
Note that not any function K(⋅,⋅) → R can be used as a kernel. Only the matrix K(xi,xj) has to correspond to real inner-products after some transformation x→ϕ(x), and if and only if K is positive semi-definite
.
Later, to learn more on kernels,
A Primer on Gaussian Processes for Regression Analysis
from Chris Fonnesbeck | PyData NYC 2019, Youtube link
Notebooks on Github link is a great place to start to learn about GP. He introduced with a simple regression problem, then move to a simple Gaussian Process model using PyMC.
To understand more on Gaussian Process, I found this lecture on Gaussian Processes of from Cornell Uni is really helpful. Many thanks to Kilian Weinberger to upload his notes as well as lecture videos publiclly.
Gaussian Process Summer Schools is a great place to learn various topics on GPs. The materials and slides can be found on gpschool github, while the records were published on Youtube.
I would suggest to start ton the 2017 Gaussian Process Summer Schools, as this year has a comprehensive introduction into GPs, and other topics. However, if you want to check more updated topics on GPs, just watch the recent workshops.
On kernels:
Deep dive into GP by implementing GP from scratch. Building GPs from numpy
and scipy
is a good way to deep understand how GPs work. From that, I think it also helps to know more insights into Multi-variate normal distributions.
At the begining, it is kind of difficult to understand and work with GP. It needs resilient. I have watched and re-watched some videos and played with notebooks several times.
Knowning GP helps understanding more on parametric Bayesian models and distributions. Expecially Multivariate Normal distribution.
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
import numpy as np
import scipy
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.gridspec as gridspec
import seaborn as sns
# Set matplotlib and seaborn plotting style
sns.set_style('darkgrid')
np.random.seed(42)
def exp_quadratic(xa, xb):
"""Exponentiated quadratic with σ=1"""
# L2 distance (Squared Euclidian)
sq_norm = -0.5 * scipy.spatial.distance.cdist(xa, xb, 'sqeuclidean')
return np.exp(sq_norm)
X = np.expand_dims(np.linspace(*xlim, 25), 1)
Σ = exp_quadratic(X, X)
plt.imshow(Σ, cmap=cm.YlGnBu);
zero = np.array([[0]])
Σ0 = exp_quadratic(X, zero)
plt.plot(X[:,0], Σ0[:,0]);
n_samples = 100
n_funcs = 8
X = np.expand_dims(np.linspace(-4,4, n_samples), 1)
Σ = exp_quadratic(X, X)
ys = np.random.multivariate_normal(mean=np.zeros(n_samples), cov=Σ, size=n_funcs)
for i in range(n_funcs):
plt.plot(X, ys[i], linestyle='-', marker='o', markersize=3)
plt.xlabel('$x$', fontsize=13)
plt.ylabel('$y = f(x)$', fontsize=13)
plt.title((
f'{n_funcs} different function realizations at {n_samples} points\n'
'sampled from a Gaussian process with exponentiated quadratic kernel'))
plt.xlim([-4, 4])
plt.show()
exponentiated_quadratic = exp_quadratic
A = np.array([[1,-2j],[2j,5]])
A, A.shape
L = np.linalg.cholesky(A)
np.dot(L, L.T.conj())
A = [[1,-2j],[2j,5]] # what happens if A is only array_like?
np.linalg.cholesky(A) # an ndarray object is returned
np.linalg.cholesky(np.matrix(A))
def GP(X1, y1, X2, kernel_func):
cov11 = kernel_func(X1, X1)
cov12 = kernel_func(X1, X2)
solved = scipy.linalg.solve(cov11, cov12, assume_a='pos').T
mu2 = solved @ y1
cov22 = kernel_func(X2, X2)
cov2 = cov22 - (solved @ cov12)
return mu2, cov2
def GP2(X1, y1, X2, kernel_func):
K11 = kernel_func(X1, X1)
K12 = kernel_func(X1, X2)
K22 = kernel_func(X2, X2)
#L = np.linalg.cholesky(K11)
mu2 = K12.T.dot(np.linalg.inv(K11)).dot(y1)
cov2 = K22 - K12.T.dot(np.linalg.inv(K11)).dot(K12)
return mu2, cov2
ny = 10 # Number of functions
domain = (-6, 6)
domain[0]+2, domain[1]-2, (n1, 1)
X1 = np.random.uniform(domain[0]+2, domain[1]-2, size=(n1, 1))
X1.shape
%%prun
f_sin = lambda x: (np.sin(x)).flatten()
n1 = 40 # Train points
n2 = 75 # Test points
ny = 5 # Number of functions
domain = (-6, 6)
X1 = np.random.uniform(domain[0]+2, domain[1]-2, size=(n1, 1))
y1 = f_sin(X1)
X2 = np.linspace(domain[0], domain[1], n2).reshape(-1, 1)
# mu2, cov2 = GP(X1, y1, X2, exp_quadratic)
mu2, cov2 = GP2(X1, y1, X2, exp_quadratic)
sigma2 = np.sqrt(np.diag(cov2))
y2 = np.random.multivariate_normal(mean=mu2, cov=cov2, size=ny)
fig, (ax1, ax2) = plt.subplots(
nrows=2, ncols=1, figsize=(6, 6))
# Plot the distribution of the function (mean, covariance)
ax1.plot(X2, f_sin(X2), 'b--', label='$sin(x)$')
ax1.fill_between(X2.flat, mu2-2*sigma2, mu2+2*sigma2, color='red',
alpha=0.15, label='$2 \sigma_{2|1}$')
ax1.plot(X2, mu2, 'r-', lw=2, label='$\mu_{2|1}$')
ax1.plot(X1, y1, 'ko', linewidth=2, label='$(x_1, y_1)$')
# Plot some samples from this function
ax2.plot(X2, y2.T, '-')
ax2.set_xlabel('$x$', fontsize=13)
ax2.set_ylabel('$y$', fontsize=13)
ax2.set_title('5 different function realizations from posterior')
ax1.axis([domain[0], domain[1], -3, 3])
ax2.set_xlim([-6, 6])
plt.tight_layout()
plt.show()
from transformers import pipeline
classifier = pipeline('sentiment-analysis')
classifier("Ihave waiting for a course my whole life.")
alist = ["Covid is good", "I love covid"]
classifier(alist)
classifier = pipeline("zero-shot-classification")
classifier("This is a sensitive topic on transport and libarary",
candidate_labels=["education", "math", "business"])
generator = pipeline("text-generation")
generator("In this notebook, we will")
gen_gpt2 = pipeline("text-generation", model="distilgpt2")
gen_gpt2("In this pandas notebook, we will", max_lenght=20)
unmasker = pipeline("fill-mask")
unmasker("This notebook will show <mask> direction", top_k=2)
ner = pipeline("ner", grouped_entities=True)
ner("I am Dan P who carry out research at Monash Uni in Melbourne City")
qa = pipeline("question-answering")
qa(question="Where to I work",
context="I am Dan P who carry out research at Monash Uni in Melbourne City")
summarizer = pipeline("summarization")
article = """
The ongoing discussions within the presidential palace in Kabul are “utterly extraordinary” after two decades of war, CNN International Security Editor Nick Paton Walsh reports.
“This has been a morning of stunning events and that looks like we are heading towards some sort of transitional government here,” Paton Walsh said. He said names are being floated around, though nothing is confirmed, and President Ashraf Ghani would need to agree to step aside to make way for a transitional administration.
Yesterday Ghani made a brief but sombre address to the nation in which he said he was consulting with elders and other leaders both inside and outside of the country. In the short speech, he told the Afghan people his "focus is to avoid further instability, aggression and displacement," but he did not resign.
As talks on Sunday continue, Paton Walsh said there hasn’t been evidence of Taliban fighters moving into the city. Earlier panic appeared to be a clash around a bank where people were trying to withdraw money.
“I've heard sporadic gunfire here but that seems to be traffic disputes. A quick drive around the city has shown traffic has dissipated until you get towards the airport, so utter chaos and panic here. The traffic in the skies we saw around the embassy appears to have quietened as well so perhaps that might suggest some of that operation is winding up,” he continued.
The apparently last-ditch diplomatic efforts would hopefully avoid the Taliban presumably moving to its next phase of slowly entering the city, which Paton Walsh said would “not be remotely pleasant for anybody living here.”
“There will be elements of resistance too so I think everybody would prefer to avoid that kind of situation,” he added.
"""
summarizer(article)
translator = pipeline("translation", model="t5-base")
translator("Hi, my name is Dan")
import math
import numpy as np
import torch
import torch.nn as nn
copus_a = ["one is one", "two is two", "three is three", "four is four", "five is five",
"six is six", "seven is seven", "eight is eight", "nine is nine"]
copus_b = ["1 = 1", "2 = 2", "3 = 3", "4 = 4", "5 = 5",
"6 = 6", "7 = 7", "8 = 8", "9 = 9"]
embed_a = {"one": [1.0,0,0,0,0,0,0,0,0,0,0,0],
"two": [0,1.0,0,0,0,0,0,0,0,0,0,0],
"three":[0,0,1.0,0,0,0,0,0,0,0,0,0],
"four": [0,0,0,1.0,0,0,0,0,0,0,0,0],
"five": [0,0,0,0,1.0,0,0,0,0,0,0,0],
"six": [0,0,0,0,0,1.0,0,0,0,0,0,0],
"seven":[0,0,0,0,0,0,1.0,0,0,0,0,0],
"eight":[0,0,0,0,0,0,0,1.0,0,0,0,0],
"nine": [0,0,0,0,0,0,0,0,1.0,0,0,0],
"is": [0,0,0,0,0,0,0,0,0,1.0,0,0],
"less": [0,0,0,0,0,0,0,0,0,0,1.0,0],
"more": [0,0,0,0,0,0,0,0,0,0,0,1.0]
}
embed_b = {"9": [1.0,0,0,0,0,0,0,0,0,0,0,0],
"8": [0,1.0,0,0,0,0,0,0,0,0,0,0],
"7": [0,0,1.0,0,0,0,0,0,0,0,0,0],
"6": [0,0,0,1.0,0,0,0,0,0,0,0,0],
"5": [0,0,0,0,1.0,0,0,0,0,0,0,0],
"4": [0,0,0,0,0,1.0,0,0,0,0,0,0],
"3": [0,0,0,0,0,0,1.0,0,0,0,0,0],
"2": [0,0,0,0,0,0,0,1.0,0,0,0,0],
"1": [0,0,0,0,0,0,0,0,1.0,0,0,0],
"=": [0,0,0,0,0,0,0,0,0,1.0,0,0],
"<": [0,0,0,0,0,0,0,0,0,1.0,0,0],
">": [0,0,0,0,0,0,0,0,0,1.0,0,0],
}
def sentence_embed(sentence, embed_dict):
"""Generate an embedding for a sentence"""
res = []
for word in sentence.split():
res.append(embed_dict[word])
return res
inp = sentence_embed("one is one", embed_a)
out = sentence_embed("1 = 1", embed_b)
inp = torch.tensor(inp, dtype=torch.float32)
out = torch.tensor(out, dtype=torch.float32)
inp.shape, out.shape
def dot_attention(q, k, v):
"""inp: input sentence, dk: keyword dimension"""
# Initiate weight matrix for Query, Key and Value
dk = k.size(-1)
logit = (q @ k.transpose(0, -1)) / math.sqrt(dk)
weights = torch.softmax(logit, dim=-1)
res = weights @ v
return res
q, k, v = inp, inp, inp
dot_attention(q, k, v)
class MultiHeadAttention(nn.Module):
def __init__(self, dm, nh):
"""
dm: model dimenstion
nh: number of heads
"""
super().__init__()
self.dm, self.nh = dm, nh
self.dk = dm // nh
self.heads = [{"wq":nn.Linear(self.dm, self.dk),
"wk":nn.Linear(self.dm, self.dk),
"wv":nn.Linear(self.dm, self.dk)} for h in range(nh)
]
self.out = nn.Linear(dm, dm)
def forward(self, inp):
res = []
for head in self.heads:
q, k, v = head["wq"](inp), head["wk"](inp), head["wv"](inp)
print(q.shape, k.shape, v.shape)
res.append(dot_attention(q, k, v))
concat = torch.cat(res, 1)
res = self.out(concat)
print(concat.shape, res.shape)
return res
dm = 12
nh = 3
# dk = 12/3 = 4
mul_head = MultiHeadAttention(dm, nh)
mul_head(inp)
# define data
data = asarray([['red'], ['green'], ['blue']])
print(data)
# define ordinal encoding
encoder = OrdinalEncoder()
# transform data
result = encoder.fit_transform(data)
print(result)
from numpy import asarray
from sklearn.preprocessing import OneHotEncoder
# define data
data = asarray([['red'], ['green'], ['blue']])
print(data)
# define one hot encoding
encoder = OneHotEncoder(sparse=False)
# transform data
onehot = encoder.fit_transform(data)
print(onehot)
from numpy import asarray
from sklearn.preprocessing import OneHotEncoder
# define data
data = asarray([['red'], ['green'], ['blue']])
print(data)
# define one hot encoding
encoder = OneHotEncoder(drop='first', sparse=False)
# transform data
onehot = encoder.fit_transform(data)
print(onehot)
from pandas import read_csv
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
# define the location of the dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/breast-cancer.csv"
# load the dataset
dataset = read_csv(url, header=None)
# retrieve the array of data
data = dataset.values
# separate into input and output columns
X = data[:, :-1].astype(str)
y = data[:, -1].astype(str)
# summarize
print('Input', X.shape)
print('Output', y.shape)
dataset.head()
type(dataset), type(data), type(X), type(y)
y.shape, X.shape, data.shape, dataset.shape
ordinal_encoder = OrdinalEncoder()
X = ordinal_encoder.fit_transform(X)
# ordinal encode target variable
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
# summarize the transformed data
print('Input', X.shape)
print(X[:5, :])
print('Output', y.shape)
print(y[:5])
from numpy import mean
from numpy import std
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import accuracy_score
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
# ordinal encode input variables
ordinal_encoder = OrdinalEncoder()
ordinal_encoder.fit(X_train)
X_train = ordinal_encoder.transform(X_train)
X_test = ordinal_encoder.transform(X_test)
# ordinal encode target variable
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
y_train = label_encoder.transform(y_train)
y_test = label_encoder.transform(y_test)
# define the model
model = LogisticRegression()
# fit on the training set
model.fit(X_train, y_train)
# predict on test set
yhat = model.predict(X_test)
# evaluate predictions
accuracy = accuracy_score(y_test, yhat)
print('Accuracy: %.2f' % (accuracy*100))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
# one-hot encode input variables
onehot_encoder = OneHotEncoder()
onehot_encoder.fit(X_train)
X_train = onehot_encoder.transform(X_train)
X_test = onehot_encoder.transform(X_test)
# ordinal encode target variable
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
y_train = label_encoder.transform(y_train)
y_test = label_encoder.transform(y_test)
# define the model
model = LogisticRegression()
# fit on the training set
model.fit(X_train, y_train)
# predict on test set
yhat = model.predict(X_test)
# evaluate predictions
accuracy = accuracy_score(y_test, yhat)
print('Accuracy: %.2f' % (accuracy*100))