Embeddings¶
Shapelet forest embedding with LR¶
This example shows how to compute a shapelet forest embedding for a univariate time series dataset and use a logistic regression model to classify new samples
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate
from sklearn.pipeline import make_pipeline
from wildboar.datasets import load_dataset
from wildboar.ensemble import ShapeletForestEmbedding
random_state = 1234
x, y = load_dataset("GunPoint")
pipe = make_pipeline(
ShapeletForestEmbedding(
n_shapelets=1,
min_shapelet_size=0,
max_shapelet_size=1,
metric="scaled_euclidean",
sparse_output=True,
max_depth=5,
random_state=random_state,
n_jobs=-1,
),
LogisticRegression(solver="newton-cg", random_state=random_state),
)
cv = cross_validate(pipe, x, y, cv=5, scoring="accuracy", n_jobs=1)
plt.plot(cv["test_score"], linestyle="", marker="+")
plt.xlabel("CV fold")
plt.ylabel("CV accuracy")
plt.savefig("../fig/sfe_lr.png")
Shapelet forest embedding with PCA¶
This example shows how PCA can be used to plot the resulting embedding
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from wildboar.datasets import load_dataset
from wildboar.ensemble import ShapeletForestEmbedding
random_state = 1234
x, y = load_dataset("CBF")
pca = make_pipeline(
ShapeletForestEmbedding(
metric="scaled_euclidean",
sparse_output=False,
max_depth=5,
random_state=random_state,
),
PCA(n_components=2, random_state=random_state),
)
p = pca.fit_transform(x)
var = pca.steps[1][1].explained_variance_ratio_
labels, index = np.unique(y, return_inverse=True)
colors = plt.cm.rainbow(np.linspace(0, 1, len(labels)))
plt.scatter(p[:, 0], p[:, 1], color=colors[index, :])
plt.xlabel("Component 1 (%.2f variance explained)" % var[0])
plt.ylabel("Component 2 (%.2f variance explained)" % var[1])
plt.savefig("fig/sfe_pca.png")