Example 1: Recovery of low-rank image corrupted by sparse noise

Contents

[1]:

import matplotlib.pyplot as plt
import numpy as np

from skpcp import PCP

[2]:

RNG = np.random.default_rng(42)

Example 1: Recovery of low-rank image corrupted by sparse noise#

[3]:

# Generate a synthetic image (low rank)
NROWS, NCOLS = 100, 100
NUM_OUTLIERS = 1000

X, Y = np.meshgrid(np.linspace(0, 1, NCOLS), np.linspace(0, 1, NROWS))
synthetic_image = np.sin(2 * np.pi * X) + np.cos(2 * np.pi * Y)

# Add sparse noise (random outliers)
sparse_noise = np.zeros_like(synthetic_image)
outlier_indices = RNG.choice(NROWS * NCOLS, size=NUM_OUTLIERS, replace=False)
sparse_noise.flat[outlier_indices] = RNG.choice([.5, -.5], size=NUM_OUTLIERS)

# Combine the low-rank image with the sparse noise
noisy_image = synthetic_image + sparse_noise
image_matrix = noisy_image.reshape(-1, noisy_image.shape[1])

# Apply principal component pursuit (PCP) to recover the low-rank and sparse components
pcp = PCP()
low_rank = pcp.fit_transform(image_matrix)
sparse = pcp.sparse_

low_rank_image = low_rank.reshape(noisy_image.shape)
sparse_image = sparse.reshape(noisy_image.shape)

# Plot the original, noisy, and recovered images
plt.figure(figsize=(16, 6))

title_list = [
    "Original Low-Rank Image",
    "Noisy Image",
    "Recovered Low-Rank Image",
    "Residuals=Original-Recovered",
    "Recovered Sparse Component (Noise)",
]
images_to_plot = [
    synthetic_image,
    noisy_image,
    low_rank_image,
    synthetic_image - low_rank_image,
    sparse_image,
]
for i, img in enumerate(images_to_plot, start=1):
    ax = plt.subplot(1, 5, i)
    im = ax.imshow(img, cmap="gray")
    plt.axis("off")
    title = title_list[i-1]
    plt.title(title)
    if title == "Residuals=Original-Recovered":
        plt.colorbar(im, ax=ax, orientation="horizontal", pad=0.1, fraction=0.05, format="%1.1e", spacing="proportional")
    else:
        plt.colorbar(im, ax=ax, orientation="horizontal", pad=0.1, fraction=0.05)
plt.axis("off")

plt.tight_layout()
plt.show()

../_images/examples_examples_3_0.png

Example 2: Recovery of low-rank image corrupted by sparse noise#

[4]:

NROWS, NCOLS = 100, 150
NUM_GROUPS = 6
P_MISSING = 0.1

data = []
for k in range(NUM_GROUPS):
    d = np.ones((NROWS, NCOLS // NUM_GROUPS)) * (k + 1) * 10
    data.append(d)
data = np.hstack(data)

# DECIMATE 20% of data
n1, n2 = data.shape
S = RNG.random((n1, n2))
data_corrupt = data.copy()
data_corrupt[S < P_MISSING] = 0

# Apply PCP to recover the low-rank and sparse components

pcp = PCP()
low_rank = pcp.fit_transform(data_corrupt)
sparse = pcp.sparse_

plt.figure(figsize=(16, 8))
images_to_plot = [
    data,
    data_corrupt,
    low_rank,
    data - low_rank,
    S,
]
title_list = [
    "Original Low-Rank Image",
    "Noisy Image",
    "Recovered Low-Rank Image",
    "Residuals=Original-Recovered",
    "Recovered Sparse Component (Noise)",
]
for i, img in enumerate(images_to_plot, start=1):
    ax = plt.subplot(1, 5, i)
    im = ax.imshow(img, cmap="gray")
    plt.axis("off")
    plt.title(title_list[i - 1])
    if title_list[i-1] == "Residuals=Original-Recovered":
        plt.colorbar(im, ax=ax, orientation="horizontal", pad=0.1, fraction=0.046, format="%.1e")
    else:
        plt.colorbar(im, ax=ax, orientation="horizontal", pad=0.1, fraction=0.046)

plt.axis("off")
plt.tight_layout()
plt.show()

../_images/examples_examples_5_0.png

Example 3: Recovery of time series data corrupted by random spikes#

[5]:

# Generate 100 time series of length 1000 from a normal distribution
num_series = 100
series_length = 1000
num_spikes = 50

phases = np.linspace(0, 2 * np.pi, num_series, endpoint=False)
time = np.arange(series_length)
time_series = np.array([
    np.sin(2 * np.pi * time / series_length + phase) + RNG.normal(0, 0.05, series_length)
    for phase in phases
])

# Add spikes (outliers) to each time series
corrupted_series = time_series.copy()
for i in range(num_series):
    spike_indices = RNG.choice(series_length, size=num_spikes, replace=False)
    corrupted_series[i, spike_indices] += RNG.choice([10, -10], size=num_spikes)

# Apply PCP to recover low-rank and sparse components
pcp = PCP()
low_rank = pcp.fit_transform(corrupted_series)
sparse = pcp.sparse_

# Select 5 time series to plot
selected_idx = np.linspace(0, num_series - 1, 5, dtype=int)

fig, axes = plt.subplots(5, 5, figsize=(20, 18))
panel_titles = [
    "Original Time Series",
    "Corrupted Time Series",
    "Recovered Low-Rank",
    "Residuals (Original - Recovered)",
    "Recovered Sparse Component (Spikes)",
]
time = np.arange(series_length)
for row, idx in enumerate(selected_idx):
    axes[0, row].plot(time, time_series[idx], label=f"Series {idx}")
    axes[0, row].set_title(panel_titles[0] + f" #{idx}")
    axes[0, row].legend()
    axes[0, row].set_xlabel("Time")
    axes[0, row].set_ylabel("Value")

    axes[1, row].plot(time, corrupted_series[idx], label=f"Series {idx}")
    axes[1, row].set_title(panel_titles[1] + f" #{idx}")
    axes[1, row].legend()
    axes[1, row].set_xlabel("Time")
    axes[1, row].set_ylabel("Value")

    axes[2, row].plot(time, low_rank[idx], label=f"Series {idx}")
    axes[2, row].set_title(panel_titles[2] + f" #{idx}")
    axes[2, row].legend()
    axes[2, row].set_xlabel("Time")
    axes[2, row].set_ylabel("Value")

    axes[3, row].plot(time, time_series[idx] - low_rank[idx], label=f"Series {idx}")
    axes[3, row].set_title(panel_titles[3] + f" #{idx}")
    axes[3, row].legend()
    axes[3, row].set_xlabel("Time")
    axes[3, row].set_ylabel("Value")

    axes[4, row].plot(time, sparse[idx], label=f"Series {idx}")
    axes[4, row].set_title(panel_titles[4] + f" #{idx}")
    axes[4, row].legend()
    axes[4, row].set_xlabel("Time")
    axes[4, row].set_ylabel("Value")

plt.tight_layout()
plt.show()

../_images/examples_examples_7_0.png