"""
Sketch-Map: Nonlinear dimensionality reduction
Sketch-Map is a nonlinear dimensionality reduction technique. Unlike linear methods
(PCA) or neighborhood-based methods (t-SNE, UMAP), Sketch-Map uses sigmoid functions to
selectively focus on "intermediate-range" pairwise distances while filtering out noise
from both extremes.
The key insight is that in high-dimensional data very short distances often represent
thermal fluctuations or noise that should not influence the global structure, very long
distances are unreliable due to the "curse of dimensionality" where all points appear
roughly equidistant in high dimensions. Intermediate distances contain the meaningful
structural information we want to preserve in the low-dimensional embedding.
The sigmoid transformation compresses both extremes toward 0 and 1, effectively creating
a "sketch" of the distance relationships that emphasizes the informative middle range.
References
----------
See Ceriotti et al. [Ceriotti2011]_, which introduces Sketch-Map, and the follow-up
[Ceriotti2013]_.
"""
import warnings
import numpy as np
from scipy import sparse
from scipy.optimize import basinhopping, minimize, curve_fit
from scipy.spatial.distance import cdist
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted, validate_data
# Sigmoid transformation functions
def sigmoid_transform(distances, sigma, a, b):
r"""Apply the Sketch-Map sigmoid transformation to distances.
The sigmoid function maps pairwise distances to the range [0, 1], with
the switching point at :math:`r = \sigma` (where :math:`s(\sigma) = 0.5`).
The transformation effectively "compresses" both very small and very large
distances, focusing the optimization on intermediate-range structure.
Mathematical form:
.. math::
s(r) = 1 - \left(1 + A \cdot \left(\frac{r}{\sigma}\right)^a\right)^{-b/a}
where :math:`A = 2^{a/b} - 1` ensures :math:`s(\sigma) = 0.5`.
Parameters
----------
distances : ndarray
Pairwise distances to transform.
sigma : float
Switching distance where :math:`s(\sigma) = 0.5`. This is the
"characteristic scale" of the transformation.
a : float
Short-range exponent controlling how quickly :math:`s(r) \to 0` as
:math:`r \to 0`. Larger values make the sigmoid steeper for distances
below sigma.
b : float
Long-range exponent controlling how quickly :math:`s(r) \to 1` as
:math:`r \to \infty`. Larger values make the sigmoid steeper for
distances above sigma.
"""
with np.errstate(divide="ignore", invalid="ignore"):
# Normalize distances by the switching distance
r_normalized = distances / sigma
# Sigmoid coefficient ensuring s(sigma) = 0.5
A = 2 ** (a / b) - 1.0
# Compute sigmoid transformation
term = A * (r_normalized**a)
transformed = 1.0 - (1.0 + term) ** (-b / a)
# Handle edge case: zero distance should map to zero
transformed[distances <= 0.0] = 0.0
return transformed
def sigmoid_inverse(y, sigma, a, b):
r"""Compute the inverse of the sigmoid transformation.
Maps a transformed distance value back to the original distance. This function is
useful for interpreting transformed distances in terms of the original distance
scale.
Parameters
----------
y : ndarray
Transformed distances in [0, 1].
sigma : float
Switching distance where :math:`s(\sigma) = 0.5`.
a : float
Short-range steepness parameter.
b : float
Long-range steepness parameter.
"""
with np.errstate(divide="ignore", invalid="ignore"):
A = 2 ** (a / b) - 1.0
# Inverse formula derived by solving s(r) = y for r
one_minus_y = np.maximum(1.0 - y, 1e-12)
inner = np.power(one_minus_y, -a / b) - 1.0
distances = sigma * np.power(np.maximum(inner / A, 0.0), 1.0 / a)
# Handle boundary cases
distances[y <= 0.0] = 0.0
distances[y >= 1.0] = np.inf
return distances
def sigmoid_derivative(distances, sigma, a, b):
r"""Compute the derivative of the sigmoid with respect to distance.
This derivative is required for gradient-based optimization of the Sketch-Map
stress function.
Parameters
----------
distances : ndarray
Pairwise distances.
sigma : float
Switching distance where :math:`s(\sigma) = 0.5`.
a : float
Short-range steepness parameter.
b : float
Long-range steepness parameter.
Returns
-------
derivative : ndarray
The value :math:`\mathrm{d}s/\mathrm{d}r` at each distance point.
"""
A = 2 ** (a / b) - 1.0
derivative = np.zeros_like(distances, dtype=float)
# Only compute for positive distances (derivative at r=0 is 0)
positive_mask = distances > 0.0
if np.any(positive_mask):
r = distances[positive_mask]
u = A * (r / sigma) ** a
prefactor = b * A * (r ** (a - 1.0)) / (sigma**a)
derivative[positive_mask] = prefactor * (1.0 + u) ** (-b / a - 1.0)
return derivative
# Utility functions
def classical_mds(distances, n_components):
"""Compute a classical MDS embedding as initialization.
Classical MDS finds coordinates that best reproduce the given distance matrix using
eigendecomposition of the doubly-centered Gram matrix.
Parameters
----------
distances : ndarray of shape (n_samples, n_samples)
Symmetric pairwise distance matrix.
n_components : int
Number of dimensions for the embedding.
"""
# Double-centering to obtain the Gram matrix
gram_matrix = -0.5 * (distances**2)
gram_matrix -= gram_matrix.mean(axis=0, keepdims=True) # Center columns
gram_matrix -= gram_matrix.mean(axis=1, keepdims=True) # Center rows
# Eigendecomposition (eigh returns sorted in ascending order)
eigenvalues, eigenvectors = np.linalg.eigh(gram_matrix)
# Select top n_components (largest eigenvalues)
top_indices = np.argsort(eigenvalues)[::-1][:n_components]
top_eigenvalues = eigenvalues[top_indices]
top_eigenvectors = eigenvectors[:, top_indices]
# Compute coordinates: X = V @ sqrt(lambda)
coordinates = top_eigenvectors * np.sqrt(np.maximum(top_eigenvalues, 0.0))
# Apply consistent sign convention
for i in range(n_components):
col = coordinates[:, i]
if col[np.argmax(np.abs(col))] < 0:
coordinates[:, i] *= -1
return coordinates
def _gaussian(x, amplitude, center, std_dev):
"""Gaussian function for curve fitting in distance distribution analysis."""
return amplitude * np.exp(-((x - center) ** 2) / (2 * std_dev**2))
# Distance distribution analysis
def analyze_distance_distribution(distances, n_bins=200):
"""Analyze the distribution of pairwise distances for parameter estimation.
This function computes a histogram of the pairwise distances and identifies key
features that inform the choice of sigmoid parameters. 1. Peak distance (where the
bulk of pairwise distances lie, this represents the characteristic scale of the
data.) 2. Gaussian range (the regime dominated by short-range fluctuations,
distances below this should be compressed). 3. Uniform cutoff (where the
high-dimensional "curse of dimensionality" causes distances to become
uninformative).
Parameters
----------
distances : ndarray
Pairwise distance matrix (square symmetric) or flattened upper triangle.
n_bins : int, default=200
Number of histogram bins for the analysis.
"""
# Extract upper triangle if given a square matrix
if distances.ndim == 2:
d = distances[np.triu_indices_from(distances, k=1)]
else:
d = distances.copy()
# Filter invalid values
d = d[np.isfinite(d) & (d >= 0)]
if d.size == 0:
raise ValueError("Empty or invalid distances array")
# Build histogram
max_distance = np.percentile(d, 99.9)
bin_edges = np.linspace(0, max_distance, n_bins + 1)
prob_density, _ = np.histogram(d, bins=bin_edges, density=True)
bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])
# Locate the peak of the distribution
peak_idx = np.argmax(prob_density)
peak_distance = bin_centers[peak_idx]
# Initialize results
analysis = {
"peak_distance": peak_distance,
"gaussian_std": None,
"gaussian_range": None,
"uniform_cutoff": None,
"bin_centers": bin_centers,
"prob_density": prob_density,
"max_distance": max_distance,
}
# Estimate Gaussian fluctuation range (left side of peak)
# Fit a Gaussian to the left half of the histogram to characterize the short-range
# noise regime
left_mask = bin_centers <= peak_distance
if np.sum(left_mask) > 3:
try:
initial_guess = [np.max(prob_density), peak_distance, 1.0]
optimal_params, _ = curve_fit(
_gaussian,
bin_centers[left_mask],
prob_density[left_mask],
p0=initial_guess,
maxfev=5000,
)
analysis["gaussian_std"] = abs(optimal_params[2])
# Gaussian range extends ~3 sigma from the peak
analysis["gaussian_range"] = peak_distance + 3 * analysis["gaussian_std"]
except (RuntimeError, ValueError):
# Fallback: use heuristic estimate
analysis["gaussian_std"] = peak_distance / 3.0
analysis["gaussian_range"] = peak_distance * 2.0
# Estimate high-dimensional cutoff (right side of peak)
# Find where density drops to 10% of peak value
right_mask = bin_centers > peak_distance
right_density = prob_density[right_mask]
if len(right_density) > 3:
peak_density = prob_density[peak_idx]
threshold = 0.1 * peak_density
below_threshold = right_density < threshold
if np.any(below_threshold):
first_below = np.argmax(below_threshold)
analysis["uniform_cutoff"] = bin_centers[right_mask][first_below]
else:
analysis["uniform_cutoff"] = np.percentile(d, 90)
# Ensure gaussian_range < uniform_cutoff
if (
analysis["gaussian_range"] is not None
and analysis["uniform_cutoff"] is not None
):
if analysis["gaussian_range"] >= analysis["uniform_cutoff"]:
# fallback to default spacing
analysis["gaussian_range"] = peak_distance + 0.2 * max_distance
analysis["uniform_cutoff"] = peak_distance + 0.6 * max_distance
return analysis
def suggest_sigmoid_params(distances, n_components, n_features, n_bins=200):
r"""Suggest sigmoid parameters based on distance distribution analysis.
This function implements heuristics from the Sketch-Map guidelines for automatic
parameter selection.
Parameter selection strategy:
- ``sigma``: placed just before the peak (90% of peak distance) to ensure the bulk
of distances fall in the sigmoid's sensitive region.
- ``a_high``, ``b_high`` (high-D sigmoid): control how aggressively short and long
distances are compressed in high-dimensional space.
- ``a_low``, ``b_low`` (low-D sigmoid): typically smaller values to account for the
"volume equalization" between high-D and low-D spaces. A rule of thumb is
:math:`a_{\text{low}} \cdot d \approx a_{\text{high}} \cdot D` where :math:`d` is
the target dimension and :math:`D` is the original dimension.
Parameters
----------
distances : ndarray
Pairwise distance matrix.
n_components : int
Target embedding dimensionality.
n_features : int
Original feature dimensionality (used for dimension ratio heuristics).
n_bins : int, default=200
Number of histogram bins for distance analysis.
Returns
-------
params : dict
Suggested parameters with keys: ``sigma``, ``a_high``, ``b_high``,
``a_low``, ``b_low``.
analysis : dict
Distance distribution analysis results from
:func:`analyze_distance_distribution`.
"""
analysis = analyze_distance_distribution(distances, n_bins=n_bins)
# Estimate sigma
sigma = 0.9 * analysis["peak_distance"]
# Estimate high-dimensional sigmoid parameters (a_high, b_high)
if (
analysis["gaussian_range"] is not None
and analysis["uniform_cutoff"] is not None
):
range_ratio = analysis["uniform_cutoff"] / max(
analysis["gaussian_range"], 1e-10
)
a_high = np.clip(2.0 + np.log(range_ratio), 2.0, 6.0)
b_high = np.clip(a_high * 2, 4.0, 12.0)
else:
a_high = 2.0
b_high = 6.0
# Estimate low-dimensional sigmoid parameters (a_low, b_low)
if n_features > 0:
a_low = np.clip(a_high * n_components / n_features, 1.0, 2.0)
else:
a_low = 2.0
b_low = np.clip(a_low, 1.0, 2.0)
params = {
"sigma": sigma,
"a_high": a_high,
"b_high": b_high,
"a_low": a_low,
"b_low": b_low,
}
return params, analysis
# Main class
[docs]
class SketchMap(TransformerMixin, BaseEstimator):
r"""
Sketch-Map is a nonlinear dimensionality reduction algorithm. Unlike methods that
try to preserve all pairwise distances (MDS) or only local neighborhoods (t-SNE),
Sketch-Map selectively focuses on intermediate-range distances using sigmoid
transformations: short distances (:math:`r \ll \sigma`) compressed toward 0 (these
often represent thermal noise or fluctuations that should not dominate the
embedding), long distances (:math:`r \gg \sigma`) compressed toward 1 (in high
dimensions, very long distances become unreliable due to the "curse of
dimensionality"), intermediate distances (:math:`r \approx \sigma`) are preserved
(these contain the meaningful structural information).
The sigmoid function has the form:
.. math::
s(r) = 1 - \left(1 + A \cdot \left(\frac{r}{\sigma}\right)^a\right)^{-b/a}
where:
- :math:`\sigma` is the "switching distance" where :math:`s(\sigma) = 0.5`
- :math:`a` controls the short-range exponent (how fast :math:`s \to 0`)
- :math:`b` controls the long-range exponent (how fast :math:`s \to 1`)
- :math:`A = 2^{a/b} - 1` ensures :math:`s(\sigma) = 0.5`
Separate sigmoids are applied to high-D distances (``a_high``, ``b_high``) and
low-D distances (``a_low``, ``b_low``), allowing different compression behaviors.
The fitting process proceeds in stages:
1. Initialization: classical MDS provides starting coordinates
2. MDS refinement (optional): optimize raw distance stress
3. Pre-optimization: initial sigmoid-transformed optimization
4. Main optimization: full optimization
5. Global optimization (optional): basin-hopping for better minima
Parameters
----------
n_components : int, default=2
Number of dimensions in the target embedding space.
params : dict or None, default=None
Sigmoid parameters dictionary. Keys are:
- ``sigma`` : float - Switching distance where :math:`s(\sigma) = 0.5`
- ``a_high`` : float - High-D sigmoid short-range steepness
- ``b_high`` : float - High-D sigmoid long-range steepness
- ``a_low`` : float - Low-D sigmoid short-range steepness
- ``b_low`` : float - Low-D sigmoid long-range steepness
If None, all parameters are estimated automatically from the data.
Partial dictionaries are allowed; missing keys use auto-estimated values.
mds_opt_steps : int, default=100
Number of MDS optimization steps (raw distances, no sigmoid) before
applying the sigmoid transformation. Set to 0 to skip this stage.
This helps refine the classical MDS initialization.
optimizer : str, default="L-BFGS-B"
Optimization algorithm. Options: ``"L-BFGS-B"`` or ``"CG"``.
preopt_steps : int, default=100
Number of pre-optimization steps with sigmoid transformation.
Uses the same settings as main optimization but fewer iterations.
max_iter : int, default=1000
Maximum iterations for the main optimization stage.
global_opt_steps : int or None, default=None
Number of basin-hopping iterations for global optimization.
Basin-hopping can escape local minima but is computationally expensive.
Set to None to disable.
mixing_ratio : float, default=0.0
Balance between raw distance stress and transformed distance stress:
- 0.0: Pure sigmoid-transformed stress
- 1.0: Pure raw distance stress
- Values in between: linear combination
center : bool, default=True
Whether to center the input data (subtract mean) before computing
pairwise distances.
init : array-like of shape (n_samples, n_components) or None, default=None
Initial embedding coordinates. If None, classical MDS is used.
random_state : int or None, default=None
Random seed for reproducibility (affects global optimization).
verbose : bool, default=False
If True, print progress information during fitting.
Attributes
----------
embedding_ : ndarray of shape (n_samples, n_components)
The fitted low-dimensional embedding coordinates.
stress_ : float
Final stress value (lower is better).
params_ : dict
The sigmoid parameters actually used (combination of user-provided
and auto-estimated values).
suggested_params_ : dict
Auto-suggested sigmoid parameters (``sigma``, ``a_high``, ``b_high``,
``a_low``, ``b_low``) based on distance distribution analysis.
Available after fitting, useful for understanding the data.
distance_analysis_ : dict
Results from the distance distribution analysis, including peak
distance, Gaussian range estimates, and histogram data.
n_samples_ : int
Number of samples in the training data.
n_features_ : int
Number of features in the training data.
See Also
--------
sklearn.manifold.MDS : Multidimensional scaling (linear).
sklearn.manifold.TSNE : t-SNE for visualization.
Examples
--------
Basic usage with automatic parameter estimation:
>>> from skmatter.decomposition import SketchMap
>>> import numpy as np
>>> X = np.random.randn(100, 50)
>>> sm = SketchMap(n_components=2, random_state=42)
>>> embedding = sm.fit_transform(X)
>>> print(embedding.shape)
(100, 2)
Using specific sigmoid parameters:
>>> params = {"sigma": 7.0, "a_high": 4.0, "b_high": 2.0,
... "a_low": 2.0, "b_low": 2.0}
>>> sm = SketchMap(n_components=2, params=params)
>>> embedding = sm.fit_transform(X)
>>> print(embedding.shape)
(100, 2)
"""
def __init__(
self,
n_components=2,
params=None,
mds_opt_steps=100,
optimizer="L-BFGS-B",
preopt_steps=100,
max_iter=1000,
global_opt_steps=None,
mixing_ratio=0.0,
center=True,
init=None,
random_state=None,
verbose=False,
):
self.n_components = n_components
self.params = params
self.mds_opt_steps = mds_opt_steps
self.optimizer = optimizer
self.preopt_steps = preopt_steps
self.max_iter = max_iter
self.global_opt_steps = global_opt_steps
self.mixing_ratio = mixing_ratio
self.center = center
self.init = init
self.random_state = random_state
self.verbose = verbose
# Stress computation
def _compute_stress(
self,
embedding,
hd_distances,
hd_transformed,
weights,
total_weight,
mixing_ratio,
use_transform=True,
):
"""Compute the Sketch-Map stress function.
The stress measures the discrepancy between (transformed) high-D distances and
(transformed) low-D distances. Lower stress indicates a better embedding.
The stress formula is::
stress = sum_ij w_ij * [(1-m)*(s_hd(D_ij) - s_ld(d_ij))^2
+ m*(D_ij - d_ij)^2]
where:
- D_ij is the high-D distance between points i and j
- d_ij is the low-D distance
- s_hd and s_ld are the high-D and low-D sigmoid functions
- w_ij are pairwise weights
- m is the mixing_ratio
Parameters
----------
embedding : ndarray, shape (n_samples * n_components,) or (n, d)
Low-dimensional coordinates (flat or 2D).
hd_distances : ndarray of shape (n_samples, n_samples)
High-dimensional pairwise distances.
hd_transformed : ndarray of shape (n_samples, n_samples)
Sigmoid-transformed high-dimensional distances.
weights : ndarray of shape (n_samples, n_samples)
Pairwise weight matrix.
total_weight : float
Sum of upper-triangle weights (for normalization).
mixing_ratio : float
Balance between transformed (0) and raw (1) stress.
use_transform : bool, default=True
Whether to apply sigmoid to low-D distances.
Returns
-------
stress : float
Normalized stress value.
"""
# Reshape flat embedding to matrix form
if embedding.ndim == 1:
n_samples = hd_distances.shape[0]
embedding = embedding.reshape((n_samples, self.n_components))
# Compute low-dimensional pairwise distances
ld_distances = cdist(embedding, embedding, metric="euclidean")
# Apply sigmoid transformation to low-D distances
if use_transform:
# Use precomputed coefficient for speed
r_normalized = ld_distances / self.params_["sigma"]
term = self._A_low_ * np.power(r_normalized, self.params_["a_low"])
ld_transformed = 1.0 - np.power(
1.0 + term, -self.params_["b_low"] / self.params_["a_low"]
)
ld_transformed[ld_distances <= 0.0] = 0.0
else:
ld_transformed = ld_distances
# Compute stress only on upper triangle (use precomputed indices)
triu_idx = self._triu_idx_
diff_transformed = (hd_transformed[triu_idx] - ld_transformed[triu_idx]) ** 2
diff_direct = (hd_distances[triu_idx] - ld_distances[triu_idx]) ** 2
weights_triu = weights[triu_idx]
# Combine transformed and direct stress according to mixing_ratio
stress = np.sum(
weights_triu
* ((1.0 - mixing_ratio) * diff_transformed + mixing_ratio * diff_direct)
)
return stress / total_weight
# Gradient computation
def _compute_gradient(
self,
embedding,
hd_distances,
hd_transformed,
weights,
total_weight,
mixing_ratio,
use_transform=True,
):
"""Compute the gradient of the stress function.
The gradient is used by optimization algorithms (L-BFGS-B, CG) to
find the embedding that minimizes stress.
Parameters
----------
embedding : ndarray
Low-dimensional coordinates (flat or 2D).
hd_distances : ndarray
High-dimensional pairwise distances.
hd_transformed : ndarray
Transformed high-dimensional distances.
weights : ndarray
Pairwise weight matrix.
total_weight : float
Total weight for normalization.
mixing_ratio : float
Balance between transformed and raw stress.
use_transform : bool, default=True
Whether sigmoid transformation is applied.
Returns
-------
gradient : ndarray of shape (n_samples * n_components,)
Flattened gradient vector.
"""
# Reshape flat embedding to matrix form
if embedding.ndim == 1:
n_samples = hd_distances.shape[0]
embedding = embedding.reshape((n_samples, self.n_components))
# Compute low-dimensional pairwise distances using optimized cdist
ld_distances = cdist(embedding, embedding, metric="euclidean")
# Compute sigmoid transformation and its derivative
if use_transform:
sigma = self.params_["sigma"]
a_low = self.params_["a_low"]
b_low = self.params_["b_low"]
A = self._A_low_
# Optimized sigmoid transform
r_normalized = ld_distances / sigma
r_pow_a = np.power(r_normalized, a_low)
term = A * r_pow_a
base = 1.0 + term
ld_transformed = 1.0 - np.power(base, -b_low / a_low)
ld_transformed[ld_distances <= 0.0] = 0.0
# Optimized derivative:
# ds/dr = b * A * r^(a-1) / sigma^a * (1+term)^(-b/a-1)
ld_derivative = np.zeros_like(ld_distances)
positive_mask = ld_distances > 0.0
if np.any(positive_mask):
r = ld_distances[positive_mask]
u = A * np.power(r / sigma, a_low)
prefactor = (
b_low * A * np.power(r, a_low - 1.0) / np.power(sigma, a_low)
)
ld_derivative[positive_mask] = prefactor * np.power(
1.0 + u, -b_low / a_low - 1.0
)
else:
ld_transformed = ld_distances
ld_derivative = np.ones_like(ld_distances)
# Compute gradient coefficients based on mixing_ratio
# The gradient comes from differentiating the stress w.r.t. coordinates
if mixing_ratio == 0.0:
# Pure transformed stress
coefficients = weights * (hd_transformed - ld_transformed) * ld_derivative
else:
# Mixed stress: includes both transformed and raw distance terms
eps = 1e-100
inv_distance = 1.0 / np.maximum(ld_distances, eps)
coefficients = (
weights
* (
(1.0 - mixing_ratio)
* (hd_transformed - ld_transformed)
* ld_derivative
+ mixing_ratio * (hd_distances - ld_distances)
)
* inv_distance
)
np.fill_diagonal(coefficients, 0.0)
# Compute gradient using efficient matrix operations
# d(stress)/d(x_i) = -2 * sum_j c_ij * (x_i - x_j)
row_sums = coefficients.sum(axis=1)
gradient = (row_sums[:, None] * embedding) - (coefficients @ embedding)
gradient = -2.0 * gradient / total_weight
return gradient.ravel()
# Optimization methods
def _optimize(
self,
initial_embedding,
hd_distances,
hd_transformed,
weights,
total_weight,
mixing_ratio,
n_steps,
use_transform=True,
):
"""Run gradient-based optimization to minimize stress.
Uses scipy.optimize.minimize with the configured optimizer
(L-BFGS-B or CG).
Parameters
----------
initial_embedding : ndarray of shape (n_samples, n_components)
Starting coordinates.
hd_distances : ndarray
High-dimensional pairwise distances.
hd_transformed : ndarray
Transformed high-dimensional distances.
weights : ndarray
Pairwise weight matrix.
total_weight : float
Total weight for normalization.
mixing_ratio : float
Balance between transformed and raw stress.
n_steps : int
Maximum number of optimization iterations.
use_transform : bool, default=True
Whether to apply sigmoid transformation.
Returns
-------
optimized_embedding : ndarray of shape (n_samples, n_components)
Optimized coordinates.
final_stress : float
Final stress value.
"""
n_samples = initial_embedding.shape[0]
x0 = initial_embedding.ravel()
# Define objective and gradient functions
def objective(x):
return self._compute_stress(
x,
hd_distances,
hd_transformed,
weights,
total_weight,
mixing_ratio,
use_transform,
)
def gradient(x):
return self._compute_gradient(
x,
hd_distances,
hd_transformed,
weights,
total_weight,
mixing_ratio,
use_transform,
)
# Run optimization
result = minimize(
objective,
x0,
method=self.optimizer,
jac=gradient,
options={"maxiter": n_steps, "gtol": 1e-8},
)
if self.verbose:
print(f" Optimization finished: stress = {result.fun:.6f}")
optimized_embedding = result.x.reshape((n_samples, self.n_components))
return optimized_embedding, result.fun
def _global_optimize(
self,
embedding,
hd_distances,
hd_transformed,
weights,
total_weight,
n_iterations,
):
"""Run global optimization using basin-hopping which combines local optimization
with random perturbations to escape local minima. This is computationally
expensive but can find better solutions for complex landscapes.
Parameters
----------
embedding : ndarray of shape (n_samples, n_components)
Current embedding (starting point).
hd_distances : ndarray
High-dimensional pairwise distances.
hd_transformed : ndarray
Transformed distances.
weights : ndarray
Pairwise weight matrix.
total_weight : float
Total weight for normalization.
n_iterations : int
Number of basin-hopping iterations.
Returns
-------
optimized_embedding : ndarray of shape (n_samples, n_components)
Globally optimized coordinates.
final_stress : float
Final stress value.
"""
n_samples = embedding.shape[0]
x0 = embedding.ravel()
def objective(x):
return self._compute_stress(
x,
hd_distances,
hd_transformed,
weights,
total_weight,
self.mixing_ratio,
use_transform=True,
)
def gradient(x):
return self._compute_gradient(
x,
hd_distances,
hd_transformed,
weights,
total_weight,
self.mixing_ratio,
use_transform=True,
)
minimizer_kwargs = {
"method": self.optimizer,
"jac": gradient,
"options": {"maxiter": 100},
}
if self.verbose:
print(f"\n=== Global optimization (basin hopping, {n_iterations} iter) ===")
# Custom random displacement for basin-hopping
rng = np.random.default_rng(self.random_state)
class RandomDisplacement:
"""Random step generator for basin-hopping."""
def __init__(self, stepsize, rng):
self.stepsize = stepsize
self.rng = rng
def __call__(self, x):
return x + self.rng.uniform(-self.stepsize, self.stepsize, x.shape)
# Step size proportional to embedding scale
scale = np.std(embedding)
take_step = RandomDisplacement(stepsize=scale * 0.5, rng=rng)
result = basinhopping(
objective,
x0,
niter=n_iterations,
minimizer_kwargs=minimizer_kwargs,
take_step=take_step,
seed=int(rng.integers(0, 2**31)) if self.random_state else None,
)
if self.verbose:
print(f" Basin hopping finished: stress = {result.fun:.6f}")
return result.x.reshape((n_samples, self.n_components)), result.fun
# Public API: fit, transform, fit_transform
[docs]
def fit(self, X, y=None, sample_weights=None):
"""Fit the Sketch-Map model to the training data.
This method computes the low-dimensional embedding by 1) computing pairwise
distances in the input space 2) estimating sigmoid parameters (if not provided)
3) transforming distances using the sigmoid function 4) optimizing embedding
coordinates to minimize stress.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data. Each row is a sample, each column is a feature.
y : Ignored
Not used, present for scikit-learn API compatibility.
sample_weights : array-like of shape (n_samples,), optional
Per-sample weights. Samples with higher weights have more
influence on the embedding. Default is uniform weights.
Returns
-------
self : SketchMap
Returns the fitted instance.
"""
X = validate_data(self, X, reset=True, dtype=np.float64)
self.X_ = X.copy()
if sparse.issparse(X):
raise ValueError("Sparse input is not supported")
if np.any(~np.isfinite(X)):
raise ValueError("Input contains NaN or infinity")
n_samples, n_features = X.shape
if n_samples < 2:
raise ValueError(
f"Found array with {n_samples} sample(s) while minimum of 2 required."
)
self.n_samples_ = n_samples
self.n_features_ = n_features
if self.verbose:
print(f"Fitting Sketch-Map: {n_samples} samples, {n_features} features")
# Compute pairwise distances
X_processed = X.copy()
if self.center:
X_processed = X_processed - X_processed.mean(axis=0, keepdims=True)
if self.verbose:
print("Data centered")
if self.verbose:
print("Computing pairwise distances...")
hd_distances = cdist(X_processed, X_processed, metric="euclidean")
# Determine sigmoid parameters
suggested, analysis = suggest_sigmoid_params(
hd_distances, self.n_components, n_features
)
self.suggested_params_ = suggested
self.distance_analysis_ = analysis
if self.params is None:
# Use all suggested parameters
self.params_ = suggested.copy()
if self.verbose:
print("Using auto-estimated sigmoid parameters:")
else:
# Start with suggested, override with user-provided values
self.params_ = suggested.copy()
for key in ["sigma", "a_high", "b_high", "a_low", "b_low"]:
if key in self.params and self.params[key] is not None:
self.params_[key] = self.params[key]
if self.verbose:
print("Using sigmoid parameters (user + auto-estimated):")
if self.verbose:
print(f"sigma = {self.params_['sigma']:.4f}")
print(
f"a_high = {self.params_['a_high']:.2f}, "
f"b_high = {self.params_['b_high']:.2f}"
)
print(
f"a_low = {self.params_['a_low']:.2f}, "
f"b_low = {self.params_['b_low']:.2f}"
)
print(f"(peak distance = {analysis['peak_distance']:.4f})")
# Apply sigmoid transformation to high-D distances
hd_transformed = sigmoid_transform(
hd_distances,
self.params_["sigma"],
self.params_["a_high"],
self.params_["b_high"],
)
# Precompute indices for upper triangle
self._triu_idx_ = np.triu_indices(n_samples, k=1)
# Precompute sigmoid coefficient for low-D transform
self._A_low_ = 2 ** (self.params_["a_low"] / self.params_["b_low"]) - 1.0
# Setup weight matrix
if sample_weights is not None:
w = np.asarray(sample_weights)
if w.shape[0] != n_samples:
raise ValueError(
f"sample_weights length {w.shape[0]} != n_samples {n_samples}"
)
weights = np.outer(w, w)
total_weight = np.sum(np.triu(weights, k=1))
if self.verbose:
print(f"Using sample weights (total_weight = {total_weight:.4f})")
else:
weights = np.ones_like(hd_distances)
total_weight = n_samples * (n_samples - 1) / 2.0
if self.verbose:
print(f"Using uniform weights (total_weight = {total_weight:.0f})")
# Initialize embedding
if self.init is not None:
embedding = np.asarray(self.init).copy()
if self.verbose:
print(f"Using provided initialization, shape = {embedding.shape}")
else:
if self.verbose:
print("Initializing with classical MDS...")
embedding = classical_mds(hd_distances, self.n_components)
# MDS pre-optimization
if self.mds_opt_steps > 0 and self.init is None:
if self.verbose:
print(
f"\n=== Stage 0: MDS optimization ({self.mds_opt_steps} steps) ==="
)
embedding, mds_stress = self._optimize(
embedding,
hd_distances,
hd_distances, # target = raw distances (no transform)
weights,
total_weight,
mixing_ratio=1.0, # pure raw distance stress
n_steps=self.mds_opt_steps,
use_transform=False,
)
if self.verbose:
print(f"MDS stress: {mds_stress:.6f}")
# Pre-optimization with sigmoid transformation
stress = 0.0
if self.preopt_steps > 0:
if self.verbose:
print(
f"\n=== Stage 1: Pre-optimization ({self.preopt_steps} steps) ==="
)
embedding, stress = self._optimize(
embedding,
hd_distances,
hd_transformed,
weights,
total_weight,
self.mixing_ratio,
self.preopt_steps,
use_transform=True,
)
if self.verbose:
print(f"Pre-optimization stress: {stress:.6f}")
# Main optimization
if self.max_iter > 0:
if self.verbose:
print(f"\n=== Stage 2: Main optimization ({self.max_iter} steps) ===")
embedding, stress = self._optimize(
embedding,
hd_distances,
hd_transformed,
weights,
total_weight,
self.mixing_ratio,
self.max_iter,
use_transform=True,
)
if self.verbose:
print(f"Final stress: {stress:.6f}")
# Global optimization
if self.global_opt_steps is not None:
if not isinstance(self.global_opt_steps, int) or self.global_opt_steps < 1:
raise ValueError(
"global_opt_steps must be a positive int, got "
f"{self.global_opt_steps}"
)
embedding, stress = self._global_optimize(
embedding,
hd_distances,
hd_transformed,
weights,
total_weight,
self.global_opt_steps,
)
# Store results
self.embedding_ = embedding
self.stress_ = stress
# Track total iterations for sklearn compatibility
n_iter = self.mds_opt_steps + self.preopt_steps + self.max_iter
if self.global_opt_steps is not None:
n_iter += self.global_opt_steps
self.n_iter_ = n_iter
if self.verbose:
print("\nSketch-Map fitting complete!")
return self
def predict(self, X):
"""Alias for :meth:`transform` (for API compatibility)."""
return self.transform(X)
[docs]
def score(self, X, y=None):
"""Return negative stress as a score.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Input data. Used only for feature validation.
y : array-like
Ignored, present for scikit-learn API compatibility.
Returns
-------
score : float
Negative of the final stress value.
"""
check_is_fitted(self, ["stress_"])
validate_data(self, X, reset=False)
return -self.stress_