Skip to content

Commit

Permalink
Check that random projections actually reduce the dimension of the data.
Browse files Browse the repository at this point in the history
  • Loading branch information
GBathie committed Mar 1, 2024
1 parent 3569c10 commit a5f1ad3
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 30 deletions.
2 changes: 2 additions & 0 deletions algorithms/linfa-reduction/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,6 @@ pub enum ReductionError {
InvalidPrecision,
#[error("Target dimension of the projection must be positive")]
NonPositiveEmbeddingSize,
#[error("Target dimension {0} is larger than the number of features {1}. ")]
DimensionIncrease(usize, usize),
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,15 @@ where

let n_dims = match &self.params {
GaussianRandomProjectionParamsInner::Dimension { target_dim } => *target_dim,
GaussianRandomProjectionParamsInner::Precision { precision } => {
johnson_lindenstrauss_min_dim(n_samples, *precision)
GaussianRandomProjectionParamsInner::Epsilon { eps } => {
johnson_lindenstrauss_min_dim(n_samples, *eps)
}
};

if n_dims > n_features {
return Err(ReductionError::DimensionIncrease(n_dims, n_features));
}

let std_dev = F::cast(n_features).sqrt().recip();
let gaussian = Normal::new(F::zero(), std_dev)?;

Expand All @@ -49,22 +53,22 @@ where

impl<F: Float> GaussianRandomProjection<F> {
/// Create new parameters for a [`GaussianRandomProjection`] with default value
/// `precision = 0.1` and a [`Xoshiro256Plus`] RNG.
/// `eps = 0.1` and a [`Xoshiro256Plus`] RNG.
pub fn params() -> GaussianRandomProjectionParams<Xoshiro256Plus> {
GaussianRandomProjectionParams(GaussianRandomProjectionValidParams {
params: GaussianRandomProjectionParamsInner::Precision { precision: 0.1 },
params: GaussianRandomProjectionParamsInner::Epsilon { eps: 0.1 },
rng: Xoshiro256Plus::seed_from_u64(42),
})
}

/// Create new parameters for a [`GaussianRandomProjection`] with default values
/// `precision = 0.1` and the provided [`Rng`].
/// `eps = 0.1` and the provided [`Rng`].
pub fn params_with_rng<R>(rng: R) -> GaussianRandomProjectionParams<R>
where
R: Rng + Clone,
{
GaussianRandomProjectionParams(GaussianRandomProjectionValidParams {
params: GaussianRandomProjectionParamsInner::Precision { precision: 0.1 },
params: GaussianRandomProjectionParamsInner::Epsilon { eps: 0.1 },
rng,
})
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@ impl<R: Rng + Clone> GaussianRandomProjectionParams<R> {
self
}

/// Set the precision (distortion, `eps`) of the embedding.
/// Set the precision parameter (distortion, `eps`) of the embedding.
///
/// Setting the precision with this function
/// Setting `eps` with this function
/// discards the target dimension parameter if it had been set previously.
pub fn precision(mut self, eps: f64) -> Self {
self.0.params = GaussianRandomProjectionParamsInner::Precision { precision: eps };
pub fn eps(mut self, eps: f64) -> Self {
self.0.params = GaussianRandomProjectionParamsInner::Epsilon { eps };

self
}
Expand Down Expand Up @@ -75,23 +75,23 @@ pub struct GaussianRandomProjectionValidParams<R: Rng + Clone> {
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum GaussianRandomProjectionParamsInner {
Dimension { target_dim: usize },
Precision { precision: f64 },
Epsilon { eps: f64 },
}

impl GaussianRandomProjectionParamsInner {
fn target_dim(&self) -> Option<usize> {
use GaussianRandomProjectionParamsInner::*;
match self {
Dimension { target_dim } => Some(*target_dim),
Precision { .. } => None,
Epsilon { .. } => None,
}
}

fn eps(&self) -> Option<f64> {
use GaussianRandomProjectionParamsInner::*;
match self {
Dimension { .. } => None,
Precision { precision } => Some(*precision),
Epsilon { eps } => Some(*eps),
}
}
}
Expand All @@ -101,7 +101,7 @@ impl<R: Rng + Clone> GaussianRandomProjectionValidParams<R> {
self.params.target_dim()
}

pub fn precision(&self) -> Option<f64> {
pub fn eps(&self) -> Option<f64> {
self.params.eps()
}

Expand All @@ -121,8 +121,8 @@ impl<R: Rng + Clone> ParamGuard for GaussianRandomProjectionParams<R> {
return Err(ReductionError::NonPositiveEmbeddingSize);
}
}
GaussianRandomProjectionParamsInner::Precision { precision } => {
if precision <= 0. || precision >= 1. {
GaussianRandomProjectionParamsInner::Epsilon { eps } => {
if eps <= 0. || eps >= 1. {
return Err(ReductionError::InvalidPrecision);
}
}
Expand Down
21 changes: 21 additions & 0 deletions algorithms/linfa-reduction/src/random_projection/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,25 @@ mod tests {
has_autotraits::<SparseRandomProjectionValidParams<Xoshiro256Plus>>();
has_autotraits::<SparseRandomProjectionParams<Xoshiro256Plus>>();
}

use linfa::{traits::Fit, Dataset};

#[test]
fn gaussian_dim_increase_error() {
let records = array![[10., 10.], [1., 12.], [20., 30.], [-20., 30.],];
let dataset = Dataset::from(records);
let res = GaussianRandomProjection::<f32>::params()
.eps(0.1)
.fit(&dataset);
assert!(res.is_err())
}
#[test]
fn sparse_dim_increase_error() {
let records = array![[10., 10.], [1., 12.], [20., 30.], [-20., 30.],];
let dataset = Dataset::from(records);
let res = SparseRandomProjection::<f32>::params()
.eps(0.1)
.fit(&dataset);
assert!(res.is_err())
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,15 @@ where

let n_dims = match &self.params {
SparseRandomProjectionParamsInner::Dimension { target_dim } => *target_dim,
SparseRandomProjectionParamsInner::Precision { precision } => {
johnson_lindenstrauss_min_dim(n_samples, *precision)
SparseRandomProjectionParamsInner::Epsilon { eps } => {
johnson_lindenstrauss_min_dim(n_samples, *eps)
}
};

if n_dims > n_features {
return Err(ReductionError::DimensionIncrease(n_dims, n_features));
}

let scale = (n_features as f64).sqrt();
let p = 1f64 / scale;
let dist = SparseDistribution::new(F::cast(scale), p);
Expand Down Expand Up @@ -96,7 +100,7 @@ impl<F: Float> SparseRandomProjection<F> {
/// `precision = 0.1` and a [`Xoshiro256Plus`] RNG.
pub fn params() -> SparseRandomProjectionParams<Xoshiro256Plus> {
SparseRandomProjectionParams(SparseRandomProjectionValidParams {
params: SparseRandomProjectionParamsInner::Precision { precision: 0.1 },
params: SparseRandomProjectionParamsInner::Epsilon { eps: 0.1 },
rng: Xoshiro256Plus::seed_from_u64(42),
})
}
Expand All @@ -108,10 +112,11 @@ impl<F: Float> SparseRandomProjection<F> {
R: Rng + Clone,
{
SparseRandomProjectionParams(SparseRandomProjectionValidParams {
params: SparseRandomProjectionParamsInner::Precision { precision: 0.1 },
params: SparseRandomProjectionParamsInner::Epsilon { eps: 0.1 },
rng,
})
}
}

impl_proj! {SparseRandomProjection<F>}

Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ impl<R: Rng + Clone> SparseRandomProjectionParams<R> {
self
}

/// Set the precision (distortion, `eps`) of the embedding.
/// Set the distortion parameter (`eps`) of the embedding.
///
/// Setting the precision with this function
/// Setting `eps` with this function
/// discards the target dimension parameter if it had been set previously.
pub fn precision(mut self, eps: f64) -> Self {
self.0.params = SparseRandomProjectionParamsInner::Precision { precision: eps };
pub fn eps(mut self, eps: f64) -> Self {
self.0.params = SparseRandomProjectionParamsInner::Epsilon { eps };

self
}
Expand Down Expand Up @@ -69,23 +69,23 @@ pub struct SparseRandomProjectionValidParams<R> {
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum SparseRandomProjectionParamsInner {
Dimension { target_dim: usize },
Precision { precision: f64 },
Epsilon { eps: f64 },
}

impl SparseRandomProjectionParamsInner {
fn target_dim(&self) -> Option<usize> {
use SparseRandomProjectionParamsInner::*;
match self {
Dimension { target_dim } => Some(*target_dim),
Precision { .. } => None,
Epsilon { .. } => None,
}
}

fn eps(&self) -> Option<f64> {
use SparseRandomProjectionParamsInner::*;
match self {
Dimension { .. } => None,
Precision { precision } => Some(*precision),
Epsilon { eps } => Some(*eps),
}
}
}
Expand All @@ -95,7 +95,7 @@ impl<R: Rng + Clone> SparseRandomProjectionValidParams<R> {
self.params.target_dim()
}

pub fn precision(&self) -> Option<f64> {
pub fn eps(&self) -> Option<f64> {
self.params.eps()
}

Expand All @@ -115,8 +115,8 @@ impl<R: Rng + Clone> ParamGuard for SparseRandomProjectionParams<R> {
return Err(ReductionError::NonPositiveEmbeddingSize);
}
}
SparseRandomProjectionParamsInner::Precision { precision } => {
if precision <= 0. || precision >= 1. {
SparseRandomProjectionParamsInner::Epsilon { eps } => {
if eps <= 0. || eps >= 1. {
return Err(ReductionError::InvalidPrecision);
}
}
Expand Down

0 comments on commit a5f1ad3

Please sign in to comment.