From efa3cd6fec018f7dd1549a4ed7ec01256a91573f Mon Sep 17 00:00:00 2001 From: thequackdaddy Date: Thu, 3 Nov 2016 19:54:06 -0500 Subject: [PATCH] Removing other polytypes, standardize procedure. --- patsy/contrasts.py | 2 +- patsy/polynomials.py | 84 ++++++++------------------------------------ 2 files changed, 15 insertions(+), 71 deletions(-) diff --git a/patsy/contrasts.py b/patsy/contrasts.py index 469aac0..010afb3 100644 --- a/patsy/contrasts.py +++ b/patsy/contrasts.py @@ -264,7 +264,7 @@ def _code_either(self, intercept, levels): # quadratic, etc., functions of the raw scores, and then use 'qr' to # orthogonalize each column against those to its left. scores -= scores.mean() - raw_poly = Polynomial.vander(scores, n - 1, 'poly') + raw_poly = Polynomial.vander(scores, n - 1) alpha, norm, beta = Polynomial.gen_qr(raw_poly, n - 1) q = Polynomial.apply_qr(raw_poly, n - 1, alpha, norm, beta) q[:, 0] = 1 diff --git a/patsy/polynomials.py b/patsy/polynomials.py index 56ca9b5..9137864 100644 --- a/patsy/polynomials.py +++ b/patsy/polynomials.py @@ -5,18 +5,19 @@ # R-compatible poly function # These are made available in the patsy.* namespace -__all__ = ["poly"] - import numpy as np from patsy.util import have_pandas, no_pickling, assert_no_pickling from patsy.state import stateful_transform +__all__ = ["poly"] + if have_pandas: import pandas + class Poly(object): - """poly(x, degree=3, polytype='poly', raw=False, scaler=None) + """poly(x, degree=3, raw=False) Generates an orthogonal polynomial transformation of x of degree. Generic usage is something along the lines of:: @@ -26,29 +27,17 @@ class Poly(object): to fit ``y`` as a function of ``x``, with a 4th degree polynomial. :arg degree: The number of degrees for the polynomial expansion. - :arg polytype: Either poly (the default), legendre, laguerre, hermite, or - hermanite_e. :arg raw: When raw is False (the default), will return orthogonal polynomials. - :arg scaler: Choice of 'qr' (default when raw=False) for QR- - decomposition or 'standardize'. .. versionadded:: 0.4.1 """ def __init__(self): self._tmp = {} - def memorize_chunk(self, x, degree=3, polytype='poly', raw=False, - scaler=None): - if not raw and (scaler is None): - scaler = 'qr' - if scaler not in ('qr', 'standardize', None): - raise ValueError('input to \'scaler\' %s is not a valid ' - 'scaling technique' % scaler) + def memorize_chunk(self, x, degree=3, raw=False): args = {"degree": degree, - "raw": raw, - "scaler": scaler, - 'polytype': polytype + "raw": raw } self._tmp["args"] = args # XX: check whether we need x values before saving them @@ -80,19 +69,12 @@ def memorize_finish(self): n = args['degree'] self.degree = n - self.scaler = args['scaler'] self.raw = args['raw'] - self.polytype = args['polytype'] - - if self.scaler is not None: - raw_poly = self.vander(scores, n, self.polytype) - if self.scaler == 'qr': + if not self.raw: + raw_poly = self.vander(scores, n) self.alpha, self.norm, self.beta = self.gen_qr(raw_poly, n) - if self.scaler == 'standardize': - self.mean, self.var = self.gen_standardize(raw_poly) - def transform(self, x, degree=3, polytype='poly', raw=False, scaler=None): if have_pandas: if isinstance(x, (pandas.Series, pandas.DataFrame)): @@ -120,23 +102,17 @@ def transform(self, x, degree=3, polytype='poly', raw=False, scaler=None): return p @staticmethod - def vander(x, n, polytype): - v_func = {'poly': np.polynomial.polynomial.polyvander, - 'cheb': np.polynomial.chebyshev.chebvander, - 'legendre': np.polynomial.legendre.legvander, - 'laguerre': np.polynomial.laguerre.lagvander, - 'hermite': np.polynomial.hermite.hermvander, - 'hermite_e': np.polynomial.hermite_e.hermevander} - raw_poly = v_func[polytype](x, n) + def vander(x, n): + raw_poly = np.polynomial.polynomial.polyvander(x, n) return raw_poly @staticmethod def gen_qr(raw_poly, n): + x = raw_poly[:, 1] + q, r = np.linalg.qr(raw_poly) # Q is now orthognoal of degree n. To match what R is doing, we # need to use the three-term recurrence technique to calculate # new alpha, beta, and norm. - x = raw_poly[:, 1] - q, r = np.linalg.qr(raw_poly) alpha = (np.sum(x.reshape((-1, 1)) * q[:, :n] ** 2, axis=0) / np.sum(q[:, :n] ** 2, axis=0)) @@ -147,10 +123,6 @@ def gen_qr(raw_poly, n): beta = (norm[1:] / norm[:n]) ** 2 return alpha, norm, beta - @staticmethod - def gen_standardize(raw_poly): - return raw_poly.mean(axis=0), raw_poly.var(axis=0) - @staticmethod def apply_qr(x, n, alpha, norm, beta): # This is where the three-term recurrence is unwound for the QR @@ -166,13 +138,6 @@ def apply_qr(x, n, alpha, norm, beta): p[:, i + 1] = (p[:, i + 1] - (beta[i - 1] * p[:, i - 1])) p /= norm return p - - @staticmethod - def apply_standardize(x, mean, var): - x[:, 1:] = ((x[:, 1:] - mean[1:]) / (var[1:] ** 0.5)) - return x - - __getstate__ = no_pickling poly = stateful_transform(Poly) @@ -215,24 +180,6 @@ def test_poly_compat(): start_idx = stop_idx + 1 assert tests_ran == R_poly_num_tests -def test_poly_smoke(): - # Test that standardized values match. - x = np.arange(27) - vanders = ['poly', 'cheb', 'legendre', 'laguerre', 'hermite', 'hermite_e'] - scalers = ['raw', 'qr', 'standardize'] - for v in vanders: - p1 = poly(x, polytype=v, scaler='standardize') - p2 = poly(x, polytype=v, raw=True) - p2 = (p2 - p2.mean(axis=0)) / p2.std(axis=0) - np.testing.assert_allclose(p1, p2) - - # Don't have tests for all this... so just make sure it works. - for v in vanders: - for s in scalers: - if s == 'raw': - poly(x, raw=True, polytype=v) - else: - poly(x, scaler=s, polytype=v) def test_poly_errors(): from nose.tools import assert_raises @@ -245,8 +192,5 @@ def test_poly_errors(): assert_raises(ValueError, poly, x, degree=0) assert_raises(ValueError, poly, x, degree=3.5) - #Invalid Poly Type - assert_raises(KeyError, poly, x, polytype='foo') - - #Invalid scaling type - assert_raises(ValueError, poly, x, scaler='bar') + p = poly(np.arange(1, 10), degree=3) + assert_no_pickling(p)