SAGA API  v9.6
mat_regression.cpp
Go to the documentation of this file.
1 
3 // //
4 // SAGA //
5 // //
6 // System for Automated Geoscientific Analyses //
7 // //
8 // Application Programming Interface //
9 // //
10 // Library: SAGA_API //
11 // //
12 //-------------------------------------------------------//
13 // //
14 // mat_regression.cpp //
15 // //
16 // Copyright (C) 2005 by Olaf Conrad //
17 // //
18 //-------------------------------------------------------//
19 // //
20 // This file is part of 'SAGA - System for Automated //
21 // Geoscientific Analyses'. //
22 // //
23 // This library is free software; you can redistribute //
24 // it and/or modify it under the terms of the GNU Lesser //
25 // General Public License as published by the Free //
26 // Software Foundation, either version 2.1 of the //
27 // License, or (at your option) any later version. //
28 // //
29 // This library is distributed in the hope that it will //
30 // be useful, but WITHOUT ANY WARRANTY; without even the //
31 // implied warranty of MERCHANTABILITY or FITNESS FOR A //
32 // PARTICULAR PURPOSE. See the GNU Lesser General Public //
33 // License for more details. //
34 // //
35 // You should have received a copy of the GNU Lesser //
36 // General Public License along with this program; if //
37 // not, see <http://www.gnu.org/licenses/>. //
38 // //
39 //-------------------------------------------------------//
40 // //
41 // contact: Olaf Conrad //
42 // Institute of Geography //
43 // University of Goettingen //
44 // Goldschmidtstr. 5 //
45 // 37077 Goettingen //
46 // Germany //
47 // //
48 // e-mail: oconrad@saga-gis.org //
49 // //
51 
52 //---------------------------------------------------------
53 #include "mat_tools.h"
54 
55 
57 // //
58 // //
59 // //
61 
62 //---------------------------------------------------------
63 double SG_Regression_Get_Adjusted_R2(double r2, int n, int p, TSG_Regression_Correction Correction)
64 {
65  double r = 1. - r2;
66 
67  switch( Correction )
68  {
69  case REGRESSION_CORR_None: default:
70  return( r2 );
71 
73  r2 = 1. - ((n ) / (n - p )) * r;
74  break;
75 
77  r2 = 1. - ((n - 1.) / (n - p - 1.)) * r;
78  break;
79 
81  r2 = 1. - ((n - 1.) / (n - p )) * r;
82  break;
83 
85  // r2 = 1. - ((n - 3.) / (n - p - 2.)) * (r + (2. / (n - p)) * r*r);
86  r2 = 1. - ((n - 3.) * r / (n - p - 1.)) * (1. + (2. * r) / (n - p + 1.));
87  break;
88 
90  r2 = 1. - ((n - 3.) * r / (n - p - 1.)) * (1. + (2. * r) / (n - p - 2.3));
91  break;
92 
94  r2 = 1. - ((n - 4.) * r / (n - p - 1.)) * (1. + (2. * r) / (n - p + 1.));
95  break;
96  }
97 
98  return( r2 < 0. ? 0. : r2 > 1. ? 1. : r2 );
99 }
100 
101 
103 // //
104 // //
105 // //
107 
108 //---------------------------------------------------------
110 {
111  Destroy();
112 }
113 
114 //---------------------------------------------------------
116 {
117  Destroy();
118 }
119 
120 
122 // //
124 
125 //---------------------------------------------------------
127 {
128  m_R2 = -1.; // mark as invalid (or unprocessed)
129 
131 
132  m_x.Destroy();
133  m_y.Destroy();
134 }
135 
136 
138 // //
140 
141 //---------------------------------------------------------
142 bool CSG_Regression::Add_Values(double x, double y)
143 {
144  return( m_y.Add_Row(y) && m_x.Add_Row(x) );
145 }
146 
147 //---------------------------------------------------------
148 bool CSG_Regression::Set_Values(int nValues, double *x, double *y)
149 {
150  Destroy();
151 
152  return( m_y.Create(nValues, y) && m_x.Create(nValues, x) );
153 }
154 
155 
157 // //
159 
160 //---------------------------------------------------------
162 {
163  static CSG_String s; s.Clear();
164 
165  s += CSG_String::Format("\nNumber of observations = %d\n", Get_Count());
166  s += CSG_String::Format("\nPredictor variable (x):\n min. = %.6f, max. = %.6f, mean = %.6f, stddev. = %.6f\n", m_xMin, m_xMax, m_xMean, sqrt(m_xVar));
167  s += CSG_String::Format("\nDependent variable (y):\n min. = %.6f, max. = %.6f, mean = %.6f, stddev. = %.6f\n", m_yMin, m_yMax, m_yMean, sqrt(m_yVar));
168  s += CSG_String::Format(SG_T("\nRegression (r = %.4f, r² = %.4f):\n\n y = "), m_R, m_R2);
169 
170  switch( m_Type )
171  {
172  case REGRESSION_Linear: s += CSG_String::Format("%.6f %+.6f * x" , m_RConst, m_RCoeff); break; // Y = a + b * X
173  case REGRESSION_Rez_X : s += CSG_String::Format("%.6f %+.6f / x" , m_RConst, m_RCoeff); break; // Y = a + b / X
174  case REGRESSION_Rez_Y : s += CSG_String::Format("%.6f / (%.6f - x)" , m_RConst, m_RCoeff); break; // Y = a / (b - X)
175  case REGRESSION_Pow : s += CSG_String::Format("%.6f * x^%.6f" , m_RConst, m_RCoeff); break; // Y = a * X^b
176  case REGRESSION_Exp : s += CSG_String::Format("%.6f * e^(%.6f * x)", m_RConst, m_RCoeff); break; // Y = a * e^(b * X)
177  case REGRESSION_Log : s += CSG_String::Format("%.6f %+.6f * ln(x)" , m_RConst, m_RCoeff); break; // Y = a + b * ln(X)
178  }
179 
180  s += "\n";
181 
182  return( s );
183 }
184 
185 
187 // //
189 
190 //---------------------------------------------------------
191 double CSG_Regression::Get_x(double y) const
192 {
193  if( m_R2 >= 0. )
194  {
195  switch( m_Type )
196  {
197  case REGRESSION_Linear: // Y = a + b * X -> X = (Y - a) / b
198  if( m_RCoeff != 0. )
199  return( (m_RConst * y) / m_RCoeff );
200  break;
201 
202  case REGRESSION_Rez_X: // Y = a + b / X -> X = b / (Y - a)
203  if( (y = y - m_RConst) != 0. )
204  return( m_RCoeff / y );
205  break;
206 
207  case REGRESSION_Rez_Y: // Y = a / (b - X) -> X = b - a / Y
208  if( y != 0. )
209  return( m_RCoeff - m_RConst / y );
210  break;
211 
212  case REGRESSION_Pow: // Y = a * X^b -> X = (Y / a)^(1 / b)
213  if( m_RConst != 0. && m_RCoeff != 0. )
214  return( pow(y / m_RConst, 1. / m_RCoeff) );
215  break;
216 
217  case REGRESSION_Exp: // Y = a * e^(b * X) -> X = ln(Y / a) / b
218  if( m_RConst != 0. && (y = y / m_RConst) > 0. && m_RCoeff != 0. )
219  return( log(y) / m_RCoeff );
220  break;
221 
222  case REGRESSION_Log: // Y = a + b * ln(X) -> X = e^((Y - a) / b)
223  if( m_RCoeff != 0. )
224  return( exp((y - m_RConst) / m_RCoeff) );
225  break;
226  }
227  }
228 
229  return( sqrt(-1.) ); // NaN
230 }
231 
232 //---------------------------------------------------------
233 double CSG_Regression::Get_y(double x) const
234 {
235  if( m_R2 >= 0. )
236  {
237  switch( m_Type )
238  {
239  case REGRESSION_Linear: // Y = a + b * X
240  return( m_RConst + m_RCoeff * x );
241 
242  case REGRESSION_Rez_X: // Y = a + b / X
243  if( x != 0. )
244  return( m_RConst + m_RCoeff / x );
245  break;
246 
247  case REGRESSION_Rez_Y: // Y = a / (b - X)
248  if( (x = m_RCoeff - x) != 0. )
249  return( m_RConst / x );
250  break;
251 
252  case REGRESSION_Pow: // Y = a * X^b
253  return( m_RConst * pow(x, m_RCoeff) );
254 
255  case REGRESSION_Exp: // Y = a e^(b * X)
256  return( m_RConst * exp(m_RCoeff * x) );
257 
258  case REGRESSION_Log: // Y = a + b * ln(X)
259  if( x > 0. )
260  return( m_RConst + m_RCoeff * log(x) );
261  break;
262  }
263  }
264 
265  return( sqrt(-1.) );
266 }
267 
268 
270 // //
272 
273 //---------------------------------------------------------
274 inline double CSG_Regression::_Y_Transform(double y)
275 {
276  switch( m_Type )
277  {
278  default:
279  return( y );
280 
281  case REGRESSION_Rez_Y:
282  if( y == 0. ) { y = M_FLT_EPSILON; }
283  return( 1. / y );
284 
285  case REGRESSION_Pow:
286  case REGRESSION_Exp:
287  if( y <= 0. ) { y = M_FLT_EPSILON; }
288  return( log(y) );
289  }
290 }
291 
292 //---------------------------------------------------------
293 inline double CSG_Regression::_X_Transform(double x)
294 {
295  switch( m_Type )
296  {
297  default:
298  return( x );
299 
300  case REGRESSION_Rez_X:
301  if( x == 0. ) { x = M_FLT_EPSILON; }
302  return( 1. / x );
303 
304  case REGRESSION_Pow:
305  case REGRESSION_Log:
306  if( x <= 0. ) { x = M_FLT_EPSILON; }
307  return( log(x) );
308  }
309 }
310 
311 
313 // //
315 
316 //---------------------------------------------------------
318 {
319  m_R2 = -1.; // mark as invalid (unprocessed)
320  m_Type = Type;
321 
322  if( Get_Count() < 2 )
323  {
324  return( false );
325  }
326 
327  CSG_Simple_Statistics sx, sy;
328 
329  for(int i=0; i<Get_Count(); i++)
330  {
331  sx += _X_Transform(m_x[i]);
332  sy += _Y_Transform(m_y[i]);
333  }
334 
335  m_xMin = sx.Get_Minimum(); m_xMax = sx.Get_Maximum(); m_xMean = sx.Get_Mean(); m_xVar = sx.Get_Variance();
336  m_yMin = sy.Get_Minimum(); m_yMax = sy.Get_Maximum(); m_yMean = sy.Get_Mean(); m_yVar = sy.Get_Variance();
337 
338  if( m_xMin >= m_xMax || m_yMin >= m_yMax )
339  {
340  return( false );
341  }
342 
343  //-----------------------------------------------------
344  double s_x = 0., s_y = 0., s_xx = 0., s_xy = 0., s_dx2 = 0., s_dy2 = 0., s_dxdy = 0.;
345 
346  for(int i=0; i<Get_Count(); i++)
347  {
348  double x = _X_Transform(m_x[i]);
349  double y = _Y_Transform(m_y[i]);
350 
351  s_x += x;
352  s_y += y;
353  s_xx += x * x;
354  s_xy += x * y;
355 
356  x -= m_xMean;
357  y -= m_yMean;
358 
359  s_dx2 += x * x;
360  s_dy2 += y * y;
361  s_dxdy += x * y;
362  }
363 
364  //-----------------------------------------------------
365  m_RCoeff = s_dxdy / s_dx2;
366  m_RConst = (s_xx * s_y - s_x * s_xy) / (Get_Count() * s_xx - s_x * s_x);
367  m_R = s_dxdy / sqrt(s_dx2 * s_dy2);
368  m_R2 = m_R*m_R;
371 
372  //-----------------------------------------------------
373  if( !bStdError )
374  {
375  m_SE = -1.;
376  }
377  else
378  {
379  m_SE = 0.;
380 
381  for(int i=0; i<Get_Count(); i++)
382  {
383  double d = fabs(m_y[i] - Get_y(m_x[i]));
384 
385  m_SE += d;
386  }
387 
388  m_SE /= Get_Count();
389  }
390 
391  //-----------------------------------------------------
392  switch( m_Type )
393  {
394  default: // case REGRESSION_Linear:
395  return( true );
396 
397  case REGRESSION_Rez_X: {
398  m_xVar = 1. / m_xVar;
399  break; }
400 
401  case REGRESSION_Rez_Y: {
402  double d = m_RConst;
403  m_RConst = 1. / m_RCoeff;
404  m_RCoeff = d * m_RCoeff;
405  m_yVar = 1. / m_yVar;
406  break; }
407 
408  case REGRESSION_Pow: {
409  m_RConst = exp(m_RConst);
410  m_xVar = exp(m_xVar);
411  m_yVar = exp(m_yVar);
412  break; }
413 
414  case REGRESSION_Exp: {
415  m_RConst = exp(m_RConst);
416  m_yVar = exp(m_yVar);
417  break; }
418 
419  case REGRESSION_Log: {
420  m_xVar = exp(m_xVar);
421  break; }
422  }
423 
425 
426  if( !s.Create(m_x) ) { return( false ); } m_xMin = s.Get_Minimum(); m_xMean = s.Get_Mean(); m_xMax = s.Get_Maximum(); m_xVar = s.Get_Variance();
427  if( !s.Create(m_y) ) { return( false ); } m_yMin = s.Get_Minimum(); m_yMean = s.Get_Mean(); m_yMax = s.Get_Maximum(); m_yVar = s.Get_Variance();
428 
429  return( true );
430 }
431 
432 //---------------------------------------------------------
433 bool CSG_Regression::Calculate(int nValues, double *x, double *y, TSG_Regression_Type Type, bool bStdError)
434 {
435  return( Set_Values(nValues, x, y) && Calculate(Type, bStdError) );
436 }
437 
438 
440 // //
441 // //
442 // //
444 
445 //---------------------------------------------------------
REGRESSION_CORR_Pratt
@ REGRESSION_CORR_Pratt
Definition: mat_tools.h:1573
SG_T
#define SG_T(s)
Definition: api_core.h:537
CSG_Regression::Get_x
double Get_x(double y) const
Definition: mat_regression.cpp:191
REGRESSION_CORR_Olkin_Pratt
@ REGRESSION_CORR_Olkin_Pratt
Definition: mat_tools.h:1572
CSG_Regression::CSG_Regression
CSG_Regression(void)
Definition: mat_regression.cpp:109
CSG_Regression::m_yMax
double m_yMax
Definition: mat_tools.h:1659
CSG_Simple_Statistics::Get_Variance
double Get_Variance(void)
Definition: mat_tools.h:752
CSG_Regression::Get_Count
int Get_Count(void) const
Definition: mat_tools.h:1610
CSG_Regression::m_yVar
double m_yVar
Definition: mat_tools.h:1659
CSG_Regression::m_xVar
double m_xVar
Definition: mat_tools.h:1658
CSG_Regression::m_xMin
double m_xMin
Definition: mat_tools.h:1658
CSG_Regression::Destroy
void Destroy(void)
Definition: mat_regression.cpp:126
CSG_Regression::m_RConst
double m_RConst
Definition: mat_tools.h:1657
CSG_Vector::Add_Row
bool Add_Row(double Value=0.)
Definition: mat_matrix.cpp:188
CSG_Regression::m_yMin
double m_yMin
Definition: mat_tools.h:1659
REGRESSION_CORR_Wherry_1
@ REGRESSION_CORR_Wherry_1
Definition: mat_tools.h:1570
REGRESSION_Log
@ REGRESSION_Log
Definition: mat_tools.h:1594
CSG_Regression::m_SE
double m_SE
Definition: mat_tools.h:1657
CSG_Regression::m_R
double m_R
Definition: mat_tools.h:1657
CSG_Regression::Get_y
double Get_y(double x) const
Definition: mat_regression.cpp:233
mat_tools.h
REGRESSION_Linear
@ REGRESSION_Linear
Definition: mat_tools.h:1589
CSG_Simple_Statistics::Get_Maximum
double Get_Maximum(void)
Definition: mat_tools.h:747
REGRESSION_CORR_Claudy_3
@ REGRESSION_CORR_Claudy_3
Definition: mat_tools.h:1574
CSG_Regression::Set_Values
bool Set_Values(int nValues, double *x, double *y)
Definition: mat_regression.cpp:148
REGRESSION_CORR_Wherry_2
@ REGRESSION_CORR_Wherry_2
Definition: mat_tools.h:1571
CSG_Vector::Create
bool Create(const CSG_Vector &Vector)
Definition: mat_matrix.cpp:87
CSG_Simple_Statistics::Get_Minimum
double Get_Minimum(void)
Definition: mat_tools.h:746
CSG_Regression::m_x
CSG_Vector m_x
Definition: mat_tools.h:1661
CSG_Regression::m_xMean
double m_xMean
Definition: mat_tools.h:1658
CSG_Regression::m_R2
double m_R2
Definition: mat_tools.h:1657
CSG_Regression::_X_Transform
double _X_Transform(double y)
Definition: mat_regression.cpp:293
CSG_Regression::m_P
double m_P
Definition: mat_tools.h:1657
CSG_Regression::Calculate
bool Calculate(TSG_Regression_Type Type=REGRESSION_Linear, bool bStdError=false)
Definition: mat_regression.cpp:317
CSG_Regression::m_Type
TSG_Regression_Type m_Type
Definition: mat_tools.h:1663
REGRESSION_Rez_X
@ REGRESSION_Rez_X
Definition: mat_tools.h:1590
REGRESSION_Rez_Y
@ REGRESSION_Rez_Y
Definition: mat_tools.h:1591
M_FLT_EPSILON
#define M_FLT_EPSILON
Definition: mat_tools.h:121
CSG_Simple_Statistics::Get_Mean
double Get_Mean(void)
Definition: mat_tools.h:751
REGRESSION_Pow
@ REGRESSION_Pow
Definition: mat_tools.h:1592
CSG_String::Format
static CSG_String Format(const char *Format,...)
Definition: api_string.cpp:270
TSG_Regression_Type
TSG_Regression_Type
Definition: mat_tools.h:1588
TSG_Regression_Correction
TSG_Regression_Correction
Definition: mat_tools.h:1567
CSG_String::Clear
void Clear(void)
Definition: api_string.cpp:259
CSG_Vector::Destroy
bool Destroy(void)
Definition: mat_matrix.cpp:130
SG_Char
#define SG_Char
Definition: api_core.h:536
CSG_String
Definition: api_core.h:563
CSG_Regression::Add_Values
bool Add_Values(double x, double y)
Definition: mat_regression.cpp:142
CSG_Regression::m_y
CSG_Vector m_y
Definition: mat_tools.h:1661
CSG_Regression::m_yMean
double m_yMean
Definition: mat_tools.h:1659
CSG_Regression::m_R2_Adj
double m_R2_Adj
Definition: mat_tools.h:1657
CSG_Regression::~CSG_Regression
virtual ~CSG_Regression(void)
Definition: mat_regression.cpp:115
CSG_Simple_Statistics::Create
bool Create(bool bHoldValues=false)
Definition: mat_tools.cpp:350
CSG_Regression::m_RCoeff
double m_RCoeff
Definition: mat_tools.h:1657
CSG_Regression::asString
const SG_Char * asString(void)
Definition: mat_regression.cpp:161
CSG_Test_Distribution::Get_F_Tail_from_R2
static double Get_F_Tail_from_R2(double R2, int nPredictors, int nSamples, TSG_Test_Distribution_Type Type=TESTDIST_TYPE_Right)
Definition: mat_tools.cpp:3271
SG_Regression_Get_Adjusted_R2
double SG_Regression_Get_Adjusted_R2(double r2, int n, int p, TSG_Regression_Correction Correction)
Definition: mat_regression.cpp:63
REGRESSION_Exp
@ REGRESSION_Exp
Definition: mat_tools.h:1593
REGRESSION_CORR_Smith
@ REGRESSION_CORR_Smith
Definition: mat_tools.h:1569
CSG_Simple_Statistics
Definition: mat_tools.h:723
REGRESSION_CORR_None
@ REGRESSION_CORR_None
Definition: mat_tools.h:1568
CSG_Regression::m_xMax
double m_xMax
Definition: mat_tools.h:1658
CSG_Regression::_Y_Transform
double _Y_Transform(double x)
Definition: mat_regression.cpp:274