SAGA API  v9.5
mat_regression_weighted.cpp
Go to the documentation of this file.
1 
3 // //
4 // SAGA //
5 // //
6 // System for Automated Geoscientific Analyses //
7 // //
8 // Application Programming Interface //
9 // //
10 // Library: SAGA_API //
11 // //
12 //-------------------------------------------------------//
13 // //
14 // mat_regression_multiple.cpp //
15 // //
16 // Copyright (C) 2005 by Olaf Conrad //
17 // //
18 //-------------------------------------------------------//
19 // //
20 // This file is part of 'SAGA - System for Automated //
21 // Geoscientific Analyses'. //
22 // //
23 // This library is free software; you can redistribute //
24 // it and/or modify it under the terms of the GNU Lesser //
25 // General Public License as published by the Free //
26 // Software Foundation, either version 2.1 of the //
27 // License, or (at your option) any later version. //
28 // //
29 // This library is distributed in the hope that it will //
30 // be useful, but WITHOUT ANY WARRANTY; without even the //
31 // implied warranty of MERCHANTABILITY or FITNESS FOR A //
32 // PARTICULAR PURPOSE. See the GNU Lesser General Public //
33 // License for more details. //
34 // //
35 // You should have received a copy of the GNU Lesser //
36 // General Public License along with this program; if //
37 // not, see <http://www.gnu.org/licenses/>. //
38 // //
39 //-------------------------------------------------------//
40 // //
41 // contact: Olaf Conrad //
42 // Institute of Geography //
43 // University of Goettingen //
44 // Goldschmidtstr. 5 //
45 // 37077 Hamburg //
46 // Germany //
47 // //
48 // e-mail: oconrad@saga-gis.org //
49 // //
51 
52 //---------------------------------------------------------
53 #include "mat_tools.h"
54 
55 
57 // //
58 // //
59 // //
61 
62 //---------------------------------------------------------
64 {
65  m_r2 = -1.0;
66 
67  m_Log_maxIter = 30;
68  m_Log_Epsilon = 0.001;
69  m_Log_Difference = 1000.;
70 }
71 
72 //---------------------------------------------------------
74 {
75  Destroy();
76 }
77 
78 //---------------------------------------------------------
80 {
81  m_r2 = -1.0;
82 
83  m_y.Destroy();
84  m_w.Destroy();
85  m_X.Destroy();
86  m_b.Destroy();
87 
88  return( true );
89 }
90 
91 
93 // //
95 
96 //---------------------------------------------------------
97 bool CSG_Regression_Weighted::Add_Sample(double Weight, double Dependent, const CSG_Vector &Predictors)
98 {
99  if( m_X.Get_NRows() == 0 )
100  {
101  m_X.Create(Predictors.Get_N() + 1, 1);
102  }
103  else if( m_X.Get_NCols() == Predictors.Get_N() + 1 )
104  {
105  m_X.Add_Row();
106  }
107  else
108  {
109  return( false );
110  }
111 
112  m_w.Add_Row(Weight );
113  m_y.Add_Row(Dependent);
114 
115  double *y = m_X[m_X.Get_NRows() - 1];
116 
117  y[0] = 1.0;
118 
119  for(int i=0; i<Predictors.Get_N(); i++)
120  {
121  y[i + 1] = Predictors[i];
122  }
123 
124  return( true );
125 }
126 
127 
129 // //
131 
132 //---------------------------------------------------------
133 bool CSG_Regression_Weighted::Calculate(const CSG_Vector &Weights, const CSG_Vector &Dependents, const CSG_Matrix &Predictors, bool bLogistic)
134 {
135  Destroy();
136 
137  if( Weights.Get_N() == Dependents.Get_N() && Weights.Get_N() == Predictors.Get_NRows() )
138  {
139  for(int i=0; i<Weights.Get_N(); i++)
140  {
141  Add_Sample(Weights[i], Dependents[i], Predictors.Get_Row(i));
142 
143  return( Calculate(bLogistic) );
144  }
145  }
146 
147  return( false );
148 }
149 
150 //---------------------------------------------------------
152 {
153  sLong nSamples = m_w.Get_Size(), nPredictors = m_X.Get_NCols() - 1;
154 
155  if( nSamples <= nPredictors || nSamples < 2 )
156  {
157  return( false );
158  }
159 
160  //-----------------------------------------------------
161  if( bLogistic )
162  {
163  m_b = _Log_Get_Beta(m_X, m_y, m_w);
164 
165  if( m_b.Get_N() == 0 )
166  {
167  return( false );
168  }
169  }
170  else
171  {
172  CSG_Matrix YtW(nSamples, 1 + nPredictors);
173 
174  for(sLong i=0; i<nSamples; i++)
175  {
176  YtW[0][i] = m_w[i];
177 
178  for(sLong j=1; j<=nPredictors; j++)
179  {
180  YtW[j][i] = m_w[i] * m_X[i][j];
181  }
182  }
183 
184  m_b = (YtW * m_X).Get_Inverse() * (YtW * m_y);
185  }
186 
187  //-----------------------------------------------------
188  CSG_Simple_Statistics yStats(m_y);
189 
190  double rss = 0., tss = 0.;
191 
192  for(sLong i=0; i<nSamples; i++)
193  {
194  double yr = m_b[0];
195 
196  for(sLong j=1; j<=nPredictors; j++)
197  {
198  yr += m_b[j] * m_X[i][j];
199  }
200 
201  if( bLogistic )
202  {
203  yr = 1. / (1. + exp(-yr));
204  }
205 
206  rss += m_w[i] * SG_Get_Square(m_y[i] - yr);
207  tss += m_w[i] * SG_Get_Square(m_y[i] - yStats.Get_Mean());
208  }
209 
210  //-----------------------------------------------------
211  if( tss > 0.0 && tss >= rss )
212  {
213  m_r2 = fabs(tss - rss) / tss;
214 
215  return( true );
216  }
217 
218  m_r2 = -1.;
219 
220  return( false );
221 }
222 
223 
225 // //
227 
228 //---------------------------------------------------------
229 // Zhang, D., Ren, N., and Hou, X. (2018):
230 // An improved logistic regression model based on a spatially weighted technique
231 // (ILRBSWT v1.0) and its application to mineral prospectivity mapping.
232 // Geosci. Model Dev., 11, 2525-2539, https://doi.org/10.5194/gmd-11-2525-2018.
233 
234 //---------------------------------------------------------
236 {
237  if( maxIter > 0 )
238  {
239  m_Log_maxIter = maxIter;
240 
241  return( true );
242  }
243 
244  return( false );
245 }
246 
247 //---------------------------------------------------------
249 {
250  if( Epsilon >= 0. )
251  {
252  m_Log_Epsilon = Epsilon;
253 
254  return( true );
255  }
256 
257  return( false );
258 }
259 
260 //---------------------------------------------------------
262 {
263  if( Difference > 0. )
264  {
265  m_Log_Difference = Difference;
266 
267  return( true );
268  }
269 
270  return( false );
271 }
272 
273 //---------------------------------------------------------
274 CSG_Vector CSG_Regression_Weighted::_Log_Get_Beta(const CSG_Matrix &X, const CSG_Vector &y, const CSG_Vector &w)
275 {
276  CSG_Vector b(X.Get_NCols()), b_best;
277 
278  CSG_Vector p = _Log_Get_Props(X, b);
279 
280  for(int i=0; i<m_Log_maxIter; ++i)
281  {
282  CSG_Vector b_new = _Log_Get_Beta(b, X, y, w, p);
283 
284  if( b_new.Get_N() == 0 )
285  {
286  return( b_best );
287  }
288 
289  for(int j=0; j<b_new.Get_N(); ++j)
290  {
291  if( SG_is_NaN(b_new[j]) )
292  {
293  return( b_best );
294  }
295  }
296 
297  if( _Log_NoChange(b, b_new) )
298  {
299  return( b_new );
300  }
301 
302  if( _Log_OutOfControl(b, b_new) )
303  {
304  return( b_best );
305  }
306 
307  p = _Log_Get_Props(X, b_new);
308  b = b_new;
309  b_best = b;
310  }
311 
312  return( b_best );
313 }
314 
315 //---------------------------------------------------------
316 CSG_Vector CSG_Regression_Weighted::_Log_Get_Beta(const CSG_Vector &b, const CSG_Matrix &X, const CSG_Vector &y, const CSG_Vector &w, const CSG_Vector &p)
317 {
318  CSG_Matrix Xt = X.Get_Transpose() ; // X'
319  CSG_Matrix M = Xt * _Log_Get_Xwp(p, X, w); // X'* w * V(t-1) * X
320  CSG_Matrix N = M.Get_Inverse() * Xt ; // inv(X'* w * V(t-1) * X) * X'
321  CSG_Vector v = N * _Log_Get_Ywp(p, y, w); // inv(X'* w * V(t-1) * X) * X' * w * (y - p(t-1))
322 
323  if( v.Get_N() == b.Get_N() )
324  {
325  return( b + v );
326  }
327 
328  return( CSG_Vector() );
329 }
330 
331 //---------------------------------------------------------
332 CSG_Matrix CSG_Regression_Weighted::_Log_Get_Xwp(const CSG_Vector &p, const CSG_Matrix &X, const CSG_Vector &w)
333 {
334  CSG_Matrix Xwp;
335 
336  if( p.Get_N() == X.Get_NRows() && Xwp.Create(X.Get_NCols(), X.Get_NRows()) )
337  {
338  for(int i=0; i<X.Get_NRows(); ++i)
339  {
340  for(int j=0; j<X.Get_NCols(); ++j)
341  {
342  Xwp[i][j] = w[i] * p[i] * (1. - p[i]) * X[i][j]; // compute W(u) * V(u) * X
343  }
344  }
345  }
346 
347  return( Xwp );
348 }
349 
350 //---------------------------------------------------------
351 CSG_Vector CSG_Regression_Weighted::_Log_Get_Ywp(const CSG_Vector &p, const CSG_Vector &y, const CSG_Vector &w)
352 {
353  CSG_Vector Ywp(y.Get_N());
354 
355  if( y.Get_N() == p.Get_N() && Ywp.Create(y.Get_N()) )
356  {
357  for(int i=0; i<Ywp.Get_N(); i++)
358  {
359  Ywp[i] = w[i] * (y[i] - p[i]); // compute W(u) * (y - p)
360  }
361  }
362 
363  return( Ywp );
364 }
365 
366 //---------------------------------------------------------
367 CSG_Vector CSG_Regression_Weighted::_Log_Get_Props(const CSG_Matrix &X, const CSG_Vector &b)
368 {
369  CSG_Vector p(X.Get_NRows());
370 
371  for(int i=0; i<X.Get_NRows(); ++i)
372  {
373  double z = 0.;
374 
375  for(int j=0; j<X.Get_NCols(); ++j)
376  {
377  z += X[i][j] * b[j];
378  }
379 
380  p[i] = 1. / (1. + exp(-z));
381  }
382 
383  return( p );
384 }
385 
386 //---------------------------------------------------------
387 bool CSG_Regression_Weighted::_Log_NoChange(const CSG_Vector &b_old, const CSG_Vector &b_new)
388 {
389  for(int i=0; i<b_old.Get_N(); ++i)
390  {
391  if( fabs(b_old[i] - b_new[i]) > m_Log_Epsilon )
392  {
393  return( false );
394  }
395  }
396 
397  return( true ); // if the new beta is equal to the old one under certain accuracy
398 }
399 
400 //---------------------------------------------------------
401 bool CSG_Regression_Weighted::_Log_OutOfControl(const CSG_Vector &b_old, const CSG_Vector &b_new)
402 {
403  for(int i=0; i<b_old.Get_N(); ++i)
404  {
405  if( b_old[i] == 0.0 )
406  {
407  return( false );
408  }
409 
410  if( fabs(b_old[i] - b_new[i]) / fabs(b_old[i]) > m_Log_Difference )
411  {
412  return( true );
413  }
414  }
415 
416  return( false ); // if the new beta has obvious difference with the old one
417 }
418 
419 
421 // //
423 
424 //---------------------------------------------------------
426 {
427 /* if( Get_Predictor_Count() <= 1 )
428  {
429  return( false );
430  }
431 
432  //-----------------------------------------------------
433  CSG_Regression_Weighted Model;
434  CSG_Simple_Statistics Stats, SR, SE;
435 
436  int i, nModels = 0;
437 
438  for(i=0; i<m_Samples_Model.Get_NRows(); i++)
439  {
440  Stats += m_Samples_Model[i][0];
441  }
442 
443  //-----------------------------------------------------
444  // leave-one-out cross validation (LOOCV)
445  if( nSubSamples <= 1 || nSubSamples > m_Samples_Model.Get_NRows() / 2 )
446  {
447  for(i=0; i<m_Samples_Model.Get_NRows() && SG_UI_Process_Get_Okay(); i++)
448  {
449  CSG_Vector w(m_w); w.Del_Row(i); Model
450  CSG_Vector x(m_y); x.Del_Row(i);
451  CSG_Matrix Y(m_X); Y.Del_Row(i);
452 
453  if( Model.Calculate(w, x, Y) )
454  {
455  nModels++;
456 
457  double dObsrv = m_Samples_Model[i][0];
458  double dModel = Model.Get_Value(CSG_Vector(m_nPredictors, m_Samples_Model[i] + 1));
459 
460  SE += SG_Get_Square(dModel - dObsrv);
461  SR += SG_Get_Square(dModel - (Stats.Get_Sum() - dObsrv) / Samples.Get_NRows());
462  }
463  }
464  }
465 
466  //-----------------------------------------------------
467  // k-fold cross validation
468  else
469  {
470  int *SubSet = new int[m_Samples_Model.Get_NRows()];
471 
472  for(i=0; i<m_Samples_Model.Get_NRows(); i++)
473  {
474  SubSet[i] = i % nSubSamples;
475  }
476 
477  //-------------------------------------------------
478  for(int iSubSet=0; iSubSet<nSubSamples && SG_UI_Process_Get_Okay(); iSubSet++)
479  {
480  CSG_Simple_Statistics Samples_Stats;
481  CSG_Matrix Samples(m_Samples_Model), Validation;
482 
483  for(i=Samples.Get_NRows()-1; i>=0; i--)
484  {
485  if( SubSet[i] == iSubSet )
486  {
487  Validation.Add_Row(Samples.Get_Row(i));
488  Samples .Del_Row(i);
489  }
490  else
491  {
492  Samples_Stats += Samples[i][0];
493  }
494  }
495 
496  //---------------------------------------------
497  if( Model.Get_Model(Samples) )
498  {
499  nModels++;
500 
501  for(i=0; i<Validation.Get_NRows(); i++)
502  {
503  double dObsrv = Validation[i][0];
504  double dModel = Model.Get_Value(CSG_Vector(m_nPredictors, Validation[i] + 1));
505 
506  SE += SG_Get_Square(dModel - dObsrv);
507  SR += SG_Get_Square(dModel - Samples_Stats.Get_Mean());
508  }
509  }
510  }
511 
512  delete[](SubSet);
513  }
514 
515  //-----------------------------------------------------
516  m_CV_RMSE = sqrt(SE.Get_Mean());
517  m_CV_NRMSE = sqrt(SE.Get_Mean()) / Stats.Get_Range();
518  m_CV_R2 = SR.Get_Sum() / (SR.Get_Sum() + SE.Get_Sum());
519  m_CV_nSamples = nModels;
520 */
521  //-----------------------------------------------------
522  return( true );
523 }
524 
525 
527 // //
528 // //
529 // //
531 
532 //---------------------------------------------------------
CSG_Matrix::Get_Inverse
CSG_Matrix Get_Inverse(bool bSilent=true, int nSubSquare=0) const
Definition: mat_matrix.cpp:1845
CSG_Matrix::Get_NRows
sLong Get_NRows(void) const
Definition: mat_tools.h:523
CSG_Vector::Add_Row
bool Add_Row(double Value=0.)
Definition: mat_matrix.cpp:188
CSG_Matrix::Get_NCols
sLong Get_NCols(void) const
Definition: mat_tools.h:522
mat_tools.h
CSG_Matrix::Create
bool Create(const CSG_Matrix &Matrix)
Definition: mat_matrix.cpp:836
CSG_Vector
Definition: mat_tools.h:360
CSG_Regression_Weighted::Get_CrossValidation
bool Get_CrossValidation(int nSubSamples=0)
Definition: mat_regression_weighted.cpp:425
sLong
signed long long sLong
Definition: api_core.h:158
SG_Get_Square
double SG_Get_Square(double Value)
Definition: mat_tools.cpp:70
SG_is_NaN
#define SG_is_NaN(x)
Definition: api_core.h:168
CSG_Vector::Get_N
int Get_N(void) const
Definition: mat_tools.h:382
CSG_Simple_Statistics::Get_Mean
double Get_Mean(void)
Definition: mat_tools.h:751
CSG_Regression_Weighted::Set_Log_maxIter
bool Set_Log_maxIter(int maxIter)
Definition: mat_regression_weighted.cpp:235
CSG_Matrix::Get_Transpose
CSG_Matrix Get_Transpose(void) const
Definition: mat_matrix.cpp:1829
CSG_Vector::Get_Size
sLong Get_Size(void) const
Definition: mat_tools.h:380
CSG_Regression_Weighted::Destroy
bool Destroy(void)
Definition: mat_regression_weighted.cpp:79
CSG_Vector::Destroy
bool Destroy(void)
Definition: mat_matrix.cpp:130
CSG_Regression_Weighted::Set_Log_Difference
bool Set_Log_Difference(double Difference)
Definition: mat_regression_weighted.cpp:261
CSG_Regression_Weighted::~CSG_Regression_Weighted
virtual ~CSG_Regression_Weighted(void)
Definition: mat_regression_weighted.cpp:73
CSG_Matrix::Destroy
bool Destroy(void)
Definition: mat_matrix.cpp:923
CSG_Matrix::Get_Row
CSG_Vector Get_Row(sLong Row) const
Definition: mat_matrix.cpp:1332
CSG_Regression_Weighted::Calculate
bool Calculate(const CSG_Vector &Weights, const CSG_Vector &Dependents, const CSG_Matrix &Predictors, bool bLogistic=false)
Definition: mat_regression_weighted.cpp:133
CSG_Regression_Weighted::CSG_Regression_Weighted
CSG_Regression_Weighted(void)
Definition: mat_regression_weighted.cpp:63
CSG_Matrix::Add_Row
bool Add_Row(const double *Data=NULL)
Definition: mat_matrix.cpp:1100
CSG_Regression_Weighted::Add_Sample
bool Add_Sample(double Weight, double Dependent, const CSG_Vector &Predictors)
Definition: mat_regression_weighted.cpp:97
CSG_Regression_Weighted::Set_Log_Epsilon
bool Set_Log_Epsilon(double Epsilon)
Definition: mat_regression_weighted.cpp:248
CSG_Matrix
Definition: mat_tools.h:478
CSG_Simple_Statistics
Definition: mat_tools.h:723