SAGA API v9.10
Loading...
Searching...
No Matches
mat_regression_weighted.cpp
Go to the documentation of this file.
1
3// //
4// SAGA //
5// //
6// System for Automated Geoscientific Analyses //
7// //
8// Application Programming Interface //
9// //
10// Library: SAGA_API //
11// //
12//-------------------------------------------------------//
13// //
14// mat_regression_multiple.cpp //
15// //
16// Copyright (C) 2005 by Olaf Conrad //
17// //
18//-------------------------------------------------------//
19// //
20// This file is part of 'SAGA - System for Automated //
21// Geoscientific Analyses'. //
22// //
23// This library is free software; you can redistribute //
24// it and/or modify it under the terms of the GNU Lesser //
25// General Public License as published by the Free //
26// Software Foundation, either version 2.1 of the //
27// License, or (at your option) any later version. //
28// //
29// This library is distributed in the hope that it will //
30// be useful, but WITHOUT ANY WARRANTY; without even the //
31// implied warranty of MERCHANTABILITY or FITNESS FOR A //
32// PARTICULAR PURPOSE. See the GNU Lesser General Public //
33// License for more details. //
34// //
35// You should have received a copy of the GNU Lesser //
36// General Public License along with this program; if //
37// not, see <http://www.gnu.org/licenses/>. //
38// //
39//-------------------------------------------------------//
40// //
41// contact: Olaf Conrad //
42// Institute of Geography //
43// University of Goettingen //
44// Goldschmidtstr. 5 //
45// 37077 Hamburg //
46// Germany //
47// //
48// e-mail: oconrad@saga-gis.org //
49// //
51
52//---------------------------------------------------------
53#include "mat_tools.h"
54
55
57// //
58// //
59// //
61
62//---------------------------------------------------------
64{
65 m_r2 = -1.0;
66
67 m_Log_maxIter = 30;
68 m_Log_Epsilon = 0.001;
69 m_Log_Difference = 1000.;
70}
71
72//---------------------------------------------------------
77
78//---------------------------------------------------------
80{
81 m_r2 = -1.0;
82
83 m_y.Destroy();
84 m_w.Destroy();
85 m_X.Destroy();
86 m_b.Destroy();
87
88 return( true );
89}
90
91
93// //
95
96//---------------------------------------------------------
97bool CSG_Regression_Weighted::Add_Sample(double Weight, double Dependent, const CSG_Vector &Predictors)
98{
99 if( m_X.Get_NRows() == 0 )
100 {
101 m_X.Create(Predictors.Get_N() + 1, 1);
102 }
103 else if( m_X.Get_NCols() == Predictors.Get_N() + 1 )
104 {
105 m_X.Add_Row();
106 }
107 else
108 {
109 return( false );
110 }
111
112 m_w.Add_Row(Weight );
113 m_y.Add_Row(Dependent);
114
115 double *y = m_X[m_X.Get_NRows() - 1];
116
117 y[0] = 1.0;
118
119 for(int i=0; i<Predictors.Get_N(); i++)
120 {
121 y[i + 1] = Predictors[i];
122 }
123
124 return( true );
125}
126
127
129// //
131
132//---------------------------------------------------------
133bool CSG_Regression_Weighted::Calculate(const CSG_Vector &Weights, const CSG_Vector &Dependents, const CSG_Matrix &Predictors, bool bLogistic)
134{
135 Destroy();
136
137 if( Weights.Get_N() == Dependents.Get_N() && Weights.Get_N() == Predictors.Get_NRows() )
138 {
139 for(int i=0; i<Weights.Get_N(); i++)
140 {
141 Add_Sample(Weights[i], Dependents[i], Predictors.Get_Row(i));
142
143 return( Calculate(bLogistic) );
144 }
145 }
146
147 return( false );
148}
149
150//---------------------------------------------------------
152{
153 sLong nSamples = m_w.Get_Size(), nPredictors = m_X.Get_NCols() - 1;
154
155 if( nSamples <= nPredictors || nSamples < 2 )
156 {
157 return( false );
158 }
159
160 //-----------------------------------------------------
161 if( bLogistic )
162 {
163 m_b = _Log_Get_Beta(m_X, m_y, m_w);
164
165 if( m_b.Get_N() == 0 )
166 {
167 return( false );
168 }
169 }
170 else
171 {
172 CSG_Matrix YtW(nSamples, 1 + nPredictors);
173
174 for(sLong i=0; i<nSamples; i++)
175 {
176 YtW[0][i] = m_w[i];
177
178 for(sLong j=1; j<=nPredictors; j++)
179 {
180 YtW[j][i] = m_w[i] * m_X[i][j];
181 }
182 }
183
184 m_b = (YtW * m_X).Get_Inverse() * (YtW * m_y);
185 }
186
187 //-----------------------------------------------------
188 CSG_Simple_Statistics yStats(m_y);
189
190 double rss = 0., tss = 0.;
191
192 for(sLong i=0; i<nSamples; i++)
193 {
194 double yr = m_b[0];
195
196 for(sLong j=1; j<=nPredictors; j++)
197 {
198 yr += m_b[j] * m_X[i][j];
199 }
200
201 if( bLogistic )
202 {
203 yr = 1. / (1. + exp(-yr));
204 }
205
206 rss += m_w[i] * SG_Get_Square(m_y[i] - yr);
207 tss += m_w[i] * SG_Get_Square(m_y[i] - yStats.Get_Mean());
208 }
209
210 //-----------------------------------------------------
211 if( tss > 0.0 && tss >= rss )
212 {
213 m_r2 = fabs(tss - rss) / tss;
214
215 return( true );
216 }
217
218 m_r2 = -1.;
219
220 return( false );
221}
222
223
225// //
227
228//---------------------------------------------------------
229// Zhang, D., Ren, N., and Hou, X. (2018):
230// An improved logistic regression model based on a spatially weighted technique
231// (ILRBSWT v1.0) and its application to mineral prospectivity mapping.
232// Geosci. Model Dev., 11, 2525-2539, https://doi.org/10.5194/gmd-11-2525-2018.
233
234//---------------------------------------------------------
236{
237 if( maxIter > 0 )
238 {
239 m_Log_maxIter = maxIter;
240
241 return( true );
242 }
243
244 return( false );
245}
246
247//---------------------------------------------------------
249{
250 if( Epsilon >= 0. )
251 {
252 m_Log_Epsilon = Epsilon;
253
254 return( true );
255 }
256
257 return( false );
258}
259
260//---------------------------------------------------------
262{
263 if( Difference > 0. )
264 {
265 m_Log_Difference = Difference;
266
267 return( true );
268 }
269
270 return( false );
271}
272
273//---------------------------------------------------------
274CSG_Vector CSG_Regression_Weighted::_Log_Get_Beta(const CSG_Matrix &X, const CSG_Vector &y, const CSG_Vector &w)
275{
276 CSG_Vector b(X.Get_NCols()), b_best;
277
278 CSG_Vector p = _Log_Get_Props(X, b);
279
280 for(int i=0; i<m_Log_maxIter; ++i)
281 {
282 CSG_Vector b_new = _Log_Get_Beta(b, X, y, w, p);
283
284 if( b_new.Get_N() == 0 )
285 {
286 return( b_best );
287 }
288
289 for(int j=0; j<b_new.Get_N(); ++j)
290 {
291 if( SG_is_NaN(b_new[j]) )
292 {
293 return( b_best );
294 }
295 }
296
297 if( _Log_NoChange(b, b_new) )
298 {
299 return( b_new );
300 }
301
302 if( _Log_OutOfControl(b, b_new) )
303 {
304 return( b_best );
305 }
306
307 p = _Log_Get_Props(X, b_new);
308 b = b_new;
309 b_best = b;
310 }
311
312 return( b_best );
313}
314
315//---------------------------------------------------------
316CSG_Vector CSG_Regression_Weighted::_Log_Get_Beta(const CSG_Vector &b, const CSG_Matrix &X, const CSG_Vector &y, const CSG_Vector &w, const CSG_Vector &p)
317{
318 CSG_Matrix Xt = X.Get_Transpose() ; // X'
319 CSG_Matrix M = Xt * _Log_Get_Xwp(p, X, w); // X'* w * V(t-1) * X
320 CSG_Matrix N = M.Get_Inverse() * Xt ; // inv(X'* w * V(t-1) * X) * X'
321 CSG_Vector v = N * _Log_Get_Ywp(p, y, w); // inv(X'* w * V(t-1) * X) * X' * w * (y - p(t-1))
322
323 if( v.Get_N() == b.Get_N() )
324 {
325 return( b + v );
326 }
327
328 return( CSG_Vector() );
329}
330
331//---------------------------------------------------------
332CSG_Matrix CSG_Regression_Weighted::_Log_Get_Xwp(const CSG_Vector &p, const CSG_Matrix &X, const CSG_Vector &w)
333{
334 CSG_Matrix Xwp;
335
336 if( p.Get_N() == X.Get_NRows() && Xwp.Create(X.Get_NCols(), X.Get_NRows()) )
337 {
338 for(int i=0; i<X.Get_NRows(); ++i)
339 {
340 for(int j=0; j<X.Get_NCols(); ++j)
341 {
342 Xwp[i][j] = w[i] * p[i] * (1. - p[i]) * X[i][j]; // compute W(u) * V(u) * X
343 }
344 }
345 }
346
347 return( Xwp );
348}
349
350//---------------------------------------------------------
351CSG_Vector CSG_Regression_Weighted::_Log_Get_Ywp(const CSG_Vector &p, const CSG_Vector &y, const CSG_Vector &w)
352{
353 CSG_Vector Ywp(y.Get_N());
354
355 if( y.Get_N() == p.Get_N() && Ywp.Create(y.Get_N()) )
356 {
357 for(int i=0; i<Ywp.Get_N(); i++)
358 {
359 Ywp[i] = w[i] * (y[i] - p[i]); // compute W(u) * (y - p)
360 }
361 }
362
363 return( Ywp );
364}
365
366//---------------------------------------------------------
367CSG_Vector CSG_Regression_Weighted::_Log_Get_Props(const CSG_Matrix &X, const CSG_Vector &b)
368{
369 CSG_Vector p(X.Get_NRows());
370
371 for(int i=0; i<X.Get_NRows(); ++i)
372 {
373 double z = 0.;
374
375 for(int j=0; j<X.Get_NCols(); ++j)
376 {
377 z += X[i][j] * b[j];
378 }
379
380 p[i] = 1. / (1. + exp(-z));
381 }
382
383 return( p );
384}
385
386//---------------------------------------------------------
387bool CSG_Regression_Weighted::_Log_NoChange(const CSG_Vector &b_old, const CSG_Vector &b_new)
388{
389 for(int i=0; i<b_old.Get_N(); ++i)
390 {
391 if( fabs(b_old[i] - b_new[i]) > m_Log_Epsilon )
392 {
393 return( false );
394 }
395 }
396
397 return( true ); // if the new beta is equal to the old one under certain accuracy
398}
399
400//---------------------------------------------------------
401bool CSG_Regression_Weighted::_Log_OutOfControl(const CSG_Vector &b_old, const CSG_Vector &b_new)
402{
403 for(int i=0; i<b_old.Get_N(); ++i)
404 {
405 if( b_old[i] == 0.0 )
406 {
407 return( false );
408 }
409
410 if( fabs(b_old[i] - b_new[i]) / fabs(b_old[i]) > m_Log_Difference )
411 {
412 return( true );
413 }
414 }
415
416 return( false ); // if the new beta has obvious difference with the old one
417}
418
419
421// //
423
424//---------------------------------------------------------
426{
427/* if( Get_Predictor_Count() <= 1 )
428 {
429 return( false );
430 }
431
432 //-----------------------------------------------------
433 CSG_Regression_Weighted Model;
434 CSG_Simple_Statistics Stats, SR, SE;
435
436 int i, nModels = 0;
437
438 for(i=0; i<m_Samples_Model.Get_NRows(); i++)
439 {
440 Stats += m_Samples_Model[i][0];
441 }
442
443 //-----------------------------------------------------
444 // leave-one-out cross validation (LOOCV)
445 if( nSubSamples <= 1 || nSubSamples > m_Samples_Model.Get_NRows() / 2 )
446 {
447 for(i=0; i<m_Samples_Model.Get_NRows() && SG_UI_Process_Get_Okay(); i++)
448 {
449 CSG_Vector w(m_w); w.Del_Row(i); Model
450 CSG_Vector x(m_y); x.Del_Row(i);
451 CSG_Matrix Y(m_X); Y.Del_Row(i);
452
453 if( Model.Calculate(w, x, Y) )
454 {
455 nModels++;
456
457 double dObsrv = m_Samples_Model[i][0];
458 double dModel = Model.Get_Value(CSG_Vector(m_nPredictors, m_Samples_Model[i] + 1));
459
460 SE += SG_Get_Square(dModel - dObsrv);
461 SR += SG_Get_Square(dModel - (Stats.Get_Sum() - dObsrv) / Samples.Get_NRows());
462 }
463 }
464 }
465
466 //-----------------------------------------------------
467 // k-fold cross validation
468 else
469 {
470 int *SubSet = new int[m_Samples_Model.Get_NRows()];
471
472 for(i=0; i<m_Samples_Model.Get_NRows(); i++)
473 {
474 SubSet[i] = i % nSubSamples;
475 }
476
477 //-------------------------------------------------
478 for(int iSubSet=0; iSubSet<nSubSamples && SG_UI_Process_Get_Okay(); iSubSet++)
479 {
480 CSG_Simple_Statistics Samples_Stats;
481 CSG_Matrix Samples(m_Samples_Model), Validation;
482
483 for(i=Samples.Get_NRows()-1; i>=0; i--)
484 {
485 if( SubSet[i] == iSubSet )
486 {
487 Validation.Add_Row(Samples.Get_Row(i));
488 Samples .Del_Row(i);
489 }
490 else
491 {
492 Samples_Stats += Samples[i][0];
493 }
494 }
495
496 //---------------------------------------------
497 if( Model.Get_Model(Samples) )
498 {
499 nModels++;
500
501 for(i=0; i<Validation.Get_NRows(); i++)
502 {
503 double dObsrv = Validation[i][0];
504 double dModel = Model.Get_Value(CSG_Vector(m_nPredictors, Validation[i] + 1));
505
506 SE += SG_Get_Square(dModel - dObsrv);
507 SR += SG_Get_Square(dModel - Samples_Stats.Get_Mean());
508 }
509 }
510 }
511
512 delete[](SubSet);
513 }
514
515 //-----------------------------------------------------
516 m_CV_RMSE = sqrt(SE.Get_Mean());
517 m_CV_NRMSE = sqrt(SE.Get_Mean()) / Stats.Get_Range();
518 m_CV_R2 = SR.Get_Sum() / (SR.Get_Sum() + SE.Get_Sum());
519 m_CV_nSamples = nModels;
520*/
521 //-----------------------------------------------------
522 return( true );
523}
524
525
527// //
528// //
529// //
531
532//---------------------------------------------------------
signed long long sLong
Definition api_core.h:158
#define SG_is_NaN(x)
Definition api_core.h:168
sLong Get_NRows(void) const
Definition mat_tools.h:525
CSG_Matrix Get_Transpose(void) const
sLong Get_NCols(void) const
Definition mat_tools.h:524
bool Create(const CSG_Matrix &Matrix)
CSG_Matrix Get_Inverse(bool bSilent=true, int nSubSquare=0) const
CSG_Vector Get_Row(sLong Row) const
bool Get_CrossValidation(int nSubSamples=0)
bool Set_Log_Difference(double Difference)
bool Calculate(const CSG_Vector &Weights, const CSG_Vector &Dependents, const CSG_Matrix &Predictors, bool bLogistic=false)
bool Add_Sample(double Weight, double Dependent, const CSG_Vector &Predictors)
double Get_Mean(void)
Definition mat_tools.h:753
int Get_N(void) const
Definition mat_tools.h:384
SAGA_API_DLL_EXPORT double SG_Get_Square(double Value)
Definition mat_tools.cpp:70