SAGA API v9.10
Loading...
Searching...
No Matches
mat_regression_multiple.cpp
Go to the documentation of this file.
1
3// //
4// SAGA //
5// //
6// System for Automated Geoscientific Analyses //
7// //
8// Application Programming Interface //
9// //
10// Library: SAGA_API //
11// //
12//-------------------------------------------------------//
13// //
14// mat_regression_multiple.cpp //
15// //
16// Copyright (C) 2005 by Olaf Conrad //
17// //
18//-------------------------------------------------------//
19// //
20// This file is part of 'SAGA - System for Automated //
21// Geoscientific Analyses'. //
22// //
23// This library is free software; you can redistribute //
24// it and/or modify it under the terms of the GNU Lesser //
25// General Public License as published by the Free //
26// Software Foundation, either version 2.1 of the //
27// License, or (at your option) any later version. //
28// //
29// This library is distributed in the hope that it will //
30// be useful, but WITHOUT ANY WARRANTY; without even the //
31// implied warranty of MERCHANTABILITY or FITNESS FOR A //
32// PARTICULAR PURPOSE. See the GNU Lesser General Public //
33// License for more details. //
34// //
35// You should have received a copy of the GNU Lesser //
36// General Public License along with this program; if //
37// not, see <http://www.gnu.org/licenses/>. //
38// //
39//-------------------------------------------------------//
40// //
41// contact: Olaf Conrad //
42// Institute of Geography //
43// University of Goettingen //
44// Goldschmidtstr. 5 //
45// 37077 Goettingen //
46// Germany //
47// //
48// e-mail: oconrad@saga-gis.org //
49// //
51
52//---------------------------------------------------------
53#include "mat_tools.h"
54#include "table.h"
55
56
58// //
59// //
60// //
62
63//---------------------------------------------------------
84
85//---------------------------------------------------------
105
106
108// //
109// //
110// //
112
113//---------------------------------------------------------
115{
117
118 m_pRegression ->Add_Field("VAR_ID" , SG_DATATYPE_Int); // MLR_VAR_ID
119 m_pRegression ->Add_Field("VAR_NAME" , SG_DATATYPE_String); // MLR_VAR_NAME
120 m_pRegression ->Add_Field("REGCOEFF" , SG_DATATYPE_Double); // MLR_VAR_RCOEFF
121 m_pRegression ->Add_Field("R" , SG_DATATYPE_Double); // MLR_VAR_R
122 m_pRegression ->Add_Field("R2" , SG_DATATYPE_Double); // MLR_VAR_R2
123 m_pRegression ->Add_Field("R2_ADJ" , SG_DATATYPE_Double); // MLR_VAR_R2_ADJ
124 m_pRegression ->Add_Field("STD_ERROR" , SG_DATATYPE_Double); // MLR_VAR_SE
125 m_pRegression ->Add_Field("T" , SG_DATATYPE_Double); // MLR_VAR_T
126 m_pRegression ->Add_Field("SIG" , SG_DATATYPE_Double); // MLR_VAR_SIG
127 m_pRegression ->Add_Field("P" , SG_DATATYPE_Double); // MLR_VAR_P
128
129 //-----------------------------------------------------
130 m_pSteps = new CSG_Table;
131
132 m_pSteps ->Add_Field("MODEL" , SG_DATATYPE_Int); // MLR_STEP_NR
133 m_pSteps ->Add_Field("R" , SG_DATATYPE_Double); // MLR_STEP_R
134 m_pSteps ->Add_Field("R2" , SG_DATATYPE_Double); // MLR_STEP_R2
135 m_pSteps ->Add_Field("R2_ADJ" , SG_DATATYPE_Double); // MLR_STEP_R2_ADJ
136 m_pSteps ->Add_Field("STD_ERROR" , SG_DATATYPE_Double); // MLR_STEP_SE
137 m_pSteps ->Add_Field("SSR" , SG_DATATYPE_Double); // MLR_STEP_SSR
138 m_pSteps ->Add_Field("SSE" , SG_DATATYPE_Double); // MLR_STEP_SSE
139 m_pSteps ->Add_Field("MSR" , SG_DATATYPE_Double); // MLR_STEP_MSR
140 m_pSteps ->Add_Field("MSE" , SG_DATATYPE_Double); // MLR_STEP_MSE
141 m_pSteps ->Add_Field("DF" , SG_DATATYPE_Double); // MLR_STEP_DF
142 m_pSteps ->Add_Field("F" , SG_DATATYPE_Double); // MLR_STEP_F
143 m_pSteps ->Add_Field("SIG" , SG_DATATYPE_Double); // MLR_STEP_SIG
144 m_pSteps ->Add_Field("VAR_F" , SG_DATATYPE_Double); // MLR_STEP_VAR_F
145 m_pSteps ->Add_Field("VAR_SIG" , SG_DATATYPE_Double); // MLR_STEP_VAR_SIG
146 m_pSteps ->Add_Field("DIR" , SG_DATATYPE_String); // MLR_STEP_DIR
147 m_pSteps ->Add_Field("VARIABLE" , SG_DATATYPE_String); // MLR_STEP_VAR
148
149 //-----------------------------------------------------
150 m_pModel = new CSG_Table;
151
152 m_pModel ->Add_Field("PARAMETER" , SG_DATATYPE_String);
153 m_pModel ->Add_Field("VALUE" , SG_DATATYPE_Double);
154
155 m_pModel ->Add_Record()->Set_Value(0, SG_T("R2" )); // MLR_MODEL_R2
156 m_pModel ->Add_Record()->Set_Value(0, SG_T("R2_ADJ" )); // MLR_MODEL_R2_ADJ
157 m_pModel ->Add_Record()->Set_Value(0, SG_T("STD_ERROR" )); // MLR_MODEL_SE
158 m_pModel ->Add_Record()->Set_Value(0, SG_T("SSR" )); // MLR_MODEL_SSR
159 m_pModel ->Add_Record()->Set_Value(0, SG_T("SSE" )); // MLR_MODEL_SSE
160 m_pModel ->Add_Record()->Set_Value(0, SG_T("SST" )); // MLR_MODEL_SST
161 m_pModel ->Add_Record()->Set_Value(0, SG_T("MSR" )); // MLR_MODEL_MSR
162 m_pModel ->Add_Record()->Set_Value(0, SG_T("MSE" )); // MLR_MODEL_MSE
163 m_pModel ->Add_Record()->Set_Value(0, SG_T("F" )); // MLR_MODEL_F
164 m_pModel ->Add_Record()->Set_Value(0, SG_T("SIG" )); // MLR_MODEL_SIG
165 m_pModel ->Add_Record()->Set_Value(0, SG_T("PREDICTORS" )); // MLR_MODEL_NPREDICT
166 m_pModel ->Add_Record()->Set_Value(0, SG_T("SAMPLES" )); // MLR_MODEL_NSAMPLES
167 m_pModel ->Add_Record()->Set_Value(0, SG_T("CV_MSE" )); // MLR_MODEL_CV_MSE
168 m_pModel ->Add_Record()->Set_Value(0, SG_T("CV_RMSE" )); // MLR_MODEL_CV_RMSE
169 m_pModel ->Add_Record()->Set_Value(0, SG_T("CV_NRMSE" )); // MLR_MODEL_CV_RMSE
170 m_pModel ->Add_Record()->Set_Value(0, SG_T("CV_R2" )); // MLR_MODEL_CV_R2
171 m_pModel ->Add_Record()->Set_Value(0, SG_T("CV_SAMPLES" )); // MLR_MODEL_CV_NSAMPLES
172
173 //-----------------------------------------------------
174 m_Predictor = NULL;
175 m_nPredictors = 0;
176
177 m_bIntercept = bIntercept;
178}
179
180//---------------------------------------------------------
182{
183 Destroy();
184
185 delete(m_pRegression);
186 delete(m_pModel);
187 delete(m_pSteps);
188}
189
190
192// //
194
195//---------------------------------------------------------
197{
198 m_Names .Clear ();
199 m_Samples .Destroy();
200 m_Samples_Model.Destroy();
201
202 m_pRegression->Del_Records();
203 m_pSteps ->Del_Records();
204
205 for(int i=0; i<m_pModel->Get_Count(); i++)
206 {
207 m_pModel->Get_Record(i)->Set_NoData(1);
208 }
209
210 if( m_Predictor )
211 {
212 delete[](m_bIncluded);
213 delete[](m_Predictor);
214
215 m_Predictor = NULL;
216 m_nPredictors = 0;
217 }
218}
219
220
222// //
224
225//---------------------------------------------------------
227{
228 Destroy();
229
230 int nPredictors = Samples.Get_NX() - 1;
231
232 if( nPredictors < 1 || Samples.Get_NY() <= nPredictors )
233 {
234 return( false );
235 }
236
237 //-------------------------------------------------
238 for(int i=0; i<=nPredictors; i++)
239 {
240 m_Names += pNames && pNames->Get_Count() == Samples.Get_NCols() ? pNames->Get_String(i) : i == 0
241 ? CSG_String::Format( "%s", _TL("Dependent"))
242 : CSG_String::Format("%d. %s", i + 1, _TL("Predictor"));
243 }
244
245 m_Samples = Samples;
246
247 m_bIncluded = new int[nPredictors];
248 m_Predictor = new int[nPredictors];
249
250 //-------------------------------------------------
251 return( true );
252}
253
254//---------------------------------------------------------
256{
257 int nPredictors = m_Samples.Get_NX() - 1;
258
259 if( nPredictors < 1 || m_Samples.Get_NY() <= nPredictors )
260 {
261 return( false );
262 }
263
264 //-------------------------------------------------
265 if( bInclude )
266 {
267 m_nPredictors = nPredictors;
269 }
270 else
271 {
272 m_nPredictors = 0;
273 m_Samples_Model.Set_Col(m_Samples.Get_Col(0));
274 }
275
276 for(int i=0; i<nPredictors; i++)
277 {
278 m_Predictor[i] = i;
279 m_bIncluded[i] = bInclude;
280 }
281
282 for(int i=0; i<m_pModel->Get_Count(); i++)
283 {
284 m_pModel->Get_Record(i)->Set_NoData(1);
285 }
286
287 //-------------------------------------------------
288 return( true );
289}
290
291
293// //
295
296//---------------------------------------------------------
298{
299 return( Set_Data(Samples, pNames) && Get_Model() );
300}
301
302bool CSG_Regression_Multiple::Get_Model_Forward(const CSG_Matrix &Samples, double P_in, CSG_Strings *pNames)
303{
304 return( Set_Data(Samples, pNames) && Get_Model_Forward(P_in) );
305}
306
307bool CSG_Regression_Multiple::Get_Model_Backward(const CSG_Matrix &Samples, double P_out, CSG_Strings *pNames)
308{
309 return( Set_Data(Samples, pNames) && Get_Model_Backward(P_out) );
310}
311
312bool CSG_Regression_Multiple::Get_Model_Stepwise(const CSG_Matrix &Samples, double P_in, double P_out, CSG_Strings *pNames)
313{
314 return( Set_Data(Samples, pNames) && Get_Model_Stepwise(P_in, P_out) );
315}
316
317//---------------------------------------------------------
319{
320 return( _Initialize(true) && _Get_Regression(m_Samples) );
321}
322
323//---------------------------------------------------------
325{
326 if( _Initialize(false) )
327 {
328 double R2 = 0.;
329
330 while( _Get_Step_In(m_Samples_Model, P_in, R2, m_Samples) >= 0 );
331
333 }
334
335 return( false );
336}
337
338//---------------------------------------------------------
340{
341 if( _Initialize(true) )
342 {
343 double R2 = 0.;
344
345 while( _Get_Step_Out(m_Samples_Model, P_out, R2) >= 0 );
346
348 }
349
350 return( false );
351}
352
353//---------------------------------------------------------
354bool CSG_Regression_Multiple::Get_Model_Stepwise(double P_in, double P_out)
355{
356 if( _Initialize(false) )
357 {
358 double R2 = 0.;
359
360 if( P_out <= P_in )
361 {
362 P_out = P_in + 0.001;
363 }
364
365 while( _Get_Step_In(m_Samples_Model, P_in, R2, m_Samples) >= 0 && SG_UI_Process_Get_Okay() )
366 {
367 if( m_nPredictors > 1 )
368 {
369 _Get_Step_Out(m_Samples_Model, P_out, R2);
370 }
371 }
372
374 }
375
376 return( false );
377}
378
379
381// //
383
384//---------------------------------------------------------
386{
387 if( m_Samples_Model.Get_NCols() <= 1 )
388 {
389 return( false );
390 }
391
392 //-----------------------------------------------------
394 CSG_Simple_Statistics Stats, SR, SE;
395
396 int i, nModels = 0;
397
398 for(i=0; i<m_Samples_Model.Get_NRows(); i++)
399 {
400 Stats += m_Samples_Model[i][0];
401 }
402
403 //-----------------------------------------------------
404 // leave-one-out cross validation (LOOCV)
405 if( nSubSamples <= 1 || nSubSamples > m_Samples_Model.Get_NRows() / 2 )
406 {
407 for(i=0; i<m_Samples_Model.Get_NRows() && SG_UI_Process_Get_Okay(); i++)
408 {
410 Samples.Del_Row(i);
411
412 if( Model.Get_Model(Samples) )
413 {
414 nModels++;
415
416 double dObsrv = m_Samples_Model[i][0];
417 double dModel = Model.Get_Value(CSG_Vector(m_nPredictors, m_Samples_Model[i] + 1));
418
419 SE += SG_Get_Square(dModel - dObsrv);
420 SR += SG_Get_Square(dModel - (Stats.Get_Sum() - dObsrv) / Samples.Get_NRows());
421 }
422 }
423 }
424
425 //-----------------------------------------------------
426 // k-fold cross validation
427 else
428 {
429 int *SubSet = new int[m_Samples_Model.Get_NRows()];
430
431 for(i=0; i<m_Samples_Model.Get_NRows(); i++)
432 {
433 SubSet[i] = i % nSubSamples;
434 }
435
436 //-------------------------------------------------
437 for(int iSubSet=0; iSubSet<nSubSamples && SG_UI_Process_Get_Okay(); iSubSet++)
438 {
439 CSG_Simple_Statistics Samples_Stats;
440 CSG_Matrix Samples(m_Samples_Model), Validation;
441
442 for(i=Samples.Get_NY()-1; i>=0; i--)
443 {
444 if( SubSet[i] == iSubSet )
445 {
446 Validation.Add_Row(Samples.Get_Row(i));
447 Samples .Del_Row(i);
448 }
449 else
450 {
451 Samples_Stats += Samples[i][0];
452 }
453 }
454
455 //---------------------------------------------
456 if( Model.Get_Model(Samples) )
457 {
458 nModels++;
459
460 for(i=0; i<Validation.Get_NRows(); i++)
461 {
462 double dObsrv = Validation[i][0];
463 double dModel = Model.Get_Value(CSG_Vector(m_nPredictors, Validation[i] + 1));
464
465 SE += SG_Get_Square(dModel - dObsrv);
466 SR += SG_Get_Square(dModel - Samples_Stats.Get_Mean());
467 }
468 }
469 }
470
471 delete[](SubSet);
472 }
473
474 //-----------------------------------------------------
475 m_pModel->Get_Record(MLR_MODEL_CV_MSE )->Set_Value(1, SE.Get_Mean());
476 m_pModel->Get_Record(MLR_MODEL_CV_RMSE )->Set_Value(1, sqrt(SE.Get_Mean()));
477 m_pModel->Get_Record(MLR_MODEL_CV_NRMSE )->Set_Value(1, sqrt(SE.Get_Mean()) / Stats.Get_Range());
478 m_pModel->Get_Record(MLR_MODEL_CV_R2 )->Set_Value(1, SR.Get_Sum() / (SR.Get_Sum() + SE.Get_Sum()));
479 m_pModel->Get_Record(MLR_MODEL_CV_NSAMPLES)->Set_Value(1, nModels);
480
481 //-----------------------------------------------------
482 return( true );
483}
484
485
487// //
488// //
489// //
491
492//---------------------------------------------------------
493double CSG_Regression_Multiple::Get_Value(const CSG_Vector &Predictors) const
494{
495 double Value; Get_Value(Predictors, Value); return( Value );
496}
497
498bool CSG_Regression_Multiple::Get_Value(const CSG_Vector &Predictors, double &Value) const
499{
500 if( m_nPredictors == Predictors.Get_N() )
501 {
502 Value = Get_RConst();
503
504 for(int i=0; i<m_nPredictors; i++)
505 {
506 Value += Get_RCoeff(i) * Predictors(i);
507 }
508
509 return( true );
510 }
511
512 Value = 0.;
513
514 return( false );
515}
516
517//---------------------------------------------------------
519{
520 double Value; Get_Residual(iSample, Value); return( Value );
521}
522
523bool CSG_Regression_Multiple::Get_Residual(int iSample, double &Residual) const
524{
525 if( iSample >= 0 && iSample < m_Samples_Model.Get_NRows() )
526 {
527 Residual = Get_RConst();
528
529 for(int i=0; i<m_nPredictors; i++)
530 {
531 Residual += Get_RCoeff(i) * m_Samples_Model[iSample][1 + i];
532 }
533
534 Residual -= m_Samples_Model[iSample][0];
535
536 return( true );
537 }
538
539 Residual = 0.;
540
541 return( false );
542}
543
544//---------------------------------------------------------
546{
547 Residuals.Create(m_Samples_Model.Get_NRows());
548
549 for(int i=0; i<Residuals.Get_N(); i++)
550 {
551 Get_Residual(i, Residuals[i]);
552 }
553
554 return( Residuals.Get_N() > 0 );
555}
556
557
559// //
561
562//---------------------------------------------------------
563inline double CSG_Regression_Multiple::_Get_F(int nPredictors, int nSamples, double r2_full, double r2_reduced)
564{
565 return( (nSamples - nPredictors - 1) * (r2_full - r2_reduced) / (1. - r2_full) );
566}
567
568//---------------------------------------------------------
569inline double CSG_Regression_Multiple::_Get_P(int nPredictors, int nSamples, double r2_full, double r2_reduced)
570{
571 double f = _Get_F(nPredictors, nSamples, r2_full, r2_reduced);
572
573 return( CSG_Test_Distribution::Get_F_Tail(f, nPredictors, nSamples - nPredictors - 1, TESTDIST_TYPE_Right) );
574}
575
576
578// //
580
581//---------------------------------------------------------
583{
584 int nPredictors = Samples.Get_NX() - 1;
585 int nSamples = Samples.Get_NY();
586
587 //-----------------------------------------------------
588 int i, j;
589 double Ym, SSR, SSE, SST, MSR, MSE, SE, R2, F;
590 CSG_Vector Y, Yr, B;
591 CSG_Matrix X, Xt, C;
592
593 Y.Create(nSamples);
594 X.Create(nPredictors + (m_bIntercept ? 1 : 0), nSamples);
595
596 //-----------------------------------------------------
597 for(i=0, Ym=0.; i<nSamples; i++)
598 {
599 Ym += Y[i] = Samples[i][0];
600
601 if( m_bIntercept )
602 {
603 X [i][0] = 1.;
604
605 for(j=1; j<=nPredictors; j++)
606 {
607 X[i][j] = Samples[i][j];
608 }
609 }
610 else
611 {
612 for(j=0; j<nPredictors; j++)
613 {
614 X[i][j] = Samples[i][j + 1];
615 }
616 }
617 }
618
619 Ym /= nSamples;
620
621 //-----------------------------------------------------
622 Xt = X.Get_Transpose();
623
624 C = (Xt * X).Get_Inverse();
625
626 B = C * (Xt * Y);
627
628 //-----------------------------------------------------
629 Yr = X * B;
630
631 for(i=0, SSE=0., SSR=0., SST=0.; i<nSamples; i++)
632 {
633 SSE += SG_Get_Square(Yr[i] - Y[i]);
634 SSR += SG_Get_Square(Yr[i] - Ym);
635 // SST += SG_Get_Square(Y [i] - Ym);
636 }
637
638// SSE = SST - SSR;
639 SST = SSR + SSE;
640 MSR = SSR / nPredictors;
641 MSE = SSE / (nSamples - nPredictors - 1);
642 SE = sqrt(SSE / (nSamples - nPredictors));
643 R2 = SSR / SST;
644 F = MSR / MSE; // = (nSamples - nPredictors - 1) * (R2 / nPredictors) / (1. - R2);
645
646 //-----------------------------------------------------
647 m_pModel->Get_Record(MLR_MODEL_R2 )->Set_Value(1, R2);
648 m_pModel->Get_Record(MLR_MODEL_R2_ADJ )->Set_Value(1, SG_Regression_Get_Adjusted_R2(R2, nSamples, nPredictors));
649 m_pModel->Get_Record(MLR_MODEL_SE )->Set_Value(1, SE);
650 m_pModel->Get_Record(MLR_MODEL_SSR )->Set_Value(1, SSR);
651 m_pModel->Get_Record(MLR_MODEL_SSE )->Set_Value(1, SSE);
652 m_pModel->Get_Record(MLR_MODEL_SST )->Set_Value(1, SST);
653 m_pModel->Get_Record(MLR_MODEL_MSR )->Set_Value(1, MSR);
654 m_pModel->Get_Record(MLR_MODEL_MSE )->Set_Value(1, MSE);
655 m_pModel->Get_Record(MLR_MODEL_F )->Set_Value(1, F);
656 m_pModel->Get_Record(MLR_MODEL_SIG )->Set_Value(1, CSG_Test_Distribution::Get_F_Tail_from_R2(R2, nPredictors, nSamples));
657 m_pModel->Get_Record(MLR_MODEL_NPREDICT)->Set_Value(1, nPredictors);
658 m_pModel->Get_Record(MLR_MODEL_NSAMPLES)->Set_Value(1, nSamples);
659
660 //-----------------------------------------------------
661 CSG_Matrix P = SG_Get_Correlation_Matrix(Samples, true).Get_Inverse(); // get partial correlation
662
663 if( !m_bIntercept )
664 {
665 m_pRegression->Add_Record()->Set_Value(MLR_VAR_NAME, m_Names[0]);
666 }
667
668 for(j=0; j<B.Get_N(); j++)
669 {
670 double se = SE * sqrt(fabs(C[j][j]));
671 double b = B[j];
672 double t = b / se;
673 int k = (m_bIntercept ? j : j + 1);
674 double r = -P[k][0] / sqrt(P[k][k] * P[0][0]);
675
676 CSG_Table_Record *pRecord = m_pRegression->Add_Record();
677
678 pRecord->Set_Value(MLR_VAR_ID , m_bIntercept ? j - 1 : j);
679 pRecord->Set_Value(MLR_VAR_NAME , m_Names[k]);
680 pRecord->Set_Value(MLR_VAR_RCOEFF , b);
681 pRecord->Set_Value(MLR_VAR_R , r);
682 pRecord->Set_Value(MLR_VAR_R2 , r*r);
683 pRecord->Set_Value(MLR_VAR_R2_ADJ , SG_Regression_Get_Adjusted_R2(r*r, nSamples, nPredictors));
684 pRecord->Set_Value(MLR_VAR_SE , se);
685 pRecord->Set_Value(MLR_VAR_T , t);
687 }
688
689 //-----------------------------------------------------
690 return( true );
691}
692
693
695// //
696// //
697// //
699
700//---------------------------------------------------------
701int CSG_Regression_Multiple::_Get_Step_In(CSG_Matrix &X, double P_in, double &R2, const CSG_Matrix &Samples)
702{
703 int iBest, iPredictor; double rBest;
704
706
707 X.Add_Cols(1);
708
709 //-----------------------------------------------------
710 for(iPredictor=0, iBest=-1, rBest=0.; iPredictor<Samples.Get_NX()-1; iPredictor++)
711 {
712 if( !m_bIncluded[iPredictor] )
713 {
714 X.Set_Col(1 + m_nPredictors, Samples.Get_Col(1 + iPredictor));
715
716 if( R.Get_Model(X) && (iBest < 0 || rBest < R.Get_R2()) )
717 {
718 iBest = iPredictor;
719 rBest = R.Get_R2();
720 }
721 }
722 }
723
724 //-----------------------------------------------------
725 if( iBest >= 0 && _Get_P(1, Samples.Get_NY() - m_nPredictors, rBest, R2) <= P_in )
726 {
727 m_bIncluded[iBest] = true;
728 m_Predictor[m_nPredictors] = iBest;
729
731
732 X.Set_Col(m_nPredictors, Samples.Get_Col(1 + iBest));
733 _Set_Step_Info(X, R2, iBest, true);
734 R2 = rBest;
735
736 return( iBest );
737 }
738
739 X.Del_Col(X.Get_NX() - 1);
740
741 return( -1 );
742}
743
744//---------------------------------------------------------
745int CSG_Regression_Multiple::_Get_Step_Out(CSG_Matrix &X, double P_out, double &R2)
746{
747 int iBest, iPredictor; double rBest;
748
750
751 if( R2 <= 0. )
752 {
753 R.Get_Model(X);
754
755 R2 = R.Get_R2();
756 }
757
758 //-----------------------------------------------------
759 for(iPredictor=0, iBest=-1, rBest=0.; iPredictor<m_nPredictors; iPredictor++)
760 {
761 CSG_Matrix X_reduced(X);
762
763 X_reduced.Del_Col(1 + iPredictor);
764
765 if( R.Get_Model(X_reduced) && (iBest < 0 || rBest < R.Get_R2()) )
766 {
767 iBest = iPredictor;
768 rBest = R.Get_R2();
769 }
770 }
771
772 //-----------------------------------------------------
773 if( iBest >= 0 && _Get_P(1, X.Get_NY() - (m_nPredictors - 1), R2, rBest) > P_out )
774 {
776
777 X.Del_Col(1 + iBest);
778 _Set_Step_Info(X, R2, m_Predictor[iBest], false);
779 R2 = rBest;
780
781 m_bIncluded[m_Predictor[iBest]] = false;
782
783 for(iPredictor=iBest; iPredictor<m_nPredictors; iPredictor++)
784 {
785 m_Predictor[iPredictor] = m_Predictor[iPredictor + 1];
786 }
787
788 return( iBest );
789 }
790
791 return( -1 );
792}
793
794
796// //
797// //
798// //
800
801//---------------------------------------------------------
803{
805
806 if( m_nPredictors > 0 && R.Get_Model(X) )
807 {
808 m_pModel ->Assign(R.m_pModel);
809 m_pRegression ->Assign(R.m_pRegression);
810
811 m_pRegression->Get_Record(0)->Set_Value(MLR_VAR_NAME, m_Names[0]);
812
813 for(int i=0; i<m_nPredictors; i++)
814 {
815 CSG_Table_Record *pRecord = m_pRegression->Get_Record(1 + i);
816
817 pRecord->Set_Value(MLR_VAR_ID , m_Predictor[i]);
818 pRecord->Set_Value(MLR_VAR_NAME, m_Names[1 + m_Predictor[i]]);
819 }
820
821 return( true );
822 }
823
824 return( false );
825}
826
827//---------------------------------------------------------
828bool CSG_Regression_Multiple::_Set_Step_Info(const CSG_Matrix &X, double R2_prev, int iVariable, bool bIn)
829{
831
832 R.Get_Model(X);
833
834 CSG_Table_Record *pRecord = m_pSteps->Add_Record();
835
836 pRecord->Set_Value(MLR_STEP_NR , (int)m_pSteps->Get_Count());
837 pRecord->Set_Value(MLR_STEP_R , sqrt(R.Get_R2()));
838 pRecord->Set_Value(MLR_STEP_R2 , R.Get_R2());
839 pRecord->Set_Value(MLR_STEP_R2_ADJ , R.Get_R2_Adj());
840 pRecord->Set_Value(MLR_STEP_SE , R.Get_StdError());
845 pRecord->Set_Value(MLR_STEP_DF , X.Get_NRows() - m_nPredictors - 1);
848 pRecord->Set_Value(MLR_STEP_VAR_F , _Get_F(1, X.Get_NY() - (m_nPredictors - 1), bIn ? R.Get_R2() : R2_prev, bIn ? R2_prev : R.Get_R2()));
849 pRecord->Set_Value(MLR_STEP_VAR_SIG , _Get_P(1, X.Get_NY() - (m_nPredictors - 1), bIn ? R.Get_R2() : R2_prev, bIn ? R2_prev : R.Get_R2()));
850 pRecord->Set_Value(MLR_STEP_DIR , bIn ? SG_T(">>") : SG_T("<<"));
851 pRecord->Set_Value(MLR_STEP_VAR , m_Names[1 + iVariable]);
852
853 return( true );
854}
855
856
858// //
859// //
860// //
862
863//---------------------------------------------------------
864double CSG_Regression_Multiple::Get_R2 (void) const { return( m_pModel->Get_Record(MLR_MODEL_R2 )->asDouble(1) ); }
865double CSG_Regression_Multiple::Get_R2_Adj (void) const { return( m_pModel->Get_Record(MLR_MODEL_R2_ADJ )->asDouble(1) ); }
866double CSG_Regression_Multiple::Get_StdError (void) const { return( m_pModel->Get_Record(MLR_MODEL_SE )->asDouble(1) ); }
867double CSG_Regression_Multiple::Get_F (void) const { return( m_pModel->Get_Record(MLR_MODEL_F )->asDouble(1) ); }
868double CSG_Regression_Multiple::Get_P (void) const { return( m_pModel->Get_Record(MLR_MODEL_SIG )->asDouble(1) ); }
869double CSG_Regression_Multiple::Get_CV_RMSE (void) const { return( m_pModel->Get_Record(MLR_MODEL_CV_RMSE )->asDouble(1) ); }
870double CSG_Regression_Multiple::Get_CV_NRMSE (void) const { return( m_pModel->Get_Record(MLR_MODEL_CV_NRMSE )->asDouble(1) ); }
871double CSG_Regression_Multiple::Get_CV_R2 (void) const { return( m_pModel->Get_Record(MLR_MODEL_CV_R2 )->asDouble(1) ); }
872int CSG_Regression_Multiple::Get_CV_nSamples (void) const { return( m_pModel->Get_Record(MLR_MODEL_CV_NSAMPLES)->asInt (1) ); }
873int CSG_Regression_Multiple::Get_nPredictors (void) const { return( m_pModel->Get_Record(MLR_MODEL_NPREDICT )->asInt (1) ); }
874int CSG_Regression_Multiple::Get_nSamples (void) const { return( m_pModel->Get_Record(MLR_MODEL_NSAMPLES )->asInt (1) ); }
876
877//---------------------------------------------------------
878const SG_Char * CSG_Regression_Multiple::Get_Name(int iVariable) const
879{
880 if( iVariable >= 0 && iVariable < m_pRegression->Get_Count() - 1 )
881 {
882 return( m_pRegression->Get_Record(1 + iVariable)->asString(MLR_VAR_NAME) );
883 }
884
885 return( SG_T("") );
886}
887
888//---------------------------------------------------------
890{
891 if( m_pRegression->Get_Count() > 0 )
892 {
893 return( m_pRegression->Get_Record(0)->asDouble(MLR_VAR_RCOEFF) );
894 }
895
896 return( 0. );
897}
898
899//---------------------------------------------------------
900double CSG_Regression_Multiple::Get_Parameter(int iVariable, int Parameter) const
901{
902 if( iVariable >= 0 && iVariable < m_pRegression->Get_Count() - 1 && Parameter >= 0 && Parameter <= MLR_VAR_P )
903 {
904 return( m_pRegression->Get_Record(1 + iVariable)->asDouble(Parameter) );
905 }
906
907 return( 0. );
908}
909
910
912// //
914
915//---------------------------------------------------------
917{
918 CSG_String s;
919
920 if( Get_nPredictors() < 1 )
921 {
922 return( s );
923 }
924
925 //-----------------------------------------------------
926 if( m_pSteps->Get_Count() > 0 )
927 {
928 s += CSG_String::Format("\n%s:\n\n", _TL("Steps"));
929 s += CSG_String::Format("No. \tR \tR2 \tR2 adj\tStdErr\tF \tP \tF step\tP step\tVariable\n");
930 s += CSG_String::Format("------\t------\t------\t------\t------\t------\t------\t------\t------\t------\n");
931
932 for(int i=0; i<m_pSteps->Get_Count(); i++)
933 {
934 CSG_Table_Record *pRecord = m_pSteps->Get_Record(i);
935
936 s += CSG_String::Format("%d.\t%.2f\t%.2f\t%.2f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%s %s\n",
937 pRecord->asInt (MLR_STEP_NR ),
938 pRecord->asDouble(MLR_STEP_R ),
939 pRecord->asDouble(MLR_STEP_R2 ) * 100.,
940 pRecord->asDouble(MLR_STEP_R2_ADJ ) * 100.,
941 pRecord->asDouble(MLR_STEP_SE ),
942 pRecord->asDouble(MLR_STEP_F ),
943 pRecord->asDouble(MLR_STEP_SIG ) * 100.,
944 pRecord->asDouble(MLR_STEP_VAR_F ),
945 pRecord->asDouble(MLR_STEP_VAR_SIG) * 100.,
946 pRecord->asString(MLR_STEP_DIR ),
947 pRecord->asString(MLR_STEP_VAR )
948 );
949 }
950 }
951
952 //-----------------------------------------------------
953 s += CSG_String::Format("\n%s:\n\n", _TL("Correlation"));
954 s += CSG_String::Format("No. \tR \tR2 \tR2 adj\tStdErr\tt \tSig. \tb \t\tVariable\n");
955 s += CSG_String::Format("------\t------\t------\t------\t------\t------\t---------\t---------\t\t---------\n");
956
957 for(int i=0; i<m_pRegression->Get_Count(); i++)
958 {
959 CSG_Table_Record *pRecord = m_pRegression->Get_Record(i);
960
961 s += CSG_String::Format("%d.\t%.2f\t%.2f\t%.2f\t%.3f\t%.3f\t%.6f\t%.6f\t%s\n",
962 i,
963 pRecord->asDouble(MLR_VAR_R ),
964 pRecord->asDouble(MLR_VAR_R2 ) * 100.,
965 pRecord->asDouble(MLR_VAR_R2_ADJ) * 100.,
966 pRecord->asDouble(MLR_VAR_SE ),
967 pRecord->asDouble(MLR_VAR_T ),
968 pRecord->asDouble(MLR_VAR_SIG ) * 100.,
969 pRecord->asDouble(MLR_VAR_RCOEFF),
970 pRecord->asString(MLR_VAR_NAME )
971 );
972 }
973
974 //-----------------------------------------------------
975 s += CSG_String::Format("\n%s: %g", _TL("Formula"), Get_RConst());
976
977 for(int i=0; i<Get_nPredictors(); i++)
978 {
979 double b = Get_RCoeff(i);
980
981 s += CSG_String::Format(" %c %g * X%d", b < 0. ? '-' : '+', fabs(b), i + 1);
982 }
983
984 //-----------------------------------------------------
985 s += "\n\n";
986
987 s += CSG_String::Format("%s: %f (%s: %d)\n", _TL("Residual standard error"), Get_StdError(), _TL("degrees of freedom"), Get_DegFreedom());
988 s += CSG_String::Format("%s: %f (%s: %f)\n", _TL("Multiple R-squared"), 100. * Get_R2(), _TL("adjusted"), 100. * Get_R2_Adj());
989 s += CSG_String::Format("%s: %f (%d/%d DF), %s: %g\n", _TL("F-statistic"), Get_F(), Get_nPredictors(), Get_DegFreedom(), _TL("p-value"), Get_P());
990
991 //-----------------------------------------------------
992 return( s );
993}
994
995
997// //
998// //
999// //
1001
1002//---------------------------------------------------------
bool SG_UI_Process_Get_Okay(bool bBlink)
#define SG_T(s)
Definition api_core.h:537
@ SG_DATATYPE_Double
Definition api_core.h:1008
@ SG_DATATYPE_Int
Definition api_core.h:1004
@ SG_DATATYPE_String
Definition api_core.h:1009
#define SG_Char
Definition api_core.h:536
#define _TL(s)
Definition api_core.h:1568
sLong Get_NRows(void) const
Definition mat_tools.h:525
CSG_Matrix Get_Transpose(void) const
bool Add_Row(const double *Data=NULL)
int Get_NX(void) const
Definition mat_tools.h:522
sLong Get_NCols(void) const
Definition mat_tools.h:524
bool Create(const CSG_Matrix &Matrix)
bool Del_Row(sLong Row)
CSG_Vector Get_Col(sLong Col) const
int Get_NY(void) const
Definition mat_tools.h:523
bool Set_Col(sLong Col, const double *Data)
bool Add_Cols(sLong nCols)
CSG_Matrix Get_Inverse(bool bSilent=true, int nSubSquare=0) const
bool Del_Col(sLong Col)
CSG_Regression_Multiple(bool bIntercept=true)
class CSG_Table * m_pModel
Definition mat_tools.h:1783
double _Get_P(int nPredictors, int nSamples, double r2_full, double r2_reduced)
double Get_Parameter(int iVariable, int Parameter) const
bool Get_Model(const CSG_Matrix &Samples, CSG_Strings *pNames=NULL)
bool Get_Residuals(CSG_Vector &Residuals) const
bool _Set_Step_Info(const CSG_Matrix &X)
int _Get_Step_Out(CSG_Matrix &X, double P_out, double &R2)
double Get_RCoeff(int iVariable) const
Definition mat_tools.h:1755
bool Set_Data(const CSG_Matrix &Samples, CSG_Strings *pNames=NULL)
bool Get_Model_Forward(const CSG_Matrix &Samples, double P_in, CSG_Strings *pNames=NULL)
int _Get_Step_In(CSG_Matrix &X, double P_in, double &R2, const CSG_Matrix &Samples)
bool Get_CrossValidation(int nSubSamples=0)
bool Get_Model_Stepwise(const CSG_Matrix &Samples, double P_in, double P_out, CSG_Strings *pNames=NULL)
double Get_Value(const CSG_Vector &Predictors) const
bool _Get_Regression(const class CSG_Matrix &Samples)
bool Get_Model_Backward(const CSG_Matrix &Samples, double P_out, CSG_Strings *pNames=NULL)
double Get_Residual(int iSample) const
class CSG_Table * m_pSteps
Definition mat_tools.h:1783
double _Get_F(int nPredictors, int nSamples, double r2_full, double r2_reduced)
const SG_Char * Get_Name(int iVariable) const
class CSG_Table * m_pRegression
Definition mat_tools.h:1783
double Get_Mean(void)
Definition mat_tools.h:753
double Get_Sum(void)
Definition mat_tools.h:751
double Get_Range(void)
Definition mat_tools.h:750
static CSG_String Format(const char *Format,...)
CSG_String & Get_String(int Index) const
Definition api_core.h:724
int Get_Count(void) const
Definition api_core.h:714
bool Set_Value(int Field, const CSG_String &Value)
double asDouble(int Field) const
int asInt(int Field) const
const SG_Char * asString(int Field, int Decimals=-99) const
virtual CSG_Table_Record * Get_Record(sLong Index) const
Definition table.h:402
static double Get_F_Tail_from_R2(double R2, int nPredictors, int nSamples, TSG_Test_Distribution_Type Type=TESTDIST_TYPE_Right)
static double Get_T_Tail(double T, int df, TSG_Test_Distribution_Type Type=TESTDIST_TYPE_Right)
static double Get_F_Tail(double F, int dfn, int dfd, TSG_Test_Distribution_Type Type=TESTDIST_TYPE_Right)
bool Create(const CSG_Vector &Vector)
int Get_N(void) const
Definition mat_tools.h:384
#define B
#define C
ESG_Multiple_Regression_Info_Model
@ MLR_MODEL_CV_NSAMPLES
ESG_Multiple_Regression_Info_Steps
@ MLR_VAR_R2
Definition mat_tools.h:1699
@ MLR_VAR_P
Definition mat_tools.h:1704
@ MLR_VAR_R2_ADJ
Definition mat_tools.h:1700
@ MLR_VAR_ID
Definition mat_tools.h:1695
@ MLR_VAR_RCOEFF
Definition mat_tools.h:1697
@ MLR_VAR_T
Definition mat_tools.h:1702
@ MLR_VAR_SE
Definition mat_tools.h:1701
@ MLR_VAR_R
Definition mat_tools.h:1698
@ MLR_VAR_NAME
Definition mat_tools.h:1696
@ MLR_VAR_SIG
Definition mat_tools.h:1703
@ TESTDIST_TYPE_Right
Definition mat_tools.h:1527
@ TESTDIST_TYPE_TwoTail
Definition mat_tools.h:1529
SAGA_API_DLL_EXPORT CSG_Matrix SG_Get_Correlation_Matrix(const CSG_Matrix &Values, bool bCovariances=false)
SAGA_API_DLL_EXPORT double SG_Regression_Get_Adjusted_R2(double R2, int nSamples, int nPredictors, TSG_Regression_Correction Correction=REGRESSION_CORR_Wherry_1)
SAGA_API_DLL_EXPORT double SG_Get_Square(double Value)
Definition mat_tools.cpp:70