SAGA API v9.10
Loading...
Searching...
No Matches
mat_mRMR.cpp
Go to the documentation of this file.
1
3// //
4// SAGA //
5// //
6// System for Automated Geoscientific Analyses //
7// //
8// Application Programming Interface //
9// //
10// Library: SAGA_API //
11// //
12//-------------------------------------------------------//
13// //
14// mRMR.cpp //
15// //
16// Copyright (C) 2014 by //
17// Olaf Conrad //
18// //
19//-------------------------------------------------------//
20// //
21// This file is part of 'SAGA - System for Automated //
22// Geoscientific Analyses'. //
23// //
24// This library is free software; you can redistribute //
25// it and/or modify it under the terms of the GNU Lesser //
26// General Public License as published by the Free //
27// Software Foundation, either version 2.1 of the //
28// License, or (at your option) any later version. //
29// //
30// This library is distributed in the hope that it will //
31// be useful, but WITHOUT ANY WARRANTY; without even the //
32// implied warranty of MERCHANTABILITY or FITNESS FOR A //
33// PARTICULAR PURPOSE. See the GNU Lesser General Public //
34// License for more details. //
35// //
36// You should have received a copy of the GNU Lesser //
37// General Public License along with this program; if //
38// not, see <http://www.gnu.org/licenses/>. //
39// //
40//-------------------------------------------------------//
41// //
42// e-mail: oconrad@saga-gis.org //
43// //
44// contact: Olaf Conrad //
45// Institute of Geography //
46// University of Hamburg //
47// Germany //
48// //
50
51//---------------------------------------------------------
52#ifdef WITH_MRMR
53
54//---------------------------------------------------------
55// A C++ program to implement the mRMR selection using mutual information
56// written by Hanchuan Peng.
57//
58// Disclaimer: The author of program is Hanchuan Peng
59// at <penghanchuan@yahoo.com>.
60//
61// The CopyRight is reserved by the author.
62//
63// Last modification: Jan/25/2007
64//
65// by Hanchuan Peng
66// 2005-10-24: finalize the computing parts of the whole program
67// 2005-10-25: add non-discretization for the classification variable. Also slightly change some output info for the web application
68// 2005-11-01: add control to the user-defined max variable number and sample number
69// 2006-01-26: add gnu_getline.c to convert the code to be compilable under Max OS.
70// 2007-01-25: change the address info
71
72//---------------------------------------------------------
73#include "mat_tools.h"
74#include "parameters.h"
75#include "table.h"
76
77
79// //
80// //
81// //
83
84//---------------------------------------------------------
85#define DELETE_ARRAY(p) if( p ) { delete[]p; p = NULL; }
86
87//---------------------------------------------------------
88enum ESG_mRMR_Selection
89{
90 SG_mRMR_SELECTION_RANK = 0,
91 SG_mRMR_SELECTION_INDEX,
92 SG_mRMR_SELECTION_NAME,
93 SG_mRMR_SELECTION_SCORE
94};
95
96
98// //
100
101//---------------------------------------------------------
102CSG_mRMR::CSG_mRMR(void)
103{
104 m_Samples = NULL;
105 m_nSamples = 0;
106 m_nVars = 0;
107 m_bDiscretized = false; // initialize the data as continuous
108 m_bVerbose = false;
109
110 m_pSelection = new CSG_Table;
111
112 m_pSelection->Add_Field("RANK" , SG_DATATYPE_Int ); // mRMR_SELFLD_RANK
113 m_pSelection->Add_Field("INDEX", SG_DATATYPE_Int ); // mRMR_SELFLD_INDEX
114 m_pSelection->Add_Field("NAME" , SG_DATATYPE_String); // mRMR_SELFLD_NAME
115 m_pSelection->Add_Field("SCORE", SG_DATATYPE_Double); // mRMR_SELFLD_SCORE
116}
117
118//---------------------------------------------------------
119CSG_mRMR::~CSG_mRMR(void)
120{
121 Destroy();
122
123 delete(m_pSelection);
124}
125
126//---------------------------------------------------------
127void CSG_mRMR::Destroy(void)
128{
129 if( m_Samples )
130 {
131 DELETE_ARRAY(m_Samples[0]);
132 DELETE_ARRAY(m_Samples);
133 }
134
135 m_VarNames .Clear();
136 m_nSamples = 0;
137 m_nVars = 0;
138 m_bDiscretized = false; // initialize the data as continuous
139
140 m_pSelection ->Del_Records();
141}
142
143
145// //
147
148//---------------------------------------------------------
149CSG_String CSG_mRMR::Get_Description(void)
150{
151 return( _TW(
152 "The minimum Redundancy Maximum Relevance (mRMR) feature selection "
153 "algorithm has been developed by Hanchuan Peng <hanchuan.peng@gmail.com>.\n"
154 "\n"
155 "References:\n"
156 "Feature selection based on mutual information: criteria of max-dependency, max-relevance, and min-redundancy. "
157 "Hanchuan Peng, Fuhui Long, and Chris Ding, "
158 "IEEE Transactions on Pattern Analysis and Machine Intelligence, "
159 "Vol. 27, No. 8, pp.1226-1238, 2005.\n"
160 "\n"
161 "Minimum redundancy feature selection from microarray gene expression data,\n"
162 "Chris Ding, and Hanchuan Peng, "
163 "Journal of Bioinformatics and Computational Biology, "
164 "Vol. 3, No. 2, pp.185-205, 2005.\n"
165 "\n"
166 "Hanchuan Peng's mRMR Homepage at "
167 "<a target=\"_blank\" href=\"http://penglab.janelia.org/proj/mRMR/\">"
168 "http://penglab.janelia.org/proj/mRMR/</a>\n"
169 ));
170}
171
172
174// //
176
177//---------------------------------------------------------
178bool CSG_mRMR::Parameters_Add(CSG_Parameters *pParameters, CSG_Parameter *pNode)
179{
180 CSG_String ParentID(pNode ? pNode->Get_Identifier() : SG_T(""));
181
182 pParameters->Add_Int(
183 ParentID, "mRMR_NFEATURES" , _TL("Number of Features"),
184 _TL(""),
185 50, 1, true
186 );
187
188 pParameters->Add_Bool(
189 ParentID, "mRMR_DISCRETIZE" , _TL("Discretization"),
190 _TL("uncheck this means no discretizaton (i.e. data is already integer)"),
191 true
192 );
193
194 pParameters->Add_Double(
195 ParentID, "mRMR_THRESHOLD" , _TL("Discretization Threshold"),
196 _TL("a double number of the discretization threshold; set to 0 to make binarization"),
197 1.0, 0.0, true
198 );
199
200 pParameters->Add_Choice(
201 ParentID, "mRMR_METHOD" , _TL("Selection Method"),
202 _TL(""),
203 CSG_String::Format("%s|%s|",
204 _TL("Mutual Information Difference (MID)"),
205 _TL("Mutual Information Quotient (MIQ)")
206 ), 0
207 );
208
209 return( true );
210}
211
212//---------------------------------------------------------
213int CSG_mRMR::Parameters_Enable(CSG_Parameters *pParameters, CSG_Parameter *pParameter)
214{
215 if( pParameter->Cmp_Identifier("mRMR_DISCRETIZE") )
216 {
217 pParameters->Set_Enabled("mRMR_THRESHOLD", pParameter->asBool());
218 }
219
220 return( 1 );
221}
222
223//---------------------------------------------------------
224bool CSG_mRMR::Set_Data(CSG_Table &Data, int ClassField, CSG_Parameters *pParameters)
225{
226 bool bDiscretize = (*pParameters)("mRMR_DISCRETIZE") ? (*pParameters)("mRMR_DISCRETIZE")->asBool () : true;
227 double Threshold = (*pParameters)("mRMR_THRESHOLD" ) ? (*pParameters)("mRMR_THRESHOLD" )->asDouble() : 1.0;
228
229 return( Set_Data(Data, ClassField, bDiscretize ? Threshold : -1.0) );
230}
231
232bool CSG_mRMR::Set_Data(CSG_Matrix &Data, int ClassField, CSG_Parameters *pParameters)
233{
234 bool bDiscretize = (*pParameters)("mRMR_DISCRETIZE") ? (*pParameters)("mRMR_DISCRETIZE")->asBool () : true;
235 double Threshold = (*pParameters)("mRMR_THRESHOLD" ) ? (*pParameters)("mRMR_THRESHOLD" )->asDouble() : 1.0;
236
237 return( Set_Data(Data, ClassField, bDiscretize ? Threshold : -1.0) );
238}
239
240//---------------------------------------------------------
241bool CSG_mRMR::Get_Selection(CSG_Parameters *pParameters)
242{
243 int nFeatures = (*pParameters)("mRMR_NFEATURES") ? (*pParameters)("mRMR_NFEATURES")->asInt() : 50;
244 int Method = (*pParameters)("mRMR_METHOD" ) ? (*pParameters)("mRMR_METHOD" )->asInt() : 0;
245
246 return( Get_Selection(nFeatures, Method) );
247}
248
249
251// //
253
254//---------------------------------------------------------
255int CSG_mRMR::Get_Count (void) const
256{
257 return( (int)m_pSelection->Get_Count() );
258}
259
260int CSG_mRMR::Get_Index(int i) const
261{
262 return( m_pSelection->Get_Record(i)->asInt (SG_mRMR_SELECTION_INDEX) );
263}
264
265CSG_String CSG_mRMR::Get_Name(int i) const
266{
267 return( m_pSelection->Get_Record(i)->asString(SG_mRMR_SELECTION_NAME ) );
268}
269
270double CSG_mRMR::Get_Score(int i) const
271{
272 return( m_pSelection->Get_Record(i)->asDouble(SG_mRMR_SELECTION_SCORE) );
273}
274
275
277// //
279
280//---------------------------------------------------------
281#define ADD_MESSAGE(Message) if( m_bVerbose ) SG_UI_Msg_Add_Execution(CSG_String(Message) + "\n", false);
282#define ADD_WARNING(Message) if( m_bVerbose ) SG_UI_Msg_Add_Execution(_TL("Warning") + CSG_String(": ") + CSG_String(Message) + "\n", false, SG_UI_MSG_STYLE_FAILURE);
283
284
286// //
288
289//---------------------------------------------------------
290bool CSG_mRMR::Get_Memory(int nVars, int nSamples)
291{
292 Destroy();
293
294 //-----------------------------------------------------
295 m_nVars = nVars;
296
297 if( m_nVars <= 0 )
298 {
299 SG_UI_Msg_Add_Error("no features");
300
301 return( false );
302 }
303
304 m_nSamples = nSamples;
305
306 if( m_nSamples <= 0 )
307 {
308 SG_UI_Msg_Add_Error("no samples");
309
310 return( false );
311 }
312
313 //-----------------------------------------------------
314 m_Samples = new double *[m_nSamples];
315
316 if( !m_Samples )
317 {
318 SG_UI_Msg_Add_Error("failed to allocate memory.");
319
320 return( false );
321 }
322
323 m_Samples[0] = new double[m_nSamples * m_nVars];
324
325 if( !m_Samples[0] )
326 {
327 SG_UI_Msg_Add_Error("failed to allocate memory.");
328
329 return( false );
330 }
331
332 return( true );
333}
334
335//---------------------------------------------------------
336bool CSG_mRMR::Set_Data(CSG_Table &Data, int ClassField, double Threshold)
337{
338 if( !Get_Memory(Data.Get_Field_Count(), (int)Data.Get_Count()) )
339 {
340 return( false );
341 }
342
343 //-----------------------------------------------------
344 if( ClassField < 0 || ClassField >= m_nVars )
345 {
346 ClassField = 0;
347 }
348
349 Data.Set_Index(ClassField, TABLE_INDEX_Ascending);
350
351 CSG_String s;
352
353 for(int iSample=0, Class=0; iSample<m_nSamples; iSample++)
354 {
355 double *pData = m_Samples[iSample] = m_Samples[0] + iSample * m_nVars;
356
357 if( s.Cmp(Data[iSample].asString(ClassField)) )
358 {
359 s = Data[iSample].asString(ClassField);
360
361 Class++;
362 }
363
364 *pData++ = Class;
365
366 for(int iVar=0; iVar<m_nVars; iVar++)
367 {
368 if( iVar != ClassField )
369 {
370 *pData++ = Data[iSample].asDouble(iVar);
371 }
372 }
373 }
374
375 Data.Del_Index();
376
377 m_VarNames += Data.Get_Field_Name(ClassField);
378
379 for(int iVar=0; iVar<m_nVars; iVar++)
380 {
381 if( iVar != ClassField )
382 {
383 m_VarNames += Data.Get_Field_Name(iVar);
384 }
385 }
386
387 //-----------------------------------------------------
388 if( Threshold >= 0.0 ) // discretization
389 {
390 Discretize(Threshold);
391 }
392
393 return( true );
394}
395
396//---------------------------------------------------------
397bool CSG_mRMR::Set_Data(CSG_Matrix &Data, int ClassField, double Threshold)
398{
399 if( !Get_Memory(Data.Get_NX(), Data.Get_NY()) )
400 {
401 return( false );
402 }
403
404 //-----------------------------------------------------
405 if( ClassField < 0 || ClassField >= m_nVars )
406 {
407 ClassField = 0;
408 }
409
410 for(int iSample=0; iSample<m_nSamples; iSample++)
411 {
412 double *pData = m_Samples[iSample] = m_Samples[0] + iSample * m_nVars;
413
414 *pData++ = Data[iSample][ClassField];
415
416 for(int iVar=0; iVar<m_nVars; iVar++)
417 {
418 if( iVar != ClassField )
419 {
420 *pData++ = Data[iSample][iVar];
421 }
422 }
423 }
424
425 m_VarNames += "CLASS";
426
427 for(int iVar=0; iVar<m_nVars; iVar++)
428 {
429 if( iVar != ClassField )
430 {
431 m_VarNames += CSG_String::Format(SG_T("FEATURE_%02d"), iVar);
432 }
433 }
434
435 //-----------------------------------------------------
436 if( Threshold >= 0.0 ) // discretization
437 {
438 Discretize(Threshold);
439 }
440
441 return( true );
442}
443
444
446// //
448
449//---------------------------------------------------------
450bool CSG_mRMR::Discretize(double Threshold)
451{
452 if( !m_Samples[0] || Threshold < 0.0 || m_bDiscretized )
453 {
454 return( false );
455 }
456
457 long i, j; // exclude the first column
458
459 //-----------------------------------------------------
460 for(j=1; j<m_nVars; j++) // z-score
461 {
462 double cursum = 0;
463 double curmean = 0;
464 double curstd = 0;
465
466 for(i=0; i<m_nSamples; i++)
467 {
468 cursum += m_Samples[i][j];
469 }
470
471 curmean = cursum / m_nSamples;
472 cursum = 0;
473
474 double tmpf;
475
476 for(i=0; i<m_nSamples; i++)
477 {
478 tmpf = m_Samples[i][j] - curmean;
479 cursum += tmpf * tmpf;
480 }
481
482 curstd = (m_nSamples == 1) ? 0 : sqrt(cursum / (m_nSamples - 1)); // m_nSamples -1 is an unbiased version for Gaussian
483
484 for(i=0; i<m_nSamples; i++)
485 {
486 m_Samples[i][j] = (m_Samples[i][j] - curmean) / curstd;
487 }
488 }
489
490 //-----------------------------------------------------
491 for(j=1; j<m_nVars; j++)
492 {
493 double tmpf;
494
495 for(i=0; i<m_nSamples; i++)
496 {
497 tmpf = m_Samples[i][j];
498
499 if( tmpf > Threshold )
500 {
501 tmpf = 1;
502 }
503 else if( tmpf < -Threshold )
504 {
505 tmpf = -1;
506 }
507 else
508 {
509 tmpf = 0;
510 }
511
512 m_Samples[i][j] = tmpf;
513 }
514 }
515
516 m_bDiscretized = true;
517
518 return( true );
519}
520
521
523// //
525
526//---------------------------------------------------------
527#define ADD_FEATURE(rank, score) {\
528 CSG_Table_Record *pFeature = m_pSelection->Add_Record();\
529 \
530 pFeature->Set_Value(SG_mRMR_SELECTION_RANK , rank + 1);\
531 pFeature->Set_Value(SG_mRMR_SELECTION_INDEX, feaInd[rank]);\
532 pFeature->Set_Value(SG_mRMR_SELECTION_NAME , m_VarNames[(size_t)feaInd[rank]]);\
533 pFeature->Set_Value(SG_mRMR_SELECTION_SCORE, score);\
534 \
535 ADD_MESSAGE(CSG_String::Format(SG_T("%d \t %d \t %s \t %5.3f"),\
536 rank + 1, feaInd[rank], m_VarNames[(size_t)feaInd[rank]].c_str(), score)\
537 );\
538}
539
540//---------------------------------------------------------
541int CSG_mRMR::Pool_Compare(const void *a, const void *b)
542{
543 if( ((TPool *)a)->mival < ((TPool *)b)->mival )
544 return( -1 );
545
546 if( ((TPool *)a)->mival > ((TPool *)b)->mival )
547 return( 1 );
548
549 return( 0 );
550}
551
552//---------------------------------------------------------
553bool CSG_mRMR::Get_Selection(int nFeatures, int Method)
554{
555 m_pSelection->Del_Records();
556
557 if( !m_Samples[0] )
558 {
559 SG_UI_Msg_Add_Error("The input data is NULL.");
560
561 return( false );
562 }
563
564 if( nFeatures < 0 )
565 {
566 SG_UI_Msg_Add_Error("The input number of features is negative.");
567
568 return( false );
569 }
570
571 long poolUseFeaLen = 500;
572 if( poolUseFeaLen > m_nVars - 1 ) // there is a target variable (the first one), that is why must remove one
573 poolUseFeaLen = m_nVars - 1;
574
575 if( nFeatures > poolUseFeaLen )
576 nFeatures = poolUseFeaLen;
577
578 long *feaInd = new long[nFeatures];
579
580 if( !feaInd )
581 {
582 SG_UI_Msg_Add_Error("Fail to allocate memory.");
583
584 return( false );
585 }
586
587 //-----------------------------------------------------
588 // determine the pool
589
590 TPool *Pool = (TPool *)SG_Malloc(m_nVars * sizeof(TPool));
591
592 if( !Pool )
593 {
594 SG_UI_Msg_Add_Error("Fail to allocate memory.");
595
596 return( false );
597 }
598
599 //-----------------------------------------------------
600 long i, j, k;
601
602 for(i=0; i<m_nVars; i++) // the Pool[0].mival is the entropy of target classification variable
603 {
604 Pool[i].mival = -Get_MutualInfo(0, i); // set as negative for sorting purpose
605 Pool[i].Index = i;
606 Pool[i].Mask = 1; // initialized to be everything in pool would be considered
607 }
608
609 qsort(Pool + 1, m_nVars - 1, sizeof(TPool), Pool_Compare);
610
611 Pool[0].mival = -Pool[0].mival;
612
613 ADD_MESSAGE(CSG_String::Format(
614 SG_T("\nTarget classification variable (#%d column in the input data) has name=%s \tentropy score=%5.3f"),
615 0 + 1, m_VarNames[0].c_str(), Pool[0].mival
616 ));
617
618 ADD_MESSAGE("\n*** MaxRel features ***");
619 ADD_MESSAGE("Order\tFea\tName\tScore");
620
621 for(i=1; i<m_nVars-1; i++)
622 {
623 Pool[i].mival = -Pool[i].mival;
624
625 if( i <= nFeatures )
626 {
627 ADD_MESSAGE(CSG_String::Format(SG_T("%d \t %d \t %s \t %5.3f"),
628 i, (int)Pool[i].Index, m_VarNames[(int)Pool[i].Index].c_str(), Pool[i].mival
629 ));
630 }
631 }
632
633 //-----------------------------------------------------
634 // mRMR selection
635
636 long poolFeaIndMin = 1;
637 long poolFeaIndMax = poolFeaIndMin + poolUseFeaLen - 1;
638
639 feaInd[0] = Pool[1].Index;
640 Pool[feaInd[0]].Mask = 0; // after selection, no longer consider this feature
641 Pool[0 ].Mask = 0; // of course the first one, which corresponds to the classification variable, should not be considered. Just set the mask as 0 for safety.
642
643 ADD_MESSAGE("\n*** mRMR features ***");
644 ADD_MESSAGE("Order\tFea\tName\tScore");
645
646 ADD_FEATURE(0, Pool[1].mival);
647
648 for(k=1; k<nFeatures; k++) //the first one, feaInd[0] has been determined already
649 {
650 double relevanceVal, redundancyVal, tmpscore, selectscore;
651 long selectind;
652 int b_firstSelected = 0;
653
654 for(i=poolFeaIndMin; i<=poolFeaIndMax; i++)
655 {
656 if( Pool[Pool[i].Index].Mask == 0 )
657 {
658 continue; // skip this feature as it was selected already
659 }
660
661 relevanceVal = Get_MutualInfo(0, Pool[i].Index); // actually no necessary to re-compute it, this value can be retrieved from Pool[].mival vector
662 redundancyVal = 0;
663
664 for(j=0; j<k; j++)
665 {
666 redundancyVal += Get_MutualInfo(feaInd[j], Pool[i].Index);
667 }
668
669 redundancyVal /= k;
670
671 switch( Method )
672 {
673 default: // fprintf(stderr, "Undefined selection method=%d. Use MID instead.\n", mymethod);
674 case SG_mRMR_Method_MID: tmpscore = relevanceVal - redundancyVal; break;
675 case SG_mRMR_Method_MIQ: tmpscore = relevanceVal / (redundancyVal + 0.0001); break;
676 }
677
678 if( b_firstSelected == 0 )
679 {
680 selectscore = tmpscore;
681 selectind = Pool[i].Index;
682 b_firstSelected = 1;
683 }
684 else if( tmpscore > selectscore )
685 { //update the best feature found and the score
686 selectscore = tmpscore;
687 selectind = Pool[i].Index;
688 }
689 }
690
691 feaInd[k] = selectind;
692 Pool[selectind].Mask = 0;
693
694 ADD_FEATURE(k, selectscore);
695 }
696
697 //-----------------------------------------------------
698 return( true );
699}
700
701
703// //
705
706//---------------------------------------------------------
707double CSG_mRMR::Get_MutualInfo(long v1, long v2)
708{
709 double mi = -1; // initialized as an illegal value
710
711 if( !m_Samples[0] )
712 {
713 SG_UI_Msg_Add_Error("The input data is NULL.");
714
715 return mi;
716 }
717
718 if( v1 >= m_nVars || v2 >= m_nVars || v1 < 0 || v2 < 0 )
719 {
720 SG_UI_Msg_Add_Error("The input variable indexes are invalid (out of range).");
721
722 return mi;
723 }
724
725 //-----------------------------------------------------
726 // copy data
727
728 int *v1data = new int[m_nSamples];
729 int *v2data = new int[m_nSamples];
730
731 if( !v1data || !v2data )
732 {
733 SG_UI_Msg_Add_Error("Fail to allocate memory.");
734
735 return mi;
736 }
737
738 for(long i=0; i<m_nSamples; i++)
739 {
740 v1data[i] = (int)m_Samples[i][v1]; // the double already been discretized, should be safe now
741 v2data[i] = (int)m_Samples[i][v2];
742 }
743
744 //-----------------------------------------------------
745 // compute mutual info
746
747 long nstate = 3; // always true for DataTable, which was discretized as three states
748
749 int nstate1 = 0, nstate2 = 0;
750
751 double *pab = Get_JointProb(v1data, v2data, m_nSamples, nstate, nstate1, nstate2);
752
753 mi = Get_MutualInfo(pab, nstate1, nstate2);
754
755 //-----------------------------------------------------
756 // free memory and return
757
758 DELETE_ARRAY(v1data);
759 DELETE_ARRAY(v2data);
760 DELETE_ARRAY(pab );
761
762 return mi;
763}
764
765//---------------------------------------------------------
766double CSG_mRMR::Get_MutualInfo(double *pab, long pabhei, long pabwid)
767{
768 //-----------------------------------------------------
769 // check if parameters are correct
770
771 if( !pab )
772 {
773 SG_UI_Msg_Add_Error("Got illeagal parameter in compute_mutualinfo().");
774
775 return( -1.0 );
776 }
777
778 //-----------------------------------------------------
779 long i, j;
780
781 double **pab2d = new double *[pabwid];
782
783 for(j=0; j<pabwid; j++)
784 {
785 pab2d[j] = pab + (long)j * pabhei;
786 }
787
788 double *p1 = 0, *p2 = 0;
789 long p1len = 0, p2len = 0;
790 int b_findmarginalprob = 1;
791
792 //-----------------------------------------------------
793 //generate marginal probability arrays
794
795 if( b_findmarginalprob != 0 )
796 {
797 p1len = pabhei;
798 p2len = pabwid;
799 p1 = new double[p1len];
800 p2 = new double[p2len];
801
802 for (i = 0; i < p1len; i++) p1[i] = 0;
803 for (j = 0; j < p2len; j++) p2[j] = 0;
804
805 for (i = 0; i < p1len; i++) //p1len = pabhei
806 {
807 for (j = 0; j < p2len; j++) //p2len = panwid
808 {
809 // printf("%f ",pab2d[j][i]);
810 p1[i] += pab2d[j][i];
811 p2[j] += pab2d[j][i];
812 }
813 }
814 }
815
816 //-----------------------------------------------------
817 // calculate the mutual information
818
819 double muInf = 0;
820
821 muInf = 0.0;
822
823 for (j = 0; j < pabwid; j++) // should for p2
824 {
825 for (i = 0; i < pabhei; i++) // should for p1
826 {
827 if (pab2d[j][i] != 0 && p1[i] != 0 && p2[j] != 0)
828 {
829 muInf += pab2d[j][i] * log (pab2d[j][i] / p1[i] / p2[j]);
830 // printf("%f %fab %fa %fb\n",muInf,pab2d[j][i]/p1[i]/p2[j],p1[i],p2[j]);
831 }
832 }
833 }
834
835 muInf /= log(2.0);
836
837 //-----------------------------------------------------
838 // free memory
839
840 DELETE_ARRAY(pab2d);
841
842 if( b_findmarginalprob != 0 )
843 {
844 DELETE_ARRAY(p1);
845 DELETE_ARRAY(p2);
846 }
847
848 return muInf;
849}
850
851
853// //
855
856//---------------------------------------------------------
857template <class T> double * CSG_mRMR::Get_JointProb(T * img1, T * img2, long len, long maxstatenum, int &nstate1, int &nstate2)
858{
859 long i, j;
860
861 //-----------------------------------------------------
862 // get and check size information
863
864 if (!img1 || !img2 || len < 0)
865 {
866 SG_UI_Msg_Add_Error("At least one of the input vectors is invalid.");
867
868 return( NULL );
869 }
870
871// int b_findstatenum = 1; // int nstate1 = 0, nstate2 = 0;
872 int b_returnprob = 1;
873
874 //-----------------------------------------------------
875 // copy data into new INT type array (hence quantization) and then reange them begin from 0 (i.e. state1)
876
877 int *vec1 = new int[len];
878 int *vec2 = new int[len];
879
880 if( !vec1 || !vec2 )
881 {
882 SG_UI_Msg_Add_Error("Fail to allocate memory.\n");
883
884 return( NULL );
885 }
886
887 int nrealstate1 = 0, nrealstate2 = 0;
888
889 Copy_Vector(img1, len, vec1, nrealstate1);
890 Copy_Vector(img2, len, vec2, nrealstate2);
891
892 //update the #state when necessary
893 nstate1 = (nstate1 < nrealstate1) ? nrealstate1 : nstate1;
894 //printf("First vector #state = %i\n",nrealstate1);
895 nstate2 = (nstate2 < nrealstate2) ? nrealstate2 : nstate2;
896 //printf("Second vector #state = %i\n",nrealstate2);
897
898 // if (nstate1!=maxstatenum || nstate2!=maxstatenum)
899 // printf("find nstate1=%d nstate2=%d\n", nstate1, nstate2);
900
901 //-----------------------------------------------------
902 // generate the joint-distribution table
903
904 double *hab = new double[nstate1 * nstate2];
905 double **hab2d = new double *[nstate2];
906
907 if( !hab || !hab2d )
908 {
909 SG_UI_Msg_Add_Error("Fail to allocate memory.");
910
911 return( NULL );
912 }
913
914 for(j=0; j<nstate2; j++)
915 {
916 hab2d[j] = hab + (long)j * nstate1;
917 }
918
919 for(i=0; i<nstate1; i++)
920 {
921 for(j=0; j<nstate2; j++)
922 {
923 hab2d[j][i] = 0;
924 }
925 }
926
927 for(i=0; i<len; i++)
928 {
929 // old method -- slow
930 // indx = (long)(vec2[i]) * nstate1 + vec1[i];
931 // hab[indx] += 1;
932
933 // new method -- fast
934 hab2d[vec2[i]][vec1[i]] += 1;
935 //printf("vec2[%d]=%d vec1[%d]=%d\n", i, vec2[i], i, vec1[i]);
936 }
937
938 //-----------------------------------------------------
939 // return the probabilities, otherwise return count numbers
940
941 if( b_returnprob )
942 {
943 for(i=0; i<nstate1; i++)
944 {
945 for(j=0; j<nstate2; j++)
946 {
947 hab2d[j][i] /= len;
948 }
949 }
950 }
951
952 //-----------------------------------------------------
953 // finish
954
955 DELETE_ARRAY(hab2d);
956 DELETE_ARRAY(vec1);
957 DELETE_ARRAY(vec2);
958
959 return hab;
960}
961
962//---------------------------------------------------------
963template <class T> void CSG_mRMR::Copy_Vector(T *srcdata, long len, int *desdata, int &nstate)
964{
965 if (!srcdata || !desdata)
966 {
967 SG_UI_Msg_Add_Error("no points in Copy_Vector()!");
968
969 return;
970 }
971
972 // note: originally I added 0.5 before rounding, however seems the negative numbers and
973 // positive numbers are all rounded towarded 0; hence int(-1+0.5)=0 and int(1+0.5)=1;
974 // This is unwanted because I need the above to be -1 and 1.
975 // for this reason I just round with 0.5 adjustment for positive and negative differently
976
977 // copy data
978 int minn, maxx;
979 if (srcdata[0] > 0)
980 {
981 maxx = minn = int (srcdata[0] + 0.5);
982 }
983 else
984 {
985 maxx = minn = int (srcdata[0] - 0.5);
986 }
987
988 long i;
989 int tmp;
990 double tmp1;
991
992 for (i = 0; i < len; i++)
993 {
994 tmp1 = double (srcdata[i]);
995 tmp = (tmp1 > 0) ? (int) (tmp1 + 0.5) : (int) (tmp1 - 0.5); //round to integers
996 minn = (minn < tmp) ? minn : tmp;
997 maxx = (maxx > tmp) ? maxx : tmp;
998 desdata[i] = tmp;
999 // printf("%i ",desdata[i]);
1000 }
1001
1002 // make the vector data begin from 0 (i.e. 1st state)
1003 for (i = 0; i < len; i++)
1004 {
1005 desdata[i] -= minn;
1006 }
1007
1008 // return the #state
1009 nstate = (maxx - minn + 1);
1010
1011 return;
1012}
1013
1014
1016// //
1017// //
1018// //
1020
1021//---------------------------------------------------------
1022#endif // WITH_MRMR
void SG_UI_Msg_Add_Error(const char *Message)
#define _TW(s)
Definition api_core.h:1569
SAGA_API_DLL_EXPORT void * SG_Malloc(size_t size)
#define SG_T(s)
Definition api_core.h:537
@ SG_DATATYPE_Double
Definition api_core.h:1008
@ SG_DATATYPE_Int
Definition api_core.h:1004
@ SG_DATATYPE_String
Definition api_core.h:1009
#define _TL(s)
Definition api_core.h:1568
int Get_NX(void) const
Definition mat_tools.h:522
int Get_NY(void) const
Definition mat_tools.h:523
const SG_Char * Get_Identifier(void) const
bool asBool(void) const
Definition parameters.h:286
bool Cmp_Identifier(const CSG_String &Identifier) const
CSG_Parameter * Add_Choice(const CSG_String &ParentID, const CSG_String &ID, const CSG_String &Name, const CSG_String &Description, const CSG_String &Items, int Default=0)
void Set_Enabled(bool bEnabled=true)
CSG_Parameter * Add_Double(const CSG_String &ParentID, const CSG_String &ID, const CSG_String &Name, const CSG_String &Description, double Value=0.0, double Minimum=0.0, bool bMinimum=false, double Maximum=0.0, bool bMaximum=false)
CSG_Parameter * Add_Int(const CSG_String &ParentID, const CSG_String &ID, const CSG_String &Name, const CSG_String &Description, int Value=0, int Minimum=0, bool bMinimum=false, int Maximum=0, bool bMaximum=false)
CSG_Parameter * Add_Bool(const CSG_String &ParentID, const CSG_String &ID, const CSG_String &Name, const CSG_String &Description, bool Value=false)
int Cmp(const CSG_String &String) const
static CSG_String Format(const char *Format,...)
const SG_Char * Get_Field_Name(int Field) const
Definition table.h:362
sLong Get_Count(void) const
Definition table.h:400
bool Set_Index(CSG_Index &Index, int Field, bool bAscending=true) const
Definition table.cpp:1508
int Get_Field_Count(void) const
Definition table.h:361
virtual bool Add_Field(const CSG_String &Name, TSG_Data_Type Type, int Position=-1)
Definition table.cpp:479
bool Del_Index(void)
Definition table.cpp:1419
@ TABLE_INDEX_Ascending
Definition table.h:105