SAGA API  v9.5
mat_mRMR.cpp
Go to the documentation of this file.
1 
3 // //
4 // SAGA //
5 // //
6 // System for Automated Geoscientific Analyses //
7 // //
8 // Application Programming Interface //
9 // //
10 // Library: SAGA_API //
11 // //
12 //-------------------------------------------------------//
13 // //
14 // mRMR.cpp //
15 // //
16 // Copyright (C) 2014 by //
17 // Olaf Conrad //
18 // //
19 //-------------------------------------------------------//
20 // //
21 // This file is part of 'SAGA - System for Automated //
22 // Geoscientific Analyses'. //
23 // //
24 // This library is free software; you can redistribute //
25 // it and/or modify it under the terms of the GNU Lesser //
26 // General Public License as published by the Free //
27 // Software Foundation, either version 2.1 of the //
28 // License, or (at your option) any later version. //
29 // //
30 // This library is distributed in the hope that it will //
31 // be useful, but WITHOUT ANY WARRANTY; without even the //
32 // implied warranty of MERCHANTABILITY or FITNESS FOR A //
33 // PARTICULAR PURPOSE. See the GNU Lesser General Public //
34 // License for more details. //
35 // //
36 // You should have received a copy of the GNU Lesser //
37 // General Public License along with this program; if //
38 // not, see <http://www.gnu.org/licenses/>. //
39 // //
40 //-------------------------------------------------------//
41 // //
42 // e-mail: oconrad@saga-gis.org //
43 // //
44 // contact: Olaf Conrad //
45 // Institute of Geography //
46 // University of Hamburg //
47 // Germany //
48 // //
50 
51 //---------------------------------------------------------
52 #ifdef WITH_MRMR
53 
54 //---------------------------------------------------------
55 // A C++ program to implement the mRMR selection using mutual information
56 // written by Hanchuan Peng.
57 //
58 // Disclaimer: The author of program is Hanchuan Peng
59 // at <penghanchuan@yahoo.com>.
60 //
61 // The CopyRight is reserved by the author.
62 //
63 // Last modification: Jan/25/2007
64 //
65 // by Hanchuan Peng
66 // 2005-10-24: finalize the computing parts of the whole program
67 // 2005-10-25: add non-discretization for the classification variable. Also slightly change some output info for the web application
68 // 2005-11-01: add control to the user-defined max variable number and sample number
69 // 2006-01-26: add gnu_getline.c to convert the code to be compilable under Max OS.
70 // 2007-01-25: change the address info
71 
72 //---------------------------------------------------------
73 #include "mat_tools.h"
74 #include "parameters.h"
75 #include "table.h"
76 
77 
79 // //
80 // //
81 // //
83 
84 //---------------------------------------------------------
85 #define DELETE_ARRAY(p) if( p ) { delete[]p; p = NULL; }
86 
87 //---------------------------------------------------------
88 enum ESG_mRMR_Selection
89 {
90  SG_mRMR_SELECTION_RANK = 0,
91  SG_mRMR_SELECTION_INDEX,
92  SG_mRMR_SELECTION_NAME,
93  SG_mRMR_SELECTION_SCORE
94 };
95 
96 
98 // //
100 
101 //---------------------------------------------------------
102 CSG_mRMR::CSG_mRMR(void)
103 {
104  m_Samples = NULL;
105  m_nSamples = 0;
106  m_nVars = 0;
107  m_bDiscretized = false; // initialize the data as continous
108  m_bVerbose = false;
109 
110  m_pSelection = new CSG_Table;
111 
112  m_pSelection->Add_Field("RANK" , SG_DATATYPE_Int ); // mRMR_SELFLD_RANK
113  m_pSelection->Add_Field("INDEX", SG_DATATYPE_Int ); // mRMR_SELFLD_INDEX
114  m_pSelection->Add_Field("NAME" , SG_DATATYPE_String); // mRMR_SELFLD_NAME
115  m_pSelection->Add_Field("SCORE", SG_DATATYPE_Double); // mRMR_SELFLD_SCORE
116 }
117 
118 //---------------------------------------------------------
119 CSG_mRMR::~CSG_mRMR(void)
120 {
121  Destroy();
122 
123  delete(m_pSelection);
124 }
125 
126 //---------------------------------------------------------
127 void CSG_mRMR::Destroy(void)
128 {
129  if( m_Samples )
130  {
131  DELETE_ARRAY(m_Samples[0]);
132  DELETE_ARRAY(m_Samples);
133  }
134 
135  m_VarNames .Clear();
136  m_nSamples = 0;
137  m_nVars = 0;
138  m_bDiscretized = false; // initialize the data as continous
139 
140  m_pSelection ->Del_Records();
141 }
142 
143 
145 // //
147 
148 //---------------------------------------------------------
149 CSG_String CSG_mRMR::Get_Description(void)
150 {
151  return( _TW(
152  "The minimum Redundancy Maximum Relevance (mRMR) feature selection "
153  "algorithm has been developed by Hanchuan Peng <hanchuan.peng@gmail.com>.\n"
154  "\n"
155  "References:\n"
156  "Feature selection based on mutual information: criteria of max-dependency, max-relevance, and min-redundancy. "
157  "Hanchuan Peng, Fuhui Long, and Chris Ding, "
158  "IEEE Transactions on Pattern Analysis and Machine Intelligence, "
159  "Vol. 27, No. 8, pp.1226-1238, 2005.\n"
160  "\n"
161  "Minimum redundancy feature selection from microarray gene expression data,\n"
162  "Chris Ding, and Hanchuan Peng, "
163  "Journal of Bioinformatics and Computational Biology, "
164  "Vol. 3, No. 2, pp.185-205, 2005.\n"
165  "\n"
166  "Hanchuan Peng's mRMR Homepage at "
167  "<a target=\"_blank\" href=\"http://penglab.janelia.org/proj/mRMR/\">"
168  "http://penglab.janelia.org/proj/mRMR/</a>\n"
169  ));
170 }
171 
172 
174 // //
176 
177 //---------------------------------------------------------
178 bool CSG_mRMR::Parameters_Add(CSG_Parameters *pParameters, CSG_Parameter *pNode)
179 {
180  CSG_String ParentID(pNode ? pNode->Get_Identifier() : SG_T(""));
181 
182  pParameters->Add_Int(
183  ParentID, "mRMR_NFEATURES" , _TL("Number of Features"),
184  _TL(""),
185  50, 1, true
186  );
187 
188  pParameters->Add_Bool(
189  ParentID, "mRMR_DISCRETIZE" , _TL("Discretization"),
190  _TL("uncheck this means no discretizaton (i.e. data is already integer)"),
191  true
192  );
193 
194  pParameters->Add_Double(
195  ParentID, "mRMR_THRESHOLD" , _TL("Discretization Threshold"),
196  _TL("a double number of the discretization threshold; set to 0 to make binarization"),
197  1.0, 0.0, true
198  );
199 
200  pParameters->Add_Choice(
201  ParentID, "mRMR_METHOD" , _TL("Selection Method"),
202  _TL(""),
203  CSG_String::Format("%s|%s|",
204  _TL("Mutual Information Difference (MID)"),
205  _TL("Mutual Information Quotient (MIQ)")
206  ), 0
207  );
208 
209  return( true );
210 }
211 
212 //---------------------------------------------------------
213 int CSG_mRMR::Parameters_Enable(CSG_Parameters *pParameters, CSG_Parameter *pParameter)
214 {
215  if( pParameter->Cmp_Identifier("mRMR_DISCRETIZE") )
216  {
217  pParameters->Set_Enabled("mRMR_THRESHOLD", pParameter->asBool());
218  }
219 
220  return( 1 );
221 }
222 
223 //---------------------------------------------------------
224 bool CSG_mRMR::Set_Data(CSG_Table &Data, int ClassField, CSG_Parameters *pParameters)
225 {
226  bool bDiscretize = (*pParameters)("mRMR_DISCRETIZE") ? (*pParameters)("mRMR_DISCRETIZE")->asBool () : true;
227  double Threshold = (*pParameters)("mRMR_THRESHOLD" ) ? (*pParameters)("mRMR_THRESHOLD" )->asDouble() : 1.0;
228 
229  return( Set_Data(Data, ClassField, bDiscretize ? Threshold : -1.0) );
230 }
231 
232 bool CSG_mRMR::Set_Data(CSG_Matrix &Data, int ClassField, CSG_Parameters *pParameters)
233 {
234  bool bDiscretize = (*pParameters)("mRMR_DISCRETIZE") ? (*pParameters)("mRMR_DISCRETIZE")->asBool () : true;
235  double Threshold = (*pParameters)("mRMR_THRESHOLD" ) ? (*pParameters)("mRMR_THRESHOLD" )->asDouble() : 1.0;
236 
237  return( Set_Data(Data, ClassField, bDiscretize ? Threshold : -1.0) );
238 }
239 
240 //---------------------------------------------------------
241 bool CSG_mRMR::Get_Selection(CSG_Parameters *pParameters)
242 {
243  int nFeatures = (*pParameters)("mRMR_NFEATURES") ? (*pParameters)("mRMR_NFEATURES")->asInt() : 50;
244  int Method = (*pParameters)("mRMR_METHOD" ) ? (*pParameters)("mRMR_METHOD" )->asInt() : 0;
245 
246  return( Get_Selection(nFeatures, Method) );
247 }
248 
249 
251 // //
253 
254 //---------------------------------------------------------
255 int CSG_mRMR::Get_Count (void) const
256 {
257  return( (int)m_pSelection->Get_Count() );
258 }
259 
260 int CSG_mRMR::Get_Index(int i) const
261 {
262  return( m_pSelection->Get_Record(i)->asInt (SG_mRMR_SELECTION_INDEX) );
263 }
264 
265 CSG_String CSG_mRMR::Get_Name(int i) const
266 {
267  return( m_pSelection->Get_Record(i)->asString(SG_mRMR_SELECTION_NAME ) );
268 }
269 
270 double CSG_mRMR::Get_Score(int i) const
271 {
272  return( m_pSelection->Get_Record(i)->asDouble(SG_mRMR_SELECTION_SCORE) );
273 }
274 
275 
277 // //
279 
280 //---------------------------------------------------------
281 #define ADD_MESSAGE(Message) if( m_bVerbose ) SG_UI_Msg_Add_Execution(CSG_String(Message) + "\n", false);
282 #define ADD_WARNING(Message) if( m_bVerbose ) SG_UI_Msg_Add_Execution(_TL("Warning") + CSG_String(": ") + CSG_String(Message) + "\n", false, SG_UI_MSG_STYLE_FAILURE);
283 
284 
286 // //
288 
289 //---------------------------------------------------------
290 bool CSG_mRMR::Get_Memory(int nVars, int nSamples)
291 {
292  Destroy();
293 
294  //-----------------------------------------------------
295  m_nVars = nVars;
296 
297  if( m_nVars <= 0 )
298  {
299  SG_UI_Msg_Add_Error("no features");
300 
301  return( false );
302  }
303 
304  m_nSamples = nSamples;
305 
306  if( m_nSamples <= 0 )
307  {
308  SG_UI_Msg_Add_Error("no samples");
309 
310  return( false );
311  }
312 
313  //-----------------------------------------------------
314  m_Samples = new double *[m_nSamples];
315 
316  if( !m_Samples )
317  {
318  SG_UI_Msg_Add_Error("failed to allocate memory.");
319 
320  return( false );
321  }
322 
323  m_Samples[0] = new double[m_nSamples * m_nVars];
324 
325  if( !m_Samples[0] )
326  {
327  SG_UI_Msg_Add_Error("failed to allocate memory.");
328 
329  return( false );
330  }
331 
332  return( true );
333 }
334 
335 //---------------------------------------------------------
336 bool CSG_mRMR::Set_Data(CSG_Table &Data, int ClassField, double Threshold)
337 {
338  if( !Get_Memory(Data.Get_Field_Count(), (int)Data.Get_Count()) )
339  {
340  return( false );
341  }
342 
343  //-----------------------------------------------------
344  if( ClassField < 0 || ClassField >= m_nVars )
345  {
346  ClassField = 0;
347  }
348 
349  Data.Set_Index(ClassField, TABLE_INDEX_Ascending);
350 
351  CSG_String s;
352 
353  for(int iSample=0, Class=0; iSample<m_nSamples; iSample++)
354  {
355  double *pData = m_Samples[iSample] = m_Samples[0] + iSample * m_nVars;
356 
357  if( s.Cmp(Data[iSample].asString(ClassField)) )
358  {
359  s = Data[iSample].asString(ClassField);
360 
361  Class++;
362  }
363 
364  *pData++ = Class;
365 
366  for(int iVar=0; iVar<m_nVars; iVar++)
367  {
368  if( iVar != ClassField )
369  {
370  *pData++ = Data[iSample].asDouble(iVar);
371  }
372  }
373  }
374 
375  Data.Del_Index();
376 
377  m_VarNames += Data.Get_Field_Name(ClassField);
378 
379  for(int iVar=0; iVar<m_nVars; iVar++)
380  {
381  if( iVar != ClassField )
382  {
383  m_VarNames += Data.Get_Field_Name(iVar);
384  }
385  }
386 
387  //-----------------------------------------------------
388  if( Threshold >= 0.0 ) // discretization
389  {
390  Discretize(Threshold);
391  }
392 
393  return( true );
394 }
395 
396 //---------------------------------------------------------
397 bool CSG_mRMR::Set_Data(CSG_Matrix &Data, int ClassField, double Threshold)
398 {
399  if( !Get_Memory(Data.Get_NX(), Data.Get_NY()) )
400  {
401  return( false );
402  }
403 
404  //-----------------------------------------------------
405  if( ClassField < 0 || ClassField >= m_nVars )
406  {
407  ClassField = 0;
408  }
409 
410  for(int iSample=0; iSample<m_nSamples; iSample++)
411  {
412  double *pData = m_Samples[iSample] = m_Samples[0] + iSample * m_nVars;
413 
414  *pData++ = Data[iSample][ClassField];
415 
416  for(int iVar=0; iVar<m_nVars; iVar++)
417  {
418  if( iVar != ClassField )
419  {
420  *pData++ = Data[iSample][iVar];
421  }
422  }
423  }
424 
425  m_VarNames += "CLASS";
426 
427  for(int iVar=0; iVar<m_nVars; iVar++)
428  {
429  if( iVar != ClassField )
430  {
431  m_VarNames += CSG_String::Format(SG_T("FEATURE_%02d"), iVar);
432  }
433  }
434 
435  //-----------------------------------------------------
436  if( Threshold >= 0.0 ) // discretization
437  {
438  Discretize(Threshold);
439  }
440 
441  return( true );
442 }
443 
444 
446 // //
448 
449 //---------------------------------------------------------
450 bool CSG_mRMR::Discretize(double Threshold)
451 {
452  if( !m_Samples[0] || Threshold < 0.0 || m_bDiscretized )
453  {
454  return( false );
455  }
456 
457  long i, j; // exclude the first column
458 
459  //-----------------------------------------------------
460  for(j=1; j<m_nVars; j++) // z-score
461  {
462  double cursum = 0;
463  double curmean = 0;
464  double curstd = 0;
465 
466  for(i=0; i<m_nSamples; i++)
467  {
468  cursum += m_Samples[i][j];
469  }
470 
471  curmean = cursum / m_nSamples;
472  cursum = 0;
473 
474  double tmpf;
475 
476  for(i=0; i<m_nSamples; i++)
477  {
478  tmpf = m_Samples[i][j] - curmean;
479  cursum += tmpf * tmpf;
480  }
481 
482  curstd = (m_nSamples == 1) ? 0 : sqrt(cursum / (m_nSamples - 1)); // m_nSamples -1 is an unbiased version for Gaussian
483 
484  for(i=0; i<m_nSamples; i++)
485  {
486  m_Samples[i][j] = (m_Samples[i][j] - curmean) / curstd;
487  }
488  }
489 
490  //-----------------------------------------------------
491  for(j=1; j<m_nVars; j++)
492  {
493  double tmpf;
494 
495  for(i=0; i<m_nSamples; i++)
496  {
497  tmpf = m_Samples[i][j];
498 
499  if( tmpf > Threshold )
500  {
501  tmpf = 1;
502  }
503  else if( tmpf < -Threshold )
504  {
505  tmpf = -1;
506  }
507  else
508  {
509  tmpf = 0;
510  }
511 
512  m_Samples[i][j] = tmpf;
513  }
514  }
515 
516  m_bDiscretized = true;
517 
518  return( true );
519 }
520 
521 
523 // //
525 
526 //---------------------------------------------------------
527 #define ADD_FEATURE(rank, score) {\
528  CSG_Table_Record *pFeature = m_pSelection->Add_Record();\
529  \
530  pFeature->Set_Value(SG_mRMR_SELECTION_RANK , rank + 1);\
531  pFeature->Set_Value(SG_mRMR_SELECTION_INDEX, feaInd[rank]);\
532  pFeature->Set_Value(SG_mRMR_SELECTION_NAME , m_VarNames[(size_t)feaInd[rank]]);\
533  pFeature->Set_Value(SG_mRMR_SELECTION_SCORE, score);\
534  \
535  ADD_MESSAGE(CSG_String::Format(SG_T("%d \t %d \t %s \t %5.3f"),\
536  rank + 1, feaInd[rank], m_VarNames[(size_t)feaInd[rank]].c_str(), score)\
537  );\
538 }
539 
540 //---------------------------------------------------------
541 int CSG_mRMR::Pool_Compare(const void *a, const void *b)
542 {
543  if( ((TPool *)a)->mival < ((TPool *)b)->mival )
544  return( -1 );
545 
546  if( ((TPool *)a)->mival > ((TPool *)b)->mival )
547  return( 1 );
548 
549  return( 0 );
550 }
551 
552 //---------------------------------------------------------
553 bool CSG_mRMR::Get_Selection(int nFeatures, int Method)
554 {
555  m_pSelection->Del_Records();
556 
557  if( !m_Samples[0] )
558  {
559  SG_UI_Msg_Add_Error("The input data is NULL.");
560 
561  return( false );
562  }
563 
564  if( nFeatures < 0 )
565  {
566  SG_UI_Msg_Add_Error("The input number of features is negative.");
567 
568  return( false );
569  }
570 
571  long poolUseFeaLen = 500;
572  if( poolUseFeaLen > m_nVars - 1 ) // there is a target variable (the first one), that is why must remove one
573  poolUseFeaLen = m_nVars - 1;
574 
575  if( nFeatures > poolUseFeaLen )
576  nFeatures = poolUseFeaLen;
577 
578  long *feaInd = new long[nFeatures];
579 
580  if( !feaInd )
581  {
582  SG_UI_Msg_Add_Error("Fail to allocate memory.");
583 
584  return( false );
585  }
586 
587  //-----------------------------------------------------
588  // determine the pool
589 
590  TPool *Pool = (TPool *)SG_Malloc(m_nVars * sizeof(TPool));
591 
592  if( !Pool )
593  {
594  SG_UI_Msg_Add_Error("Fail to allocate memory.");
595 
596  return( false );
597  }
598 
599  //-----------------------------------------------------
600  long i, j, k;
601 
602  for(i=0; i<m_nVars; i++) // the Pool[0].mival is the entropy of target classification variable
603  {
604  Pool[i].mival = -Get_MutualInfo(0, i); // set as negative for sorting purpose
605  Pool[i].Index = i;
606  Pool[i].Mask = 1; // initialized to be everything in pool would be considered
607  }
608 
609  qsort(Pool + 1, m_nVars - 1, sizeof(TPool), Pool_Compare);
610 
611  Pool[0].mival = -Pool[0].mival;
612 
613  ADD_MESSAGE(CSG_String::Format(
614  SG_T("\nTarget classification variable (#%d column in the input data) has name=%s \tentropy score=%5.3f"),
615  0 + 1, m_VarNames[0].c_str(), Pool[0].mival
616  ));
617 
618  ADD_MESSAGE("\n*** MaxRel features ***");
619  ADD_MESSAGE("Order\tFea\tName\tScore");
620 
621  for(i=1; i<m_nVars-1; i++)
622  {
623  Pool[i].mival = -Pool[i].mival;
624 
625  if( i <= nFeatures )
626  {
627  ADD_MESSAGE(CSG_String::Format(SG_T("%d \t %d \t %s \t %5.3f"),
628  i, (int)Pool[i].Index, m_VarNames[(int)Pool[i].Index].c_str(), Pool[i].mival
629  ));
630  }
631  }
632 
633  //-----------------------------------------------------
634  // mRMR selection
635 
636  long poolFeaIndMin = 1;
637  long poolFeaIndMax = poolFeaIndMin + poolUseFeaLen - 1;
638 
639  feaInd[0] = Pool[1].Index;
640  Pool[feaInd[0]].Mask = 0; // after selection, no longer consider this feature
641  Pool[0 ].Mask = 0; // of course the first one, which corresponds to the classification variable, should not be considered. Just set the mask as 0 for safety.
642 
643  ADD_MESSAGE("\n*** mRMR features ***");
644  ADD_MESSAGE("Order\tFea\tName\tScore");
645 
646  ADD_FEATURE(0, Pool[1].mival);
647 
648  for(k=1; k<nFeatures; k++) //the first one, feaInd[0] has been determined already
649  {
650  double relevanceVal, redundancyVal, tmpscore, selectscore;
651  long selectind;
652  int b_firstSelected = 0;
653 
654  for(i=poolFeaIndMin; i<=poolFeaIndMax; i++)
655  {
656  if( Pool[Pool[i].Index].Mask == 0 )
657  {
658  continue; // skip this feature as it was selected already
659  }
660 
661  relevanceVal = Get_MutualInfo(0, Pool[i].Index); // actually no necessary to re-compute it, this value can be retrieved from Pool[].mival vector
662  redundancyVal = 0;
663 
664  for(j=0; j<k; j++)
665  {
666  redundancyVal += Get_MutualInfo(feaInd[j], Pool[i].Index);
667  }
668 
669  redundancyVal /= k;
670 
671  switch( Method )
672  {
673  default: // fprintf(stderr, "Undefined selection method=%d. Use MID instead.\n", mymethod);
674  case SG_mRMR_Method_MID: tmpscore = relevanceVal - redundancyVal; break;
675  case SG_mRMR_Method_MIQ: tmpscore = relevanceVal / (redundancyVal + 0.0001); break;
676  }
677 
678  if( b_firstSelected == 0 )
679  {
680  selectscore = tmpscore;
681  selectind = Pool[i].Index;
682  b_firstSelected = 1;
683  }
684  else if( tmpscore > selectscore )
685  { //update the best feature found and the score
686  selectscore = tmpscore;
687  selectind = Pool[i].Index;
688  }
689  }
690 
691  feaInd[k] = selectind;
692  Pool[selectind].Mask = 0;
693 
694  ADD_FEATURE(k, selectscore);
695  }
696 
697  //-----------------------------------------------------
698  return( true );
699 }
700 
701 
703 // //
705 
706 //---------------------------------------------------------
707 double CSG_mRMR::Get_MutualInfo(long v1, long v2)
708 {
709  double mi = -1; // initialized as an illegal value
710 
711  if( !m_Samples[0] )
712  {
713  SG_UI_Msg_Add_Error("The input data is NULL.");
714 
715  return mi;
716  }
717 
718  if( v1 >= m_nVars || v2 >= m_nVars || v1 < 0 || v2 < 0 )
719  {
720  SG_UI_Msg_Add_Error("The input variable indexes are invalid (out of range).");
721 
722  return mi;
723  }
724 
725  //-----------------------------------------------------
726  // copy data
727 
728  int *v1data = new int[m_nSamples];
729  int *v2data = new int[m_nSamples];
730 
731  if( !v1data || !v2data )
732  {
733  SG_UI_Msg_Add_Error("Fail to allocate memory.");
734 
735  return mi;
736  }
737 
738  for(long i=0; i<m_nSamples; i++)
739  {
740  v1data[i] = (int)m_Samples[i][v1]; // the double already been discretized, should be safe now
741  v2data[i] = (int)m_Samples[i][v2];
742  }
743 
744  //-----------------------------------------------------
745  // compute mutual info
746 
747  long nstate = 3; // always true for DataTable, which was discretized as three states
748 
749  int nstate1 = 0, nstate2 = 0;
750 
751  double *pab = Get_JointProb(v1data, v2data, m_nSamples, nstate, nstate1, nstate2);
752 
753  mi = Get_MutualInfo(pab, nstate1, nstate2);
754 
755  //-----------------------------------------------------
756  // free memory and return
757 
758  DELETE_ARRAY(v1data);
759  DELETE_ARRAY(v2data);
760  DELETE_ARRAY(pab );
761 
762  return mi;
763 }
764 
765 //---------------------------------------------------------
766 double CSG_mRMR::Get_MutualInfo(double *pab, long pabhei, long pabwid)
767 {
768  //-----------------------------------------------------
769  // check if parameters are correct
770 
771  if( !pab )
772  {
773  SG_UI_Msg_Add_Error("Got illeagal parameter in compute_mutualinfo().");
774 
775  return( -1.0 );
776  }
777 
778  //-----------------------------------------------------
779  long i, j;
780 
781  double **pab2d = new double *[pabwid];
782 
783  for(j=0; j<pabwid; j++)
784  {
785  pab2d[j] = pab + (long)j * pabhei;
786  }
787 
788  double *p1 = 0, *p2 = 0;
789  long p1len = 0, p2len = 0;
790  int b_findmarginalprob = 1;
791 
792  //-----------------------------------------------------
793  //generate marginal probability arrays
794 
795  if( b_findmarginalprob != 0 )
796  {
797  p1len = pabhei;
798  p2len = pabwid;
799  p1 = new double[p1len];
800  p2 = new double[p2len];
801 
802  for (i = 0; i < p1len; i++) p1[i] = 0;
803  for (j = 0; j < p2len; j++) p2[j] = 0;
804 
805  for (i = 0; i < p1len; i++) //p1len = pabhei
806  {
807  for (j = 0; j < p2len; j++) //p2len = panwid
808  {
809  // printf("%f ",pab2d[j][i]);
810  p1[i] += pab2d[j][i];
811  p2[j] += pab2d[j][i];
812  }
813  }
814  }
815 
816  //-----------------------------------------------------
817  // calculate the mutual information
818 
819  double muInf = 0;
820 
821  muInf = 0.0;
822 
823  for (j = 0; j < pabwid; j++) // should for p2
824  {
825  for (i = 0; i < pabhei; i++) // should for p1
826  {
827  if (pab2d[j][i] != 0 && p1[i] != 0 && p2[j] != 0)
828  {
829  muInf += pab2d[j][i] * log (pab2d[j][i] / p1[i] / p2[j]);
830  // printf("%f %fab %fa %fb\n",muInf,pab2d[j][i]/p1[i]/p2[j],p1[i],p2[j]);
831  }
832  }
833  }
834 
835  muInf /= log(2.0);
836 
837  //-----------------------------------------------------
838  // free memory
839 
840  DELETE_ARRAY(pab2d);
841 
842  if( b_findmarginalprob != 0 )
843  {
844  DELETE_ARRAY(p1);
845  DELETE_ARRAY(p2);
846  }
847 
848  return muInf;
849 }
850 
851 
853 // //
855 
856 //---------------------------------------------------------
857 template <class T> double * CSG_mRMR::Get_JointProb(T * img1, T * img2, long len, long maxstatenum, int &nstate1, int &nstate2)
858 {
859  long i, j;
860 
861  //-----------------------------------------------------
862  // get and check size information
863 
864  if (!img1 || !img2 || len < 0)
865  {
866  SG_UI_Msg_Add_Error("At least one of the input vectors is invalid.");
867 
868  return( NULL );
869  }
870 
871 // int b_findstatenum = 1; // int nstate1 = 0, nstate2 = 0;
872  int b_returnprob = 1;
873 
874  //-----------------------------------------------------
875  // copy data into new INT type array (hence quantization) and then reange them begin from 0 (i.e. state1)
876 
877  int *vec1 = new int[len];
878  int *vec2 = new int[len];
879 
880  if( !vec1 || !vec2 )
881  {
882  SG_UI_Msg_Add_Error("Fail to allocate memory.\n");
883 
884  return( NULL );
885  }
886 
887  int nrealstate1 = 0, nrealstate2 = 0;
888 
889  Copy_Vector(img1, len, vec1, nrealstate1);
890  Copy_Vector(img2, len, vec2, nrealstate2);
891 
892  //update the #state when necessary
893  nstate1 = (nstate1 < nrealstate1) ? nrealstate1 : nstate1;
894  //printf("First vector #state = %i\n",nrealstate1);
895  nstate2 = (nstate2 < nrealstate2) ? nrealstate2 : nstate2;
896  //printf("Second vector #state = %i\n",nrealstate2);
897 
898  // if (nstate1!=maxstatenum || nstate2!=maxstatenum)
899  // printf("find nstate1=%d nstate2=%d\n", nstate1, nstate2);
900 
901  //-----------------------------------------------------
902  // generate the joint-distribution table
903 
904  double *hab = new double[nstate1 * nstate2];
905  double **hab2d = new double *[nstate2];
906 
907  if( !hab || !hab2d )
908  {
909  SG_UI_Msg_Add_Error("Fail to allocate memory.");
910 
911  return( NULL );
912  }
913 
914  for(j=0; j<nstate2; j++)
915  {
916  hab2d[j] = hab + (long)j * nstate1;
917  }
918 
919  for(i=0; i<nstate1; i++)
920  {
921  for(j=0; j<nstate2; j++)
922  {
923  hab2d[j][i] = 0;
924  }
925  }
926 
927  for(i=0; i<len; i++)
928  {
929  // old method -- slow
930  // indx = (long)(vec2[i]) * nstate1 + vec1[i];
931  // hab[indx] += 1;
932 
933  // new method -- fast
934  hab2d[vec2[i]][vec1[i]] += 1;
935  //printf("vec2[%d]=%d vec1[%d]=%d\n", i, vec2[i], i, vec1[i]);
936  }
937 
938  //-----------------------------------------------------
939  // return the probabilities, otherwise return count numbers
940 
941  if( b_returnprob )
942  {
943  for(i=0; i<nstate1; i++)
944  {
945  for(j=0; j<nstate2; j++)
946  {
947  hab2d[j][i] /= len;
948  }
949  }
950  }
951 
952  //-----------------------------------------------------
953  // finish
954 
955  DELETE_ARRAY(hab2d);
956  DELETE_ARRAY(vec1);
957  DELETE_ARRAY(vec2);
958 
959  return hab;
960 }
961 
962 //---------------------------------------------------------
963 template <class T> void CSG_mRMR::Copy_Vector(T *srcdata, long len, int *desdata, int &nstate)
964 {
965  if (!srcdata || !desdata)
966  {
967  SG_UI_Msg_Add_Error("no points in Copy_Vector()!");
968 
969  return;
970  }
971 
972  // note: originally I added 0.5 before rounding, however seems the negative numbers and
973  // positive numbers are all rounded towarded 0; hence int(-1+0.5)=0 and int(1+0.5)=1;
974  // This is unwanted because I need the above to be -1 and 1.
975  // for this reason I just round with 0.5 adjustment for positive and negative differently
976 
977  // copy data
978  int minn, maxx;
979  if (srcdata[0] > 0)
980  {
981  maxx = minn = int (srcdata[0] + 0.5);
982  }
983  else
984  {
985  maxx = minn = int (srcdata[0] - 0.5);
986  }
987 
988  long i;
989  int tmp;
990  double tmp1;
991 
992  for (i = 0; i < len; i++)
993  {
994  tmp1 = double (srcdata[i]);
995  tmp = (tmp1 > 0) ? (int) (tmp1 + 0.5) : (int) (tmp1 - 0.5); //round to integers
996  minn = (minn < tmp) ? minn : tmp;
997  maxx = (maxx > tmp) ? maxx : tmp;
998  desdata[i] = tmp;
999  // printf("%i ",desdata[i]);
1000  }
1001 
1002  // make the vector data begin from 0 (i.e. 1st state)
1003  for (i = 0; i < len; i++)
1004  {
1005  desdata[i] -= minn;
1006  }
1007 
1008  // return the #state
1009  nstate = (maxx - minn + 1);
1010 
1011  return;
1012 }
1013 
1014 
1016 // //
1017 // //
1018 // //
1020 
1021 //---------------------------------------------------------
1022 #endif // WITH_MRMR
SG_DATATYPE_Int
@ SG_DATATYPE_Int
Definition: api_core.h:1000
SG_T
#define SG_T(s)
Definition: api_core.h:537
SG_DATATYPE_String
@ SG_DATATYPE_String
Definition: api_core.h:1005
_TL
#define _TL(s)
Definition: api_core.h:1489
_TW
#define _TW(s)
Definition: api_core.h:1490
CSG_Parameter::Get_Identifier
const SG_Char * Get_Identifier(void) const
Definition: parameter.cpp:547
SG_Malloc
SAGA_API_DLL_EXPORT void * SG_Malloc(size_t size)
Definition: api_memory.cpp:65
CSG_Parameters::Add_Double
CSG_Parameter * Add_Double(const CSG_String &ParentID, const CSG_String &ID, const CSG_String &Name, const CSG_String &Description, double Value=0.0, double Minimum=0.0, bool bMinimum=false, double Maximum=0.0, bool bMaximum=false)
Definition: parameters.cpp:475
CSG_Table::Get_Field_Count
int Get_Field_Count(void) const
Definition: table.h:356
CSG_String::Cmp
int Cmp(const CSG_String &String) const
Definition: api_string.cpp:515
mat_tools.h
CSG_Parameter
Definition: parameters.h:207
CSG_Table::Get_Field_Name
const SG_Char * Get_Field_Name(int iField) const
Definition: table.h:357
CSG_Parameters::Set_Enabled
void Set_Enabled(bool bEnabled=true)
Definition: parameters.cpp:405
CSG_Table::Get_Count
sLong Get_Count(void) const
Definition: table.h:392
CSG_Parameters::Add_Int
CSG_Parameter * Add_Int(const CSG_String &ParentID, const CSG_String &ID, const CSG_String &Name, const CSG_String &Description, int Value=0, int Minimum=0, bool bMinimum=false, int Maximum=0, bool bMaximum=false)
Definition: parameters.cpp:470
CSG_Table::Del_Index
bool Del_Index(void)
Definition: table.cpp:1337
parameters.h
CSG_String::Format
static CSG_String Format(const char *Format,...)
Definition: api_string.cpp:270
CSG_Table::Add_Field
virtual bool Add_Field(const CSG_String &Name, TSG_Data_Type Type, int Position=-1)
Definition: table.cpp:481
CSG_Table
Definition: table.h:283
CSG_String
Definition: api_core.h:563
TABLE_INDEX_Ascending
@ TABLE_INDEX_Ascending
Definition: table.h:105
CSG_Parameters
Definition: parameters.h:1690
CSG_Table::Set_Index
bool Set_Index(CSG_Index &Index, int Field, bool bAscending=true) const
Definition: table.cpp:1426
SG_UI_Msg_Add_Error
void SG_UI_Msg_Add_Error(const char *Message)
Definition: api_callback.cpp:557
CSG_Parameter::Cmp_Identifier
bool Cmp_Identifier(const CSG_String &Identifier) const
Definition: parameter.cpp:553
CSG_Matrix
Definition: mat_tools.h:478
table.h
CSG_Matrix::Get_NY
int Get_NY(void) const
Definition: mat_tools.h:521
CSG_Parameters::Add_Bool
CSG_Parameter * Add_Bool(const CSG_String &ParentID, const CSG_String &ID, const CSG_String &Name, const CSG_String &Description, bool Value=false)
Definition: parameters.cpp:465
CSG_Parameter::asBool
bool asBool(void) const
Definition: parameters.h:281
CSG_Matrix::Get_NX
int Get_NX(void) const
Definition: mat_tools.h:520
CSG_Parameters::Add_Choice
CSG_Parameter * Add_Choice(const CSG_String &ParentID, const CSG_String &ID, const CSG_String &Name, const CSG_String &Description, const CSG_String &Items, int Default=0)
Definition: parameters.cpp:533
SG_DATATYPE_Double
@ SG_DATATYPE_Double
Definition: api_core.h:1004