#b-navbar { height:0px; display:none; visibility:hidden; }

ページ

2014年6月22日日曜日

PHPで相関係数(行列)を計算する

参考:http://lib.stat.cmu.edu/multi/pca.c


/** 
* Creates a correlation matrix from a given data matrix.  
* 
* @see http://lib.stat.cmu.edu/multi/pca.c 
* 
* @author   Paul Meagher 
* @version  0.2 
* @modified Mar 23, 2008 
*/ 

   
     
class CorrelationMatrix { 
     
    private $eps    = 0.005; 

    public $nrows   = 0; 
    public $ncols   = 0;     
     
    public $means   = array(); 
    public $stddevs = array();     
    public $cormat  = array();     
     
  function CorrelationMatrix($data) { 
                 
    // num rows 
    $this->nrows = count($data);    
     
    // num cols 
    $this->ncols = count($data[0]);  
     
    // Determine mean of column vectors of input data matrix  
    for ($j=0; $j < $this->ncols; $j++) { 
      $this->means[$j] = 0.0; 
      for($i=0; $i < $this->nrows; $i++)  
        $this->means[$j] += $data[$i][$j]; 
      $this->means[$j] /= $this->nrows; 
    } 
           
    // Determine standard deviations of column vectors of data matrix.  
    for ($j=0; $j < $this->ncols; $j++) { 
      $this->stddevs[$j] = 0.0; 
      for ($i=0; $i < $this->nrows; $i++)  
        $this->stddevs[$j] += (($data[$i][$j]-$this->means[$j])*($data[$i][$j]-$this->means[$j])); 
      $this->stddevs[$j] /= $this->nrows; 
      $this->stddevs[$j] = sqrt($this->stddevs[$j]); 
      // The following in an inelegant but usual way to handle 
      // near-zero stddev values, which would cause a zero- 
      // divide error.  
      if ($this->stddevs[$j] <= $this->eps)  
        $this->stddevs[$j] = 1.0; 
    } 
     
    // Center and reduce the column vectors.  
    for ($i=0; $i < $this->nrows; $i++) { 
      for ($j=0; $j < $this->ncols; $j++) { 
        $data[$i][$j] -= $this->means[$j]; 
        $x = sqrt($this->nrows); 
        $x *= $this->stddevs[$j]; 
        $data[$i][$j] /= $x; 
      } 
    } 
   
    // Calculate the m * m correlation matrix.  
    for ($j1=0; $j1 < $this->ncols-1; $j1++) { 
      $this->cormat[$j1][$j1] = 1.0; 
      for ($j2=$j1+1; $j2 < $this->ncols; $j2++) { 
        $this->cormat[$j1][$j2] = 0.0; 
        for ($i=0; $i < $this->nrows; $i++) 
          $this->cormat[$j1][$j2] += ( $data[$i][$j1] * $data[$i][$j2]); 
        $this->cormat[$j2][$j1] = $this->cormat[$j1][$j2]; 
      } 
    } 
     
    $this->cormat[$this->ncols-1][$this->ncols-1] = 1.0; 
     
  } 
   
  function printMeans() {   
    printf("Column Means: 
"); 
    for ($j=0; $j < $this->ncols; $j++)   
      printf("%7.2f", $this->means[$j]);   
    printf("
");   
  } 

  function printStdDevs() { 
    printf("Column Standard Deviations: 
"); 
    for ($j=0; $j < $this->ncols; $j++)  
      printf("%7.2f", $this->stddevs[$j]);  
    printf("
"); 
  } 

  function printCorMat() { 
    printf("Correlation Matrix: 
"); 
    for ($i=0; $i < $this->ncols; $i++) {      
      for ($j=0; $j < $this->ncols; $j++)  
        //echo $this->cormat[$i][$j]." "; 
        printf("%7.4f", $this->cormat[$i][$j]);  
      printf("
"); 
    } 
  } 
       
} 


動作デモ

include "CorrelationMatrix.php";
 
$data[0] = array(1, 2, 3);
$data[1] = array(2, 3, 4);
$data[2] = array(3, 4, 5);
$data[3] = array(4, 6, 8);
$data[4] = array(5, 8, 10);
 
$cm = new CorrelationMatrix($data);
print "
";
$cm->printMeans();
print "
"; $cm->printStdDevs(); print "
"; $cm->printCorMat(); print "
"; print_r ( $cm->cormat ) ; print "
";

デモ出力


Column Means: 
   3.00   4.60   6.00

Column Standard Deviations: 
   1.41   2.15   2.61
Correlation Matrix: 
 1.0000 0.9848 0.9762
 0.9848 1.0000 0.9970
 0.9762 0.9970 1.0000

Array
(
    [0] => Array
        (
            [0] => 1
            [1] => 0.98479824644792
            [2] => 0.97618706018395
        )

    [1] => Array
        (
            [0] => 0.98479824644792
            [1] => 1
            [2] => 0.9969527608178
        )

    [2] => Array
        (
            [0] => 0.97618706018395
            [1] => 0.9969527608178
            [2] => 1
        )

)


10行目の、$cm = new CorrelationMatrix($data); の配列に
結果数値が入って帰ってくる。

0 件のコメント:

コメントを投稿