Class for computing multiple linear regression of the form
http://mnshankar.wordpress.com/2011/05/01/regression-analysis-using-php/
Copyright (c) 2011 Shankar Manamalkav <nshankarあっとまーくufl.edu>
------------------------------------------------------------
ソースはこちら:classライブラリです。includeして使ってください。
・class Lib_Regression
・class Lib_Matrix の2つ
後半に
・テストデータ
・サンプルコード
・サンプルからの出力を掲載。
・エクセルとの比較検証もしました。
------------------------------------------------------------
class Lib_Regression
{
protected $SSEScalar; //sum of squares due to error
protected $SSRScalar; //sum of squares due to regression
protected $SSTOScalar; //Total sum of squares
protected $RSquare; //R square
protected $RSquarePValue; //R square p falue (significant f)
protected $F; //F statistic
protected $coefficients; //regression coefficients array
protected $stderrors; //standard errror array
protected $tstats; //t statistics array
protected $pvalues; //p values array
protected $x = array();
protected $y = array();
//implement singleton
/**
* An instance of this class (singleton)
* @var Lib_Regression
*/
public function setX($x)
{
$this->x = $x;
}
public function setY($y)
{
$this->y = $y;
}
public function getSSE()
{
return $this->SSEScalar;
}
public function getSSR()
{
return $this->SSRScalar;
}
public function getSSTO()
{
return $this->SSTOScalar;
}
public function getRSQUARE()
{
return $this->RSquare;
}
public function getRSQUAREPValue()
{
return $this->RSquarePValue;
}
public function getF()
{
return $this->F;
}
public function getCoefficients()
{
return $this->coefficients;
}
public function getStandardError()
{
return $this->stderrors;
}
public function getTStats()
{
return $this->tstats;
}
public function getPValues()
{
return $this->pvalues;
}
/**
* @example $reg->loadCSV('abc.csv',array(0), array(1,2,3));
* @param type $file
* @param array $xcolnumbers
* @param type $ycolnumber
*/
public function LoadCSV($file, array $ycol, array $xcol, $hasHeader=true)
{
$xarray = array();
$yarray = array();
$handle = fopen($file, "r");
//if first row has headers.. ignore
if ($hasHeader)
$data = fgetcsv($handle);
//get the data into array
while (($data = fgetcsv($handle)) !== FALSE)
{
$rawData[] = array($data);
}
$sampleSize = count($rawData);
$r = 0;
while ($r < $sampleSize)
{
$xarray[] = $this->GetArray($rawData, $xcol, $r, true);
$yarray[] = $this->GetArray($rawData, $ycol, $r); //y always has 1 col!
$r++;
}
$this->x = $xarray;
$this->y = $yarray;
}
private function GetArray($rawData, $cols, $r, $incIntercept=false)
{
$arrIdx = "";
$z = 0;
$arr = array();
if ($incIntercept)
//prepend an all 1's column for the intercept
$arr[]=1;
foreach ($cols as $key => $val)
{
$arrIdx = '$rawData[' . $r . '][0][' . $val . '];';
eval("\$z = $arrIdx");
$arr[] = $z;
}
return $arr;
}
//if true is passed to this method, compute normalized coefficients
public function Compute($computeNormalized=false)
{
if ((count($this->x)==0)||(count($this->y)==0))
throw new Exception ('Please supply valid X and Y arrays');
$mx = new Lib_Matrix($this->x);
$my = new Lib_Matrix($this->y);
//coefficient(b) = (X'X)-1X'Y
$xTx = $mx->Transpose()->Multiply($mx)->Inverse();
$xTy = $mx->Transpose()->Multiply($my);
$coeff = $xTx->Multiply($xTy);
$num_independent = $mx->NumColumns(); //note: intercept is included
$sample_size = $mx->NumRows();
$dfTotal = $sample_size - 1;
$dfModel = $num_independent - 1;
$dfResidual = $dfTotal - $dfModel;
//create unit vector..
for ($ctr = 0; $ctr < $sample_size; $ctr++)
$u[] = array(1);
$um = new Lib_Matrix($u);
//SSR = b(t)X(t)Y - (Y(t)UU(T)Y)/n
//MSE = SSE/(df)
$SSR = $coeff->Transpose()->Multiply($mx->Transpose())->Multiply($my)
->Subtract(
($my->Transpose()
->Multiply($um)
->Multiply($um->Transpose())
->Multiply($my)
->ScalarDivide($sample_size))
);
$SSE = $my->Transpose()->Multiply($my)->Subtract(
$coeff->Transpose()
->Multiply($mx->Transpose())
->Multiply($my)
);
$SSTO = $SSR->Add($SSE);
$this->SSEScalar = $SSE->GetElementAt(0, 0);
$this->SSRScalar = $SSR->GetElementAt(0, 0);
$this->SSTOScalar = $SSTO->GetElementAt(0, 0);
$this->RSquare = $this->SSRScalar / $this->SSTOScalar;
$this->F = (($this->SSRScalar / ($dfModel)) / ($this->SSEScalar / ($dfResidual)));
$this->RSquarePValue= $this->FishersF($this->F, $dfModel, $dfResidual);
$MSE = $SSE->ScalarDivide($dfResidual);
//this is a scalar.. get element
$e = ($MSE->GetElementAt(0, 0));
$stdErr = $xTx->ScalarMultiply($e);
for ($i = 0; $i < $num_independent; $i++)
{
//get the diagonal elements
$searray[] = array(sqrt($stdErr->GetElementAt($i, $i)));
//compute the t-statistic
$tstat[] = array($coeff->GetElementAt($i, 0) / $searray[$i][0]);
//compute the student p-value from the t-stat
$pvalue[] = array($this->Student_PValue($tstat[$i][0], $dfResidual));
}
//convert into 1-d vectors and store
for ($ctr = 0; $ctr < $num_independent; $ctr++)
{
$this->coefficients[] = $coeff->GetElementAt($ctr, 0);
$this->stderrors[] = $searray[$ctr][0];
$this->tstats[] = $tstat[$ctr][0];
$this->pvalues[] = $pvalue[$ctr][0];
}
/* multiplying the unstandardized coefficient by the ratio of the standard deviations
* of the independent variable (x) and dependent variable(y) gives us NORMALIZED coefft.
*/
if ($computeNormalized)
{
//if normalized betas are desired, compute them
$this->coefficients = $this->ComputeNormalizedBeta($this->coefficients,
$mx->GetInnerArray(), $my->GetInnerArray());
}
}
private function ComputeNormalizedBeta($beta, $x, $y)
{
$betaNormalized=array();
$cols = count($x[0]);
for ($i = 1; $i < $cols; $i++)
{
${'xvar' . $i} = $this->ComputeStdev($x, $i);
}
$yvar1 = $this->ComputeStdev($y, 0);
$betaNormalized[0] = 0; //Normalized coeff. intercept is not defined
for ($i = 1; $i < count($beta); $i++)
{
$betaNormalized[] = $beta[$i] * ${'xvar' . $i} / $yvar1;
}
return $betaNormalized;
}
private function ComputeStdev($arr, $column)
{
//$arr is a 2d matrix
//compute stdev of a particular column of a 2d array
//standardized x and y.
$rows = count($arr);
for ($i = 0; $i < $rows; $i++)
{
$x[] = $arr[$i][$column];
}
$stDevX = $this->StdDeviation($x);
return $stDevX;
}
//compute the standard deviation of 1-d array that is passed in
private function StdDeviation($sample)
{
//compute mean
$mean = array_sum($sample) / sizeof($sample);
//loop through all and find square of difference betn each and mean
foreach ($sample as $num)
{
$devs[] = pow(($num - $mean), 2);
}
//finally, take square root of average of those..
$std = sqrt(array_sum($devs) / count($devs));
return $std;
}
/**
* @link http://home.ubalt.edu/ntsbarsh/Business-stat/otherapplets/pvalues.htm#rtdist
* @param float $t_stat
* @param float $deg_F
* @return float
*/
private function Student_PValue($t_stat, $deg_F)
{
$t_stat = abs($t_stat);
$mw = $t_stat / sqrt($deg_F);
$th = atan2($mw, 1);
if ($deg_F == 1)
return 1.0 - $th / (M_PI / 2.0);
$sth = sin($th);
$cth = cos($th);
if ($deg_F % 2 == 1)
return 1.0 - ($th + $sth * $cth * $this->statcom($cth * $cth, 2, $deg_F - 3, -1)) / (M_PI / 2.0);
else
return 1.0 - ($sth * $this->statcom($cth * $cth, 1, $deg_F - 3, -1));
}
/**
* @link http://home.ubalt.edu/ntsbarsh/Business-stat/otherapplets/pvalues.htm#rtdist
* @param float $q
* @param float $i
* @param float $j
* @param float $b
* @return float
*/
private function statcom($q, $i, $j, $b)
{
$zz = 1;
$z = $zz;
$k = $i;
while ($k <= $j)
{
$zz = $zz * $q * $k / ( $k - $b);
$z = $z + $zz;
$k = $k + 2;
}
return $z;
}
/**
*Formula to return Fishers F (P Value of RSquare) (Significance F)
* @param type $f - F statistic computed from regression analysis
* @param float $n1 - Degrees of freedom1 (regression)
* @param int $n2 - Degrees of freedom2 (residual)
* @return float
*/
private function FishersF($f,$n1, $n2)
{
$x = $n2/($n1*$f+$n2);
if ($n1 % 2 == 0)
return ($this->statcom(1-$x,$n2,$n1+$n2-4,$n2-2)*pow($x,$n2/2));
if ($n2 % 2 == 0)
return (1 - $this->statcom($x,$n1,$n1+$n2-4,$n1-2)*pow(1-$x,$n1/2));
$th = atan(sqrt($n1*$f/$n2));
$a = $th/(M_PI / 2.0);
$sth = sin($th);
$cth = cos($th);
if ($n2>1)
$a+=$sth*$cth*($this->statcom($cth*$cth, 2, $n2-3, -1)/(M_PI / 2.0));
if ($n1==1)
return 1-$a;
$c = 4*$this->statcom($sth*$sth, $n2+1, $n1+$n2-4, $n2-2)*$sth*pow($cth,$n2)/M_PI;
if ($n2==1)
return (1-$a+$c/2);
$k=2;
while ($k<=($n2-1)/2)
{
$c = $c*$k/($k-.5);
$k++;
}
return (1-$a+$c);
}
}
class Lib_Matrix
{
//global vars
protected $rows;
protected $columns;
protected $MainMatrix = array();
/**
* Matrix Constructor
*
* Initialize the Matrix object. Throw an exception if jagged array is passed.
*
* @param array $matrix - The array
*/
function __construct($matrix)
{
for ($i = 0; $i < count($matrix); $i++)
{
for ($j = 0; $j < count($matrix[$i]); $j++)
$this->MainMatrix[$i][$j] = $matrix[$i][$j];
}
$this->rows = count($this->MainMatrix);
$this->columns = count($this->MainMatrix[0]);
if (!$this->isValidMatrix())
{
throw new Exception("Invalid matrix");
}
}
/**
* Is it a valid matrix?
*
* Returns 'False' if it is not a rectangular matrix
*
* @return bool
*/
private function isValidMatrix()
{
for ($i = 0; $i < $this->rows; $i++)
{
$numCol = count($this->MainMatrix [$i]);
if ($this->columns != $numCol)
return false;
}
return true;
}
/**
* Display the matrix
* Formatted display of matrix for debugging.
*/
public function DisplayMatrix()
{
$rows = $this->rows;
$cols = $this->columns;
echo "Order of the matrix is ($rows rows X $cols columns)\n";
for ($r = 0; $r < $rows; $r++)
{
for ($c = 0; $c < $cols; $c++)
{
echo $this->MainMatrix[$r][$c];
}
echo "\n";
}
}
/**
* Get the inner array stored in matrix object
*
* @return array
*/
public function GetInnerArray()
{
return $this->MainMatrix;
}
/**
* Number of rows in the matrix
* @return integer
*/
public function NumRows()
{
return count($this->MainMatrix);
}
/**
* Number of columns in the matrix
* @return integer
*/
public function NumColumns()
{
return count($this->MainMatrix[0]);
}
/**
* Return element found at location $row, $col.
*
* @param int $row
* @param int $col
* @return object(depends on input)
*/
public function GetElementAt($row, $col)
{
return $this->MainMatrix[$row][$col];
}
/**
* Is this a square matrix?
*
* Determinants and inverses only exist for square matrices!
*
* @return bool
*/
public function isSquareMatrix()
{
if ($this->rows == $this->columns)
return true;
return false;
}
/**
* Subtract matrix2 from matrix object on which this method is called
* @param Lib_Matrix $matrix2
* @return Lib_Matrix Note that original matrix is left unchanged
*/
public function Subtract(Lib_Matrix $matrix2)
{
$rows1 = $this->rows;
$columns1 = $this->columns;
$rows2 = $matrix2->NumRows();
$columns2 = $matrix2->NumColumns();
if (($rows1 != $rows2) || ($columns1 != $columns2))
throw new Exception('Matrices are not the same size!');
for ($i = 0; $i < $rows1; $i++)
{
for ($j = 0; $j < $columns1; $j++)
{
$newMatrix[$i][$j] = $this->MainMatrix[$i][$j] -
$matrix2->GetElementAt($i, $j);
}
}
return new Lib_Matrix($newMatrix);
}
/**
* Add matrix2 to matrix object that calls this method.
* @param Model_Matrix $matrix2
* @return Lib_Matrix Note that original matrix is left unchanged
*/
function Add(Lib_Matrix $matrix2)
{
$rows1 = $this->rows;
$rows2 = $matrix2->NumRows();
$columns1 = $this->columns;
$columns2 = $matrix2->NumColumns();
if (($rows1 != $rows2) || ($columns1 != $columns2))
throw new Exception('Matrices are not the same size!');
for ($i = 0; $i < $rows1; $i++)
{
for ($j = 0; $j < $columns1; $j++)
{
$newMatrix[$i][$j] = $this->MainMatrix[$i][$j] +
$matrix2->GetElementAt($i, $j);
}
}
return new Lib_Matrix($newMatrix);
}
/**
* Multiply matrix2 into matrix object that calls this method
* @param Model_Matrix $matrix2
* @return Lib_Matrix Note that original matrix is left unaltered
*/
function Multiply(Lib_Matrix $matrix2)
{
$sum = 0;
$rows1 = $this->rows;
$columns1 = $this->columns;
$columns2 = $matrix2->NumColumns();
$rows2 = $matrix2->NumRows();
if ($columns1 != $rows2)
throw new Exception('Incompatible matrix types supplied');
for ($i = 0; $i < $rows1; $i++)
{
for ($j = 0; $j < $columns2; $j++)
{
$newMatrix[$i][$j] = 0;
for ($ctr = 0; $ctr < $columns1; $ctr++)
{
$newMatrix[$i][$j] += $this->MainMatrix[$i][$ctr] *
$matrix2->GetElementAt($ctr, $j);
}
}
}
return new Lib_Matrix($newMatrix);
}
/**
* Multiply every element of matrix on which this method is called by the scalar
* @param object $scalar
* @return Lib_Matrix
*/
public function ScalarMultiply($scalar)
{
$rows = $this->rows;
$columns = $this->columns;
$newMatrix = array();
for ($i = 0; $i < $rows; $i++)
{
for ($j = 0; $j < $columns; $j++)
{
$newMatrix[$i][$j] = $this->MainMatrix[$i][$j] * $scalar;
}
}
return new Lib_Matrix($newMatrix);
}
/**
* Divide every element of matrix on which this method is called by the scalar
* @param object $scalar
* @return Lib_Matrix
*/
public function ScalarDivide($scalar)
{
$rows = $this->rows;
$columns = $this->columns;
$newMatrix = array();
for ($i = 0; $i < $rows; $i++)
{
for ($j = 0; $j < $columns; $j++)
{
$newMatrix[$i][$j] = $this->MainMatrix[$i][$j] / $scalar;
}
}
return new Lib_Matrix($newMatrix);
}
/**
* Return the sub-matrix after crossing out the $crossx and $crossy row and column respectively
* Part of determinant expansion by minors method
* @param int $crossX
* @param int $crossY
* @return Lib_Matrix
*/
public function GetSubMatrix($crossX, $crossY)
{
$rows = $this->rows;
$columns = $this->columns;
$newMatrix = array();
$p = 0; // submatrix row counter
for ($i = 0; $i < $rows; $i++)
{
$q = 0; // submatrix col counter
if ($crossX != $i)
{
for ($j = 0; $j < $columns; $j++)
{
if ($crossY != $j)
{
$newMatrix[$p][$q] = $this->GetElementAt($i, $j);
//$matrix[$i][$j];
$q++;
}
}
$p++;
}
}
return new Lib_Matrix($newMatrix);
}
/**
* Compute the determinant of the square matrix on which this method is called
* @link http://mathworld.wolfram.com/DeterminantExpansionbyMinors.html
* @return object(depends on input)
*/
public function Determinant()
{
if (!$this->isSquareMatrix())
throw new Exception("Not a square matrix!");
$rows = $this->rows;
$columns = $this->columns;
$determinant = 0;
if ($rows == 1 && $columns == 1)
{
return $this->MainMatrix[0][0];
}
if ($rows == 2 && $columns == 2)
{
$determinant = $this->MainMatrix[0][0] * $this->MainMatrix[1][1] -
$this->MainMatrix[0][1] * $this->MainMatrix[1][0];
} else
{
for ($j = 0; $j < $columns; $j++)
{
$subMatrix = $this->GetSubMatrix(0, $j);
if (fmod($j, 2) == 0)
{
$determinant += $this->MainMatrix[0][$j] * $subMatrix->Determinant();
} else
{
$determinant -= $this->MainMatrix[0][$j] * $subMatrix->Determinant();
}
}
}
return $determinant;
}
/**
* Compute the transpose of matrix on which this method is called (invert rows and columns)
* @return Lib_Matrix
*/
public function Transpose()
{
$rows = $this->rows;
$columns = $this->columns;
$newArray = array();
for ($i = 0; $i < $rows; $i++)
{
for ($j = 0; $j < $columns; $j++)
{
$newArray[$j][$i] = $this->MainMatrix[$i][$j];
}
}
return new Lib_Matrix($newArray);
}
/**
* Compute the inverse of the matrix on which this method is found (A*A(-1)=I)
* (cofactor(a))T/(det a)
* @link http://www.mathwords.com/i/inverse_of_a_matrix.htm
* @return Lib_Matrix
*/
function Inverse()
{
if (!$this->isSquareMatrix())
throw new Exception("Not a square matrix!");
$rows = $this->rows;
$columns = $this->columns;
$newMatrix = array();
for ($i = 0; $i < $rows; $i++)
{
for ($j = 0; $j < $columns; $j++)
{
$subMatrix = $this->GetSubMatrix($i, $j);
if (fmod($i + $j, 2) == 0)
{
$newMatrix[$i][$j] = ($subMatrix->Determinant());
} else
{
$newMatrix[$i][$j] = -($subMatrix->Determinant());
}
}
}
$cofactorMatrix = new Lib_Matrix($newMatrix);
return $cofactorMatrix->Transpose()
->ScalarDivide($this->Determinant());
}
}
----------------------------------------------------------------------
テストデータ
| y | x0 | x1 | x2 |
| 4.5 | 1 | 8 | 2 |
| 22.5 | 1 | 40.5 | 24.5 |
| 2 | 1 | 4.5 | 0.5 |
| 0.5 | 1 | 0.5 | 2 |
| 18 | 1 | 4.5 | 4.5 |
| 2 | 1 | 7 | 8 |
| 32 | 1 | 24.5 | 40.5 |
| 4.5 | 1 | 4.5 | 2 |
| 40.5 | 1 | 32 | 24.5 |
| 2 | 1 | 0.5 | 4.5 |
----------------------------------------------------------------------
利用サンプル
Y目的変数、X配列:説明変数
//independent vars.. note 1 has been added to first column for computing
//intercept
$x = array(
array(1, 8, 2,2),
array(1, 40.5, 24.5,3),
array(1, 4.5, .5,4),
array(1, .5, 2,5),
array(1, 4.5, 4.5,6),
array(1, 7, 8,7),
array(1, 24.5, 40.5,8),
array(1, 4.5, 2,9),
array(1, 32, 24.5,11),
array(1, .5, 4.5,22),
);
//dependent matrix..note it is a 2d array
$y = array(array(4.5),
array(22.5),
array(2),
array(.5),
array(18),
array(2),
array(32),
array(4.5),
array(40.5),
array(2)
);
$reg = new Lib_Regression();
$reg->setX($x);
$reg->setY($y);
//NOTE: passing true to the compute method generates standardized coefficients
$reg->Compute(); //go!
var_dump($reg->getSSE());
var_dump($reg->getSSR());
var_dump($reg->getSSTO());
var_dump($reg->getRSQUARE());
var_dump($reg->getF());
var_dump($reg->getRSQUAREPValue());
var_dump($reg->getCoefficients());
var_dump($reg->getStandardError());
var_dump($reg->getTStats());
var_dump($reg->getPValues());
----------------------------------------------------------------------
サンプルからの出力
|
--------------------------------------------------------------------
エクセルとの結果比較:OK
エクセルとの結果比較:OK
| 回帰統計 | ||||
| 重相関 R | 0.873966 | |||
| 重決定 R2 | 0.763817 | |||
| 補正 R2 | 0.553479 | |||
| 標準誤差 | 7.998289 | |||
| 観測数 | 10 | |||
| 分散分析表 | ||||
| 自由度 | 変動 | 分散 | 観測された分散比 | |
| 回帰 | 3 | 1448.217 | 482.7389 | 11.31904 |
| 残差 | 7 | 447.8083 | 63.97262 | |
| 合計 | 10 | 1896.025 | ||
| 係数 | 標準誤差 | t | P-値 | |
| 切片 | 1.564758 | 3.490056 | 0.448348 | 0.667451 |
| x0 | 0 | 0 | 65535 | #NUM! |
| x1 | 0.378702 | 0.327975 | 1.154669 | 0.286117 |
| x2 | 0.574748 | 0.343039 | 1.675462 | 0.137751 |
--------------------------------------------------------------------
コメント:
従属変数(X)で、冒頭1列が余計に必要な点に注意がいるが(1で埋めるだけ)、
あとは、スムーズに使える。
変数の数は、適宜増やせる。
処理も速い。
エクセルだと、従属変数が16個までなので、これを使えば、突破できる。
R使ってもいいのだろうが、ウェブシステムに簡易的に組み込むならこっちも検討する価値はある。
/**
* Copyright (c) 2011 Shankar Manamalkav <nshankarあっとまーくufl.edu>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* Class for computing multiple linear regression of the form
* y=a+b1x1+b2x2+b3x3...
*
* @author shankar<nshankarあっとまーくufl.edu>
*
*
*/


go to my blog dildo,realistic dildo,vibrators,horse dildo,horse dildo,cheap sex toys,silicone sex doll,real dolls,realistic dildo view it now
返信削除a705l2wroqs443 dog dildo,dildos,realistic vibrators,dog dildo,japanese sex dolls,dog dildo,dildo,g-spot dildos,adult sex toys w367d7pbraq238
返信削除t134u1nnagk128 vibrating dildos,sex toys,dildos,love dolls,wolf dildo,women sex toys,double dildos,horse dildo,Panty Vibrators d160j8bolzz276
返信削除