;;########################################################################
;; statfunc.lsp
;; contains new statistical functions and modifications of Tierney functions.
;; Copyright (c) 1991-97 by Forrest W. Young
;;########################################################################

(defun size (matrix-a) (array-dimensions matrix-a))

(defun nscores (x)
"Args: (x)
Returns the normal scores of the elements of x."
  (let ((ranks (rank x))
        (n (length x)))
    (normal-quant (/ (1+ ranks) (1+ n)))))

(defun ssq (x) 
"Args: (x)
Returns the sum of squares of the elements of x."
  (sum (^ x 2)))
    
(defun standard-deviation (x)
"Args: (x)
Returns the standard deviation of the elements x. Vector reducing.
Issues warning on zero standard deviation. Return nil when one datum."
  (let* ((n (count-elements x))
         (r (- x (mean x)))
         (sd nil))
    (when (> n 1) (setf sd (sqrt (* (mean (* r r)) (/ n (- n 1)))))
          (when (= sd 0) (error-message "A variable has no variance.")))
    sd))

(defun variance (x)
  (let ((result (standard-deviation x)))
    (when result (setf result (^ result 2)))
    result))

(defun range (x)
  (let ((up-low (select (fivnum x) (list 4 0)))
        )
    (- (select up-low 0) (select up-low 1))))

(defun mid-range (x)
    (mean (select (fivnum x) (list 0 4))))

;;the following two formulas taken from SAS elementary stats procs p 11
;;they give the same results shown for SAS PROC UNIVARIATE

(defun skewness (x)
  (let ((std-cubed (^ (standard-deviation x) 3))
        (dev-cubed (^ (- x (mean x)) 3))
        (n (length x))
        (skew nil))
    (if (= 0 std-cubed) (error-message "Skewness Undefined.")
        (when (> n 2)
          (setf skew 
                (* (/ n (* (- n 1) (- n 2))) (sum (/ dev-cubed std-cubed))))))
    ))

(defun kurtosis (x)
  (let ((std-fourth (^ (standard-deviation x) 4))
        (dev-fourth (^ (- x (mean x)) 4))
        (n (length x))
        (kurtosis nil))
    (if (= 0 std-fourth) (error-message "Kurtosis Undefined.")
        (when (> n 3)
          (setf kurtosis (- (* (/ (* n (+ n 1)) (* (- n 1) (- n 2) (- n 3))) 
                         (sum (/ dev-fourth std-fourth)))
                         (/ (* 3 (- n 1) (- n 1)) (* (- n 2) (- n 3)))))))
    ))

(defun correlation-matrix (matrix &key types)
"Args: (matrix &key types)
 Takes a nxm multivariate matrix and returns a correlation matrix. Types is 
an m-vector of variable type strings (category, ordinal, numeric). Uses all n 
of the input matrix's v_ariables if :types not specified, only the numeric variables if :types is specifed."
  (let ((it matrix)
        )
    (when types 
          (setf it (select matrix (iseq (select (size matrix) 0)) 
                           ($position '("numeric") types))))
    (setf it (normalize (center it) 1))
  (/ (matmult (transpose it) it) (- (select (array-dimensions it) 0) 1))))

(defun sv-decomp2 (matrix)
  (let* (
         (rc (array-dimensions matrix) )
         (r (select rc 0) )
         (c (select rc 1) )
        )
    (if (< r c)
        (let (
              (svd (sv-decomp (transpose matrix)) )
             )
          (list (select svd 2) (select svd 1) (select svd 0) (select svd 3)))
        (sv-decomp matrix) ) ) )

(defun pca (data &key (corr t) (left-alpha 1))
"Args: DATA &key (CORR t)
Performs a principal components analysis of DATA. Computes Covariances if CORR is nil. Returns a list of three things: A matrix of scores, a vector of eigenvalues & a matrix of coefficients."
  (let* (
         (prepped-data (if corr 
                           (/ (normalize (center data) 1) 
                              (sqrt (1- (select (array-dimensions data) 0))) )
                           (/ (center data) 
                              (sqrt (1- (select (array-dimensions data) 0))) )
                           ) )
         (svd (sv-decomp2 prepped-data) )
         )
    (when (< (sum (col (select svd 2) 0)) 0)
        (setf (select svd 0) (* (- 1) (select svd 0)) )
        (setf (select svd 2) (* (- 1) (select svd 2)) ) )
    (list (%* (select  svd 0) (diagonal (^ (select svd 1) left-alpha)))
        (^ (select svd 1) 2)
        (%* (select  svd 2) (diagonal (^ (select svd 1) (1- left-alpha)))))))

(defun center (data)
"Args: DATA
Centers the columns of the matrix DATA. Returns a matrix."
  (apply #'bind-columns (mapcar #'(lambda (x) (- x (mean x))) (column-list data)) ) )

(defun normalize (data &optional (std-dev 1))
"Args: DATA-MATRIX &OPTIONAL STD-DEV
Normalize the scores on each variable to a mean of 0 and a standard deviation of 1 or any other desired value.  (Actually, SQRT(N-1) times DESIRED-STDEV.)"
  (apply #'bind-columns (mapcar #'(lambda (x) (* std-dev (/ x (standard-deviation x)))) (column-list (center data))) ) )

(defun orthogonal-procrustean-rotation (matrix target)
"Finds the orthogonal rotation matrix, which, when applied to MATRIX, will rotate MATRIX into as close a congruence with TARGET as possible."
  (let (
        (svd (sv-decomp2 (%* (transpose matrix) target)) )
        )
    (%* (select svd 0) (transpose (select svd 2))) ) )

;;Generalized Singular Value Decomposition

(defun gsv-decomp (a w q)
"Args: (A W Q)
A is a real matrix, W and Q are positive definite diagonal matrices.
Computes the generalized singular value decomposition of A such that A = NUM'  
where N'WN = M'QM = I. Returns a four-element list of the form (N U M FLAG) 
where N and M are matrices whose columns are the left and right generalized 
singular vectors of A and U is the sequence of generalized singular values of 
A. FLAG is T if the algorithm converged, NIL otherwise."
  (cond 
    ((>= (select (size a) 0) (select (size a) 1))    ;nrows >= ncols
     (let* ((b (matmult (sqrt w) a (sqrt q)))
            (svd (sv-decomp b))
            )
       (list (matmult (sqrt (inverse w)) (select svd 0))
             (select svd 1)
             (matmult (sqrt (inverse q)) (select svd 2))
             (select svd 3))))
    (t                                               ;nrows < ncols
     (let* ((b (matmult (sqrt w) a (sqrt q)))
            (svd (sv-decomp (transpose b)))
            )
       (list (matmult (sqrt (inverse w)) (select svd 2))
             (select svd 1)
             (matmult (sqrt (inverse q)) (select svd 0))
             (select svd 3))))))

(defun distance-matrix (matrix)
"Args: (matrix)
Takes a nxm multivariate matrix and returns a euclidean distance matrix."
  (let* ((cpd (matmult matrix (transpose matrix)))
         (ssq (diagonal cpd))
         (n (length ssq))
         ) 
    (sqrt (+ (outer-product ssq (repeat 1 n))
             (* -2 cpd)
             (outer-product (repeat 1 n) ssq)))))

(defun gs-orthog (matrix  
                  &key (column-order (iseq 
                                      (select (array-dimensions matrix) 1)))
                       (unpermute 'nil) 
                       (norm 'nil))
"Method Args: (MATRIX &key (COLUMN-ORDER (iseq num-columns-in-matrix)) (UNPERMUTE 'nil) (NORM 'nil))
Performs a Gram-Schmidt Orthogonalization on the columns of MATRIX.  
XT = X* ;   T = (inverse R) {from QDR decomp};   (X*'X*) = D or I depending on whether orthog. or orthonorm. was performed. Returns a list of lists containing: COLUMN-ORDER (order in which columns of X were orthogonalized), X*, T. If the keyword argument :COLUMN-ORDER is provided and the keyword argument :UNPERMUTE is given a 'nil value (the default), then the GS-Orthogonalization will be performed on the matrix whose columns are appropriately permuted. (XP)T = (X*P). In this case, the result list will contain the results from the !!!permuted!!! form of MATRIX: [COLUMN-ORDER, (X*P), & T]. If, in this same case, the keyword argument :UNPERMUTE is supplied a value of 't, the solution will be determined as above, but the final results will un-permute the X and X* matrices and appropriately permute the transformation such that the GS equations hold as: X [inverse(TP') P'] = X*. Here, the result list contains the original matrix X, the transformation matrix which orthogonalizes the columns in the proper order
[inverse(TP') P'], and the un-permuted transformed data matrix, X* (which contains columns orthogonalized in COLUMN-ORDER but left in their original matrix positions). If the keyword argument  :NORM T  is provided, then a GS-OrthoNORMALization will be performed instead of an orthogonalization."

  (if (/= (length (remove-duplicates column-order))
          (select (array-dimensions matrix) 1))
      (error "Insufficient number of elements specified in :COLUMN-ORDER sequence."))  
  
  (let* (
         (permuted-matrix (permute-matrix matrix column-order))
         (sol (if norm
                  (qr-decomp2 permuted-matrix :pivot nil)
                  (qdr-decomp permuted-matrix :pivot nil)) )
         )
    (if unpermute

        (let (
              (unpermutation (mapcar #'(lambda (x) (position x column-order))
                                   (iseq (length column-order))))
              )
          (if norm
              (list column-order 
                    (permute-matrix (select sol 0) unpermutation)
                    (permute-matrix (inverse (permute-matrix 
                                              (select sol 1) 
                                              unpermutation))
                                    unpermutation) )
              (list column-order
                    (permute-matrix (%* (select sol 0) 
                                        (diagonal (select sol 1)))
                                    unpermutation)
                    (permute-matrix (inverse (permute-matrix 
                                              (select sol 2) 
                                              unpermutation))
                                    unpermutation) ) ))
        (if norm
            (list column-order 
                  (select sol 0) 
                  (inverse (select sol 1)))
            (list column-order
                  (%* (select sol 0) (diagonal (select sol 1)))
                  (inverse (select sol 2)))) ) ))

(defun permute-matrix (matrix new-order &key (col t) (row nil))
"Args: (MATRIX NEW-ORDER &key (COL t) (ROW nil))
Permutes the columns (or rows) of MATRIX into the order specified by NEW-ORDER.  By default the columns are permuted.  If the keyword argument COL is 'nil or ROW is 't, the rows of the MATRIX will be permuted."
  (let (
        (ad (array-dimensions matrix))
        (new-order (remove-duplicates new-order))
        )
    
    (if (or row (null col))
        (if (/= (length new-order) (select ad 0))
            (error "Incorrect length of new order list.")
            (bind-rows2 (select (row-list2 matrix :list t) new-order)))
        
        (if (/= (length new-order) (select ad 1))
            (error "Incorrect length of new order list.")
            (bind-columns2 (select (column-list matrix) new-order))) ) ))


(defun qdr-decomp (matrix &key (pivot nil))
"Args: (MATRIX &key (PIVOT 'nil)
A modification to QR-DECOMP2 which performs the factorization:X = QDR; Q'Q = I where D = (diagonal of original R); R = original R with its diagonal removed = (inverse D) %* (original R). Returns a list of lists containing: Q D R & (if PIVOT, then column-order). MATRIX is a matrix of real numbers with at least as many rows as columns. Computes the QR factorization of A and returns the result in a list of the form (Q D R).  If PIVOT is true the columns of X are first permuted to insure that the absolute values of the diagonal elements of R are nonincreasing.  In this case the result includes a third element, a list of the indices of the columns in the order in which they were used.  Also (from QR-DECOMP2) the diagonal values of R are all now guaranteed to be positive and the parameter PIVOT has been changed to a keyword parameter."
  (let* (
         (qr (qr-decomp2 matrix :pivot pivot) )
         (d (diagonal (select qr 1)) )
         (r* (%* (diagonal (/ 1 d)) (select qr 1)) )
         )
    (list (select qr 0) d r* (if pivot (select qr 2))) ))

(defun qr-decomp2 (matrix &key (pivot nil))
"Args: (MATRIX &key (PIVOT 'nil)
A slight modification to QR-DECOMP which insures that the diagonal values of R are all positive.  Also the parameter, PIVOT, has been changed from an optional parameter (in the original QR-DECOMP function) to a keyword parameter. MATRIX is a matrix of real numbers with at least as many rows as columns. Computes the QR factorization of A and returns the result in a list of the form (Q R).  If PIVOT is true the columns of X are first permuted to insure that the absolute values of the diagonal elements of R are nonincreasing.  In this case the result includes a third element, a list of the indices of the columns in the order in which they were used."
  (let* (
         (qr (qr-decomp matrix pivot) )
         (d (diagonal (select qr 1)) )
         (ones (repeat 1 (select (array-dimensions matrix) 1)) )
         (p (mapcar #'(lambda (i) (setf (select ones i) -1))
                    (which (minusp d))) )
         )
    (list (%* (select qr 0) (diagonal ones))
          (%* (diagonal ones) (select qr 1))
          (if pivot (select qr 2))) ))

(provide "statfunc")