📄 chemometrics.py

📁 PyChem是用Python语言编写的多元变量分析软件。它包括一个前端图形界面用于管理和保存试验数据
💻 PY
📖 第 1 页 / 共 2 页
字号:
12 下一页
"""Chemometrics toolbox for Python

Includes:
    -Partial least squares regression (PLS1 and PLS2)
    -Principal components analysis
    -Discriminant function analysis (DFA)
    
$Id: chemometrics.py Copyright (C) 2005  Roger Jarvis

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

"""

import string, scipy, copy
from process import meancent
from process import autoscale

def __fdot__(a,b):
    """Dot product for large arrays, faster than numarrays dot
    depending on the spec of the computer being used"""
    product = scipy.zeros((a.shape[0],b.shape[1]),'d')
    r = 0
    while r < a.shape[0]:
        product[r,:] = scipy.sum(scipy.reshape(a[r,:],(a.shape[1],1))*b,0)
        r = r+1
    return product


def __mean__(a, axis=0):
    """Find the mean of 2D array along axis = 0 or 1
    default axis is 0
    """
    return scipy.sum(a,axis)/a.shape[axis]


def __std__(a):
    """Find the standard deviation of 2D array
    along axis = 0
    """
    m = __mean__(a,0)
    m = scipy.resize(m,(a.shape[0],a.shape[1]))
    return scipy.sqrt(scipy.sum((a-m)**2,0)/(a.shape[0]-1))


def __diag__(a):
    """Transform vector to diagonal matrix
    """
    d = scipy.zeros((len(a),len(a)),'d')
    for i in range(len(a)):
        d[i,i] = a[i]
    return d


def __flip__(a, axis=0):
    """Reverse order of array elements along axis 0 or 1
    """
    if axis == 0:
        axa,axb = 0,1
    elif axis == 1:
        axa,axb = 1,0

    ind = []    
    for x in range(0,a.shape[axa],1):
        if x == 0:
            ind.append(a.shape[axa]-1)
        else:
            ind.append(a.shape[axa]-1-x)
            
    b = scipy.zeros((a.shape),'d')
    for x in range(a.shape[axa]):
        if axis == 0:
            b[x] = a[ind[x]]
        elif axis == 1:
            b[:,x] = a[:,ind[x]]
    return b

        
def __rms__(pred,act):
    """Calculate the root mean squared error of prediction
    """
    return scipy.reshape(scipy.sqrt(scipy.sum((act-pred)**2)/act.shape[0]),())

    
def __min__(x, axis=0):
    """find min of 2d array x along axis 0 or 1
    """
    s = scipy.sort(x,axis)
    return scipy.reshape(s[0],())


def __max__(x, axis=0):
    """find min of 2d array x along axis 0 or 1
    """
    s = scipy.sort(x,axis)
    return scipy.reshape(s[x.shape[0]-1],())


def __slice__(x,index,axis=0):
    """for slicing arrays
    """
    if axis == 0:
        slice = scipy.reshape(x[:,int(index[0])],(x.shape[0],1))
        for n in range(1,len(index),1):
            slice = scipy.concatenate((slice,scipy.reshape(x[:,int(index[n])],(x.shape[0],1))),1)
    elif axis == 1:    
        slice = scipy.reshape(x[int(index[0]),:],(1,x.shape[1]))
        for n in range(1,len(index),1):
            slice = scipy.concatenate((slice,scipy.reshape(x[int(index[n]),:],(1,x.shape[1]))),0)
    return slice

    
def __split__(xdata, ydata, names, mask):
    """Splits x and y inputs into training, cross validation (and
    independent test groups) for use with modelling algorithms.
    If max(mask)==2 return x1,x2,x3,y1,y2,y3,n1,n2,n3 else if max(mask)==1
    return x1,x2,y1,y2,n1,n2
    """
    x1 = scipy.take(xdata,_index(mask,0),0)
    x2 = scipy.take(xdata,_index(mask,1),0)
    y1 = scipy.take(ydata,_index(mask,0),0)
    y2 = scipy.take(ydata,_index(mask,1),0)
    n1,n2 = [],[]
    for i in range(len(names)):
        if mask[i] == 0:
            n1.append(names[i])
        elif mask[i] == 1:
            n2.append(names[i])
    
    if max(mask) == 1:
        return x1,x2,y1,y2,n1,n2
    elif max(mask) == 2:
        x3 = scipy.take(xdata,_index(mask,2),0)
        y3 = scipy.take(ydata,_index(mask,2),0)
        n3 = []
        for i in range(len(names)):
            if mask[i] == 2:
                n3.append(names[i])
        return x1,x2,x3,y1,y2,y3,n1,n2,n3
    
def __TW__(X,group):
    """Generate T and W matrices for DFA
    """
    T,W = scipy.zeros((X.shape[1],X.shape[1]),'d'),scipy.zeros((X.shape[1],X.shape[1]),'d')
    mx = scipy.stats.mean(X,0)[scipy.NewAxis,:]
##    group = scipy.reshape(group,(group.shape[0],))
    for x in range(1,max(group)+1):
        idx = _index(group,int(x))
        meani = scipy.stats.mean(scipy.take(X,idx,0),0)[scipy.NewAxis,:]
        meani = scipy.resize(meani,(len(idx),X.shape[1]))
        mxi = scipy.resize(mx,(len(idx),X.shape[1]))
        A = scipy.take(X,idx,0) - mxi
        C = scipy.take(X,idx,0) - meani
        
        T = T + scipy.dot(scipy.transpose(A),A)
        W = W + scipy.dot(scipy.transpose(C),C)
    return T,W


def __adj__(a):
    """Adjoint of a"""
    div,mod = divmod(a.shape[1], 2)
    adj = scipy.zeros((a.shape),'d')
    for p in range(0,a.shape[0],1):
        for q in range(0,a.shape[1],1):
            if mod == 0:
                if p < a.shape[1]/2:
                    if q < a.shape[1]/2:
                        adj[p,q] = (a[p+1,q+1]*a[p+2,q+2])-(a[p+1,q+2]*a[p+2,q+1])
                    if q >= a.shape[1]/2:
                        adj[p,q] = (a[p+1,q-2]*a[p+2,q-1])-(a[p+1,q-1]*a[p+2,q-2])
                if p >= a.shape[1]/2:
                    if q < a.shape[1]/2:
                        adj[p,q] = (a[p-2,q+1]*a[p-1,q+2])-(a[p-2,q+2]*a[p-1,q+1])
                    if q >= a.shape[1]/2:
                        adj[p,q] = (a[p-2,q-2]*a[p-1,q-1])-(a[p-2,q-1]*a[p-1,q-2])
            if mod != 0:
                if p < a.shape[1]/2:
                    if q < a.shape[1]/2:
                        adj[p,q] = (a[p+1,q+1]*a[p+2,q+2])-(a[p+1,q+2]*a[p+2,q+1])
                    if q > a.shape[1]/2:
                        adj[p,q] = (a[p+1,q-2]*a[p+2,q-1])-(a[p+1,q-1]*a[p+2,q-2])
                    if q == a.shape[1]/2:
                        adj[p,q] = (a[p+1,q-1]*a[p+2,q+1])-(a[p+1,q+1]*a[p+2,q-1])
                if p > a.shape[1]/2:
                    if q < a.shape[1]/2:
                        adj[p,q] = (a[p-2,q+1]*a[p-1,q+2])-(a[p-2,q+2]*a[p-1,q+1])
                    if q > a.shape[1]/2:
                        adj[p,q] = (a[p-2,q-2]*a[p-1,q-1])-(a[p-2,q-1]*a[p-1,q-2])
                    if q == a.shape[1]/2:
                        adj[p,q] = (a[p-2,q-1]*a[p-1,q+1])-(a[p-2,q+1]*a[p-1,q-1])
                if p == a.shape[1]/2:        
                    if q < a.shape[1]/2:
                        adj[p,q] = (a[p-1,q+1]*a[p+1,q+2])-(a[p-1,q+2]*a[p+1,q+1])
                    if q > a.shape[1]/2:
                        adj[p,q] = (a[p-1,q-2]*a[p+1,q-1])-(a[p-1,q-1]*a[p+1,q-2])
                    if q == a.shape[1]/2:
                        adj[p,q] = (a[p-1,q-1]*a[p+1,q+1])-(a[p-1,q+1]*a[p+1,q-1])
                        
    for m in range(1,adj.shape[0]+1,1):
        for n in range(1,adj.shape[1]+1,1): 
            if divmod(m,2)[1] != 0:
                if divmod(n,2)[1] == 0:
                    adj[m-1,n-1] = adj[m-1,n-1]*-1
            elif divmod(m,2)[1] == 0:
                if divmod(n,2)[1] != 0:
                    adj[m-1,n-1] = adj[m-1,n-1]*-1
                    
    return scipy.transpose(adj)                    

def __inverse__(a):
    """Inverse of a"""
    d = scipy.linalg.det(a)
    if d == 0:
        d = 0.001
    return __adj__(a)/d

def _index(y,num):
    """use this to get tuple index for take"""
    idx = []
    for i in range(y.shape[0]):
        if y[i] == num:
            idx.append(i)
    return tuple(idx)

def _put(a,ind,v):
    """a pvt put function"""
    c = 0
    for each in ind:
        a[each,:] = v[c,:]
        c += 1
    return a

def _sample(x,N):
    """randomly select N samples from x"""
    select = []
    while len(select) < N:
        a = int(scipy.rand(1)*float(x))
        if a not in select:
            select.append(a) 
    return select
    
def PCA_SVD(myarray,type='covar'):
    """Run principal components analysis (PCA) by singular
    value decomposition (SVD)
    
    Formerly SP_PCA_SVD
    
    >>> a = scipy.array([[1,2,3],[0,1,1.5],[-1,-6,34],[8,15,2]])
    >>> a
    array([[  1. ,   2. ,   3. ],
           [  0. ,   1. ,   1.5],
           [ -1. ,  -6. ,  34. ],
           [  8. ,  15. ,   2. ]])
    >>> # There are four samples, with three variables each
    >>> tt,pp,pr,eigs = SP_PCA_SVD(a)
    >>> tt 
    array([[  5.86463567e+00,  -4.28370443e+00,   1.46798845e-01],
           [  6.65979784e+00,  -6.16620433e+00,  -1.25067331e-01],
           [ -2.56257861e+01,   1.82610701e+00,  -6.62877855e-03],
           [  1.31013526e+01,   8.62380175e+00,  -1.51027354e-02]])
    >>> pp
    array([[ 0.15026487,  0.40643255, -0.90123973],
           [ 0.46898935,  0.77318935,  0.4268808 ],
           [ 0.87032721, -0.48681703, -0.07442934]])
    >>> # This is the 'rotation matrix' - you can imagine colm names
    >>> # of PC1, PC2, PC3 and row names of variable1, variable2, variable3.
    >>> pr
    array([[ 97.1073744 ],
           [ 98.88788958],
           [ 99.98141011]])
    >>> eigs
    array([[ 30.11765617],
           [ 11.57915467],
           [  0.1935556 ]])
    >>> a
    array([[  1. ,   2. ,   3. ],
           [  0. ,   1. ,   1.5],
           [ -1. ,  -6. ,  34. ],
           [  8. ,  15. ,   2. ]])
    """
    newarray = copy.deepcopy(myarray)
    if type=='covar':
        newarray = meancent(newarray)
    elif type=='corr':
        newarray = autoscale(newarray)
    else:
        raise KeyError, "'type' must be one of 'covar or 'corr'"

    # I think this may run faster if myarray is converted to a matrix first.
    # (This should be tested - anyone got a large dataset?)
    # mymat = scipy.mat(myarray)
    u,s,v = scipy.linalg.svd(newarray)
    tt = scipy.dot(newarray,scipy.transpose(v))
    pp = v
    pr = (1-(s/scipy.sum(scipy.sum(newarray**2))))*100
    pr = scipy.reshape(pr,(1,len(pr)))
    pr = scipy.concatenate((scipy.array([[0.0]]),pr),1)
    pr = scipy.reshape(pr,(pr.shape[1],))
    eigs = s

    return tt,pp,pr[:,scipy.NewAxis],eigs[:,scipy.NewAxis]
    

def PCA_NIPALS(myarray,comps,type='covar',stb=None):
    """Run principal components analysis (PCA) using NIPALS
12 下一页
💿 文件大小 96 K
👤 上传用户 dxhh
📂 所属分类数据结构
🏷️ 相关标签

#PyChem #Python #语言 #编写
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -