📄 nist_spblas.cc
字号:
mult_diag(alpha, x, incx, y, incy);
if (is_symmetric())
nondiag_mult_vec_transpose(alpha, x, incx, y, incy);
}
void mult_vec_transpose(const T& alpha, const T* x, int incx, T* y,
int incy) const
{
nondiag_mult_vec_transpose(alpha, x, incx, y, incy);
if (is_triangular() || is_symmetric())
mult_diag(alpha, x, incx, y, incy);
if (is_symmetric())
nondiag_mult_vec(alpha, x, incx, y, incy);
}
void mult_vec_conj_transpose(const T& alpha, const T* x, int incx, T* y,
int incy) const
{
nondiag_mult_vec_conj_transpose(alpha, x, incx, y, incy);
if (is_triangular() || is_symmetric())
mult_conj_diag(alpha, x, incx, y, incy);
if (is_symmetric())
nondiag_mult_vec_conj(alpha, x, incx, y, incy);
}
int triangular_solve(T alpha, T* x, int incx ) const
{
if (alpha == (T) 0.0)
ERROR_RETURN(1);
if ( ! is_triangular() )
ERROR_RETURN(1);
int N = num_rows();
if (is_lower_triangular())
{
for (int i=0, ii=0; i<N; i++, ii += incx)
{
x[ii] = (x[ii] - sp_dot_product(S[i], x, incx)) / diag[i];
}
if (alpha != (T) 1.0)
{
for (int i=0, ii=0; i<N; i++, ii += incx)
x[ii] /= alpha;
}
}
else if (is_upper_triangular())
{
for (int i=N-1, ii=(N-1)*incx ; 0<=i ; i--, ii-=incx)
{
x[ii] = (x[ii] - sp_dot_product(S[i],x, incx)) / diag[i];
}
if (alpha != (T) 1.0)
{
for (int i=N-1, ii=(N-1)*incx ; 0<=i ; i--, ii-=incx)
x[ii] /= alpha;
}
}
else
ERROR_RETURN(1);
return 0;
}
int transpose_triangular_solve(T alpha, T* x, int incx) const
{
if ( ! is_triangular())
return -1;
int N = num_rows();
if (is_lower_triangular())
{
for (int j=N-1, jj=(N-1)*incx; 0<=j; j--, jj -= incx)
{
x[jj] /= diag[j] ;
sp_axpy( -x[jj], S[j], x, incx);
}
if (alpha != (T) 1.0)
{
for (int jj=(N-1)*incx; 0<=jj; jj -=incx)
x[jj] /= alpha;
}
}
else if (is_upper_triangular())
{
for (int j=0, jj=0; j<N; j++, jj += incx)
{
x[jj] /= diag[j];
sp_axpy(- x[jj], S[j], x, incx);
}
if (alpha != (T) 1.0)
{
for (int jj=(N-1)*incx; 0<=jj; jj -=incx)
x[jj] /= alpha;
}
}
else
ERROR_RETURN(1);
return 0;
}
int transpose_triangular_conj_solve(T alpha, T* x, int incx) const
{
if ( ! is_triangular())
return -1;
int N = num_rows();
if (is_lower_triangular())
{
for (int j=N-1, jj=(N-1)*incx; 0<=j; j--, jj -= incx)
{
x[jj] /= conj(diag[j]) ;
sp_conj_axpy( -x[jj], S[j], x, incx);
}
if (alpha != (T) 1.0)
{
for (int jj=(N-1)*incx; 0<=jj; jj -=incx)
x[jj] /= alpha;
}
}
else if (is_upper_triangular())
{
for (int j=0, jj=0; j<N; j++, jj += incx)
{
x[jj] /= conj(diag[j]);
sp_conj_axpy(- x[jj], S[j], x, incx);
}
if (alpha != (T) 1.0)
{
for (int jj=(N-1)*incx; 0<=jj; jj -=incx)
x[jj] /= alpha;
}
}
else
ERROR_RETURN(1);
return 0;
}
public:
inline T& val(pair<T, int> &VP) { return VP.first; }
inline int& col_index(pair<T,int> &VP) { return VP.second; }
inline const T& val(pair<T, int> const &VP) const { return VP.first; }
inline int col_index(pair<T,int> const &VP) const { return VP.second; }
TSp_mat( int M, int N) : Sp_mat(M,N), S(M), diag() {}
void destroy()
{
// set vector sizes to zero
(vector<T>(0)).swap(diag);
(vector< vector< pair<T, int> > > (0) ).swap(S);
}
/**
This function is the entry point for all of the insert routines in
this implementation. It fills the sparse matrix, one entry at a time.
If matrix is declared unit_diagonal, then inserting any diagonal
values is ignored. If it is symmetric (upper/lower) or triangular
(upper/lower) inconsistent values are not caught. (That is, entries
into the upper region of a lower triangular matrix is not reported.)
[NOTE: the base is determined at the creation phase, and can be determined
by testing whether BLAS_usgp(A, blas_one_base) returns 1. If it returns 0,
then offsets are zero based.]
@param val the numeric value of entry A(i,j)
@param i the row index of A(i,j)
@param j the column index of A(i,j)
@return 0 if succesful, 1 otherwise
*/
int insert_entry(T val, int i, int j)
{
if (is_one_base())
{
i--;
j--;
}
/* make sure the indices are in range */
ASSERT_RETURN(i >= 0, 1);
ASSERT_RETURN(i < num_rows(), 1);
ASSERT_RETURN(j >= 0, 1);
ASSERT_RETURN(j < num_cols(), 1);
/* allocate space for the diagonal, if this is the first time
* trying to insert values.
*/
if (is_new())
{
set_open();
if (is_triangular() || is_symmetric())
{
diag.resize(num_rows());
if (is_unit_diag())
{
for (unsigned int ii=0; ii< diag.size(); ii++)
diag[ii] = T(1.0);
}
else
{
for (unsigned int ii=0; ii< diag.size(); ii++)
diag[ii] = (T) 0.0;
}
}
}
if (is_open())
{
if (i==j && (is_triangular() || is_symmetric() || is_hermitian()) )
{
if (!is_unit_diag())
{
diag[i] += val;
}
else /* if unit diagonal */
{
if (val != (T) 1)
ERROR_RETURN(0); /* tries to insert non-unit diagonal */
}
if (is_upper_storage() && i > j)
ERROR_RETURN(0); /* tries to fill lower-triangular region */
else
if (is_lower_storage() && i < j)
ERROR_RETURN(0); /* tries to fill upper-triangular region */
}
else
{
S[i].push_back( make_pair(val, j) );
}
num_nonzeros() ++;
}
return 0;
}
int insert_entries( int nz, const T* Val, const int *I, const int *J)
{
for (int i=0; i<nz; i++)
{
insert_entry(Val[i], I[i], J[i]) ;
}
return 0;
}
int insert_row(int k, int nz, const T* Val, const int *J)
{
for (int i=0; i<nz; i++)
insert_entry(Val[i], k, J[i]);
return 0;
}
int insert_col(int k, int nz, const T* Val, const int *I)
{
for (int i=0; i<nz; i++)
insert_entry(Val[i], I[i], k);
return 0;
}
int insert_block(const T* Val, int row_stride,
int col_stride, int bi, int bj)
{
/* translate from block index to global indices */
int Iend = K(bi+1);
int Jend = L(bj+1);
for (int i=K(bi), r=0; i<Iend; i++, r += row_stride)
for (int j=L(bi); j<Jend; j++, r += col_stride)
insert_entry( Val[r], i, j );
return 0;
}
int end_construction()
{
return Sp_mat::end_construction();
}
int usmv(enum blas_trans_type transa, const T& alpha, const T* x , int incx,
T* y, int incy) const
{
ASSERT_RETURN(is_valid(), -1);
if (transa == blas_no_trans)
mult_vec(alpha, x, incx, y, incy);
else
if (transa == blas_conj_trans)
mult_vec_conj_transpose(alpha, x, incx, y, incy);
else
if ( transa == blas_trans)
mult_vec_transpose(alpha, x, incx, y, incy);
else
ERROR_RETURN(1);
return 0;
}
int usmm(enum blas_order_type ordera, enum blas_trans_type transa,
int nrhs, const T& alpha, const T* b, int ldb, T* C, int ldC) const
{
if (ordera == blas_rowmajor)
{
/* for each column of C, perform a mat_vec */
for (int i=0; i<nrhs; i++)
{
usmv( transa, alpha, &b[i], ldb, &C[i], ldC );
}
return 0;
}
else
if (ordera == blas_colmajor)
{
/* for each column of C, perform a mat_vec */
for (int i=0; i<nrhs; i++)
{
usmv( transa, alpha, &b[i*ldb], 1, &C[i*ldC], 1 );
}
return 0;
}
else
ERROR_RETURN(1);
}
int ussv( enum blas_trans_type transa, const T& alpha, T* x, int incx) const
{
if (transa == blas_trans)
return transpose_triangular_solve(alpha, x, incx);
else
if (transa == blas_conj_trans)
return transpose_triangular_conj_solve(alpha, x, incx);
else
if (transa == blas_no_trans)
return triangular_solve(alpha, x, incx);
else
ERROR_RETURN(1);
}
int ussm( enum blas_order_type ordera, enum blas_trans_type transa, int nrhs,
const T& alpha, T* C, int ldC) const
{
if (ordera == blas_rowmajor)
{
/* for each column of C, perform a usmv */
for (int i=0; i<nrhs; i++)
{
ussv(
transa, alpha, &C[i], ldC );
}
return 0;
}
else
if (ordera == blas_colmajor)
{
/* for each column of C, perform a mat_vec */
for (int i=0; i<nrhs; i++)
{
ussv( transa, alpha, &C[i*ldC], 1 );
}
return 0;
}
else
ERROR_RETURN(1);
}
void print() const
{
Sp_mat::print(); /* print matrix header info */
/* if there is actual data, print out contents */
for (int i=0; i<num_rows(); i++)
for (unsigned int j=0; j< S[i].size(); j++)
cout << i << " " << col_index(S[i][j]) <<
" " << val(S[i][j]) << "\n";
/* if matrix is triangular, print out diagonals */
if (is_upper_triangular() || is_lower_triangular())
{
for (unsigned int i=0; i< diag.size(); i++)
cout << i << " " << i << " " << diag[i] << "\n";
}
}
};
typedef TSp_mat<float> FSp_mat;
typedef TSp_mat<double> DSp_mat;
typedef TSp_mat<complex<float> > CSp_mat;
typedef TSp_mat<complex<double> > ZSp_mat;
void table_print();
void print(int A);
}
/* namespace */
namespace NIST_SPBLAS
{
static vector<Sp_mat *> Table;
static unsigned int Table_active_matrices = 0;
int Table_insert(Sp_mat* S);
int Table_remove(unsigned int i);
/*
finds an empty slot in global sparse marix table, and fills it with
given entry. Returns -1 if no spot found, or table is corrupt.
*/
int Table_insert(Sp_mat* S)
{
if (Table_active_matrices <= Table.size())
{
Table.push_back(S);
Table_active_matrices++;
return Table.size() - 1;
}
else
{
/* there is an available slot; find it. */
for (unsigned int i=0; i<Table.size(); i++)
{
if (Table[i] == NULL)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -