📄 pagerank.m

📁 基于matlab的pagerank代码
💻 M
📖 第 1 页 / 共 2 页
字号:
12 下一页
function [x flag hist dt] = pagerank(A,optionsu)
% PAGERANK Compute the PageRank for a directed graph.
%
% [p flag hist dt] = pagerank(A)
%
%   Compute the pagerank vector p for the directed graph A, with
%   teleportation probability (1-c).  
%
%   flag is 1 if the method converged; hist returns the convergence history
%   and dt is the total time spent solving the system
%
%   The matrix A should have the outlinks represented in the rows.
%
%   This driver can compute PageRank using 4 different algorithms, 
%   the default algorithm is the Arnoldi iteration for PageRank due to
%   Grief and Golub.  Other algorithms include gauss-seidel iterations,
%   power iterations, a linear system formulation, or an approximate
%   PageRank formulation.
%
%   The output p satisfies p = c A'*D^{+} p + c d'*p v + (1-c) v and
%   norm(p,1) = 1.
%
%   The power method solves the eigensystem x = P''^T x.
%   The linear system solves the system (I-cP^T)x = (1-c)v.
%   The dense method uses "\" on I-cP^T which the LU factorization.
%   
%   To specify a different solver for the linear system, use an anonymous
%   function wrapper around one of Matlab's solver calls.  To use GMRES,
%   call pagerank(..., struct('linsolver', ... 
%             @(f,v,tol,its) gmres(f,v,[],tol, its)))
%
%   Note 1: the 'approx' algorithm is the PageRank approximate personalized
%   PageRank algorithm due to Gleich and Polito.  It creates a set of
%   active pages and runs until either norm(p(boundary),1) < options.bp or
%   norm(p(boundary),inf) < options.bp, where the boundary is defined as
%   the set of pages that have a non-zero personalized PageRank but are not
%   in the set of active pages.  As options.bp -> 0, both of these
%   approximations compute the actual personalized PageRank vector.
%
%   Note 2: the 'eval' algorithm evaluates five algorithms to compute the
%   PageRank vector and summarizes the results in a report.  The return
%   from the algorithm are a set of cell arrays where 
%   p = cell(5,1), flag = cell(5,1), hist = cell(5,1), dt = cell(5,1)
%   and each cell contains the result from one algorithm.  
%   p{1} is the vector computed from the 'power' algorithm
%   p{2} is the vector computed from the 'gs' algorithm
%   p{3} is the vector computed from the 'arnoldi' algorithm
%   p{4} is the vector computed from the 'linsys' algorithm with bicgstab
%   p{5} is the vector comptued from the 'linsys' algorithm with gmres
%   the other outputs all match these indices.
%
%   pagerank(A,options) specifies optional parameters
%   options.c: the teleportation coefficient [double | {0.85}]
%   options.tol: the stopping tolerance [double | {1e-7}]
%   options.v: the personalization vector [vector | {uniform: 1/n}]
%   options.maxiter maximum number of iterations [integer | {500}]
%   options.verbose: extra output information [{0} | 1]
%   options.x0: the initial vector [vector | {options.v}]
%   options.alg: force the algorithm type 
%     ['gs' | 'power' | 'linsys' | 'dense' | {'arnoldi'} | ...
%      'approx' | 'eval']
%
%   options.linsys_solver: a function handle for the linear solver used
%     with the linsys option [fh | {@(f,v,tol,its) bicgstab(f,v,tol,its)}]
%   options.arnoldi_k: use a k dimensional arnoldi basis [intger | {8}]
%   options.approx_bp: boundary probability to expand [float | 1e-3]
%   options.approx_boundary: when to expand on the boundary [1 | {inf}]
%   options.approx_subiter: number of subiterations of power iterations
%     [integer | {5}]
%
% Example:
%   load cs-stanford;
%   p = pagerank(A);
%   p = pagerank(A,struct('alg','linsys',... 
%                  'linsys_solver',@(f,v,tol,its) gmres(f,v,[],tol, its)));
%   pagerank(A,struct('alg','eval'));


%
% pagerank.m
% David Gleich
%

%
% 21 February 2006
% -- added approximate PageRank
%
% Revision 1.10
% 28 January 2006
%  -- added different computational modes and timing information
%
% Revision 1.00
% 19 Octoboer 2005
%

%
% The driver does mainly parameter checking, then sends things off to one
% of the computational routines.
% 

[m n] = size(A);
if (m ~= n)
    error('pagerank:invalidParameter', 'the matrix A must be square');
end;    

options = struct('tol', 1e-7, 'maxiter', 500, 'v', ones(n,1)./n, ...
    'c', 0.85, 'verbose', 0, 'alg', 'arnoldi', ...
    'linsys_solver', @(f,v,tol,its) bicgstab(f,v,tol,its), ...
    'arnoldi_k', 8, 'approx_bp', 1e-3, 'approx_boundary', inf,...
    'approx_subiter', 5);

if (nargin > 1)
    options = merge_structs(optionsu, options);
end;


if (size(options.v) ~= size(A,1))
    error('pagerank:invalidParameter', ...
        'the vector v must have the same size as A');
end;

if (~issparse(A))
    A = sparse(A);
end;

% normalize the matrix
P = normout(A);

switch (options.alg)
    case 'dense'
        [x flag hist dt] = pagerank_dense(P, options);
    case 'linsys'
        [x flag hist dt] = pagerank_linsys(P, options);
    case 'gs'
        [x flag hist dt] = pagerank_gs(P, options);
    case 'power'
        [x flag hist dt] = pagerank_power(P, options);
    case 'arnoldi'
        [x flag hist dt] = pagerank_arnoldi(P, options);
    case 'approx'
        [x flag hist dt] = pagerank_approx(P, options);
    case 'eval'
        [x flag hist dt] = pagerank_eval(P, options);
    otherwise
        error('pagerank:invalidParameter', ...
            'invalid computation mode specified.');
end;

   

% ===================================
% pagerank_linsys
% ===================================

function [x flag hist dt] = pagerank_linsys(P, options)

if (options.verbose > 0)
   fprintf('linear system computation...\n');
end;

tol = options.tol;
v = options.v;
maxiter = options.maxiter;
c = options.c;

solver = options.linsys_solver;

% transpose P (see pagerank_linsys_mult docs)
P = P';

f = @(x,varargin) pagerank_linsys_mult(x,P,c,length(varargin));

tic;
[x flag ignore1 ignore2 hist] = solver(f,v,tol,maxiter);
dt = toc;

% renormalize the vector to have norm 1
x = x./norm(x,1);



function y = pagerank_linsys_mult(x,P,c,tflag)
% compute the matrix vector product for the linear system.  This function
% includes the transpose flag (tflag > 0) to indicate a transpose multiply.
% Because many of the algorithms just use A*x (and not A'*x) the matrix P
% should have already been transposed.
if (tflag > 0)
    %y = x - c*P'*x;
    y = x - c*spmatvec_transmult(P,x);
else
    %y = x - c*P*x;
    y = x - c*spmatvec_mult(P,x);
end;
    

% ===================================
% pagerank_dense
% ===================================

function [x flag hist dt]  = pagerank_dense(P, options)
% solve as a dense linear system

if (options.verbose > 0)
   fprintf('dense computation...\n');
end;

v = options.v;
c = options.c;

n = size(P,1);

P = eye(n) - c*full(P)';

tic;
x = P \ v;
dt = toc;

hist = norm(P*x - v,1);
flag = 0;

% renormalize the vector to have norm 1
x = x./norm(x,1);


% ===================================
% pagerank_gs
% ===================================
function [x flag hist dt]  = pagerank_gs(P, options)
% use gauss-seidel computation

if (options.verbose > 0)
   fprintf('gauss-seidel computation...\n');
end;

tol = options.tol;
v = options.v;
maxiter = options.maxiter;
c = options.c;

x = v;
if (isfield(options, 'x0'))
    x = options.x0;
else
    % this is dumb, but we need to make sure
    % we actually get x it's own memory...
    
    % right now, Matlab just has a ``shadow copy''
    x(1) = x(1)-1.0;
    x(1) = x(1)+1.0;    
end;

delta = 1;
iter = 0;
P = -c*P;

hist = zeros(maxiter,1);

dt = 0;
while (delta > tol && iter < maxiter)
    tic;
    xold = pagerank_gs_mult(P,x,(1-c)*v);
    dt = dt + toc;
    
    delta = norm(x - xold,1);
    hist(iter+1) = delta;
    
    if (options.verbose > 0)
        fprintf('iter=%d; delta=%f\n', iter, delta);
    end;
    
    iter = iter + 1;
end;

% resize hist
hist = hist(1:iter);

% renormalize the vector to have norm 1
x = x./norm(x,1);

% default is convergence
flag = 0;

if (delta > tol && iter == maxiter)
    warning('pagerank:didNotConverge', ...
        'The PageRank algorithm did not converge after %i iterations', ...
        maxiter);
    flag = 1;
end;

% ===================================
% pagerank_power
% ===================================

function [x flag hist dt] = pagerank_power(P, options)
% use the power iteration algorithm

if (options.verbose > 0)
   fprintf('power iteration computation...\n');
end;
 

tol = options.tol;
v = options.v;
maxiter = options.maxiter;
c = options.c;

x = v;
if (isfield(options, 'x0'))
    x = options.x0;
end;
    
hist = zeros(maxiter,1);
delta = 1;
iter = 0;
dt = 0;
while (delta > tol && iter < maxiter)
    tic;
    y =c* spmatvec_transmult(P,x);
    w = 1 - norm(y,1);
    y = y + w*v;
    dt = dt + toc;
    
    delta = norm(x - y,1);
    
    hist(iter+1) = delta;
    
    tic;
    x = y;
    dt = dt + toc;
    
    if (options.verbose > 0)
        fprintf('iter=%d; delta=%f\n', iter, delta);
    end;
    
    iter = iter + 1;
end;

% resize hist
hist = hist(1:iter);

flag = 0;

if (delta > tol && iter == maxiter)
    warning('pagerank:didNotConverge', ...
        'The PageRank algorithm did not converge after %i iterations', ...
        maxiter);
    flag = 1;
end;

% ===================================
% pagerank_arnoldi
% ===================================

function [x flag hist dt] = pagerank_arnoldi(P, options)
% use the power iteration algorithm

if (options.verbose > 0)
   fprintf('arnoldi method computation...\n');
end;
 

tol = options.tol;
v = options.v;
maxiter = options.maxiter;
c = options.c;
k = options.arnoldi_k;

x = v;
if (isfield(options, 'x0'))
    x = options.x0;
end;
    
hist = zeros(maxiter,1);

d = dangling(P);
d = double(d);
P = P';

%f = @(x) pagerank_arnoldi_mult(x,P,c,d,v);
f = @(x) pagerank_mult(x,P,c,d,v);

iter = 0;
dt = 0;
delta = 1;
while (delta > tol && iter < maxiter)
    tic;
    [Q H] = pagerank_arnoldi_fact(f,x,k);
    [u,s,v]=svd(H-[speye(k);zeros(1,k)]);  
    x=Q(:,1:k)*v(:,k);
    dt = dt + toc;
    
    % for statistics purposes only
    delta=norm(f(x)-x,1)/norm(x,1);
    
    hist(iter+1) = delta;
    
    if (options.verbose > 0)
        fprintf('iter=%d; delta=%f\n', iter, delta);
    end;
    
    iter = iter + 1;
end;

% ensure correct normalization
x = sign(sum(x))*x;
x = x/norm(x,1);

% resize hist
hist = hist(1:iter);

flag = 0;

if (delta > tol && iter == maxiter)
    warning('pagerank:didNotConverge', ...
        'The PageRank algorithm did not converge after %i iterations', ...
        maxiter);
    flag = 1;
end;


function [V,H] = pagerank_arnoldi_fact(A,V,k)
% [Q,H] = ARNOLDI7(A,Q0,K,c,d,e,v)
%
% ARNOLDI: Reduce an n x n  matrix A to upper Hessenberg form.
%     [Q,H] = ARNOLDI(A,Q0,K) computes (k+1) x k upper
%     Hessenberg matrix H and n x k matrix Q with orthonormal
%     columns and  Q(:,1) = Q0/NORM(Q0), such that 
%     Q(:,1:k+1)'*A*Q(:,1:k) = H.
%
% A can also be a function_handle to return A*x
%

%
% Written by Chen Grief
% modified by David Gleich
%

V(:,1) = V(:,1)/norm(V(:,1));
if (~isa(A,'function_handle'))
    f = @(x) A*x;
    A = f;
end;

w = A(V(:,1));
alpha=V(:,1)'*w;
H(1,1)=alpha;
f(:,1)=w-V(:,1)*alpha;
12 下一页
💿 文件大小 274 K
👤 上传用户 GreatMarxist
📂 所属分类 matlab例程
🏷️ 相关标签

#pagerank #matlab #代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -