alg_config.cal

来自「Calc Software Package for Number Calc」· CAL 代码 · 共 1,254 行 · 第 1/3 页

CAL
1,254
字号
/* * alg_config - help determine optimal values for algorithm levels * * Copyright (C) 2006  Landon Curt Noll * * Calc is open software; you can redistribute it and/or modify it under * the terms of the version 2.1 of the GNU Lesser General Public License * as published by the Free Software Foundation. * * Calc is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU Lesser General * Public License for more details. * * A copy of version 2.1 of the GNU Lesser General Public License is * distributed with calc under the filename COPYING-LGPL.  You should have * received a copy with calc; if not, write to Free Software Foundation, Inc. * 59 Temple Place, Suite 330, Boston, MA  02111-1307, USA. * * @(#) $Revision: 29.15 $ * @(#) $Id: alg_config.cal,v 29.15 2006/12/16 11:18:46 chongo Exp $ * @(#) $Source: /usr/local/src/cmd/calc/cal/RCS/alg_config.cal,v $ * * Under source code control:	2006/06/07 14:10:11 * File existed as early as:	2006 * * chongo <was here> /\oo/\	http://www.isthe.com/chongo/ * Share and enjoy!  :-)	http://www.isthe.com/chongo/tech/comp/calc/ */global test_time;	/* try for this many seconds in loop test *//* * mul_loop - measure the CPU time to perform a set of multiply loops * * given: *	repeat	number of multiply loops to perform *	x	array of 5 values, each the same length in BASEB-bit words * *		NOTE: When their lengths are 1 BASEB-bit word, then a *		      dummy loop of simple constants are used.  Thus the *		      length == 1 is an approximation of loop overhead. * * returns: *	approximate runtime to perform repeat the multiply loops * * NOTE: This is an internal support function that is normally *	 not called directly from the command line.  Call the *	 function best_mul2() instead. */define mul_loop(repeat, x){    local start;	/* start of execution */    local end;		/* end of execution */    local answer;	/* multiplicand */    local len;		/* length of each element */    local baseb_bytes;	/* bytes in a BASEB-bit word */    local i;    /* firewall */    if (!isint(repeat) || repeat < 0) {	quit "mul_loop: 1st arg: repeat must be an integer > 0";    }    if (size(*x) != 5) {	quit "mul_loop: 2nd arg matrix does not have 5 elements";    }    if (matdim(*x) != 1) {	quit "mul_loop: 2nd arg matrix is not 1 dimensional";    }    if (matmin(*x, 1) != 0) {	quit "mul_loop: 2nd arg matrix index range does not start with 0";    }    if (matmax(*x, 1) != 4) {	quit "mul_loop: 2nd arg matrix index range does not end with 4";    }    baseb_bytes = config("baseb") / 8;    len = sizeof((*x)[0]) / baseb_bytes;    for (i=1; i < 4; ++i) {	if ((sizeof((*x)[i]) / baseb_bytes) != len) {	    quit "mul_loop: 2nd arg matrix elements are not of equal BASEB-bit word length";	}    }    /* multiply pairwise, all sets of a given length */    start = usertime();    for (i=0; i < repeat; ++i) {	if (len == 1) {	    /* we use len == 1 to test this tester loop overhead */	    answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; answer = 0 * 0;	    /**/	    answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; answer = 0 * 0;	    /**/	    answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; answer = 0 * 0;	    /**/	    answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; answer = 0 * 0;	    /**/	    answer = 0 * 0; answer = 0 * 0; answer = 0 * 0; answer = 0 * 0;	} else {	    answer = (*x)[0] * (*x)[1];	    answer = (*x)[0] * (*x)[2];	    answer = (*x)[0] * (*x)[3];	    answer = (*x)[0] * (*x)[4];	    /**/	    answer = (*x)[1] * (*x)[0];	    answer = (*x)[1] * (*x)[2];	    answer = (*x)[1] * (*x)[3];	    answer = (*x)[1] * (*x)[4];	    /**/	    answer = (*x)[2] * (*x)[0];	    answer = (*x)[2] * (*x)[1];	    answer = (*x)[2] * (*x)[3];	    answer = (*x)[2] * (*x)[4];	    /**/	    answer = (*x)[3] * (*x)[0];	    answer = (*x)[3] * (*x)[1];	    answer = (*x)[3] * (*x)[2];	    answer = (*x)[3] * (*x)[4];	    /**/	    answer = (*x)[4] * (*x)[0];	    answer = (*x)[4] * (*x)[1];	    answer = (*x)[4] * (*x)[2];	    answer = (*x)[4] * (*x)[3];	}    }    /*     * return duration     */    end = usertime();    return end-start;}/* * mul_ratio - ratio of rates of 1st and 2nd multiply algorithms * * given: *	len	length in BASEB-bit words to multiply * * return: *	ratio of (1st / 2nd) algorithm rate * * When want to determine a rate to a precision of 1 part in 1000. * Most systems today return CPU time to at least 10 msec precision. * So to get rates to that precision, we need to time loops to at * least 1000 times as long as the precision (10 msec * 1000) * which usually requires timing of loops that last 10 seconds or more. * * NOTE: This is an internal support function that is normally *	 not called directly from the command line.  Call the *	 function best_mul2() instead. */define mul_ratio(len){    local mat x[5];		/* array of values for mul_loop to multiply */    local mat one[5];		/* array if single BASEB-bit values */    local baseb;		/* calc word size in bits */    local orig_cfg;		/* caller configuration */    local loops;		/* number of multiply loops to time */    local tlen;			/* time to perform some number of loops */    local tover;		/* est of time for loop overhead */    local alg1_rate;		/* loop rate of 1st algorithm */    local alg2_rate;		/* loop rate of 2nd algorithm */    local i;    /*     * firewall     */    if (!isint(len) || len < 2) {	quit "mul_ratio: 1st arg: len is not an integer > 1";    }    /*     * remember the caller's config state     */    orig_cfg = config("all");    config("mul2", 0),;    config("sq2", 0),;    config("pow2", 0),;    config("redc2", 0),;    config("tilde", 0),;    /*     * initialize x, the values we will multiply     *     * We want these tests to be repeatable as possible, so we will seed     * the PRNG in a deterministic way.     */    baseb = config("baseb");    srand(sha1(sha1(baseb, config("version"))));    for (i=0; i < 5; ++i) {	/* force the values to be a full len words long */	x[i] = ((1<<(((len-1) * baseb) + baseb-1)) |		    randbit(((len-1) * baseb) + baseb-2));	/* single BASEB-bit values */        one[i] = 1;    }    /*     * determine the number of loops needed to test 1st alg     */    config("mul2", 2^31-1),;    loops = 1/2;    do {	loops *= 2;	tlen = mul_loop(loops, &x);	if (config("user_debug") > 3) {	    printf("\t    alg1 loops %d took %.3f sec\n", loops, tlen);	}    } while (tlen < 1.0);    /*     * determine the 1st algorithm rate     */    loops = max(1, ceil(loops * test_time / tlen));    if (loops < 8) {	if (config("user_debug") > 1) {	    printf("    we must expand loop test time to more than %d secs\n",		ceil(test_time * (8 / loops)));	}	loops = 8;    }    if (config("user_debug") > 3) {	printf("\t    will try alg1 %d loops\n", loops);    }    tlen = mul_loop(loops, &x);    if (config("user_debug") > 3) {	printf("\t    alg1 time = %.3f secs\n", tlen);    }    tover = mul_loop(loops, &one);    if (config("user_debug") > 3) {	printf("\t    alg1 overhead look %.3f secs\n", tover);    }    if (tlen <= tover) {	quit "mul_ratio: overhead >= loop time";    }    alg1_rate = loops / (tlen - tover);    if (config("user_debug") > 2) {	printf("\tmultiply alg1 rate = %.3f loopsets/sec\n", alg1_rate);    }    if (alg1_rate <= 0.0) {	quit "mul_ratio: alg1 rate was <= 0.0";    }    /*     * determine the number of loops needed to test 1st alg     */    config("mul2", 2),;    loops = 1/2;    do {	loops *= 2;	tlen = mul_loop(loops, &x);	if (config("user_debug") > 3) {	    printf("\t    alg2 loops %d took %.3f sec\n", loops, tlen);	}    } while (tlen < 1.0);    /*     * determine the 2nd algorithm rate     */    loops = max(1, ceil(loops * test_time / tlen));    if (loops < 8) {	if (config("user_debug") > 1) {	    printf("    we must expand loop test time to more than %d secs\n",		ceil(test_time * (8 / loops)));	}	loops = 8;    }    tlen = mul_loop(loops, &x);    if (config("user_debug") > 3) {	printf("\t    alg2 time = %.3f secs\n", tlen);    }    tover = mul_loop(loops, &one);    if (config("user_debug") > 3) {	printf("\t    alg2 overhead look %.3f secs\n", tover);    }    if (tlen <= tover) {	quit "mul_ratio: overhead >= loop time";    }    alg2_rate = loops / (tlen - tover);    if (config("user_debug") > 2) {	printf("\tmultiply alg2 rate = %.3f loopsets/sec\n", alg2_rate);    }    if (alg2_rate <= 0.0) {	quit "mul_ratio: alg2 rate was <= 0.0";    }    /*     * restore old config     */    config("all", orig_cfg),;    /*     * return alg1 / alg2 rate ratio     */    return (alg1_rate / alg2_rate);}/* * best_mul2 - determine the best config("mul2") parameter * * NOTE: Due to precision problems with CPU measurements, it is not *	 unusual for the output of this function to vary slightly *	 from run to run. * * NOTE: This function is designed to take a long time to run. *	  We recommend setting: * *		config("user_debug", 2) * *	  so that yon can watch the progress of this function. */define best_mul2(){    local ratio;	/* previously calculated alg1/alg2 ratio */    local low;		/* low loop value tested */    local high;		/* low loop value tested */    local expand;	/* how fast to expand the length */    /*     * setup     */    test_time = 15.0;    if (config("user_debug") > 0) {	printf("will start with loop test time of %d secs\n", test_time);    }    /*     * firewall - must have a >1 ratio for the initial length     */    high = 16;    if (config("user_debug") > 0) {	printf("testing multiply alg1/alg2 ratio for len = %d\n", high);    }    ratio = mul_ratio(high);    if (config("user_debug") > 1) {	printf("    multiply alg1/alg2 ratio = %.3f\n", ratio);    }    if (ratio <= 1.0) {	quit "best_mul2: tests imply config(\"mul2\") should be < 4";    }    /*     * expand lengths until the ratio flips     */    do {	/*	 * determine the paramters of the next ratio test	 *	 * We will multiplicatively expand our test level until	 * the ratio drops below 1.0.	 */	expand = ((ratio >= 3.5) ? 16 : 2^round(ratio));	low = high;	high *= expand;	if (config("user_debug") > 1) {	    printf("    expand the next test range by a factor of %d\n",	           expand);	}	/*	 * determine the alg1/alg2 test ratio for this new length	 */	if (high >= 2^31) {	    quit "best_mul2: tests imply config(\"mul2\") should be >= 2^31";	}	if (config("user_debug") > 0) {	    printf("testing multiply alg1/alg2 ratio for len = %d\n", high);	}	ratio = mul_ratio(high);	if (config("user_debug") > 1) {	    printf("    multiply alg1/alg2 ratio = %.3f\n", ratio);	}    } while (ratio >= 1.0);    if (config("user_debug") > 0) {	printf("alg1/alg2 ratio now < 1.0, starting binary search between %d and %d\n",	    low, high);    }    /*     * binary search between low and high, for where ratio is just under 1.0     */    while (low+1 < high) {    	/* try the mid-point */	if (config("user_debug") > 0) {	    printf("testing multiply alg1/alg2 ratio for len = %d\n",	    	int((low+high)/2));	}	ratio = mul_ratio(int((low+high)/2));	if (config("user_debug") > 1) {	    printf("    multiply alg1/alg2 ratio = %.3f\n", ratio);	}	/* bump lower range up if we went over */	if (ratio >= 1.0) {	    if (config("user_debug") > 2) {	    	printf("\tmove low from %d up to %d\n",		    low, int((low+high)/2));	    }	    low = int((low+high)/2);	/* drop higher range down if we went under */	} else {	    if (config("user_debug") > 2) {	    	printf("\tmove high from %d down to %d\n",		     high, int((low+high)/2));	    }	    high = int((low+high)/2);	}    }    /*

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?