📄 bte_regr_test.c
字号:
* our node to all the other nodes (including ourself). We * accomplish this with both a memcpy and a bte_copy. Timings * for both are printed. * **********************************************************************/#define NSEC(x) ((x) * (1000000000UL / local_cpu_data->itc_freq))/* * Migrate to each cpu. When on the desired cpu, time transfers to * each node by calling brt_time_xfer. */static intbrt_tst_time_xfers(void){ int tst_cpu; int dest_node; int xfer_lines; int i; if (tm_memcpy) { printk("Cpu,Src,Dst,Lines,Stup,Transfr,Fin,Execute," "Overall,Memcpy\n"); } else { printk("Cpu,Src,Dst,Lines,Stup,Transfr,Fin,Execute," "Overall\n"); } if (tm_alternate) { /* Now transfer from this node to all the others. */ for (dest_node = 0; dest_node < numnodes; dest_node++) { for (xfer_lines = tm_min_lines; xfer_lines <= tm_max_lines;) { for (i = 0; i < tm_iterations; i++) { for (tst_cpu = 0; tst_cpu < smp_num_cpus; tst_cpu++) { /* Move to the desired CPU. */ set_cpus_allowed(current, (1UL << tst_cpu)); brt_time_xfer(dest_node, 1, xfer_lines); } } /* Handle a min of 0 */ if (xfer_lines < 1) { xfer_lines = 1; } else { xfer_lines *= 2; } } } } else { for (tst_cpu = 0; tst_cpu < smp_num_cpus; tst_cpu++) { /* Move to the desired CPU. */ set_cpus_allowed(current, (1UL << tst_cpu)); /* Now transfer from this node to all the others. */ for (dest_node = 0; dest_node < numnodes; dest_node++) { for (xfer_lines = tm_min_lines; xfer_lines <= tm_max_lines;) { brt_time_xfer(dest_node, tm_iterations, xfer_lines); /* Handle a min of 0 */ if (xfer_lines < 1) { xfer_lines = 1; } else { xfer_lines *= 2; } } } } } return (0);}/* * Transfer the bte_test_buffer from our node to the specified * destination and print out timing results. */static voidbrt_time_xfer(int dest_node, int iterations, int xfer_lines){ int iteration; char *src, *dst; u64 xfer_len, src_phys, dst_phys; u64 itc_before, itc_after, mem_intvl, bte_intvl; xfer_len = xfer_lines * L1_CACHE_BYTES; src = nodepda->bte_if[0].bte_test_buf; src_phys = __pa(src); dst = NODEPDA(dest_node)->bte_if[1].bte_test_buf; dst_phys = __pa(dst); mem_intvl = 0; for (iteration = 0; iteration < iterations; iteration++) { if (tm_memcpy) { itc_before = ia64_get_itc(); memcpy(dst, src, xfer_len); itc_after = ia64_get_itc(); mem_intvl = itc_after - itc_before; } itc_before = ia64_get_itc(); bte_copy(src_phys, dst_phys, xfer_len, BTE_NOTIFY, NULL); itc_after = ia64_get_itc(); bte_intvl = itc_after - itc_before; if (tm_memcpy) { printk("%3d,%3d,%3d,%5d,%4ld,%7ld,%3ld," "%7ld,%7ld,%7ld\n", smp_processor_id(), NASID_GET(src), NASID_GET(dst), xfer_lines, NSEC(bte_setup_time), NSEC(bte_transfer_time), NSEC(bte_tear_down_time), NSEC(bte_execute_time), NSEC(bte_intvl), NSEC(mem_intvl)); } else { printk("%3d,%3d,%3d,%5d,%4ld,%7ld,%3ld," "%7ld,%7ld\n", smp_processor_id(), NASID_GET(src), NASID_GET(dst), xfer_lines, NSEC(bte_setup_time), NSEC(bte_transfer_time), NSEC(bte_tear_down_time), NSEC(bte_execute_time), NSEC(bte_intvl)); } }}/*********************************************************************** * Notification Hang Test. -- NOTE: Has never actually caused a hang. * * The next set of code checks to see if the Notification Hang * occurs. It does this by starting one thread per cpu, pinning * the thread to its assigned cpu. After it is pinned, we lock * the associated bte. Source, Dest, and Notification are * assigned. * * Inside of a loop, we set the length and trigger the * transfer. We use the ITC to determine when the transfer should * complete. Whenever the IBLS_BUSY bit is cleared, the transfer * has completed. We occasionally call schedule (Since all CPUs * have a pinned process The machine will be doing nothing but * out tranfers) and loop. * * If twice max normal time has passed without seeing the * notification, we check the Length/Status register to see if * IBLS_BUSY is still asserted and length is zero. This is an * indication of the hang. * **********************************************************************//* * Launch one thread per cpu. When all threads are started, sleep * the specified timeout and then notify the other threads that it * is time to exit. */static intbrt_tst_notify_hang(void){ int tst_cpu; printk("Waiting %d seconds to complete test.\n", hang_timeout); atomic_set(&brt_thread_cnt, 0); brt_exit_flag = 0; for (tst_cpu = 0; tst_cpu < smp_num_cpus; tst_cpu++) { if ((kernel_thread(brt_notify_thrd, (void *)(long)tst_cpu, 0)) < 0) { printk("Failed to start thread.\n"); } } set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(hang_timeout * HZ); set_current_state(TASK_RUNNING); printk("Flagging an exit.\n"); brt_exit_flag = 1; while (atomic_read(&brt_thread_cnt)) { /* Wait until everyone else is done. */ schedule(); } printk("All threads have exited.\n"); return (0);}/* * One of these threads is started per cpu. Each thread is responsible * for loading that cpu's bte interface and then writing to the * test buffer. The transfers are set in a round-robin fashion. * The end result is that each test buffer is being written into * by the previous node and both cpu's at the same time as the * local bte is transferring it to the next node. */static intbrt_notify_thrd(void *__bind_cpu){ int bind_cpu = (long int)__bind_cpu; int cpu = cpu_logical_map(bind_cpu); nodepda_t *nxt_node; long tmout_itc_intvls; long tmout; long passes; long good_xfer_cnt; u64 src_phys, dst_phys; int i; volatile char *src_buf; u64 *notify; atomic_inc(&brt_thread_cnt); daemonize(); set_user_nice(current, 19); sigfillset(¤t->blocked); /* Migrate to the right CPU */ set_cpus_allowed(current, 1UL << cpu); /* Calculate the uSec timeout itc offset. */ tmout_itc_intvls = local_cpu_data->cyc_per_usec * hang_usec; if (local_cnodeid() == (numnodes - 1)) { nxt_node = NODEPDA(0); } else { nxt_node = NODEPDA(local_cnodeid() + 1); } src_buf = nodepda->bte_if[0].bte_test_buf; src_phys = __pa(src_buf); dst_phys = __pa(nxt_node->bte_if[0].bte_test_buf); notify = kmalloc(L1_CACHE_BYTES, GFP_KERNEL); ASSERT(!((u64) notify & L1_CACHE_MASK)); printk("BTE Hang %d xfer 0x%lx -> 0x%lx, Notify=0x%lx\n", smp_processor_id(), src_phys, dst_phys, (u64) notify); passes = 0; good_xfer_cnt = 0; /* Loop until signalled to exit. */ while (!brt_exit_flag) { /* * A hang will prevent further transfers. * NOTE: Sometimes, it appears like a hang occurred and * then transfers begin again. This just means that * there is NUMA congestion and the hang_usec param * should be increased. */ if (!(*notify & IBLS_BUSY)) { if ((bte_copy(src_phys, dst_phys, 4UL * L1_CACHE_BYTES, BTE_NOTIFY, (void *)notify)) != BTE_SUCCESS) { printk("<0>Cpu %d Could not " "allocate a bte.\n", smp_processor_id()); continue; } tmout = ia64_get_itc() + tmout_itc_intvls; while ((*notify & IBLS_BUSY) && (ia64_get_itc() < tmout)) { /* Push data out with the processor. */ for (i = 0; i < (4 * L1_CACHE_BYTES); i += L1_CACHE_BYTES) { src_buf[i] = (passes % 128); } }; if (*notify & IBLS_BUSY) { printk("<0>Cpu %d BTE appears to have " "hung.\n", smp_processor_id()); } else { good_xfer_cnt++; } } /* Every x passes, take a little break. */ if (!(++passes % 40)) { passes = 0; schedule_timeout(0.01 * HZ); } } kfree(notify); printk("Cpu %d had %ld good passes\n", smp_processor_id(), good_xfer_cnt); atomic_dec(&brt_thread_cnt); return (0);}/*********************************************************************** * Invalid Transfer Test. * * Just transfer from the local node to a nasid which does not * exist. * * >>> Potential Problem: on SN1, HUB interrupt doesn't always * occurr. * **********************************************************************//* * Locate a nasid which doesn't exist. Perform a bte_copy from that * node to our local node. */static intbrt_tst_invalid_xfers(void){ int i; int free_nasid = -1; int cpu; int error_cnt; u64 ret_code; if (ix_srcnasid != -1) { free_nasid = ix_srcnasid; } else { /* Only looking for nasids from C-Nodes. */ for (i = 0; i < PLAT_MAX_NODE_NUMBER; i += 2) { if (local_node_data->physical_node_map[i] == -1) { free_nasid = i; break; } } } if (free_nasid == -1) { printk("tst_invalid_xfers: No free nodes found. " "Exiting.\n"); return (0); } printk("tst_invalid_xfers: Using source nasid of %d\n", free_nasid); error_cnt = 0; for (i = 0; i < ix_iterations; i++) { if (verbose >= 1) { printk("-------------------------------" "-------------------------------" "--------------\n"); } if ((verbose >= 1) || !(i % 10)) { printk(" Loop %d\n", i); } for (cpu = 0; cpu < smp_num_cpus; cpu++) { set_cpus_allowed(current, (1UL << cpu)); if (verbose > 1) { printk("Testing with CPU %d\n", smp_processor_id()); } /* >>> Need a better means of calculating a * remote addr. */ ret_code = bte_copy(TO_NODE(free_nasid, 0), __pa(nodepda->bte_if[0]. bte_test_buf), 4 * L1_CACHE_BYTES, BTE_NOTIFY, NULL); error_cnt += (ret_code ? 1 : 0); } } ret_code = ((error_cnt != (ix_iterations * smp_num_cpus)) ? 1 : 0); return (ret_code);}/*********************************************************************** * Kernel command line handler. * **********************************************************************/#if !defined(MODULE)static int __initbrt_setup(char *str){ int cur_val; if (get_option(&str, &cur_val)) { selected_tests = cur_val; } if (get_option(&str, &cur_val)) { verbose = cur_val; } if (get_option(&str, &cur_val)) { hang_timeout = cur_val; } if (get_option(&str, &cur_val)) { hang_usec = cur_val; } if (get_option(&str, &cur_val)) { tm_min_lines = cur_val; } if (get_option(&str, &cur_val)) { tm_max_lines = cur_val; } if (get_option(&str, &cur_val)) { tm_iterations = cur_val; } if (get_option(&str, &cur_val)) { tm_alternate = cur_val; } if (get_option(&str, &cur_val)) { tm_memcpy = cur_val; } if (get_option(&str, &cur_val)) { ix_iterations = cur_val; } if (get_option(&str, &cur_val)) { ix_srcnasid = cur_val; } return (1);}#endif /* !defined(MODULE) *//*********************************************************************** * Module parameters. * * The two supported cases are loadable module parms and kernel * command line support. * * The loadable module options are specified below in the * MODULE_PARM macros and have associated descriptions. * * The kernel command line option is btetest=x[,y[,z]] etc. The * individual setting order is constant. NOTE: The btetest flag * is checked for in the bte_init_node function. * **********************************************************************/MODULE_LICENSE("GPL");MODULE_AUTHOR("Silicon Graphics, Inc.");MODULE_DESCRIPTION("Test the Block Transfer Engine(BTE) " "present on SGI machines.");MODULE_PARM(selected_tests, "1i");MODULE_PARM_DESC(selected_tests, "Bitmask of tests to run.");MODULE_PARM(verbose, "1i");MODULE_PARM_DESC(verbose, "How much information should be " "printed during the tests.");MODULE_PARM(hang_timeout, "1i");MODULE_PARM_DESC(hang_timeout, "Number of seconds to wait for the Bte " "Notification Failure.");MODULE_PARM(hang_usec, "1i");MODULE_PARM_DESC(hang_usec, "Number of micro-seconds to wait for the 4-line Bte " "transfer to complete.");MODULE_PARM(tm_min_lines, "1i");MODULE_PARM_DESC(tm_min_lines, "Minimum number of cache lines" " to time with.");MODULE_PARM(tm_max_lines, "1i");MODULE_PARM_DESC(tm_max_lines, "Maximum number of cache lines" " to time with.");MODULE_PARM(tm_iterations, "1i");MODULE_PARM_DESC(tm_iterations, "Rerun each timed transfer this " "many times.");MODULE_PARM(tm_alternate, "1i");MODULE_PARM_DESC(tm_alternate, "Cycle across cpus between each " "iteration");MODULE_PARM(tm_memcpy, "1i");MODULE_PARM_DESC(tm_memcpy, "Use memcpy as a comparison to BTE");MODULE_PARM(ix_iterations, "1i");MODULE_PARM_DESC(ix_iterations, "Rerun each transfer from an " "invalid nasid this many times.");MODULE_PARM(ix_srcnasid, "1i");MODULE_PARM_DESC(ix_srcnasid, "Nasid to attempt xfer from.");#if !defined(MODULE)__setup("btetest=", brt_setup);#endif /* !defined(MODULE) */module_init(brt_test_init);module_exit(brt_test_exit);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -