smp.c

来自「优龙2410linux2.6.8内核源代码」· C语言 代码 · 共 1,285 行 · 第 1/3 页

C
1,285
字号
		mm->cpu_vm_mask = cpumask_of_cpu(cpu);		goto local_flush_and_out;	} else {		/* This optimization is not valid.  Normally		 * we will be holding the page_table_lock, but		 * there is an exception which is copy_page_range()		 * when forking.  The lock is held during the individual		 * page table updates in the parent, but not at the		 * top level, which is where we are invoked.		 */		if (0) {			cpumask_t this_cpu_mask = cpumask_of_cpu(cpu);			/* By virtue of running under the mm->page_table_lock,			 * and mmu_context.h:switch_mm doing the same, the			 * following operation is safe.			 */			if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask))				goto local_flush_and_out;		}	}	smp_cross_call_masked(&xcall_flush_tlb_pending,			      ctx, nr, (unsigned long) vaddrs,			      mm->cpu_vm_mask);local_flush_and_out:	__flush_tlb_pending(ctx, nr, vaddrs);	put_cpu();}void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end){	start &= PAGE_MASK;	end    = PAGE_ALIGN(end);	if (start != end) {		smp_cross_call(&xcall_flush_tlb_kernel_range,			       0, start, end);		__flush_tlb_kernel_range(start, end);	}}/* CPU capture. *//* #define CAPTURE_DEBUG */extern unsigned long xcall_capture;static atomic_t smp_capture_depth = ATOMIC_INIT(0);static atomic_t smp_capture_registry = ATOMIC_INIT(0);static unsigned long penguins_are_doing_time;void smp_capture(void){	int result = __atomic_add(1, &smp_capture_depth);	membar("#StoreStore | #LoadStore");	if (result == 1) {		int ncpus = num_online_cpus();#ifdef CAPTURE_DEBUG		printk("CPU[%d]: Sending penguins to jail...",		       smp_processor_id());#endif		penguins_are_doing_time = 1;		membar("#StoreStore | #LoadStore");		atomic_inc(&smp_capture_registry);		smp_cross_call(&xcall_capture, 0, 0, 0);		while (atomic_read(&smp_capture_registry) != ncpus)			membar("#LoadLoad");#ifdef CAPTURE_DEBUG		printk("done\n");#endif	}}void smp_release(void){	if (atomic_dec_and_test(&smp_capture_depth)) {#ifdef CAPTURE_DEBUG		printk("CPU[%d]: Giving pardon to "		       "imprisoned penguins\n",		       smp_processor_id());#endif		penguins_are_doing_time = 0;		membar("#StoreStore | #StoreLoad");		atomic_dec(&smp_capture_registry);	}}/* Imprisoned penguins run with %pil == 15, but PSTATE_IE set, so they * can service tlb flush xcalls... */extern void prom_world(int);extern void save_alternate_globals(unsigned long *);extern void restore_alternate_globals(unsigned long *);void smp_penguin_jailcell(int irq, struct pt_regs *regs){	unsigned long global_save[24];	clear_softint(1 << irq);	preempt_disable();	__asm__ __volatile__("flushw");	save_alternate_globals(global_save);	prom_world(1);	atomic_inc(&smp_capture_registry);	membar("#StoreLoad | #StoreStore");	while (penguins_are_doing_time)		membar("#LoadLoad");	restore_alternate_globals(global_save);	atomic_dec(&smp_capture_registry);	prom_world(0);	preempt_enable();}extern unsigned long xcall_promstop;void smp_promstop_others(void){	smp_cross_call(&xcall_promstop, 0, 0, 0);}extern void sparc64_do_profile(struct pt_regs *regs);#define prof_multiplier(__cpu)		cpu_data(__cpu).multiplier#define prof_counter(__cpu)		cpu_data(__cpu).countervoid smp_percpu_timer_interrupt(struct pt_regs *regs){	unsigned long compare, tick, pstate;	int cpu = smp_processor_id();	int user = user_mode(regs);	/*	 * Check for level 14 softint.	 */	{		unsigned long tick_mask = tick_ops->softint_mask;		if (!(get_softint() & tick_mask)) {			extern void handler_irq(int, struct pt_regs *);			handler_irq(14, regs);			return;		}		clear_softint(tick_mask);	}	do {		sparc64_do_profile(regs);		if (!--prof_counter(cpu)) {			irq_enter();			if (cpu == boot_cpu_id) {				kstat_this_cpu.irqs[0]++;				timer_tick_interrupt(regs);			}			update_process_times(user);			irq_exit();			prof_counter(cpu) = prof_multiplier(cpu);		}		/* Guarantee that the following sequences execute		 * uninterrupted.		 */		__asm__ __volatile__("rdpr	%%pstate, %0\n\t"				     "wrpr	%0, %1, %%pstate"				     : "=r" (pstate)				     : "i" (PSTATE_IE));		compare = tick_ops->add_compare(current_tick_offset);		tick = tick_ops->get_tick();		/* Restore PSTATE_IE. */		__asm__ __volatile__("wrpr	%0, 0x0, %%pstate"				     : /* no outputs */				     : "r" (pstate));	} while (time_after_eq(tick, compare));}static void __init smp_setup_percpu_timer(void){	int cpu = smp_processor_id();	unsigned long pstate;	prof_counter(cpu) = prof_multiplier(cpu) = 1;	/* Guarantee that the following sequences execute	 * uninterrupted.	 */	__asm__ __volatile__("rdpr	%%pstate, %0\n\t"			     "wrpr	%0, %1, %%pstate"			     : "=r" (pstate)			     : "i" (PSTATE_IE));	tick_ops->init_tick(current_tick_offset);	/* Restore PSTATE_IE. */	__asm__ __volatile__("wrpr	%0, 0x0, %%pstate"			     : /* no outputs */			     : "r" (pstate));}void __init smp_tick_init(void){	boot_cpu_id = hard_smp_processor_id();	current_tick_offset = timer_tick_offset;	cpu_set(boot_cpu_id, cpu_online_map);	prof_counter(boot_cpu_id) = prof_multiplier(boot_cpu_id) = 1;}cycles_t cacheflush_time;unsigned long cache_decay_ticks;extern unsigned long cheetah_tune_scheduling(void);static void __init smp_tune_scheduling(void){	unsigned long orig_flush_base, flush_base, flags, *p;	unsigned int ecache_size, order;	cycles_t tick1, tick2, raw;	int cpu_node;	/* Approximate heuristic for SMP scheduling.  It is an	 * estimation of the time it takes to flush the L2 cache	 * on the local processor.	 *	 * The ia32 chooses to use the L1 cache flush time instead,	 * and I consider this complete nonsense.  The Ultra can service	 * a miss to the L1 with a hit to the L2 in 7 or 8 cycles, and	 * L2 misses are what create extra bus traffic (ie. the "cost"	 * of moving a process from one cpu to another).	 */	printk("SMP: Calibrating ecache flush... ");	if (tlb_type == cheetah || tlb_type == cheetah_plus) {		cacheflush_time = cheetah_tune_scheduling();		goto report;	}	cpu_find_by_instance(0, &cpu_node, NULL);	ecache_size = prom_getintdefault(cpu_node,					 "ecache-size", (512 * 1024));	if (ecache_size > (4 * 1024 * 1024))		ecache_size = (4 * 1024 * 1024);	orig_flush_base = flush_base =		__get_free_pages(GFP_KERNEL, order = get_order(ecache_size));	if (flush_base != 0UL) {		local_irq_save(flags);		/* Scan twice the size once just to get the TLB entries		 * loaded and make sure the second scan measures pure misses.		 */		for (p = (unsigned long *)flush_base;		     ((unsigned long)p) < (flush_base + (ecache_size<<1));		     p += (64 / sizeof(unsigned long)))			*((volatile unsigned long *)p);		tick1 = tick_ops->get_tick();		__asm__ __volatile__("1:\n\t"				     "ldx	[%0 + 0x000], %%g1\n\t"				     "ldx	[%0 + 0x040], %%g2\n\t"				     "ldx	[%0 + 0x080], %%g3\n\t"				     "ldx	[%0 + 0x0c0], %%g5\n\t"				     "add	%0, 0x100, %0\n\t"				     "cmp	%0, %2\n\t"				     "bne,pt	%%xcc, 1b\n\t"				     " nop"				     : "=&r" (flush_base)				     : "0" (flush_base),				       "r" (flush_base + ecache_size)				     : "g1", "g2", "g3", "g5");		tick2 = tick_ops->get_tick();		local_irq_restore(flags);		raw = (tick2 - tick1);		/* Dampen it a little, considering two processes		 * sharing the cache and fitting.		 */		cacheflush_time = (raw - (raw >> 2));		free_pages(orig_flush_base, order);	} else {		cacheflush_time = ((ecache_size << 2) +				   (ecache_size << 1));	}report:	/* Convert ticks/sticks to jiffies. */	cache_decay_ticks = cacheflush_time / timer_tick_offset;	if (cache_decay_ticks < 1)		cache_decay_ticks = 1;	printk("Using heuristic of %ld cycles, %ld ticks.\n",	       cacheflush_time, cache_decay_ticks);}/* /proc/profile writes can call this, don't __init it please. */static spinlock_t prof_setup_lock = SPIN_LOCK_UNLOCKED;int setup_profiling_timer(unsigned int multiplier){	unsigned long flags;	int i;	if ((!multiplier) || (timer_tick_offset / multiplier) < 1000)		return -EINVAL;	spin_lock_irqsave(&prof_setup_lock, flags);	for (i = 0; i < NR_CPUS; i++)		prof_multiplier(i) = multiplier;	current_tick_offset = (timer_tick_offset / multiplier);	spin_unlock_irqrestore(&prof_setup_lock, flags);	return 0;}void __init smp_prepare_cpus(unsigned int max_cpus){	int instance, mid;	instance = 0;	while (!cpu_find_by_instance(instance, NULL, &mid)) {		if (mid < max_cpus)			cpu_set(mid, phys_cpu_present_map);		instance++;	}	if (num_possible_cpus() > max_cpus) {		instance = 0;		while (!cpu_find_by_instance(instance, NULL, &mid)) {			if (mid != boot_cpu_id) {				cpu_clear(mid, phys_cpu_present_map);				if (num_possible_cpus() <= max_cpus)					break;			}			instance++;		}	}	smp_store_cpu_info(boot_cpu_id);}void __devinit smp_prepare_boot_cpu(void){	if (hard_smp_processor_id() >= NR_CPUS) {		prom_printf("Serious problem, boot cpu id >= NR_CPUS\n");		prom_halt();	}	current_thread_info()->cpu = hard_smp_processor_id();	cpu_set(smp_processor_id(), cpu_online_map);	cpu_set(smp_processor_id(), phys_cpu_present_map);}int __devinit __cpu_up(unsigned int cpu){	int ret = smp_boot_one_cpu(cpu);	if (!ret) {		cpu_set(cpu, smp_commenced_mask);		while (!cpu_isset(cpu, cpu_online_map))			mb();		if (!cpu_isset(cpu, cpu_online_map)) {			ret = -ENODEV;		} else {			smp_synchronize_one_tick(cpu);		}	}	return ret;}void __init smp_cpus_done(unsigned int max_cpus){	unsigned long bogosum = 0;	int i;	for (i = 0; i < NR_CPUS; i++) {		if (cpu_online(i))			bogosum += cpu_data(i).udelay_val;	}	printk("Total of %ld processors activated "	       "(%lu.%02lu BogoMIPS).\n",	       (long) num_online_cpus(),	       bogosum/(500000/HZ),	       (bogosum/(5000/HZ))%100);	/* We want to run this with all the other cpus spinning	 * in the kernel.	 */	smp_tune_scheduling();}/* This needn't do anything as we do not sleep the cpu * inside of the idler task, so an interrupt is not needed * to get a clean fast response. * * XXX Reverify this assumption... -DaveM * * Addendum: We do want it to do something for the signal *           delivery case, we detect that by just seeing *           if we are trying to send this to an idler or not. */void smp_send_reschedule(int cpu){	if (cpu_data(cpu).idle_volume == 0)		smp_receive_signal(cpu);}/* This is a nop because we capture all other cpus * anyways when making the PROM active. */void smp_send_stop(void){}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?