smp.c
来自「优龙2410linux2.6.8内核源代码」· C语言 代码 · 共 1,285 行 · 第 1/3 页
C
1,285 行
mm->cpu_vm_mask = cpumask_of_cpu(cpu); goto local_flush_and_out; } else { /* This optimization is not valid. Normally * we will be holding the page_table_lock, but * there is an exception which is copy_page_range() * when forking. The lock is held during the individual * page table updates in the parent, but not at the * top level, which is where we are invoked. */ if (0) { cpumask_t this_cpu_mask = cpumask_of_cpu(cpu); /* By virtue of running under the mm->page_table_lock, * and mmu_context.h:switch_mm doing the same, the * following operation is safe. */ if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask)) goto local_flush_and_out; } } smp_cross_call_masked(&xcall_flush_tlb_pending, ctx, nr, (unsigned long) vaddrs, mm->cpu_vm_mask);local_flush_and_out: __flush_tlb_pending(ctx, nr, vaddrs); put_cpu();}void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end){ start &= PAGE_MASK; end = PAGE_ALIGN(end); if (start != end) { smp_cross_call(&xcall_flush_tlb_kernel_range, 0, start, end); __flush_tlb_kernel_range(start, end); }}/* CPU capture. *//* #define CAPTURE_DEBUG */extern unsigned long xcall_capture;static atomic_t smp_capture_depth = ATOMIC_INIT(0);static atomic_t smp_capture_registry = ATOMIC_INIT(0);static unsigned long penguins_are_doing_time;void smp_capture(void){ int result = __atomic_add(1, &smp_capture_depth); membar("#StoreStore | #LoadStore"); if (result == 1) { int ncpus = num_online_cpus();#ifdef CAPTURE_DEBUG printk("CPU[%d]: Sending penguins to jail...", smp_processor_id());#endif penguins_are_doing_time = 1; membar("#StoreStore | #LoadStore"); atomic_inc(&smp_capture_registry); smp_cross_call(&xcall_capture, 0, 0, 0); while (atomic_read(&smp_capture_registry) != ncpus) membar("#LoadLoad");#ifdef CAPTURE_DEBUG printk("done\n");#endif }}void smp_release(void){ if (atomic_dec_and_test(&smp_capture_depth)) {#ifdef CAPTURE_DEBUG printk("CPU[%d]: Giving pardon to " "imprisoned penguins\n", smp_processor_id());#endif penguins_are_doing_time = 0; membar("#StoreStore | #StoreLoad"); atomic_dec(&smp_capture_registry); }}/* Imprisoned penguins run with %pil == 15, but PSTATE_IE set, so they * can service tlb flush xcalls... */extern void prom_world(int);extern void save_alternate_globals(unsigned long *);extern void restore_alternate_globals(unsigned long *);void smp_penguin_jailcell(int irq, struct pt_regs *regs){ unsigned long global_save[24]; clear_softint(1 << irq); preempt_disable(); __asm__ __volatile__("flushw"); save_alternate_globals(global_save); prom_world(1); atomic_inc(&smp_capture_registry); membar("#StoreLoad | #StoreStore"); while (penguins_are_doing_time) membar("#LoadLoad"); restore_alternate_globals(global_save); atomic_dec(&smp_capture_registry); prom_world(0); preempt_enable();}extern unsigned long xcall_promstop;void smp_promstop_others(void){ smp_cross_call(&xcall_promstop, 0, 0, 0);}extern void sparc64_do_profile(struct pt_regs *regs);#define prof_multiplier(__cpu) cpu_data(__cpu).multiplier#define prof_counter(__cpu) cpu_data(__cpu).countervoid smp_percpu_timer_interrupt(struct pt_regs *regs){ unsigned long compare, tick, pstate; int cpu = smp_processor_id(); int user = user_mode(regs); /* * Check for level 14 softint. */ { unsigned long tick_mask = tick_ops->softint_mask; if (!(get_softint() & tick_mask)) { extern void handler_irq(int, struct pt_regs *); handler_irq(14, regs); return; } clear_softint(tick_mask); } do { sparc64_do_profile(regs); if (!--prof_counter(cpu)) { irq_enter(); if (cpu == boot_cpu_id) { kstat_this_cpu.irqs[0]++; timer_tick_interrupt(regs); } update_process_times(user); irq_exit(); prof_counter(cpu) = prof_multiplier(cpu); } /* Guarantee that the following sequences execute * uninterrupted. */ __asm__ __volatile__("rdpr %%pstate, %0\n\t" "wrpr %0, %1, %%pstate" : "=r" (pstate) : "i" (PSTATE_IE)); compare = tick_ops->add_compare(current_tick_offset); tick = tick_ops->get_tick(); /* Restore PSTATE_IE. */ __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : /* no outputs */ : "r" (pstate)); } while (time_after_eq(tick, compare));}static void __init smp_setup_percpu_timer(void){ int cpu = smp_processor_id(); unsigned long pstate; prof_counter(cpu) = prof_multiplier(cpu) = 1; /* Guarantee that the following sequences execute * uninterrupted. */ __asm__ __volatile__("rdpr %%pstate, %0\n\t" "wrpr %0, %1, %%pstate" : "=r" (pstate) : "i" (PSTATE_IE)); tick_ops->init_tick(current_tick_offset); /* Restore PSTATE_IE. */ __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : /* no outputs */ : "r" (pstate));}void __init smp_tick_init(void){ boot_cpu_id = hard_smp_processor_id(); current_tick_offset = timer_tick_offset; cpu_set(boot_cpu_id, cpu_online_map); prof_counter(boot_cpu_id) = prof_multiplier(boot_cpu_id) = 1;}cycles_t cacheflush_time;unsigned long cache_decay_ticks;extern unsigned long cheetah_tune_scheduling(void);static void __init smp_tune_scheduling(void){ unsigned long orig_flush_base, flush_base, flags, *p; unsigned int ecache_size, order; cycles_t tick1, tick2, raw; int cpu_node; /* Approximate heuristic for SMP scheduling. It is an * estimation of the time it takes to flush the L2 cache * on the local processor. * * The ia32 chooses to use the L1 cache flush time instead, * and I consider this complete nonsense. The Ultra can service * a miss to the L1 with a hit to the L2 in 7 or 8 cycles, and * L2 misses are what create extra bus traffic (ie. the "cost" * of moving a process from one cpu to another). */ printk("SMP: Calibrating ecache flush... "); if (tlb_type == cheetah || tlb_type == cheetah_plus) { cacheflush_time = cheetah_tune_scheduling(); goto report; } cpu_find_by_instance(0, &cpu_node, NULL); ecache_size = prom_getintdefault(cpu_node, "ecache-size", (512 * 1024)); if (ecache_size > (4 * 1024 * 1024)) ecache_size = (4 * 1024 * 1024); orig_flush_base = flush_base = __get_free_pages(GFP_KERNEL, order = get_order(ecache_size)); if (flush_base != 0UL) { local_irq_save(flags); /* Scan twice the size once just to get the TLB entries * loaded and make sure the second scan measures pure misses. */ for (p = (unsigned long *)flush_base; ((unsigned long)p) < (flush_base + (ecache_size<<1)); p += (64 / sizeof(unsigned long))) *((volatile unsigned long *)p); tick1 = tick_ops->get_tick(); __asm__ __volatile__("1:\n\t" "ldx [%0 + 0x000], %%g1\n\t" "ldx [%0 + 0x040], %%g2\n\t" "ldx [%0 + 0x080], %%g3\n\t" "ldx [%0 + 0x0c0], %%g5\n\t" "add %0, 0x100, %0\n\t" "cmp %0, %2\n\t" "bne,pt %%xcc, 1b\n\t" " nop" : "=&r" (flush_base) : "0" (flush_base), "r" (flush_base + ecache_size) : "g1", "g2", "g3", "g5"); tick2 = tick_ops->get_tick(); local_irq_restore(flags); raw = (tick2 - tick1); /* Dampen it a little, considering two processes * sharing the cache and fitting. */ cacheflush_time = (raw - (raw >> 2)); free_pages(orig_flush_base, order); } else { cacheflush_time = ((ecache_size << 2) + (ecache_size << 1)); }report: /* Convert ticks/sticks to jiffies. */ cache_decay_ticks = cacheflush_time / timer_tick_offset; if (cache_decay_ticks < 1) cache_decay_ticks = 1; printk("Using heuristic of %ld cycles, %ld ticks.\n", cacheflush_time, cache_decay_ticks);}/* /proc/profile writes can call this, don't __init it please. */static spinlock_t prof_setup_lock = SPIN_LOCK_UNLOCKED;int setup_profiling_timer(unsigned int multiplier){ unsigned long flags; int i; if ((!multiplier) || (timer_tick_offset / multiplier) < 1000) return -EINVAL; spin_lock_irqsave(&prof_setup_lock, flags); for (i = 0; i < NR_CPUS; i++) prof_multiplier(i) = multiplier; current_tick_offset = (timer_tick_offset / multiplier); spin_unlock_irqrestore(&prof_setup_lock, flags); return 0;}void __init smp_prepare_cpus(unsigned int max_cpus){ int instance, mid; instance = 0; while (!cpu_find_by_instance(instance, NULL, &mid)) { if (mid < max_cpus) cpu_set(mid, phys_cpu_present_map); instance++; } if (num_possible_cpus() > max_cpus) { instance = 0; while (!cpu_find_by_instance(instance, NULL, &mid)) { if (mid != boot_cpu_id) { cpu_clear(mid, phys_cpu_present_map); if (num_possible_cpus() <= max_cpus) break; } instance++; } } smp_store_cpu_info(boot_cpu_id);}void __devinit smp_prepare_boot_cpu(void){ if (hard_smp_processor_id() >= NR_CPUS) { prom_printf("Serious problem, boot cpu id >= NR_CPUS\n"); prom_halt(); } current_thread_info()->cpu = hard_smp_processor_id(); cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), phys_cpu_present_map);}int __devinit __cpu_up(unsigned int cpu){ int ret = smp_boot_one_cpu(cpu); if (!ret) { cpu_set(cpu, smp_commenced_mask); while (!cpu_isset(cpu, cpu_online_map)) mb(); if (!cpu_isset(cpu, cpu_online_map)) { ret = -ENODEV; } else { smp_synchronize_one_tick(cpu); } } return ret;}void __init smp_cpus_done(unsigned int max_cpus){ unsigned long bogosum = 0; int i; for (i = 0; i < NR_CPUS; i++) { if (cpu_online(i)) bogosum += cpu_data(i).udelay_val; } printk("Total of %ld processors activated " "(%lu.%02lu BogoMIPS).\n", (long) num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); /* We want to run this with all the other cpus spinning * in the kernel. */ smp_tune_scheduling();}/* This needn't do anything as we do not sleep the cpu * inside of the idler task, so an interrupt is not needed * to get a clean fast response. * * XXX Reverify this assumption... -DaveM * * Addendum: We do want it to do something for the signal * delivery case, we detect that by just seeing * if we are trying to send this to an idler or not. */void smp_send_reschedule(int cpu){ if (cpu_data(cpu).idle_volume == 0) smp_receive_signal(cpu);}/* This is a nop because we capture all other cpus * anyways when making the PROM active. */void smp_send_stop(void){}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?