📄 ehca_mrmw.c
字号:
/* * IBM eServer eHCA Infiniband device driver for Linux on POWER * * MR/MW functions * * Authors: Dietmar Decker <ddecker@de.ibm.com> * Christoph Raisch <raisch@de.ibm.com> * Hoang-Nam Nguyen <hnguyen@de.ibm.com> * * Copyright (c) 2005 IBM Corporation * * All rights reserved. * * This source code is distributed under a dual license of GPL v2.0 and OpenIB * BSD. * * OpenIB BSD License * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials * provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */#include <asm/current.h>#include <rdma/ib_umem.h>#include "ehca_iverbs.h"#include "ehca_mrmw.h"#include "hcp_if.h"#include "hipz_hw.h"#define NUM_CHUNKS(length, chunk_size) \ (((length) + (chunk_size - 1)) / (chunk_size))/* max number of rpages (per hcall register_rpages) */#define MAX_RPAGES 512static struct kmem_cache *mr_cache;static struct kmem_cache *mw_cache;enum ehca_mr_pgsize { EHCA_MR_PGSIZE4K = 0x1000L, EHCA_MR_PGSIZE64K = 0x10000L, EHCA_MR_PGSIZE1M = 0x100000L, EHCA_MR_PGSIZE16M = 0x1000000L};#define EHCA_MR_PGSHIFT4K 12#define EHCA_MR_PGSHIFT64K 16#define EHCA_MR_PGSHIFT1M 20#define EHCA_MR_PGSHIFT16M 24static u32 ehca_encode_hwpage_size(u32 pgsize){ int log = ilog2(pgsize); WARN_ON(log < 12 || log > 24 || log & 3); return (log - 12) / 4;}static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca){ return 1UL << ilog2(shca->hca_cap_mr_pgsize);}static struct ehca_mr *ehca_mr_new(void){ struct ehca_mr *me; me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); if (me) spin_lock_init(&me->mrlock); else ehca_gen_err("alloc failed"); return me;}static void ehca_mr_delete(struct ehca_mr *me){ kmem_cache_free(mr_cache, me);}static struct ehca_mw *ehca_mw_new(void){ struct ehca_mw *me; me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); if (me) spin_lock_init(&me->mwlock); else ehca_gen_err("alloc failed"); return me;}static void ehca_mw_delete(struct ehca_mw *me){ kmem_cache_free(mw_cache, me);}/*----------------------------------------------------------------------*/struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags){ struct ib_mr *ib_mr; int ret; struct ehca_mr *e_maxmr; struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); if (shca->maxmr) { e_maxmr = ehca_mr_new(); if (!e_maxmr) { ehca_err(&shca->ib_device, "out of memory"); ib_mr = ERR_PTR(-ENOMEM); goto get_dma_mr_exit0; } ret = ehca_reg_maxmr(shca, e_maxmr, (u64 *)KERNELBASE, mr_access_flags, e_pd, &e_maxmr->ib.ib_mr.lkey, &e_maxmr->ib.ib_mr.rkey); if (ret) { ehca_mr_delete(e_maxmr); ib_mr = ERR_PTR(ret); goto get_dma_mr_exit0; } ib_mr = &e_maxmr->ib.ib_mr; } else { ehca_err(&shca->ib_device, "no internal max-MR exist!"); ib_mr = ERR_PTR(-EINVAL); goto get_dma_mr_exit0; }get_dma_mr_exit0: if (IS_ERR(ib_mr)) ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x", PTR_ERR(ib_mr), pd, mr_access_flags); return ib_mr;} /* end ehca_get_dma_mr() *//*----------------------------------------------------------------------*/struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, struct ib_phys_buf *phys_buf_array, int num_phys_buf, int mr_access_flags, u64 *iova_start){ struct ib_mr *ib_mr; int ret; struct ehca_mr *e_mr; struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); u64 size; if ((num_phys_buf <= 0) || !phys_buf_array) { ehca_err(pd->device, "bad input values: num_phys_buf=%x " "phys_buf_array=%p", num_phys_buf, phys_buf_array); ib_mr = ERR_PTR(-EINVAL); goto reg_phys_mr_exit0; } if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { /* * Remote Write Access requires Local Write Access * Remote Atomic Access requires Local Write Access */ ehca_err(pd->device, "bad input values: mr_access_flags=%x", mr_access_flags); ib_mr = ERR_PTR(-EINVAL); goto reg_phys_mr_exit0; } /* check physical buffer list and calculate size */ ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf, iova_start, &size); if (ret) { ib_mr = ERR_PTR(ret); goto reg_phys_mr_exit0; } if ((size == 0) || (((u64)iova_start + size) < (u64)iova_start)) { ehca_err(pd->device, "bad input values: size=%lx iova_start=%p", size, iova_start); ib_mr = ERR_PTR(-EINVAL); goto reg_phys_mr_exit0; } e_mr = ehca_mr_new(); if (!e_mr) { ehca_err(pd->device, "out of memory"); ib_mr = ERR_PTR(-ENOMEM); goto reg_phys_mr_exit0; } /* register MR on HCA */ if (ehca_mr_is_maxmr(size, iova_start)) { e_mr->flags |= EHCA_MR_FLAG_MAXMR; ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags, e_pd, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); if (ret) { ib_mr = ERR_PTR(ret); goto reg_phys_mr_exit1; } } else { struct ehca_mr_pginfo pginfo; u32 num_kpages; u32 num_hwpages; u64 hw_pgsize; num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, PAGE_SIZE); /* for kernel space we try most possible pgsize */ hw_pgsize = ehca_get_max_hwpage_size(shca); num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size, hw_pgsize); memset(&pginfo, 0, sizeof(pginfo)); pginfo.type = EHCA_MR_PGI_PHYS; pginfo.num_kpages = num_kpages; pginfo.hwpage_size = hw_pgsize; pginfo.num_hwpages = num_hwpages; pginfo.u.phy.num_phys_buf = num_phys_buf; pginfo.u.phy.phys_buf_array = phys_buf_array; pginfo.next_hwpage = ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); if (ret) { ib_mr = ERR_PTR(ret); goto reg_phys_mr_exit1; } } /* successful registration of all pages */ return &e_mr->ib.ib_mr;reg_phys_mr_exit1: ehca_mr_delete(e_mr);reg_phys_mr_exit0: if (IS_ERR(ib_mr)) ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p " "num_phys_buf=%x mr_access_flags=%x iova_start=%p", PTR_ERR(ib_mr), pd, phys_buf_array, num_phys_buf, mr_access_flags, iova_start); return ib_mr;} /* end ehca_reg_phys_mr() *//*----------------------------------------------------------------------*/struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int mr_access_flags, struct ib_udata *udata){ struct ib_mr *ib_mr; struct ehca_mr *e_mr; struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_mr_pginfo pginfo; int ret, page_shift; u32 num_kpages; u32 num_hwpages; u64 hwpage_size; if (!pd) { ehca_gen_err("bad pd=%p", pd); return ERR_PTR(-EFAULT); } if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { /* * Remote Write Access requires Local Write Access * Remote Atomic Access requires Local Write Access */ ehca_err(pd->device, "bad input values: mr_access_flags=%x", mr_access_flags); ib_mr = ERR_PTR(-EINVAL); goto reg_user_mr_exit0; } if (length == 0 || virt + length < virt) { ehca_err(pd->device, "bad input values: length=%lx " "virt_base=%lx", length, virt); ib_mr = ERR_PTR(-EINVAL); goto reg_user_mr_exit0; } e_mr = ehca_mr_new(); if (!e_mr) { ehca_err(pd->device, "out of memory"); ib_mr = ERR_PTR(-ENOMEM); goto reg_user_mr_exit0; } e_mr->umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags); if (IS_ERR(e_mr->umem)) { ib_mr = (void *)e_mr->umem; goto reg_user_mr_exit1; } if (e_mr->umem->page_size != PAGE_SIZE) { ehca_err(pd->device, "page size not supported, " "e_mr->umem->page_size=%x", e_mr->umem->page_size); ib_mr = ERR_PTR(-EINVAL); goto reg_user_mr_exit2; } /* determine number of MR pages */ num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); /* select proper hw_pgsize */ page_shift = PAGE_SHIFT; if (e_mr->umem->hugetlb) { /* determine page_shift, clamp between 4K and 16M */ page_shift = (fls64(length - 1) + 3) & ~3; page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), EHCA_MR_PGSHIFT16M); } hwpage_size = 1UL << page_shift; /* now that we have the desired page size, shift until it's * supported, too. 4K is always supported, so this terminates. */ while (!(hwpage_size & shca->hca_cap_mr_pgsize)) hwpage_size >>= 4;reg_user_mr_fallback: num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); /* register MR on HCA */ memset(&pginfo, 0, sizeof(pginfo)); pginfo.type = EHCA_MR_PGI_USER; pginfo.hwpage_size = hwpage_size; pginfo.num_kpages = num_kpages; pginfo.num_hwpages = num_hwpages; pginfo.u.usr.region = e_mr->umem; pginfo.next_hwpage = e_mr->umem->offset / hwpage_size; pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk, (&e_mr->umem->chunk_list), list); ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { ehca_warn(pd->device, "failed to register mr " "with hwpage_size=%lx", hwpage_size); ehca_info(pd->device, "try to register mr with " "kpage_size=%lx", PAGE_SIZE); /* * this means kpages are not contiguous for a hw page * try kernel page size as fallback solution */ hwpage_size = PAGE_SIZE; goto reg_user_mr_fallback; } if (ret) { ib_mr = ERR_PTR(ret); goto reg_user_mr_exit2; } /* successful registration of all pages */ return &e_mr->ib.ib_mr;reg_user_mr_exit2: ib_umem_release(e_mr->umem);reg_user_mr_exit1: ehca_mr_delete(e_mr);reg_user_mr_exit0: if (IS_ERR(ib_mr)) ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p", PTR_ERR(ib_mr), pd, mr_access_flags, udata); return ib_mr;} /* end ehca_reg_user_mr() *//*----------------------------------------------------------------------*/int ehca_rereg_phys_mr(struct ib_mr *mr, int mr_rereg_mask, struct ib_pd *pd, struct ib_phys_buf *phys_buf_array, int num_phys_buf, int mr_access_flags, u64 *iova_start){ int ret; struct ehca_shca *shca = container_of(mr->device, struct ehca_shca, ib_device); struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); u64 new_size; u64 *new_start; u32 new_acl; struct ehca_pd *new_pd; u32 tmp_lkey, tmp_rkey; unsigned long sl_flags; u32 num_kpages = 0; u32 num_hwpages = 0; struct ehca_mr_pginfo pginfo; u32 cur_pid = current->tgid; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && (my_pd->ownpid != cur_pid)) { ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x", cur_pid, my_pd->ownpid); ret = -EINVAL; goto rereg_phys_mr_exit0; } if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) { /* TODO not supported, because PHYP rereg hCall needs pages */ ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -