diff options
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_mr.c')
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_mr.c | 267 |
1 files changed, 196 insertions, 71 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 5890a8246216..53271df10e47 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -24,17 +24,22 @@ u8 rxe_get_next_key(u32 last_key) int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) { + struct rxe_map_set *set = mr->cur_map_set; + switch (mr->type) { - case RXE_MR_TYPE_DMA: + case IB_MR_TYPE_DMA: return 0; - case RXE_MR_TYPE_MR: - if (iova < mr->iova || length > mr->length || - iova > mr->iova + mr->length - length) + case IB_MR_TYPE_USER: + case IB_MR_TYPE_MEM_REG: + if (iova < set->iova || length > set->length || + iova > set->iova + set->length - length) return -EFAULT; return 0; default: + pr_warn("%s: mr type (%d) not supported\n", + __func__, mr->type); return -EFAULT; } } @@ -48,48 +53,101 @@ static void rxe_mr_init(int access, struct rxe_mr *mr) u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1); u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; - mr->ibmr.lkey = lkey; - mr->ibmr.rkey = rkey; + /* set ibmr->l/rkey and also copy into private l/rkey + * for user MRs these will always be the same + * for cases where caller 'owns' the key portion + * they may be different until REG_MR WQE is executed. + */ + mr->lkey = mr->ibmr.lkey = lkey; + mr->rkey = mr->ibmr.rkey = rkey; + mr->state = RXE_MR_STATE_INVALID; - mr->type = RXE_MR_TYPE_NONE; mr->map_shift = ilog2(RXE_BUF_PER_MAP); } -static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) +static void rxe_mr_free_map_set(int num_map, struct rxe_map_set *set) { int i; - int num_map; - struct rxe_map **map = mr->map; - num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; + for (i = 0; i < num_map; i++) + kfree(set->map[i]); - mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); - if (!mr->map) - goto err1; + kfree(set->map); + kfree(set); +} + +static int rxe_mr_alloc_map_set(int num_map, struct rxe_map_set **setp) +{ + int i; + struct rxe_map_set *set; + + set = kmalloc(sizeof(*set), GFP_KERNEL); + if (!set) + goto err_out; + + set->map = kmalloc_array(num_map, sizeof(struct rxe_map *), GFP_KERNEL); + if (!set->map) + goto err_free_set; for (i = 0; i < num_map; i++) { - mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); - if (!mr->map[i]) - goto err2; + set->map[i] = kmalloc(sizeof(struct rxe_map), GFP_KERNEL); + if (!set->map[i]) + goto err_free_map; } + *setp = set; + + return 0; + +err_free_map: + for (i--; i >= 0; i--) + kfree(set->map[i]); + + kfree(set->map); +err_free_set: + kfree(set); +err_out: + return -ENOMEM; +} + +/** + * rxe_mr_alloc() - Allocate memory map array(s) for MR + * @mr: Memory region + * @num_buf: Number of buffer descriptors to support + * @both: If non zero allocate both mr->map and mr->next_map + * else just allocate mr->map. Used for fast MRs + * + * Return: 0 on success else an error + */ +static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf, int both) +{ + int ret; + int num_map; + BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); + num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; mr->map_shift = ilog2(RXE_BUF_PER_MAP); mr->map_mask = RXE_BUF_PER_MAP - 1; - mr->num_buf = num_buf; - mr->num_map = num_map; mr->max_buf = num_map * RXE_BUF_PER_MAP; + mr->num_map = num_map; - return 0; + ret = rxe_mr_alloc_map_set(num_map, &mr->cur_map_set); + if (ret) + goto err_out; -err2: - for (i--; i >= 0; i--) - kfree(mr->map[i]); + if (both) { + ret = rxe_mr_alloc_map_set(num_map, &mr->next_map_set); + if (ret) { + rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); + goto err_out; + } + } - kfree(mr->map); -err1: + return 0; + +err_out: return -ENOMEM; } @@ -100,12 +158,13 @@ void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr) mr->ibmr.pd = &pd->ibpd; mr->access = access; mr->state = RXE_MR_STATE_VALID; - mr->type = RXE_MR_TYPE_DMA; + mr->type = IB_MR_TYPE_DMA; } int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr) { + struct rxe_map_set *set; struct rxe_map **map; struct rxe_phys_buf *buf = NULL; struct ib_umem *umem; @@ -113,7 +172,6 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int num_buf; void *vaddr; int err; - int i; umem = ib_umem_get(pd->ibpd.device, start, length, access); if (IS_ERR(umem)) { @@ -127,18 +185,20 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, rxe_mr_init(access, mr); - err = rxe_mr_alloc(mr, num_buf); + err = rxe_mr_alloc(mr, num_buf, 0); if (err) { pr_warn("%s: Unable to allocate memory for map\n", __func__); goto err_release_umem; } - mr->page_shift = PAGE_SHIFT; - mr->page_mask = PAGE_SIZE - 1; + set = mr->cur_map_set; + set->page_shift = PAGE_SHIFT; + set->page_mask = PAGE_SIZE - 1; + + num_buf = 0; + map = set->map; - num_buf = 0; - map = mr->map; if (length > 0) { buf = map[0]->buf; @@ -161,26 +221,24 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, buf->size = PAGE_SIZE; num_buf++; buf++; - } } mr->ibmr.pd = &pd->ibpd; mr->umem = umem; mr->access = access; - mr->length = length; - mr->iova = iova; - mr->va = start; - mr->offset = ib_umem_offset(umem); mr->state = RXE_MR_STATE_VALID; - mr->type = RXE_MR_TYPE_MR; + mr->type = IB_MR_TYPE_USER; + + set->length = length; + set->iova = iova; + set->va = start; + set->offset = ib_umem_offset(umem); return 0; err_cleanup_map: - for (i = 0; i < mr->num_map; i++) - kfree(mr->map[i]); - kfree(mr->map); + rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); err_release_umem: ib_umem_release(umem); err_out: @@ -191,19 +249,17 @@ int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) { int err; - rxe_mr_init(0, mr); + /* always allow remote access for FMRs */ + rxe_mr_init(IB_ACCESS_REMOTE, mr); - /* In fastreg, we also set the rkey */ - mr->ibmr.rkey = mr->ibmr.lkey; - - err = rxe_mr_alloc(mr, max_pages); + err = rxe_mr_alloc(mr, max_pages, 1); if (err) goto err1; mr->ibmr.pd = &pd->ibpd; mr->max_buf = max_pages; mr->state = RXE_MR_STATE_FREE; - mr->type = RXE_MR_TYPE_MR; + mr->type = IB_MR_TYPE_MEM_REG; return 0; @@ -214,21 +270,24 @@ err1: static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, size_t *offset_out) { - size_t offset = iova - mr->iova + mr->offset; + struct rxe_map_set *set = mr->cur_map_set; + size_t offset = iova - set->iova + set->offset; int map_index; int buf_index; u64 length; + struct rxe_map *map; - if (likely(mr->page_shift)) { - *offset_out = offset & mr->page_mask; - offset >>= mr->page_shift; + if (likely(set->page_shift)) { + *offset_out = offset & set->page_mask; + offset >>= set->page_shift; *n_out = offset & mr->map_mask; *m_out = offset >> mr->map_shift; } else { map_index = 0; buf_index = 0; - length = mr->map[map_index]->buf[buf_index].size; + map = set->map[map_index]; + length = map->buf[buf_index].size; while (offset >= length) { offset -= length; @@ -238,7 +297,8 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, map_index++; buf_index = 0; } - length = mr->map[map_index]->buf[buf_index].size; + map = set->map[map_index]; + length = map->buf[buf_index].size; } *m_out = map_index; @@ -259,7 +319,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) goto out; } - if (!mr->map) { + if (!mr->cur_map_set) { addr = (void *)(uintptr_t)iova; goto out; } @@ -272,13 +332,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) lookup_iova(mr, iova, &m, &n, &offset); - if (offset + length > mr->map[m]->buf[n].size) { + if (offset + length > mr->cur_map_set->map[m]->buf[n].size) { pr_warn("crosses page boundary\n"); addr = NULL; goto out; } - addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; + addr = (void *)(uintptr_t)mr->cur_map_set->map[m]->buf[n].addr + offset; out: return addr; @@ -302,7 +362,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, if (length == 0) return 0; - if (mr->type == RXE_MR_TYPE_DMA) { + if (mr->type == IB_MR_TYPE_DMA) { u8 *src, *dest; src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); @@ -314,7 +374,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, return 0; } - WARN_ON_ONCE(!mr->map); + WARN_ON_ONCE(!mr->cur_map_set); err = mr_check_range(mr, iova, length); if (err) { @@ -324,7 +384,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, lookup_iova(mr, iova, &m, &i, &offset); - map = mr->map + m; + map = mr->cur_map_set->map + m; buf = map[0]->buf + i; while (length > 0) { @@ -507,8 +567,8 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, if (!mr) return NULL; - if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) || - (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) || + if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) || + (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || mr_pd(mr) != pd || (access && !(access & mr->access)) || mr->state != RXE_MR_STATE_VALID)) { rxe_drop_ref(mr); @@ -531,9 +591,9 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) goto err; } - if (rkey != mr->ibmr.rkey) { - pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n", - __func__, rkey, mr->ibmr.rkey); + if (rkey != mr->rkey) { + pr_err("%s: rkey (%#x) doesn't match mr->rkey (%#x)\n", + __func__, rkey, mr->rkey); ret = -EINVAL; goto err_drop_ref; } @@ -545,6 +605,12 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) goto err_drop_ref; } + if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) { + pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type); + ret = -EINVAL; + goto err_drop_ref; + } + mr->state = RXE_MR_STATE_FREE; ret = 0; @@ -554,6 +620,67 @@ err: return ret; } +/* user can (re)register fast MR by executing a REG_MR WQE. + * user is expected to hold a reference on the ib mr until the + * WQE completes. + * Once a fast MR is created this is the only way to change the + * private keys. It is the responsibility of the user to maintain + * the ib mr keys in sync with rxe mr keys. + */ +int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr); + u32 key = wqe->wr.wr.reg.key & 0xff; + u32 access = wqe->wr.wr.reg.access; + struct rxe_map_set *set; + + /* user can only register MR in free state */ + if (unlikely(mr->state != RXE_MR_STATE_FREE)) { + pr_warn("%s: mr->lkey = 0x%x not free\n", + __func__, mr->lkey); + return -EINVAL; + } + + /* user can only register mr with qp in same protection domain */ + if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) { + pr_warn("%s: qp->pd and mr->pd don't match\n", + __func__); + return -EINVAL; + } + + mr->access = access; + mr->lkey = (mr->lkey & ~0xff) | key; + mr->rkey = (access & IB_ACCESS_REMOTE) ? mr->lkey : 0; + mr->state = RXE_MR_STATE_VALID; + + set = mr->cur_map_set; + mr->cur_map_set = mr->next_map_set; + mr->cur_map_set->iova = wqe->wr.wr.reg.mr->iova; + mr->next_map_set = set; + + return 0; +} + +int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct rxe_mr *mr = to_rmr(ibmr); + struct rxe_map_set *set = mr->next_map_set; + struct rxe_map *map; + struct rxe_phys_buf *buf; + + if (unlikely(set->nbuf == mr->num_buf)) + return -ENOMEM; + + map = set->map[set->nbuf / RXE_BUF_PER_MAP]; + buf = &map->buf[set->nbuf % RXE_BUF_PER_MAP]; + + buf->addr = addr; + buf->size = ibmr->page_size; + set->nbuf++; + + return 0; +} + int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct rxe_mr *mr = to_rmr(ibmr); @@ -564,7 +691,7 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) return -EINVAL; } - mr->state = RXE_MR_STATE_ZOMBIE; + mr->state = RXE_MR_STATE_INVALID; rxe_drop_ref(mr_pd(mr)); rxe_drop_index(mr); rxe_drop_ref(mr); @@ -575,14 +702,12 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) void rxe_mr_cleanup(struct rxe_pool_entry *arg) { struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem); - int i; ib_umem_release(mr->umem); - if (mr->map) { - for (i = 0; i < mr->num_map; i++) - kfree(mr->map[i]); + if (mr->cur_map_set) + rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); - kfree(mr->map); - } + if (mr->next_map_set) + rxe_mr_free_map_set(mr->num_map, mr->next_map_set); } |