diff options
author | Dave Airlie <airlied@redhat.com> | 2016-03-15 09:49:19 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2016-03-15 09:49:19 +1000 |
commit | 211afd577a186e18d3eece543c6767420d6f6737 (patch) | |
tree | cc8c4b0bea3dfeda43efca178d1c5931b88ed3ff | |
parent | f2c488212b511f7eadef78c564f1bff8f64db231 (diff) | |
parent | 90d7116061f86c1f8ea460806a0414addea7b58b (diff) |
Merge tag 'drm-vc4-next-2016-03-14' of github.com:anholt/linux into drm-next
This pull request covers what's left for 4.6. Notably, it includes a
significant 3D performance improvement and a fix to HDMI hotplug
detection for the Pi2/3.
* tag 'drm-vc4-next-2016-03-14' of github.com:anholt/linux:
drm/vc4: Recognize a more specific compatible string for V3D.
dt-bindings: Add binding docs for V3D.
drm/vc4: Return -EFAULT on copy_from_user() failure
drm/vc4: Respect GPIO_ACTIVE_LOW on HDMI HPD if set in the devicetree.
drm/vc4: Let gpiolib know that we're OK with sleeping for HPD.
drm/vc4: improve throughput by pipelining binning and rendering jobs
-rw-r--r-- | Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_bo.c | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.h | 37 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 123 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_hdmi.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_irq.c | 58 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_v3d.c | 1 |
7 files changed, 193 insertions, 57 deletions
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt b/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt index 56a961aa5061..9f97df4d5152 100644 --- a/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt +++ b/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt @@ -35,6 +35,12 @@ Optional properties for HDMI: as an interrupt/status bit in the HDMI controller itself). See bindings/pinctrl/brcm,bcm2835-gpio.txt +Required properties for V3D: +- compatible: Should be "brcm,bcm2835-v3d" +- reg: Physical base address and length of the V3D's registers +- interrupts: The interrupt number + See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt + Example: pixelvalve@7e807000 { compatible = "brcm,bcm2835-pixelvalve2"; @@ -60,6 +66,12 @@ hdmi: hdmi@7e902000 { clock-names = "pixel", "hdmi"; }; +v3d: v3d@7ec00000 { + compatible = "brcm,bcm2835-v3d"; + reg = <0x7ec00000 0x1000>; + interrupts = <1 10>; +}; + vc4: gpu { compatible = "brcm,bcm2835-vc4"; }; diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 22278bcfc60e..ac8eafea6361 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -499,11 +499,12 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, if (IS_ERR(bo)) return PTR_ERR(bo); - ret = copy_from_user(bo->base.vaddr, + if (copy_from_user(bo->base.vaddr, (void __user *)(uintptr_t)args->data, - args->size); - if (ret != 0) + args->size)) { + ret = -EFAULT; goto fail; + } /* Clear the rest of the memory from allocating from the BO * cache. */ diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index f53fe6cd72be..fa2ad15d4f62 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -52,7 +52,7 @@ struct vc4_dev { /* Protects bo_cache and the BO stats. */ struct mutex bo_lock; - /* Sequence number for the last job queued in job_list. + /* Sequence number for the last job queued in bin_job_list. * Starts at 0 (no jobs emitted). */ uint64_t emit_seqno; @@ -62,11 +62,19 @@ struct vc4_dev { */ uint64_t finished_seqno; - /* List of all struct vc4_exec_info for jobs to be executed. - * The first job in the list is the one currently programmed - * into ct0ca/ct1ca for execution. + /* List of all struct vc4_exec_info for jobs to be executed in + * the binner. The first job in the list is the one currently + * programmed into ct0ca for execution. */ - struct list_head job_list; + struct list_head bin_job_list; + + /* List of all struct vc4_exec_info for jobs that have + * completed binning and are ready for rendering. The first + * job in the list is the one currently programmed into ct1ca + * for execution. + */ + struct list_head render_job_list; + /* List of the finished vc4_exec_infos waiting to be freed by * job_done_work. */ @@ -296,11 +304,20 @@ struct vc4_exec_info { }; static inline struct vc4_exec_info * -vc4_first_job(struct vc4_dev *vc4) +vc4_first_bin_job(struct vc4_dev *vc4) +{ + if (list_empty(&vc4->bin_job_list)) + return NULL; + return list_first_entry(&vc4->bin_job_list, struct vc4_exec_info, head); +} + +static inline struct vc4_exec_info * +vc4_first_render_job(struct vc4_dev *vc4) { - if (list_empty(&vc4->job_list)) + if (list_empty(&vc4->render_job_list)) return NULL; - return list_first_entry(&vc4->job_list, struct vc4_exec_info, head); + return list_first_entry(&vc4->render_job_list, + struct vc4_exec_info, head); } /** @@ -414,7 +431,9 @@ int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -void vc4_submit_next_job(struct drm_device *dev); +void vc4_submit_next_bin_job(struct drm_device *dev); +void vc4_submit_next_render_job(struct drm_device *dev); +void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec); int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, bool interruptible); void vc4_job_handle_completed(struct vc4_dev *vc4); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 202aa1544acc..8d4384f8b78d 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -141,10 +141,10 @@ vc4_save_hang_state(struct drm_device *dev) struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_get_hang_state *state; struct vc4_hang_state *kernel_state; - struct vc4_exec_info *exec; + struct vc4_exec_info *exec[2]; struct vc4_bo *bo; unsigned long irqflags; - unsigned int i, unref_list_count; + unsigned int i, j, unref_list_count, prev_idx; kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); if (!kernel_state) @@ -153,37 +153,55 @@ vc4_save_hang_state(struct drm_device *dev) state = &kernel_state->user_state; spin_lock_irqsave(&vc4->job_lock, irqflags); - exec = vc4_first_job(vc4); - if (!exec) { + exec[0] = vc4_first_bin_job(vc4); + exec[1] = vc4_first_render_job(vc4); + if (!exec[0] && !exec[1]) { spin_unlock_irqrestore(&vc4->job_lock, irqflags); return; } - unref_list_count = 0; - list_for_each_entry(bo, &exec->unref_list, unref_head) - unref_list_count++; + /* Get the bos from both binner and renderer into hang state. */ + state->bo_count = 0; + for (i = 0; i < 2; i++) { + if (!exec[i]) + continue; + + unref_list_count = 0; + list_for_each_entry(bo, &exec[i]->unref_list, unref_head) + unref_list_count++; + state->bo_count += exec[i]->bo_count + unref_list_count; + } + + kernel_state->bo = kcalloc(state->bo_count, + sizeof(*kernel_state->bo), GFP_ATOMIC); - state->bo_count = exec->bo_count + unref_list_count; - kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo), - GFP_ATOMIC); if (!kernel_state->bo) { spin_unlock_irqrestore(&vc4->job_lock, irqflags); return; } - for (i = 0; i < exec->bo_count; i++) { - drm_gem_object_reference(&exec->bo[i]->base); - kernel_state->bo[i] = &exec->bo[i]->base; - } + prev_idx = 0; + for (i = 0; i < 2; i++) { + if (!exec[i]) + continue; - list_for_each_entry(bo, &exec->unref_list, unref_head) { - drm_gem_object_reference(&bo->base.base); - kernel_state->bo[i] = &bo->base.base; - i++; + for (j = 0; j < exec[i]->bo_count; j++) { + drm_gem_object_reference(&exec[i]->bo[j]->base); + kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; + } + + list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { + drm_gem_object_reference(&bo->base.base); + kernel_state->bo[j + prev_idx] = &bo->base.base; + j++; + } + prev_idx = j + 1; } - state->start_bin = exec->ct0ca; - state->start_render = exec->ct1ca; + if (exec[0]) + state->start_bin = exec[0]->ct0ca; + if (exec[1]) + state->start_render = exec[1]->ct1ca; spin_unlock_irqrestore(&vc4->job_lock, irqflags); @@ -267,13 +285,15 @@ vc4_hangcheck_elapsed(unsigned long data) struct vc4_dev *vc4 = to_vc4_dev(dev); uint32_t ct0ca, ct1ca; unsigned long irqflags; - struct vc4_exec_info *exec; + struct vc4_exec_info *bin_exec, *render_exec; spin_lock_irqsave(&vc4->job_lock, irqflags); - exec = vc4_first_job(vc4); + + bin_exec = vc4_first_bin_job(vc4); + render_exec = vc4_first_render_job(vc4); /* If idle, we can stop watching for hangs. */ - if (!exec) { + if (!bin_exec && !render_exec) { spin_unlock_irqrestore(&vc4->job_lock, irqflags); return; } @@ -284,9 +304,12 @@ vc4_hangcheck_elapsed(unsigned long data) /* If we've made any progress in execution, rearm the timer * and wait. */ - if (ct0ca != exec->last_ct0ca || ct1ca != exec->last_ct1ca) { - exec->last_ct0ca = ct0ca; - exec->last_ct1ca = ct1ca; + if ((bin_exec && ct0ca != bin_exec->last_ct0ca) || + (render_exec && ct1ca != render_exec->last_ct1ca)) { + if (bin_exec) + bin_exec->last_ct0ca = ct0ca; + if (render_exec) + render_exec->last_ct1ca = ct1ca; spin_unlock_irqrestore(&vc4->job_lock, irqflags); vc4_queue_hangcheck(dev); return; @@ -386,11 +409,13 @@ vc4_flush_caches(struct drm_device *dev) * The job_lock should be held during this. */ void -vc4_submit_next_job(struct drm_device *dev) +vc4_submit_next_bin_job(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - struct vc4_exec_info *exec = vc4_first_job(vc4); + struct vc4_exec_info *exec; +again: + exec = vc4_first_bin_job(vc4); if (!exec) return; @@ -400,11 +425,40 @@ vc4_submit_next_job(struct drm_device *dev) V3D_WRITE(V3D_BPOA, 0); V3D_WRITE(V3D_BPOS, 0); - if (exec->ct0ca != exec->ct0ea) + /* Either put the job in the binner if it uses the binner, or + * immediately move it to the to-be-rendered queue. + */ + if (exec->ct0ca != exec->ct0ea) { submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); + } else { + vc4_move_job_to_render(dev, exec); + goto again; + } +} + +void +vc4_submit_next_render_job(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_exec_info *exec = vc4_first_render_job(vc4); + + if (!exec) + return; + submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); } +void +vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + bool was_empty = list_empty(&vc4->render_job_list); + + list_move_tail(&exec->head, &vc4->render_job_list); + if (was_empty) + vc4_submit_next_render_job(dev); +} + static void vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) { @@ -443,14 +497,14 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) exec->seqno = seqno; vc4_update_bo_seqnos(exec, seqno); - list_add_tail(&exec->head, &vc4->job_list); + list_add_tail(&exec->head, &vc4->bin_job_list); /* If no job was executing, kick ours off. Otherwise, it'll - * get started when the previous job's frame done interrupt + * get started when the previous job's flush done interrupt * occurs. */ - if (vc4_first_job(vc4) == exec) { - vc4_submit_next_job(dev); + if (vc4_first_bin_job(vc4) == exec) { + vc4_submit_next_bin_job(dev); vc4_queue_hangcheck(dev); } @@ -859,7 +913,8 @@ vc4_gem_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - INIT_LIST_HEAD(&vc4->job_list); + INIT_LIST_HEAD(&vc4->bin_job_list); + INIT_LIST_HEAD(&vc4->render_job_list); INIT_LIST_HEAD(&vc4->job_done_list); INIT_LIST_HEAD(&vc4->seqno_cb_list); spin_lock_init(&vc4->job_lock); diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 56272ca98ab7..d8b864925fd3 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -47,6 +47,7 @@ struct vc4_hdmi { void __iomem *hdmicore_regs; void __iomem *hd_regs; int hpd_gpio; + bool hpd_active_low; struct clk *pixel_clock; struct clk *hsm_clock; @@ -166,7 +167,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) struct vc4_dev *vc4 = to_vc4_dev(dev); if (vc4->hdmi->hpd_gpio) { - if (gpio_get_value(vc4->hdmi->hpd_gpio)) + if (gpio_get_value_cansleep(vc4->hdmi->hpd_gpio) ^ + vc4->hdmi->hpd_active_low) return connector_status_connected; else return connector_status_disconnected; @@ -517,11 +519,17 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) * we'll use the HDMI core's register. */ if (of_find_property(dev->of_node, "hpd-gpios", &value)) { - hdmi->hpd_gpio = of_get_named_gpio(dev->of_node, "hpd-gpios", 0); + enum of_gpio_flags hpd_gpio_flags; + + hdmi->hpd_gpio = of_get_named_gpio_flags(dev->of_node, + "hpd-gpios", 0, + &hpd_gpio_flags); if (hdmi->hpd_gpio < 0) { ret = hdmi->hpd_gpio; goto err_unprepare_hsm; } + + hdmi->hpd_active_low = hpd_gpio_flags & OF_GPIO_ACTIVE_LOW; } vc4->hdmi = hdmi; diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 78a21357fb2d..b0104a346a74 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -30,6 +30,10 @@ * disables that specific interrupt, and 0s written are ignored * (reading either one returns the set of enabled interrupts). * + * When we take a binning flush done interrupt, we need to submit the + * next frame for binning and move the finished frame to the render + * thread. + * * When we take a render frame interrupt, we need to wake the * processes waiting for some frame to be done, and get the next frame * submitted ASAP (so the hardware doesn't sit idle when there's work @@ -44,6 +48,7 @@ #include "vc4_regs.h" #define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \ + V3D_INT_FLDONE | \ V3D_INT_FRDONE) DECLARE_WAIT_QUEUE_HEAD(render_wait); @@ -77,7 +82,7 @@ vc4_overflow_mem_work(struct work_struct *work) unsigned long irqflags; spin_lock_irqsave(&vc4->job_lock, irqflags); - current_exec = vc4_first_job(vc4); + current_exec = vc4_first_bin_job(vc4); if (current_exec) { vc4->overflow_mem->seqno = vc4->finished_seqno + 1; list_add_tail(&vc4->overflow_mem->unref_head, @@ -98,17 +103,43 @@ vc4_overflow_mem_work(struct work_struct *work) } static void -vc4_irq_finish_job(struct drm_device *dev) +vc4_irq_finish_bin_job(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_exec_info *exec = vc4_first_bin_job(vc4); + + if (!exec) + return; + + vc4_move_job_to_render(dev, exec); + vc4_submit_next_bin_job(dev); +} + +static void +vc4_cancel_bin_job(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_exec_info *exec = vc4_first_bin_job(vc4); + + if (!exec) + return; + + list_move_tail(&exec->head, &vc4->bin_job_list); + vc4_submit_next_bin_job(dev); +} + +static void +vc4_irq_finish_render_job(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - struct vc4_exec_info *exec = vc4_first_job(vc4); + struct vc4_exec_info *exec = vc4_first_render_job(vc4); if (!exec) return; vc4->finished_seqno++; list_move_tail(&exec->head, &vc4->job_done_list); - vc4_submit_next_job(dev); + vc4_submit_next_render_job(dev); wake_up_all(&vc4->job_wait_queue); schedule_work(&vc4->job_done_work); @@ -125,9 +156,10 @@ vc4_irq(int irq, void *arg) barrier(); intctl = V3D_READ(V3D_INTCTL); - /* Acknowledge the interrupts we're handling here. The render - * frame done interrupt will be cleared, while OUTOMEM will - * stay high until the underlying cause is cleared. + /* Acknowledge the interrupts we're handling here. The binner + * last flush / render frame done interrupt will be cleared, + * while OUTOMEM will stay high until the underlying cause is + * cleared. */ V3D_WRITE(V3D_INTCTL, intctl); @@ -138,9 +170,16 @@ vc4_irq(int irq, void *arg) status = IRQ_HANDLED; } + if (intctl & V3D_INT_FLDONE) { + spin_lock(&vc4->job_lock); + vc4_irq_finish_bin_job(dev); + spin_unlock(&vc4->job_lock); + status = IRQ_HANDLED; + } + if (intctl & V3D_INT_FRDONE) { spin_lock(&vc4->job_lock); - vc4_irq_finish_job(dev); + vc4_irq_finish_render_job(dev); spin_unlock(&vc4->job_lock); status = IRQ_HANDLED; } @@ -205,6 +244,7 @@ void vc4_irq_reset(struct drm_device *dev) V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); spin_lock_irqsave(&vc4->job_lock, irqflags); - vc4_irq_finish_job(dev); + vc4_cancel_bin_job(dev); + vc4_irq_finish_render_job(dev); spin_unlock_irqrestore(&vc4->job_lock, irqflags); } diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 31de5d17bc85..e6d3c6028341 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -268,6 +268,7 @@ static int vc4_v3d_dev_remove(struct platform_device *pdev) } static const struct of_device_id vc4_v3d_dt_match[] = { + { .compatible = "brcm,bcm2835-v3d" }, { .compatible = "brcm,vc4-v3d" }, {} }; |