commit 280b713b5b0fd84cf2469098aee88acbb5de859c Author: Eric Anholt Date: Thu Mar 12 16:56:27 2009 -0700 drm/i915: Allow tiling of objects with bit 17 swizzling by the CPU. Save the bit 17 state of the pages when freeing the page list, and reswizzle them if necessary when rebinding the pages (in case they were swapped out). Since we have userland with expectations that the swizzle enums let it pread and pwrite contents accurately, we can't expose a new swizzle enum for bit 17 (which it would have to GTT map to handle), so we handle it down in pread and pwrite by swizzling the copy when bit 17 of the page address is set. Signed-off-by: Eric Anholt diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index efcd610..bccd414 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -446,6 +446,9 @@ struct drm_i915_gem_object { uint32_t tiling_mode; uint32_t stride; + /** Record of address bit 17 of each page at last unbind. */ + long *bit_17; + /** AGP mapping type (AGP_USER_MEMORY or AGP_USER_CACHED_MEMORY */ uint32_t agp_type; @@ -640,6 +643,8 @@ void i915_gem_object_put_pages(struct drm_gem_object *obj); /* i915_gem_tiling.c */ void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); +void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj); +void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj); /* i915_gem_debug.c */ void i915_gem_dump_object(struct drm_gem_object *obj, int len, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3a1189d..6dca9fc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -155,6 +155,15 @@ fast_shmem_read(struct page **pages, return 0; } +static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj) +{ + drm_i915_private_t *dev_priv = obj->dev->dev_private; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + + return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && + obj_priv->tiling_mode != I915_TILING_NONE; +} + static inline int slow_shmem_copy(struct page *dst_page, int dst_offset, @@ -182,6 +191,64 @@ slow_shmem_copy(struct page *dst_page, return 0; } +static inline int +slow_shmem_bit17_copy(struct page *gpu_page, + int gpu_offset, + struct page *cpu_page, + int cpu_offset, + int length, + int is_read) +{ + char *gpu_vaddr, *cpu_vaddr; + + /* Use the unswizzled path if this page isn't affected. */ + if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { + if (is_read) + return slow_shmem_copy(cpu_page, cpu_offset, + gpu_page, gpu_offset, length); + else + return slow_shmem_copy(gpu_page, gpu_offset, + cpu_page, cpu_offset, length); + } + + gpu_vaddr = kmap_atomic(gpu_page, KM_USER0); + if (gpu_vaddr == NULL) + return -ENOMEM; + + cpu_vaddr = kmap_atomic(cpu_page, KM_USER1); + if (cpu_vaddr == NULL) { + kunmap_atomic(gpu_vaddr, KM_USER0); + return -ENOMEM; + } + + /* Copy the data, XORing A6 with A17 (1). The user already knows he's + * XORing with the other bits (A9 for Y, A9 and A10 for X) + */ + while (length > 0) { + int cacheline_end = ALIGN(gpu_offset + 1, 64); + int this_length = min(cacheline_end - gpu_offset, length); + int swizzled_gpu_offset = gpu_offset ^ 64; + + if (is_read) { + memcpy(cpu_vaddr + cpu_offset, + gpu_vaddr + swizzled_gpu_offset, + this_length); + } else { + memcpy(gpu_vaddr + swizzled_gpu_offset, + cpu_vaddr + cpu_offset, + this_length); + } + cpu_offset += this_length; + gpu_offset += this_length; + length -= this_length; + } + + kunmap_atomic(cpu_vaddr, KM_USER1); + kunmap_atomic(gpu_vaddr, KM_USER0); + + return 0; +} + /** * This is the fast shmem pread path, which attempts to copy_from_user directly * from the backing pages of the object to the user's address space. On a @@ -270,6 +337,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, int page_length; int ret; uint64_t data_ptr = args->data_ptr; + int do_bit17_swizzling; remain = args->size; @@ -294,6 +362,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, goto fail_put_user_pages; } + do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + mutex_lock(&dev->struct_mutex); ret = i915_gem_object_get_pages(obj); @@ -328,11 +398,20 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, if ((data_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - data_page_offset; - ret = slow_shmem_copy(user_pages[data_page_index], - data_page_offset, - obj_priv->pages[shmem_page_index], - shmem_page_offset, - page_length); + if (do_bit17_swizzling) { + ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], + shmem_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length, + 1); + } else { + ret = slow_shmem_copy(user_pages[data_page_index], + data_page_offset, + obj_priv->pages[shmem_page_index], + shmem_page_offset, + page_length); + } if (ret) goto fail_put_pages; @@ -384,9 +463,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); - if (ret != 0) + if (i915_gem_object_needs_bit17_swizzle(obj)) { ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv); + } else { + ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); + if (ret != 0) + ret = i915_gem_shmem_pread_slow(dev, obj, args, + file_priv); + } drm_gem_object_unreference(obj); @@ -728,6 +812,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, int page_length; int ret; uint64_t data_ptr = args->data_ptr; + int do_bit17_swizzling; remain = args->size; @@ -752,6 +837,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, goto fail_put_user_pages; } + do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + mutex_lock(&dev->struct_mutex); ret = i915_gem_object_get_pages(obj); @@ -786,11 +873,20 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, if ((data_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - data_page_offset; - ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], - shmem_page_offset, - user_pages[data_page_index], - data_page_offset, - page_length); + if (do_bit17_swizzling) { + ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], + shmem_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length, + 0); + } else { + ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], + shmem_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length); + } if (ret) goto fail_put_pages; @@ -855,6 +951,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file_priv); } + } else if (i915_gem_object_needs_bit17_swizzle(obj)) { + ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv); } else { ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv); if (ret == -EFAULT) { @@ -1298,6 +1396,9 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) if (--obj_priv->pages_refcount != 0) return; + if (obj_priv->tiling_mode != I915_TILING_NONE) + i915_gem_object_save_bit_17_swizzle(obj); + for (i = 0; i < page_count; i++) if (obj_priv->pages[i] != NULL) { if (obj_priv->dirty) @@ -1923,6 +2024,10 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) } obj_priv->pages[i] = page; } + + if (obj_priv->tiling_mode != I915_TILING_NONE) + i915_gem_object_do_bit_17_swizzle(obj); + return 0; } @@ -3601,6 +3706,7 @@ void i915_gem_free_object(struct drm_gem_object *obj) i915_gem_free_mmap_offset(obj); drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER); + kfree(obj_priv->bit_17); drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 6be3f92..f27e523 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -25,6 +25,8 @@ * */ +#include "linux/string.h" +#include "linux/bitops.h" #include "drmP.h" #include "drm.h" #include "i915_drm.h" @@ -127,8 +129,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) swizzle_y = I915_BIT_6_SWIZZLE_9_11; } else { /* Bit 17 swizzling by the CPU in addition. */ - swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; - swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; + swizzle_y = I915_BIT_6_SWIZZLE_9_17; } break; } @@ -288,6 +290,19 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; else args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + + /* Hide bit 17 swizzling from the user. This prevents old Mesa + * from aborting the application on sw fallbacks to bit 17, + * and we use the pread/pwrite bit17 paths to swizzle for it. + * If there was a user that was relying on the swizzle + * information for drm_intel_bo_map()ed reads/writes this would + * break it, but we don't have any of those. + */ + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9; + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; + /* If we can't handle the swizzling, make it untiled. */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { args->tiling_mode = I915_TILING_NONE; @@ -354,8 +369,100 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, DRM_ERROR("unknown tiling mode\n"); } + /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9; + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; + drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); return 0; } + +/** + * Swap every 64 bytes of this page around, to account for it having a new + * bit 17 of its physical address and therefore being interpreted differently + * by the GPU. + */ +static int +i915_gem_swizzle_page(struct page *page) +{ + char *vaddr; + int i; + char temp[64]; + + vaddr = kmap(page); + if (vaddr == NULL) + return -ENOMEM; + + for (i = 0; i < PAGE_SIZE; i += 128) { + memcpy(temp, &vaddr[i], 64); + memcpy(&vaddr[i], &vaddr[i + 64], 64); + memcpy(&vaddr[i + 64], temp, 64); + } + + kunmap(page); + + return 0; +} + +void +i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + int page_count = obj->size >> PAGE_SHIFT; + int i; + + if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17) + return; + + if (obj_priv->bit_17 == NULL) + return; + + for (i = 0; i < page_count; i++) { + char new_bit_17 = page_to_phys(obj_priv->pages[i]) >> 17; + if ((new_bit_17 & 0x1) != + (test_bit(i, obj_priv->bit_17) != 0)) { + int ret = i915_gem_swizzle_page(obj_priv->pages[i]); + if (ret != 0) { + DRM_ERROR("Failed to swizzle page\n"); + return; + } + set_page_dirty(obj_priv->pages[i]); + } + } +} + +void +i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + int page_count = obj->size >> PAGE_SHIFT; + int i; + + if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17) + return; + + if (obj_priv->bit_17 == NULL) { + obj_priv->bit_17 = kmalloc(BITS_TO_LONGS(page_count) * + sizeof(long), GFP_KERNEL); + if (obj_priv->bit_17 == NULL) { + DRM_ERROR("Failed to allocate memory for bit 17 " + "record\n"); + return; + } + } + + for (i = 0; i < page_count; i++) { + if (page_to_phys(obj_priv->pages[i]) & (1 << 17)) + __set_bit(i, obj_priv->bit_17); + else + __clear_bit(i, obj_priv->bit_17); + } +} diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 67e3353..95962fa 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -594,6 +594,9 @@ struct drm_i915_gem_busy { #define I915_BIT_6_SWIZZLE_9_10_11 4 /* Not seen by userland */ #define I915_BIT_6_SWIZZLE_UNKNOWN 5 +/* Seen by userland. */ +#define I915_BIT_6_SWIZZLE_9_17 6 +#define I915_BIT_6_SWIZZLE_9_10_17 7 struct drm_i915_gem_set_tiling { /** Handle of the buffer to have its tiling state updated */