Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(437)

Unified Diff: src/core/SkBitmapProcState_matrixProcs.cpp

Issue 6453065: arm: dynamic NEON support for SkBitmapProcState matrix operations. (Closed) Base URL: http://skia.googlecode.com/svn/trunk/
Patch Set: Created 12 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « gyp/opts.gyp ('k') | src/core/SkBitmapProcState_matrix_clamp.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/core/SkBitmapProcState_matrixProcs.cpp
===================================================================
--- src/core/SkBitmapProcState_matrixProcs.cpp (revision 4857)
+++ src/core/SkBitmapProcState_matrixProcs.cpp (working copy)
@@ -8,6 +8,7 @@
#include "SkPerspIter.h"
#include "SkShader.h"
#include "SkUtils.h"
+#include "SkUtilsArm.h"
// Helper to ensure that when we shift down, we do it w/o sign-extension
// so the caller doesn't have to manually mask off the top 16 bits
@@ -67,27 +68,31 @@
void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
+// Compile neon code paths if needed
+#if !SK_ARM_NEON_IS_NONE
+
+// These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
+extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
+extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
+
+#endif // !SK_ARM_NEON_IS_NONE
+
+// Compile non-neon code path if needed
+#if !SK_ARM_NEON_IS_ALWAYS
#define MAKENAME(suffix) ClampX_ClampY ## suffix
#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max)
#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max)
#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
#define CHECK_FOR_DECAL
-#if defined(__ARM_HAVE_NEON)
- #include "SkBitmapProcState_matrix_clamp.h"
-#else
- #include "SkBitmapProcState_matrix.h"
-#endif
+#include "SkBitmapProcState_matrix.h"
#define MAKENAME(suffix) RepeatX_RepeatY ## suffix
#define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
#define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
-#if defined(__ARM_HAVE_NEON)
- #include "SkBitmapProcState_matrix_repeat.h"
-#else
- #include "SkBitmapProcState_matrix.h"
+#include "SkBitmapProcState_matrix.h"
#endif
#define MAKENAME(suffix) GeneralXY ## suffix
@@ -228,52 +233,6 @@
{
int i;
-#if defined(__ARM_HAVE_NEON)
- if (count >= 8) {
- /* SkFixed is 16.16 fixed point */
- SkFixed dx2 = dx+dx;
- SkFixed dx4 = dx2+dx2;
- SkFixed dx8 = dx4+dx4;
-
- /* now build fx/fx+dx/fx+2dx/fx+3dx */
- SkFixed fx1, fx2, fx3;
- int32x2_t lower, upper;
- int32x4_t lbase, hbase;
- uint16_t *dst16 = (uint16_t *)dst;
-
- fx1 = fx+dx;
- fx2 = fx1+dx;
- fx3 = fx2+dx;
-
- /* avoid an 'lbase unitialized' warning */
- lbase = vdupq_n_s32(fx);
- lbase = vsetq_lane_s32(fx1, lbase, 1);
- lbase = vsetq_lane_s32(fx2, lbase, 2);
- lbase = vsetq_lane_s32(fx3, lbase, 3);
- hbase = vaddq_s32(lbase, vdupq_n_s32(dx4));
-
- /* take upper 16 of each, store, and bump everything */
- do {
- int32x4_t lout, hout;
- uint16x8_t hi16;
-
- lout = lbase;
- hout = hbase;
- /* gets hi's of all louts then hi's of all houts */
- asm ("vuzpq.16 %q0, %q1" : "+w" (lout), "+w" (hout));
- hi16 = vreinterpretq_u16_s32(hout);
- vst1q_u16(dst16, hi16);
-
- /* on to the next */
- lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8));
- hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8));
- dst16 += 8;
- count -= 8;
- fx += dx8;
- } while (count >= 8);
- dst = (uint32_t *) dst16;
- }
-#else
for (i = (count >> 2); i > 0; --i)
{
*dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
@@ -282,7 +241,6 @@
fx += dx+dx;
}
count &= 3;
-#endif
uint16_t* xx = (uint16_t*)dst;
for (i = count; i > 0; --i) {
@@ -293,43 +251,7 @@
void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
{
-#if defined(__ARM_HAVE_NEON)
- if (count >= 8) {
- int32x4_t wide_fx;
- int32x4_t wide_fx2;
- int32x4_t wide_dx8 = vdupq_n_s32(dx*8);
- wide_fx = vdupq_n_s32(fx);
- wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
- wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
- wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
-
- wide_fx2 = vaddq_s32(wide_fx, vdupq_n_s32(dx+dx+dx+dx));
-
- while (count >= 8) {
- int32x4_t wide_out;
- int32x4_t wide_out2;
-
- wide_out = vshlq_n_s32(vshrq_n_s32(wide_fx, 12), 14);
- wide_out = vorrq_s32(wide_out,
- vaddq_s32(vshrq_n_s32(wide_fx,16), vdupq_n_s32(1)));
-
- wide_out2 = vshlq_n_s32(vshrq_n_s32(wide_fx2, 12), 14);
- wide_out2 = vorrq_s32(wide_out2,
- vaddq_s32(vshrq_n_s32(wide_fx2,16), vdupq_n_s32(1)));
-
- vst1q_u32(dst, vreinterpretq_u32_s32(wide_out));
- vst1q_u32(dst+4, vreinterpretq_u32_s32(wide_out2));
-
- dst += 8;
- fx += dx*8;
- wide_fx = vaddq_s32(wide_fx, wide_dx8);
- wide_fx2 = vaddq_s32(wide_fx2, wide_dx8);
- count -= 8;
- }
- }
-#endif
-
if (count & 1)
{
SkASSERT((fx >> (16 + 14)) == 0);
@@ -574,7 +496,17 @@
// clamp gets special version of filterOne
fFilterOneX = SK_Fixed1;
fFilterOneY = SK_Fixed1;
+#if SK_ARM_NEON_IS_NONE
return ClampX_ClampY_Procs[index];
+#elif SK_ARM_NEON_IS_ALWAYS
+ return ClampX_ClampY_Procs_neon[index];
+#else // SK_ARM_NEON_IS_DYNAMIC
+ if (sk_cpu_arm_has_neon()) {
+ return ClampX_ClampY_Procs_neon[index];
+ } else {
+ return ClampX_ClampY_Procs[index];
+ }
+#endif
}
// all remaining procs use this form for filterOne
@@ -584,9 +516,19 @@
if (SkShader::kRepeat_TileMode == fTileModeX &&
SkShader::kRepeat_TileMode == fTileModeY)
{
+#if SK_ARM_NEON_IS_NONE
return RepeatX_RepeatY_Procs[index];
+#elif SK_ARM_NEON_IS_ALWAYS
+ return RepeatX_RepeatY_Procs_neon[index];
+#else // SK_ARM_NEON_IS_DYNAMIC
+ if (sk_cpu_arm_has_neon()) {
+ return RepeatX_RepeatY_Procs_neon[index];
+ } else {
+ return RepeatX_RepeatY_Procs[index];
+ }
+#endif
}
-
+
fTileProcX = choose_tile_proc(fTileModeX);
fTileProcY = choose_tile_proc(fTileModeY);
fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
« no previous file with comments | « gyp/opts.gyp ('k') | src/core/SkBitmapProcState_matrix_clamp.h » ('j') | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b