diff --git a/gfx/skia/src/effects/SkGradientShader.cpp b/gfx/skia/src/effects/SkGradientShader.cpp
--- a/gfx/skia/src/effects/SkGradientShader.cpp
+++ b/gfx/skia/src/effects/SkGradientShader.cpp
@@ -167,16 +167,17 @@ private:
 
     mutable uint16_t*   fCache16;   // working ptr. If this is NULL, we need to recompute the cache values
     mutable SkPMColor*  fCache32;   // working ptr. If this is NULL, we need to recompute the cache values
 
     mutable uint16_t*   fCache16Storage;    // storage for fCache16, allocated on demand
     mutable SkMallocPixelRef* fCache32PixelRef;
     mutable unsigned    fCacheAlpha;        // the alpha value we used when we computed the cache. larger than 8bits so we can store uninitialized value
 
+    static SkPMColor PremultiplyColor(SkColor c0, U8CPU alpha);
     static void Build16bitCache(uint16_t[], SkColor c0, SkColor c1, int count);
     static void Build32bitCache(SkPMColor[], SkColor c0, SkColor c1, int count,
                                 U8CPU alpha);
     void setCacheAlpha(U8CPU alpha) const;
     void initCommon();
 
     typedef SkShader INHERITED;
 };
@@ -512,16 +513,31 @@ static inline U8CPU dither_fixed_to_8(Sk
  *  For dithering with premultiply, we want to ceiling the alpha component,
  *  to ensure that it is always >= any color component.
  */
 static inline U8CPU dither_ceil_fixed_to_8(SkFixed n) {
     n >>= 8;
     return ((n << 1) - (n | (n >> 8))) >> 8;
 }
 
+SkPMColor Gradient_Shader::PremultiplyColor(SkColor c0, U8CPU paintAlpha)
+{
+    SkFixed a = SkMulDiv255Round(SkColorGetA(c0), paintAlpha);
+    SkFixed r = SkColorGetR(c0);
+    SkFixed g = SkColorGetG(c0);
+    SkFixed b = SkColorGetB(c0);
+    
+    a = SkIntToFixed(a) + 0x8000;
+    r = SkIntToFixed(r) + 0x8000;
+    g = SkIntToFixed(g) + 0x8000;
+    b = SkIntToFixed(b) + 0x8000;
+        
+    return SkPremultiplyARGBInline(a >> 16, r >> 16, g >> 16, b >> 16);
+}
+
 void Gradient_Shader::Build32bitCache(SkPMColor cache[], SkColor c0, SkColor c1,
                                       int count, U8CPU paintAlpha) {
     SkASSERT(count > 1);
 
     // need to apply paintAlpha to our two endpoints
     SkFixed a = SkMulDiv255Round(SkColorGetA(c0), paintAlpha);
     SkFixed da;
     {
@@ -613,24 +629,24 @@ const uint16_t* Gradient_Shader::getCach
         }
     }
     return fCache16;
 }
 
 const SkPMColor* Gradient_Shader::getCache32() const {
     if (fCache32 == NULL) {
         // double the count for dither entries
-        const int entryCount = kCache32Count * 2;
+        const int entryCount = kCache32Count * 2 + 2;
         const size_t allocSize = sizeof(SkPMColor) * entryCount;
 
         if (NULL == fCache32PixelRef) {
             fCache32PixelRef = SkNEW_ARGS(SkMallocPixelRef,
                                           (NULL, allocSize, NULL));
         }
-        fCache32 = (SkPMColor*)fCache32PixelRef->getAddr();
+        fCache32 = (SkPMColor*)fCache32PixelRef->getAddr() + 1;
         if (fColorCount == 2) {
             Build32bitCache(fCache32, fOrigColors[0], fOrigColors[1],
                             kCache32Count, fCacheAlpha);
         } else {
             Rec* rec = fRecs;
             int prevIndex = 0;
             for (int i = 1; i < fColorCount; i++) {
                 int nextIndex = SkFixedToFFFF(rec[i].fPos) >> (16 - kCache32Bits);
@@ -644,28 +660,31 @@ const SkPMColor* Gradient_Shader::getCac
             }
             SkASSERT(prevIndex == kCache32Count - 1);
         }
 
         if (fMapper) {
             SkMallocPixelRef* newPR = SkNEW_ARGS(SkMallocPixelRef,
                                                  (NULL, allocSize, NULL));
             SkPMColor* linear = fCache32;           // just computed linear data
-            SkPMColor* mapped = (SkPMColor*)newPR->getAddr();    // storage for mapped data
+            SkPMColor* mapped = (SkPMColor*)newPR->getAddr() + 1;    // storage for mapped data
             SkUnitMapper* map = fMapper;
             for (int i = 0; i < kCache32Count; i++) {
                 int index = map->mapUnit16((i << 8) | i) >> 8;
                 mapped[i] = linear[index];
                 mapped[i + kCache32Count] = linear[index + kCache32Count];
             }
             fCache32PixelRef->unref();
             fCache32PixelRef = newPR;
-            fCache32 = (SkPMColor*)newPR->getAddr();
+            fCache32 = (SkPMColor*)newPR->getAddr() + 1;
         }
     }
+    //Write the clamp colours into the first and last entries of fCache32
+    fCache32[-1] = PremultiplyColor(fOrigColors[0], fCacheAlpha);
+    fCache32[kCache32Count * 2] = PremultiplyColor(fOrigColors[fColorCount - 1], fCacheAlpha);
     return fCache32;
 }
 
 /*
  *  Because our caller might rebuild the same (logically the same) gradient
  *  over and over, we'd like to return exactly the same "bitmap" if possible,
  *  allowing the client to utilize a cache of our bitmap (e.g. with a GPU).
  *  To do that, we maintain a private cache of built-bitmaps, based on our
@@ -875,28 +894,38 @@ void Linear_Gradient::shadeSpan(int x, i
             dx = dxStorage[0];
         } else {
             SkASSERT(fDstToIndexClass == kLinear_MatrixClass);
             dx = SkScalarToFixed(fDstToIndex.getScaleX());
         }
 
         if (SkFixedNearlyZero(dx)) {
             // we're a vertical gradient, so no change in a span
-            unsigned fi = proc(fx) >> (16 - kCache32Bits);
-            sk_memset32_dither(dstC, cache[toggle + fi],
-                                     cache[(toggle ^ TOGGLE_MASK) + fi], count);
+            if (proc == clamp_tileproc) {
+                if (fx < 0) {
+                    sk_memset32(dstC, cache[-1], count);
+                } else if (fx > 0xFFFF) {
+                    sk_memset32(dstC, cache[kCache32Count * 2], count);
+                } else {
+                    unsigned fi = proc(fx) >> (16 - kCache32Bits);
+                    sk_memset32_dither(dstC, cache[toggle + fi],
+                                       cache[(toggle ^ TOGGLE_MASK) + fi], count);
+                }
+            } else {
+                unsigned fi = proc(fx) >> (16 - kCache32Bits);
+                sk_memset32_dither(dstC, cache[toggle + fi],
+                                   cache[(toggle ^ TOGGLE_MASK) + fi], count);
+            }
         } else if (proc == clamp_tileproc) {
             SkClampRange range;
-            range.init(fx, dx, count, 0, 0xFF);
+            range.init(fx, dx, count, cache[-1], cache[kCache32Count * 2]);
 
             if ((count = range.fCount0) > 0) {
-                sk_memset32_dither(dstC,
-                                   cache[toggle + range.fV0],
-                                   cache[(toggle ^ TOGGLE_MASK) + range.fV0],
-                                   count);
+                 // Do we really want to dither the clamp values?
+                 sk_memset32(dstC, range.fV0, count);
                 dstC += count;
             }
             if ((count = range.fCount1) > 0) {
                 int unroll = count >> 3;
                 fx = range.fFx1;
                 for (int i = 0; i < unroll; i++) {
                     NO_CHECK_ITER;  NO_CHECK_ITER;
                     NO_CHECK_ITER;  NO_CHECK_ITER;
@@ -905,20 +934,17 @@ void Linear_Gradient::shadeSpan(int x, i
                 }
                 if ((count &= 7) > 0) {
                     do {
                         NO_CHECK_ITER;
                     } while (--count != 0);
                 }
             }
             if ((count = range.fCount2) > 0) {
-                sk_memset32_dither(dstC,
-                                   cache[toggle + range.fV1],
-                                   cache[(toggle ^ TOGGLE_MASK) + range.fV1],
-                                   count);
+                sk_memset32(dstC, range.fV1, count);
             }
         } else if (proc == mirror_tileproc) {
             do {
                 unsigned fi = mirror_8bits(fx >> 8);
                 SkASSERT(fi <= 0xFF);
                 fx += dx;
                 *dstC++ = cache[toggle + fi];
                 toggle ^= TOGGLE_MASK;
@@ -1670,19 +1699,24 @@ public:
             }
             SkScalar b = (SkScalarMul(fDiff.fX, fx) +
                          SkScalarMul(fDiff.fY, fy) - fStartRadius) * 2;
             SkScalar db = (SkScalarMul(fDiff.fX, dx) +
                           SkScalarMul(fDiff.fY, dy)) * 2;
             if (proc == clamp_tileproc) {
                 for (; count > 0; --count) {
                     SkFixed t = two_point_radial(b, fx, fy, fSr2D2, foura, fOneOverTwoA, posRoot);
-                    SkFixed index = SkClampMax(t, 0xFFFF);
-                    SkASSERT(index <= 0xFFFF);
-                    *dstC++ = cache[index >> (16 - kCache32Bits)];
+                    if (t < 0) {
+                      *dstC++ = cache[-1];
+                    } else if (t > 0xFFFF) {
+                      *dstC++ = cache[kCache32Count * 2];
+                    } else {
+                      SkASSERT(t <= 0xFFFF);
+                      *dstC++ = cache[t >> (16 - kCache32Bits)];
+                    }
                     fx += dx;
                     fy += dy;
                     b += db;
                 }
             } else if (proc == mirror_tileproc) {
                 for (; count > 0; --count) {
                     SkFixed t = two_point_radial(b, fx, fy, fSr2D2, foura, fOneOverTwoA, posRoot);
                     SkFixed index = mirror_tileproc(t);
