diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs
index 633dd2cfd017a8..dec6446cd7653b 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs
@@ -13274,6 +13274,28 @@ ref Unsafe.As<ulong, T>(ref MemoryMarshal.GetReference(RemainderUInt64Mask_8x9))
             throw new NotSupportedException();
         }
 
+        // TODO: The uses of these ApplyScalar methods are all as part of operators when handling edge cases (NaN, Infinity, really large inputs, etc.)
+        // Currently, these edge cases are not handled in a vectorized way and instead fall back to scalar processing. We can look into
+        // handling those in a vectorized manner as well.
+
+        private static Vector128<float> ApplyScalar<TOperator>(Vector128<float> floats) where TOperator : IUnaryOperator<float, float> =>
+            Vector128.Create(TOperator.Invoke(floats[0]), TOperator.Invoke(floats[1]), TOperator.Invoke(floats[2]), TOperator.Invoke(floats[3]));
+
+        private static Vector256<float> ApplyScalar<TOperator>(Vector256<float> floats) where TOperator : IUnaryOperator<float, float> =>
+            Vector256.Create(ApplyScalar<TOperator>(floats.GetLower()), ApplyScalar<TOperator>(floats.GetUpper()));
+
+        private static Vector512<float> ApplyScalar<TOperator>(Vector512<float> floats) where TOperator : IUnaryOperator<float, float> =>
+            Vector512.Create(ApplyScalar<TOperator>(floats.GetLower()), ApplyScalar<TOperator>(floats.GetUpper()));
+
+        private static Vector128<double> ApplyScalar<TOperator>(Vector128<double> doubles) where TOperator : IUnaryOperator<double, double> =>
+            Vector128.Create(TOperator.Invoke(doubles[0]), TOperator.Invoke(doubles[1]));
+
+        private static Vector256<double> ApplyScalar<TOperator>(Vector256<double> doubles) where TOperator : IUnaryOperator<double, double> =>
+            Vector256.Create(ApplyScalar<TOperator>(doubles.GetLower()), ApplyScalar<TOperator>(doubles.GetUpper()));
+
+        private static Vector512<double> ApplyScalar<TOperator>(Vector512<double> doubles) where TOperator : IUnaryOperator<double, double> =>
+            Vector512.Create(ApplyScalar<TOperator>(doubles.GetLower()), ApplyScalar<TOperator>(doubles.GetUpper()));
+
         /// <summary>Creates a span of <typeparamref name="TTo"/> from a <typeparamref name="TTo"/> when they're the same type.</summary>
         private static unsafe ReadOnlySpan<TTo> Rename<TFrom, TTo>(ReadOnlySpan<TFrom> span)
         {
@@ -14811,6 +14833,7 @@ public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
         /// <summary>(x * y) + z</summary>
         internal readonly struct MultiplyAddEstimateOperator<T> : ITernaryOperator<T> where T : INumberBase<T>
         {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
             public static T Invoke(T x, T y, T z)
             {
                 // TODO https://github.com/dotnet/runtime/issues/98053: Use T.MultiplyAddEstimate when it's available.
@@ -14839,6 +14862,7 @@ public static T Invoke(T x, T y, T z)
                 return (x * y) + z;
             }
 
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
             public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z)
             {
                 if (Fma.IsSupported)
@@ -14860,6 +14884,7 @@ public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y, Vector128<T> z
                 return (x * y) + z;
             }
 
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
             public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> z)
             {
                 if (Fma.IsSupported)
@@ -14871,6 +14896,7 @@ public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y, Vector256<T> z
                 return (x * y) + z;
             }
 
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
             public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y, Vector512<T> z)
             {
                 if (Avx512F.IsSupported)
@@ -15952,22 +15978,367 @@ public static Vector512<T> Invoke(Vector512<T> x)
         internal readonly struct CosOperator<T> : IUnaryOperator<T, T>
             where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => false; // TODO: Vectorize
+            // This code is based on `vrs4_cos` and `vrd2_cos` from amd/aocl-libm-ose
+            // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Implementation notes from amd/aocl-libm-ose:
+            // --------------------------------------------
+            // To compute cosf(float x)
+            // Using the identity,
+            // cos(x) = sin(x + pi/2)           (1)
+            //
+            // 1. Argument Reduction
+            //      Now, let x be represented as,
+            //          |x| = N * pi + f        (2) | N is an integer,
+            //                                        -pi/2 <= f <= pi/2
+            //
+            //      From (2), N = int( (x + pi/2) / pi) - 0.5
+            //                f = |x| - (N * pi)
+            //
+            // 2. Polynomial Evaluation
+            //       From (1) and (2),sin(f) can be calculated using a polynomial
+            //       sin(f) = f*(1 + C1*f^2 + C2*f^4 + C3*f^6 + c4*f^8)
+            //
+            // 3. Reconstruction
+            //      Hence, cos(x) = sin(x + pi/2) = (-1)^N * sin(f)
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
             public static T Invoke(T x) => T.Cos(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+        }
+
+        /// <summary>float.Cos(x)</summary>
+        private readonly struct CosOperatorSingle : IUnaryOperator<float, float>
+        {
+            internal const uint MaxVectorizedValue = 0x4A989680u;
+            internal const uint SignMask = 0x7FFFFFFFu;
+            private const float AlmHuge = 1.2582912e7f;
+            private const float Pi_Tail1 = 8.742278e-8f;
+            private const float Pi_Tail2 = 3.430249e-15f;
+            private const float C1 = -0.16666657f;
+            private const float C2 = 0.008332962f;
+            private const float C3 = -1.9801206e-4f;
+            private const float C4 = 2.5867037e-6f;
+
+            public static bool Vectorizable => true;
+
+            public static float Invoke(float x) => float.Cos(x);
+
+            public static Vector128<float> Invoke(Vector128<float> x)
+            {
+                Vector128<uint> uxMasked = Vector128.Abs(x).AsUInt32();
+                if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<CosOperatorSingle>(x);
+                }
+
+                Vector128<float> r = uxMasked.AsSingle();
+                Vector128<float> almHuge = Vector128.Create(AlmHuge);
+                Vector128<float> dn = ((r + Vector128.Create(float.Pi / 2)) * Vector128.Create(1 / float.Pi)) + almHuge;
+                Vector128<uint> odd = dn.AsUInt32() << 31;
+                dn = dn - almHuge - Vector128.Create(0.5f);
+                Vector128<float> f = r + (dn * Vector128.Create(-float.Pi)) + (dn * Vector128.Create(Pi_Tail1)) + (dn * Vector128.Create(Pi_Tail2));
+
+                // POLY_EVAL_ODD_9
+                Vector128<float> f2 = f * f;
+                Vector128<float> f4 = f2 * f2;
+                Vector128<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1));
+                Vector128<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector128<float>.One);
+                Vector128<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C3), f2, Vector128.Create(C4) * f4);
+                Vector128<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector128<float> poly = f * a3;
+
+                return (poly.AsUInt32() ^ odd).AsSingle();
+            }
+
+            public static Vector256<float> Invoke(Vector256<float> x)
+            {
+                Vector256<uint> uxMasked = Vector256.Abs(x).AsUInt32();
+                if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<CosOperatorSingle>(x);
+                }
+
+                Vector256<float> r = uxMasked.AsSingle();
+                Vector256<float> almHuge = Vector256.Create(AlmHuge);
+                Vector256<float> dn = ((r + Vector256.Create(float.Pi / 2)) * Vector256.Create(1 / float.Pi)) + almHuge;
+                Vector256<uint> odd = dn.AsUInt32() << 31;
+                dn = dn - almHuge - Vector256.Create(0.5f);
+                Vector256<float> f = r + (dn * Vector256.Create(-float.Pi)) + (dn * Vector256.Create(Pi_Tail1)) + (dn * Vector256.Create(Pi_Tail2));
+
+                // POLY_EVAL_ODD_9
+                Vector256<float> f2 = f * f;
+                Vector256<float> f4 = f2 * f2;
+                Vector256<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1));
+                Vector256<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector256<float>.One);
+                Vector256<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C3), f2, Vector256.Create(C4) * f4);
+                Vector256<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector256<float> poly = f * a3;
+
+                return (poly.AsUInt32() ^ odd).AsSingle();
+            }
+
+            public static Vector512<float> Invoke(Vector512<float> x)
+            {
+                Vector512<uint> uxMasked = Vector512.Abs(x).AsUInt32();
+                if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<CosOperatorSingle>(x);
+                }
+
+                Vector512<float> r = uxMasked.AsSingle();
+                Vector512<float> almHuge = Vector512.Create(AlmHuge);
+                Vector512<float> dn = ((r + Vector512.Create(float.Pi / 2)) * Vector512.Create(1 / float.Pi)) + almHuge;
+                Vector512<uint> odd = dn.AsUInt32() << 31;
+                dn = dn - almHuge - Vector512.Create(0.5f);
+                Vector512<float> f = r + (dn * Vector512.Create(-float.Pi)) + (dn * Vector512.Create(Pi_Tail1)) + (dn * Vector512.Create(Pi_Tail2));
+
+                // POLY_EVAL_ODD_9
+                Vector512<float> f2 = f * f;
+                Vector512<float> f4 = f2 * f2;
+                Vector512<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1));
+                Vector512<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector512<float>.One);
+                Vector512<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C3), f2, Vector512.Create(C4) * f4);
+                Vector512<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector512<float> poly = f * a3;
+
+                return (poly.AsUInt32() ^ odd).AsSingle();
+            }
+        }
+
+        /// <summary>double.Cos(x)</summary>
+        internal readonly struct CosOperatorDouble : IUnaryOperator<double, double>
+        {
+            internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul;
+            internal const ulong MaxVectorizedValue = 0x4160000000000000ul;
+            private const double AlmHuge = 6.755399441055744E15;
+            private const double Pi_Tail2 = -1.2246467991473532E-16;
+            private const double Pi_Tail3 = 2.9947698097183397E-33;
+            private const double C1 = -0.16666666666666666;
+            private const double C2 = 0.008333333333333165;
+            private const double C3 = -1.984126984120184E-4;
+            private const double C4 = 2.7557319210152756E-6;
+            private const double C5 = -2.5052106798274616E-8;
+            private const double C6 = 1.6058936490373254E-10;
+            private const double C7 = -7.642917806937501E-13;
+            private const double C8 = 2.7204790963151784E-15;
+
+            public static bool Vectorizable => true;
+
+            public static double Invoke(double x) => double.Cos(x);
+
+            public static Vector128<double> Invoke(Vector128<double> x)
+            {
+                Vector128<ulong> uxMasked = Vector128.Abs(x).AsUInt64();
+                if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<CosOperatorDouble>(x);
+                }
+
+                Vector128<double> r = uxMasked.AsDouble();
+                Vector128<double> almHuge = Vector128.Create(AlmHuge);
+                Vector128<double> dn = (r * Vector128.Create(1 / double.Pi)) + Vector128.Create(double.Pi / 2) + almHuge;
+                Vector128<ulong> odd = dn.AsUInt64() << 63;
+                dn = dn - almHuge - Vector128.Create(0.5);
+                Vector128<double> f = r + (dn * Vector128.Create(-double.Pi)) + (dn * Vector128.Create(Pi_Tail2)) + (dn * Vector128.Create(Pi_Tail3));
+
+                // POLY_EVAL_ODD_17
+                Vector128<double> f2 = f * f;
+                Vector128<double> f4 = f2 * f2;
+                Vector128<double> f6 = f4 * f2;
+                Vector128<double> f10 = f6 * f4;
+                Vector128<double> f14 = f10 * f4;
+                Vector128<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1));
+                Vector128<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C4), f2, Vector128.Create(C3));
+                Vector128<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C6), f2, Vector128.Create(C5));
+                Vector128<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C8), f2, Vector128.Create(C7));
+                Vector128<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
+                Vector128<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
+                Vector128<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
+
+                return (poly.AsUInt64() ^ odd).AsDouble();
+            }
+
+            public static Vector256<double> Invoke(Vector256<double> x)
+            {
+                Vector256<ulong> uxMasked = Vector256.Abs(x).AsUInt64();
+                if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<CosOperatorDouble>(x);
+                }
+
+                Vector256<double> r = uxMasked.AsDouble();
+                Vector256<double> almHuge = Vector256.Create(AlmHuge);
+                Vector256<double> dn = (r * Vector256.Create(1 / double.Pi)) + Vector256.Create(double.Pi / 2) + almHuge;
+                Vector256<ulong> odd = dn.AsUInt64() << 63;
+                dn = dn - almHuge - Vector256.Create(0.5);
+                Vector256<double> f = r + (dn * Vector256.Create(-double.Pi)) + (dn * Vector256.Create(Pi_Tail2)) + (dn * Vector256.Create(Pi_Tail3));
+
+                // POLY_EVAL_ODD_17
+                Vector256<double> f2 = f * f;
+                Vector256<double> f4 = f2 * f2;
+                Vector256<double> f6 = f4 * f2;
+                Vector256<double> f10 = f6 * f4;
+                Vector256<double> f14 = f10 * f4;
+                Vector256<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1));
+                Vector256<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C4), f2, Vector256.Create(C3));
+                Vector256<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C6), f2, Vector256.Create(C5));
+                Vector256<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C8), f2, Vector256.Create(C7));
+                Vector256<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
+                Vector256<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
+                Vector256<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
+
+                return (poly.AsUInt64() ^ odd).AsDouble();
+            }
+
+            public static Vector512<double> Invoke(Vector512<double> x)
+            {
+                Vector512<ulong> uxMasked = Vector512.Abs(x).AsUInt64();
+                if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<CosOperatorDouble>(x);
+                }
+
+                Vector512<double> r = uxMasked.AsDouble();
+                Vector512<double> almHuge = Vector512.Create(AlmHuge);
+                Vector512<double> dn = (r * Vector512.Create(1 / double.Pi)) + Vector512.Create(double.Pi / 2) + almHuge;
+                Vector512<ulong> odd = dn.AsUInt64() << 63;
+                dn = dn - almHuge - Vector512.Create(0.5);
+                Vector512<double> f = r + (dn * Vector512.Create(-double.Pi)) + (dn * Vector512.Create(Pi_Tail2)) + (dn * Vector512.Create(Pi_Tail3));
+
+                // POLY_EVAL_ODD_17
+                Vector512<double> f2 = f * f;
+                Vector512<double> f4 = f2 * f2;
+                Vector512<double> f6 = f4 * f2;
+                Vector512<double> f10 = f6 * f4;
+                Vector512<double> f14 = f10 * f4;
+                Vector512<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1));
+                Vector512<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C4), f2, Vector512.Create(C3));
+                Vector512<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C6), f2, Vector512.Create(C5));
+                Vector512<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C8), f2, Vector512.Create(C7));
+                Vector512<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
+                Vector512<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
+                Vector512<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
+
+                return (poly.AsUInt64() ^ odd).AsDouble();
+            }
         }
 
         /// <summary>T.CosPi(x)</summary>
         internal readonly struct CosPiOperator<T> : IUnaryOperator<T, T>
             where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => CosOperator<T>.Vectorizable;
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
             public static T Invoke(T x) => T.CosPi(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => CosOperator<T>.Invoke(x * Vector128.Create(T.Pi));
-            public static Vector256<T> Invoke(Vector256<T> x) => CosOperator<T>.Invoke(x * Vector256.Create(T.Pi));
-            public static Vector512<T> Invoke(Vector512<T> x) => CosOperator<T>.Invoke(x * Vector512.Create(T.Pi));
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                Vector128<T> xpi = x * Vector128.Create(T.Pi);
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(CosOperatorSingle.SignMask), Vector128.Create(CosOperatorSingle.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<CosPiOperator<float>>(x.AsSingle()).As<float, T>();
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector128.GreaterThanAny(xpi.AsUInt64() & Vector128.Create(CosOperatorDouble.SignMask), Vector128.Create(CosOperatorDouble.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
+                    }
+                }
+
+                return CosOperator<T>.Invoke(xpi);
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                Vector256<T> xpi = x * Vector256.Create(T.Pi);
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(CosOperatorSingle.SignMask), Vector256.Create(CosOperatorSingle.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<CosPiOperator<float>>(x.AsSingle()).As<float, T>();
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector256.GreaterThanAny(xpi.AsUInt64() & Vector256.Create(CosOperatorDouble.SignMask), Vector256.Create(CosOperatorDouble.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
+                    }
+                }
+
+                return CosOperator<T>.Invoke(xpi);
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                Vector512<T> xpi = x * Vector512.Create(T.Pi);
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(CosOperatorSingle.SignMask), Vector512.Create(CosOperatorSingle.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<CosPiOperator<float>>(x.AsSingle()).As<float, T>();
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector512.GreaterThanAny(xpi.AsUInt64() & Vector512.Create(CosOperatorDouble.SignMask), Vector512.Create(CosOperatorDouble.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
+                    }
+                }
+
+                return CosOperator<T>.Invoke(xpi);
+            }
         }
 
         /// <summary>T.Cosh(x)</summary>
@@ -15995,13 +16366,13 @@ public static Vector512<T> Invoke(Vector512<T> x)
             //
             // coshf = v/2 * exp(x - log(v)) where v = 0x1.0000e8p-1
 
-            private const float SINGLE_LOGV = 0.693161f;
-            private const float SINGLE_HALFV = 1.0000138f;
-            private const float SINGLE_INVV2 = 0.24999309f;
+            private const float Single_LOGV = 0.693161f;
+            private const float Single_HALFV = 1.0000138f;
+            private const float Single_INVV2 = 0.24999309f;
 
-            private const double DOUBLE_LOGV = 0.6931471805599453;
-            private const double DOUBLE_HALFV = 1.0;
-            private const double DOUBLE_INVV2 = 0.25;
+            private const double Double_LOGV = 0.6931471805599453;
+            private const double Double_HALFV = 1.0;
+            private const double Double_INVV2 = 0.25;
 
             public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
 
@@ -16014,8 +16385,8 @@ public static Vector128<T> Invoke(Vector128<T> t)
                     Vector128<float> x = t.AsSingle();
 
                     Vector128<float> y = Vector128.Abs(x);
-                    Vector128<float> z = ExpOperator<float>.Invoke(y - Vector128.Create((float)SINGLE_LOGV));
-                    return (Vector128.Create((float)SINGLE_HALFV) * (z + (Vector128.Create((float)SINGLE_INVV2) / z))).As<float, T>();
+                    Vector128<float> z = ExpOperator<float>.Invoke(y - Vector128.Create((float)Single_LOGV));
+                    return (Vector128.Create((float)Single_HALFV) * (z + (Vector128.Create((float)Single_INVV2) / z))).As<float, T>();
                 }
                 else
                 {
@@ -16023,8 +16394,8 @@ public static Vector128<T> Invoke(Vector128<T> t)
                     Vector128<double> x = t.AsDouble();
 
                     Vector128<double> y = Vector128.Abs(x);
-                    Vector128<double> z = ExpOperator<double>.Invoke(y - Vector128.Create(DOUBLE_LOGV));
-                    return (Vector128.Create(DOUBLE_HALFV) * (z + (Vector128.Create(DOUBLE_INVV2) / z))).As<double, T>();
+                    Vector128<double> z = ExpOperator<double>.Invoke(y - Vector128.Create(Double_LOGV));
+                    return (Vector128.Create(Double_HALFV) * (z + (Vector128.Create(Double_INVV2) / z))).As<double, T>();
                 }
             }
 
@@ -16035,8 +16406,8 @@ public static Vector256<T> Invoke(Vector256<T> t)
                     Vector256<float> x = t.AsSingle();
 
                     Vector256<float> y = Vector256.Abs(x);
-                    Vector256<float> z = ExpOperator<float>.Invoke(y - Vector256.Create((float)SINGLE_LOGV));
-                    return (Vector256.Create((float)SINGLE_HALFV) * (z + (Vector256.Create((float)SINGLE_INVV2) / z))).As<float, T>();
+                    Vector256<float> z = ExpOperator<float>.Invoke(y - Vector256.Create((float)Single_LOGV));
+                    return (Vector256.Create((float)Single_HALFV) * (z + (Vector256.Create((float)Single_INVV2) / z))).As<float, T>();
                 }
                 else
                 {
@@ -16044,8 +16415,8 @@ public static Vector256<T> Invoke(Vector256<T> t)
                     Vector256<double> x = t.AsDouble();
 
                     Vector256<double> y = Vector256.Abs(x);
-                    Vector256<double> z = ExpOperator<double>.Invoke(y - Vector256.Create(DOUBLE_LOGV));
-                    return (Vector256.Create(DOUBLE_HALFV) * (z + (Vector256.Create(DOUBLE_INVV2) / z))).As<double, T>();
+                    Vector256<double> z = ExpOperator<double>.Invoke(y - Vector256.Create(Double_LOGV));
+                    return (Vector256.Create(Double_HALFV) * (z + (Vector256.Create(Double_INVV2) / z))).As<double, T>();
                 }
             }
 
@@ -16056,8 +16427,8 @@ public static Vector512<T> Invoke(Vector512<T> t)
                     Vector512<float> x = t.AsSingle();
 
                     Vector512<float> y = Vector512.Abs(x);
-                    Vector512<float> z = ExpOperator<float>.Invoke(y - Vector512.Create((float)SINGLE_LOGV));
-                    return (Vector512.Create((float)SINGLE_HALFV) * (z + (Vector512.Create((float)SINGLE_INVV2) / z))).As<float, T>();
+                    Vector512<float> z = ExpOperator<float>.Invoke(y - Vector512.Create((float)Single_LOGV));
+                    return (Vector512.Create((float)Single_HALFV) * (z + (Vector512.Create((float)Single_INVV2) / z))).As<float, T>();
                 }
                 else
                 {
@@ -16065,8 +16436,8 @@ public static Vector512<T> Invoke(Vector512<T> t)
                     Vector512<double> x = t.AsDouble();
 
                     Vector512<double> y = Vector512.Abs(x);
-                    Vector512<double> z = ExpOperator<double>.Invoke(y - Vector512.Create(DOUBLE_LOGV));
-                    return (Vector512.Create(DOUBLE_HALFV) * (z + (Vector512.Create(DOUBLE_INVV2) / z))).As<double, T>();
+                    Vector512<double> z = ExpOperator<double>.Invoke(y - Vector512.Create(Double_LOGV));
+                    return (Vector512.Create(Double_HALFV) * (z + (Vector512.Create(Double_INVV2) / z))).As<double, T>();
                 }
             }
         }
@@ -16075,22 +16446,369 @@ public static Vector512<T> Invoke(Vector512<T> t)
         internal readonly struct SinOperator<T> : IUnaryOperator<T, T>
             where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => false; // TODO: Vectorize
+            // This code is based on `vrs4_sin` and `vrd2_sin` from amd/aocl-libm-ose
+            // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Implementation notes from amd/aocl-libm-ose:
+            // -----------------------------------------------------------------
+            // Convert given x into the form
+            // |x| = N * pi + f where N is an integer and f lies in [-pi/2,pi/2]
+            // N is obtained by : N = round(x/pi)
+            // f is obtained by : f = abs(x)-N*pi
+            // sin(x) = sin(N * pi + f) = sin(N * pi)*cos(f) + cos(N*pi)*sin(f)
+            // sin(x) = sign(x)*sin(f)*(-1)**N
+            //
+            // The term sin(f) can be approximated by using a polynomial
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
             public static T Invoke(T x) => T.Sin(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+        }
+
+        /// <summary>float.Sin(x)</summary>
+        private readonly struct SinOperatorSingle : IUnaryOperator<float, float>
+        {
+            internal const uint SignMask = 0x7FFFFFFFu;
+            internal const uint MaxVectorizedValue = 0x49800000u;
+            private const float AlmHuge = 1.2582912e7f;
+            private const float Pi_Tail1 = 8.742278e-8f;
+            private const float Pi_Tail2 = 3.430249e-15f;
+            private const float C1 = -0.16666657f;
+            private const float C2 = 0.0083330255f;
+            private const float C3 = -1.980742e-4f;
+            private const float C4 = 2.6019031e-6f;
+
+            public static bool Vectorizable => true;
+
+            public static float Invoke(float x) => float.Sin(x);
+
+            public static Vector128<float> Invoke(Vector128<float> x)
+            {
+                Vector128<uint> sign = x.AsUInt32() & Vector128.Create(~SignMask);
+                Vector128<uint> uxMasked = Vector128.Abs(x).AsUInt32();
+
+                if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<SinOperatorSingle>(x);
+                }
+
+                Vector128<float> r = uxMasked.AsSingle();
+                Vector128<float> almHuge = Vector128.Create(AlmHuge);
+                Vector128<float> dn = (r * Vector128.Create(1 / float.Pi)) + almHuge;
+                Vector128<uint> odd = dn.AsUInt32() << 31;
+                dn -= almHuge;
+                Vector128<float> f = r + (dn * Vector128.Create(-float.Pi)) + (dn * Vector128.Create(Pi_Tail1)) + (dn * Vector128.Create(Pi_Tail2));
+
+                // POLY_EVAL_ODD_9
+                Vector128<float> f2 = f * f;
+                Vector128<float> f4 = f2 * f2;
+                Vector128<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1));
+                Vector128<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector128<float>.One);
+                Vector128<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C3), f2, Vector128.Create(C4) * f4);
+                Vector128<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector128<float> poly = f * a3;
+
+                return (poly.AsUInt32() ^ sign ^ odd).AsSingle();
+            }
+
+            public static Vector256<float> Invoke(Vector256<float> x)
+            {
+                Vector256<uint> sign = x.AsUInt32() & Vector256.Create(~SignMask);
+                Vector256<uint> uxMasked = Vector256.Abs(x).AsUInt32();
+
+                if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<SinOperatorSingle>(x);
+                }
+
+                Vector256<float> r = uxMasked.AsSingle();
+                Vector256<float> almHuge = Vector256.Create(AlmHuge);
+                Vector256<float> dn = (r * Vector256.Create(1 / float.Pi)) + almHuge;
+                Vector256<uint> odd = dn.AsUInt32() << 31;
+                dn -= almHuge;
+                Vector256<float> f = r + (dn * Vector256.Create(-float.Pi)) + (dn * Vector256.Create(Pi_Tail1)) + (dn * Vector256.Create(Pi_Tail2));
+
+                // POLY_EVAL_ODD_9
+                Vector256<float> f2 = f * f;
+                Vector256<float> f4 = f2 * f2;
+                Vector256<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1));
+                Vector256<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector256<float>.One);
+                Vector256<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C3), f2, Vector256.Create(C4) * f4);
+                Vector256<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector256<float> poly = f * a3;
+
+                return (poly.AsUInt32() ^ sign ^ odd).AsSingle();
+            }
+
+            public static Vector512<float> Invoke(Vector512<float> x)
+            {
+                Vector512<uint> sign = x.AsUInt32() & Vector512.Create(~SignMask);
+                Vector512<uint> uxMasked = Vector512.Abs(x).AsUInt32();
+
+                if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<SinOperatorSingle>(x);
+                }
+
+                Vector512<float> r = uxMasked.AsSingle();
+                Vector512<float> almHuge = Vector512.Create(AlmHuge);
+                Vector512<float> dn = (r * Vector512.Create(1 / float.Pi)) + almHuge;
+                Vector512<uint> odd = dn.AsUInt32() << 31;
+                dn -= almHuge;
+                Vector512<float> f = r + (dn * Vector512.Create(-float.Pi)) + (dn * Vector512.Create(Pi_Tail1)) + (dn * Vector512.Create(Pi_Tail2));
+
+                // POLY_EVAL_ODD_9
+                Vector512<float> f2 = f * f;
+                Vector512<float> f4 = f2 * f2;
+                Vector512<float> a0 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1));
+                Vector512<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(a0, f2, Vector512<float>.One);
+                Vector512<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C3), f2, Vector512.Create(C4) * f4);
+                Vector512<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector512<float> poly = f * a3;
+
+                return (poly.AsUInt32() ^ sign ^ odd).AsSingle();
+            }
+        }
+
+        /// <summary>double.Sin(x)</summary>
+        private readonly struct SinOperatorDouble : IUnaryOperator<double, double>
+        {
+            internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul;
+            internal const ulong MaxVectorizedValue = 0x4160000000000000ul;
+            private const double AlmHuge = 6.755399441055744e15;
+            private const double Pi_Tail1 = 1.224646799147353e-16;
+            private const double Pi_Tail2 = 2.165713347843828e-32;
+            private const double C0 = -0.16666666666666666;
+            private const double C2 = 0.008333333333333165;
+            private const double C4 = -1.984126984120184e-4;
+            private const double C6 = 2.7557319210152756e-6;
+            private const double C8 = -2.5052106798274583e-8;
+            private const double C10 = 1.605893649037159e-10;
+            private const double C12 = -7.642917806891047e-13;
+            private const double C14 = 2.7204790957888847e-15;
+
+            public static bool Vectorizable => true;
+
+            public static double Invoke(double x) => double.Sin(x);
+
+            public static Vector128<double> Invoke(Vector128<double> x)
+            {
+                Vector128<ulong> sign = x.AsUInt64() & Vector128.Create(~SignMask);
+                Vector128<ulong> uxMasked = Vector128.Abs(x).AsUInt64();
+
+                if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<SinOperatorDouble>(x);
+                }
+
+                Vector128<double> r = uxMasked.AsDouble();
+                Vector128<double> almHuge = Vector128.Create(AlmHuge);
+                Vector128<double> dn = (r * Vector128.Create(1 / double.Pi)) + almHuge;
+                Vector128<ulong> odd = dn.AsUInt64() << 63;
+                dn -= almHuge;
+                Vector128<double> f = r - (dn * Vector128.Create(double.Pi)) - (dn * Vector128.Create(Pi_Tail1)) - (dn * Vector128.Create(Pi_Tail2));
+
+                // POLY_EVAL_ODD_17
+                Vector128<double> f2 = f * f;
+                Vector128<double> f4 = f2 * f2;
+                Vector128<double> f6 = f4 * f2;
+                Vector128<double> f10 = f6 * f4;
+                Vector128<double> f14 = f10 * f4;
+                Vector128<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C0));
+                Vector128<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C6), f2, Vector128.Create(C4));
+                Vector128<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C10), f2, Vector128.Create(C8));
+                Vector128<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C14), f2, Vector128.Create(C12));
+                Vector128<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
+                Vector128<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
+                Vector128<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
+
+                return (poly.AsUInt64() ^ sign ^ odd).AsDouble();
+            }
+
+            public static Vector256<double> Invoke(Vector256<double> x)
+            {
+                Vector256<ulong> sign = x.AsUInt64() & Vector256.Create(~SignMask);
+                Vector256<ulong> uxMasked = Vector256.Abs(x).AsUInt64();
+
+                if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<SinOperatorDouble>(x);
+                }
+
+                Vector256<double> r = uxMasked.AsDouble();
+                Vector256<double> almHuge = Vector256.Create(AlmHuge);
+                Vector256<double> dn = (r * Vector256.Create(1 / double.Pi)) + almHuge;
+                Vector256<ulong> odd = dn.AsUInt64() << 63;
+                dn -= almHuge;
+                Vector256<double> f = r - (dn * Vector256.Create(double.Pi)) - (dn * Vector256.Create(Pi_Tail1)) - (dn * Vector256.Create(Pi_Tail2));
+
+                // POLY_EVAL_ODD_17
+                Vector256<double> f2 = f * f;
+                Vector256<double> f4 = f2 * f2;
+                Vector256<double> f6 = f4 * f2;
+                Vector256<double> f10 = f6 * f4;
+                Vector256<double> f14 = f10 * f4;
+                Vector256<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C0));
+                Vector256<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C6), f2, Vector256.Create(C4));
+                Vector256<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C10), f2, Vector256.Create(C8));
+                Vector256<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C14), f2, Vector256.Create(C12));
+                Vector256<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
+                Vector256<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
+                Vector256<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
+
+                return (poly.AsUInt64() ^ sign ^ odd).AsDouble();
+            }
+
+            public static Vector512<double> Invoke(Vector512<double> x)
+            {
+                Vector512<ulong> sign = x.AsUInt64() & Vector512.Create(~SignMask);
+                Vector512<ulong> uxMasked = Vector512.Abs(x).AsUInt64();
+
+                if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<SinOperatorDouble>(x);
+                }
+
+                Vector512<double> r = uxMasked.AsDouble();
+                Vector512<double> almHuge = Vector512.Create(AlmHuge);
+                Vector512<double> dn = (r * Vector512.Create(1 / double.Pi)) + almHuge;
+                Vector512<ulong> odd = dn.AsUInt64() << 63;
+                dn -= almHuge;
+                Vector512<double> f = r - (dn * Vector512.Create(double.Pi)) - (dn * Vector512.Create(Pi_Tail1)) - (dn * Vector512.Create(Pi_Tail2));
+
+                // POLY_EVAL_ODD_17
+                Vector512<double> f2 = f * f;
+                Vector512<double> f4 = f2 * f2;
+                Vector512<double> f6 = f4 * f2;
+                Vector512<double> f10 = f6 * f4;
+                Vector512<double> f14 = f10 * f4;
+                Vector512<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C0));
+                Vector512<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C6), f2, Vector512.Create(C4));
+                Vector512<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C10), f2, Vector512.Create(C8));
+                Vector512<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C14), f2, Vector512.Create(C12));
+                Vector512<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(a1, f2, a2 * f6);
+                Vector512<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
+                Vector512<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
+
+                return (poly.AsUInt64() ^ sign ^ odd).AsDouble();
+            }
         }
 
         /// <summary>T.SinPi(x)</summary>
         internal readonly struct SinPiOperator<T> : IUnaryOperator<T, T>
             where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => SinOperator<T>.Vectorizable;
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
             public static T Invoke(T x) => T.SinPi(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => SinOperator<T>.Invoke(x * Vector128.Create(T.Pi));
-            public static Vector256<T> Invoke(Vector256<T> x) => SinOperator<T>.Invoke(x * Vector256.Create(T.Pi));
-            public static Vector512<T> Invoke(Vector512<T> x) => SinOperator<T>.Invoke(x * Vector512.Create(T.Pi));
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                Vector128<T> xpi = x * Vector128.Create(T.Pi);
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(SinOperatorSingle.SignMask), Vector128.Create(SinOperatorSingle.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<SinPiOperator<float>>(x.AsSingle()).As<float, T>();
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector128.GreaterThanAny(xpi.AsUInt64() & Vector128.Create(SinOperatorDouble.SignMask), Vector128.Create(SinOperatorDouble.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
+                    }
+                }
+
+                return SinOperator<T>.Invoke(xpi);
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                Vector256<T> xpi = x * Vector256.Create(T.Pi);
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(SinOperatorSingle.SignMask), Vector256.Create(SinOperatorSingle.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<SinPiOperator<float>>(x.AsSingle()).As<float, T>();
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector256.GreaterThanAny(xpi.AsUInt64() & Vector256.Create(SinOperatorDouble.SignMask), Vector256.Create(SinOperatorDouble.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
+                    }
+                }
+
+                return SinOperator<T>.Invoke(xpi);
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                Vector512<T> xpi = x * Vector512.Create(T.Pi);
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(SinOperatorSingle.SignMask), Vector512.Create(SinOperatorSingle.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<SinPiOperator<float>>(x.AsSingle()).As<float, T>();
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector512.GreaterThanAny(xpi.AsUInt64() & Vector512.Create(SinOperatorDouble.SignMask), Vector512.Create(SinOperatorDouble.MaxVectorizedValue)))
+                    {
+                        return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
+                    }
+                }
+
+                return SinOperator<T>.Invoke(xpi);
+            }
         }
 
         /// <summary>T.Sinh(x)</summary>
@@ -16100,13 +16818,13 @@ public static Vector512<T> Invoke(Vector512<T> t)
             // Same as cosh, but with `z -` rather than `z +`, and with the sign
             // flipped on the result based on the sign of the input.
 
-            private const float SINGLE_LOGV = 0.693161f;
-            private const float SINGLE_HALFV = 1.0000138f;
-            private const float SINGLE_INVV2 = 0.24999309f;
+            private const float Single_LOGV = 0.693161f;
+            private const float Single_HALFV = 1.0000138f;
+            private const float Single_INVV2 = 0.24999309f;
 
-            private const double DOUBLE_LOGV = 0.6931471805599453;
-            private const double DOUBLE_HALFV = 1.0;
-            private const double DOUBLE_INVV2 = 0.25;
+            private const double Double_LOGV = 0.6931471805599453;
+            private const double Double_HALFV = 1.0;
+            private const double Double_INVV2 = 0.25;
 
             public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
 
@@ -16119,8 +16837,8 @@ public static Vector128<T> Invoke(Vector128<T> t)
                     Vector128<float> x = t.AsSingle();
 
                     Vector128<float> y = Vector128.Abs(x);
-                    Vector128<float> z = ExpOperator<float>.Invoke(y - Vector128.Create((float)SINGLE_LOGV));
-                    Vector128<float> result = Vector128.Create((float)SINGLE_HALFV) * (z - (Vector128.Create((float)SINGLE_INVV2) / z));
+                    Vector128<float> z = ExpOperator<float>.Invoke(y - Vector128.Create((float)Single_LOGV));
+                    Vector128<float> result = Vector128.Create((float)Single_HALFV) * (z - (Vector128.Create((float)Single_INVV2) / z));
                     Vector128<uint> sign = x.AsUInt32() & Vector128.Create(~(uint)int.MaxValue);
                     return (sign ^ result.AsUInt32()).As<uint, T>();
                 }
@@ -16130,8 +16848,8 @@ public static Vector128<T> Invoke(Vector128<T> t)
                     Vector128<double> x = t.AsDouble();
 
                     Vector128<double> y = Vector128.Abs(x);
-                    Vector128<double> z = ExpOperator<double>.Invoke(y - Vector128.Create(DOUBLE_LOGV));
-                    Vector128<double> result = Vector128.Create(DOUBLE_HALFV) * (z - (Vector128.Create(DOUBLE_INVV2) / z));
+                    Vector128<double> z = ExpOperator<double>.Invoke(y - Vector128.Create(Double_LOGV));
+                    Vector128<double> result = Vector128.Create(Double_HALFV) * (z - (Vector128.Create(Double_INVV2) / z));
                     Vector128<ulong> sign = x.AsUInt64() & Vector128.Create(~(ulong)long.MaxValue);
                     return (sign ^ result.AsUInt64()).As<ulong, T>();
                 }
@@ -16144,8 +16862,8 @@ public static Vector256<T> Invoke(Vector256<T> t)
                     Vector256<float> x = t.AsSingle();
 
                     Vector256<float> y = Vector256.Abs(x);
-                    Vector256<float> z = ExpOperator<float>.Invoke(y - Vector256.Create((float)SINGLE_LOGV));
-                    Vector256<float> result = Vector256.Create((float)SINGLE_HALFV) * (z - (Vector256.Create((float)SINGLE_INVV2) / z));
+                    Vector256<float> z = ExpOperator<float>.Invoke(y - Vector256.Create((float)Single_LOGV));
+                    Vector256<float> result = Vector256.Create((float)Single_HALFV) * (z - (Vector256.Create((float)Single_INVV2) / z));
                     Vector256<uint> sign = x.AsUInt32() & Vector256.Create(~(uint)int.MaxValue);
                     return (sign ^ result.AsUInt32()).As<uint, T>();
                 }
@@ -16155,8 +16873,8 @@ public static Vector256<T> Invoke(Vector256<T> t)
                     Vector256<double> x = t.AsDouble();
 
                     Vector256<double> y = Vector256.Abs(x);
-                    Vector256<double> z = ExpOperator<double>.Invoke(y - Vector256.Create(DOUBLE_LOGV));
-                    Vector256<double> result = Vector256.Create(DOUBLE_HALFV) * (z - (Vector256.Create(DOUBLE_INVV2) / z));
+                    Vector256<double> z = ExpOperator<double>.Invoke(y - Vector256.Create(Double_LOGV));
+                    Vector256<double> result = Vector256.Create(Double_HALFV) * (z - (Vector256.Create(Double_INVV2) / z));
                     Vector256<ulong> sign = x.AsUInt64() & Vector256.Create(~(ulong)long.MaxValue);
                     return (sign ^ result.AsUInt64()).As<ulong, T>();
                 }
@@ -16169,8 +16887,8 @@ public static Vector512<T> Invoke(Vector512<T> t)
                     Vector512<float> x = t.AsSingle();
 
                     Vector512<float> y = Vector512.Abs(x);
-                    Vector512<float> z = ExpOperator<float>.Invoke(y - Vector512.Create((float)SINGLE_LOGV));
-                    Vector512<float> result = Vector512.Create((float)SINGLE_HALFV) * (z - (Vector512.Create((float)SINGLE_INVV2) / z));
+                    Vector512<float> z = ExpOperator<float>.Invoke(y - Vector512.Create((float)Single_LOGV));
+                    Vector512<float> result = Vector512.Create((float)Single_HALFV) * (z - (Vector512.Create((float)Single_INVV2) / z));
                     Vector512<uint> sign = x.AsUInt32() & Vector512.Create(~(uint)int.MaxValue);
                     return (sign ^ result.AsUInt32()).As<uint, T>();
                 }
@@ -16180,8 +16898,8 @@ public static Vector512<T> Invoke(Vector512<T> t)
                     Vector512<double> x = t.AsDouble();
 
                     Vector512<double> y = Vector512.Abs(x);
-                    Vector512<double> z = ExpOperator<double>.Invoke(y - Vector512.Create(DOUBLE_LOGV));
-                    Vector512<double> result = Vector512.Create(DOUBLE_HALFV) * (z - (Vector512.Create(DOUBLE_INVV2) / z));
+                    Vector512<double> z = ExpOperator<double>.Invoke(y - Vector512.Create(Double_LOGV));
+                    Vector512<double> result = Vector512.Create(Double_HALFV) * (z - (Vector512.Create(Double_INVV2) / z));
                     Vector512<ulong> sign = x.AsUInt64() & Vector512.Create(~(ulong)long.MaxValue);
                     return (sign ^ result.AsUInt64()).As<ulong, T>();
                 }
@@ -16824,7 +17542,7 @@ public static Vector128<float> Invoke(Vector128<float> x)
 
                 vx = (vx & Vector128.Create(V_MASK)) + Vector128.Create(V_OFF);
 
-                Vector128<float> r = vx.AsSingle() - Vector128.Create(1.0f);
+                Vector128<float> r = vx.AsSingle() - Vector128<float>.One;
 
                 Vector128<float> r2 = r * r;
                 Vector128<float> r4 = r2 * r2;
@@ -16893,7 +17611,7 @@ public static Vector256<float> Invoke(Vector256<float> x)
 
                 vx = (vx & Vector256.Create(V_MASK)) + Vector256.Create(V_OFF);
 
-                Vector256<float> r = vx.AsSingle() - Vector256.Create(1.0f);
+                Vector256<float> r = vx.AsSingle() - Vector256<float>.One;
 
                 Vector256<float> r2 = r * r;
                 Vector256<float> r4 = r2 * r2;
@@ -16962,7 +17680,7 @@ public static Vector512<float> Invoke(Vector512<float> x)
 
                 vx = (vx & Vector512.Create(V_MASK)) + Vector512.Create(V_OFF);
 
-                Vector512<float> r = vx.AsSingle() - Vector512.Create(1.0f);
+                Vector512<float> r = vx.AsSingle() - Vector512<float>.One;
 
                 Vector512<float> r2 = r * r;
                 Vector512<float> r4 = r2 * r2;
@@ -17496,7 +18214,7 @@ public static Vector128<float> Invoke(Vector128<float> x)
 
                 vx = (vx & Vector128.Create(V_MASK)) + Vector128.Create(V_OFF);
 
-                Vector128<float> r = vx.AsSingle() - Vector128.Create(1.0f);
+                Vector128<float> r = vx.AsSingle() - Vector128<float>.One;
 
                 Vector128<float> r2 = r * r;
                 Vector128<float> r4 = r2 * r2;
@@ -17565,7 +18283,7 @@ public static Vector256<float> Invoke(Vector256<float> x)
 
                 vx = (vx & Vector256.Create(V_MASK)) + Vector256.Create(V_OFF);
 
-                Vector256<float> r = vx.AsSingle() - Vector256.Create(1.0f);
+                Vector256<float> r = vx.AsSingle() - Vector256<float>.One;
 
                 Vector256<float> r2 = r * r;
                 Vector256<float> r4 = r2 * r2;
@@ -17634,7 +18352,7 @@ public static Vector512<float> Invoke(Vector512<float> x)
 
                 vx = (vx & Vector512.Create(V_MASK)) + Vector512.Create(V_OFF);
 
-                Vector512<float> r = vx.AsSingle() - Vector512.Create(1.0f);
+                Vector512<float> r = vx.AsSingle() - Vector512<float>.One;
 
                 Vector512<float> r2 = r * r;
                 Vector512<float> r4 = r2 * r2;
diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs
index 2bafef98fd20ea..cd6ae2455491c8 100644
--- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs
+++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs
@@ -18,9 +18,29 @@ namespace System.Numerics.Tensors.Tests
 {
     public class ConvertTests
     {
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBuiltWithAggressiveTrimming))]
+        [Fact]
         [SkipOnCoreClr("Depends heavily on folded type comparisons", RuntimeTestModes.JitMinOpts)]
         public void ConvertTruncatingAndSaturating()
+        {
+            // A few cases. More exhaustive testing is done in the OuterLoop test.
+
+            ConvertTruncatingImpl<float, double>();
+            ConvertTruncatingImpl<double, float>();
+            ConvertTruncatingImpl<long, byte>();
+            ConvertTruncatingImpl<short, uint>();
+            ConvertTruncatingImpl<Half, int>();
+
+            ConvertSaturatingImpl<float, double>();
+            ConvertSaturatingImpl<double, float>();
+            ConvertSaturatingImpl<long, byte>();
+            ConvertSaturatingImpl<short, uint>();
+            ConvertSaturatingImpl<Half, int>();
+        }
+
+        [OuterLoop]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBuiltWithAggressiveTrimming))]
+        [SkipOnCoreClr("Depends heavily on folded type comparisons", RuntimeTestModes.JitMinOpts)]
+        public void ConvertTruncatingAndSaturating_Outerloop()
         {
             MethodInfo convertTruncatingImpl = typeof(ConvertTests).GetMethod(nameof(ConvertTruncatingImpl), BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance);
             Assert.NotNull(convertTruncatingImpl);
@@ -54,11 +74,8 @@ public void ConvertChecked()
         {
             // Conversions that never overflow. This isn't an exhaustive list; just a sampling.
             ConvertCheckedImpl<byte, byte>();
-            ConvertCheckedImpl<byte, ushort>();
             ConvertCheckedImpl<byte, short>();
             ConvertCheckedImpl<byte, uint>();
-            ConvertCheckedImpl<byte, int>();
-            ConvertCheckedImpl<byte, ulong>();
             ConvertCheckedImpl<byte, long>();
             ConvertCheckedImpl<byte, float>();
             ConvertCheckedImpl<Half, Half>();
@@ -78,12 +95,12 @@ private static void ConvertTruncatingImpl<TFrom, TTo>()
         {
             AssertExtensions.Throws<ArgumentException>("destination", () => TensorPrimitives.ConvertTruncating<TFrom, TTo>(new TFrom[3], new TTo[2]));
 
+            Random rand = new(42);
             foreach (int tensorLength in Helpers.TensorLengthsIncluding0)
             {
                 using BoundedMemory<TFrom> source = BoundedMemory.Allocate<TFrom>(tensorLength);
                 using BoundedMemory<TTo> destination = BoundedMemory.Allocate<TTo>(tensorLength);
 
-                Random rand = new(42);
                 Span<TFrom> sourceSpan = source.Span;
                 for (int i = 0; i < tensorLength; i++)
                 {
@@ -110,12 +127,12 @@ private static void ConvertSaturatingImpl<TFrom, TTo>()
         {
             AssertExtensions.Throws<ArgumentException>("destination", () => TensorPrimitives.ConvertSaturating<TFrom, TTo>(new TFrom[3], new TTo[2]));
 
+            Random rand = new(42);
             foreach (int tensorLength in Helpers.TensorLengthsIncluding0)
             {
                 using BoundedMemory<TFrom> source = BoundedMemory.Allocate<TFrom>(tensorLength);
                 using BoundedMemory<TTo> destination = BoundedMemory.Allocate<TTo>(tensorLength);
 
-                Random rand = new(42);
                 Span<TFrom> sourceSpan = source.Span;
                 for (int i = 0; i < tensorLength; i++)
                 {
@@ -209,6 +226,8 @@ private static bool IsEqualWithTolerance<T>(T expected, T actual, T? tolerance =
         }
     }
 
+    // The tests for some types have been marked as OuterLoop simply to decrease inner loop testing time.
+
     public class DoubleGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimitivesTests<double> { }
     public class SingleGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimitivesTests<float> { }
     public class HalfGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimitivesTests<Half>
@@ -216,22 +235,31 @@ public class HalfGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimi
         protected override void AssertEqualTolerance(Half expected, Half actual, Half? tolerance = null) =>
             base.AssertEqualTolerance(expected, actual, tolerance ?? Half.CreateTruncating(0.001));
     }
+
+    [OuterLoop]
     public class NFloatGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimitivesTests<NFloat> { }
 
+    [OuterLoop]
     public class SByteGenericTensorPrimitives : GenericSignedIntegerTensorPrimitivesTests<sbyte> { }
     public class Int16GenericTensorPrimitives : GenericSignedIntegerTensorPrimitivesTests<short> { }
+    [OuterLoop]
     public class Int32GenericTensorPrimitives : GenericSignedIntegerTensorPrimitivesTests<int> { }
     public class Int64GenericTensorPrimitives : GenericSignedIntegerTensorPrimitivesTests<long> { }
+    [OuterLoop]
     public class IntPtrGenericTensorPrimitives : GenericSignedIntegerTensorPrimitivesTests<nint> { }
     public class Int128GenericTensorPrimitives : GenericSignedIntegerTensorPrimitivesTests<Int128> { }
 
     public class ByteGenericTensorPrimitives : GenericIntegerTensorPrimitivesTests<byte> { }
+    [OuterLoop]
     public class UInt16GenericTensorPrimitives : GenericIntegerTensorPrimitivesTests<ushort> { }
+    [OuterLoop]
     public class CharGenericTensorPrimitives : GenericIntegerTensorPrimitivesTests<char> { }
     public class UInt32GenericTensorPrimitives : GenericIntegerTensorPrimitivesTests<uint> { }
+    [OuterLoop]
     public class UInt64GenericTensorPrimitives : GenericIntegerTensorPrimitivesTests<ulong> { }
 
     public class UIntPtrGenericTensorPrimitives : GenericIntegerTensorPrimitivesTests<nuint> { }
+    [OuterLoop]
     public class UInt128GenericTensorPrimitives : GenericIntegerTensorPrimitivesTests<UInt128> { }
 
     public unsafe abstract class GenericFloatingPointNumberTensorPrimitivesTests<T> : GenericNumberTensorPrimitivesTests<T>
@@ -270,13 +298,17 @@ protected override IEnumerable<T> GetSpecialValues()
             yield return T.CreateTruncating(BitConverter.UInt32BitsToSingle(0x7FA0_0000)); // +sNaN
 
             // +Infinity, -Infinity
-            yield return T.CreateTruncating(float.PositiveInfinity);
-            yield return T.CreateTruncating(float.NegativeInfinity);
+            yield return T.PositiveInfinity;
+            yield return T.NegativeInfinity;
 
-            // +Zero, -Zero
+            // +0, -0
             yield return T.Zero;
             yield return T.NegativeZero;
 
+            // +1, -1
+            yield return T.One;
+            yield return T.NegativeOne;
+
             // Subnormals
             yield return T.Epsilon;
             yield return -T.Epsilon;
@@ -286,8 +318,15 @@ protected override IEnumerable<T> GetSpecialValues()
             // Normals
             yield return T.CreateTruncating(BitConverter.UInt32BitsToSingle(0x0080_0000));
             yield return T.CreateTruncating(BitConverter.UInt32BitsToSingle(0x8080_0000));
-            yield return T.CreateTruncating(BitConverter.UInt32BitsToSingle(0x7F7F_FFFF)); // MaxValue
-            yield return T.CreateTruncating(BitConverter.UInt32BitsToSingle(0xFF7F_FFFF)); // MinValue
+            yield return T.CreateTruncating(float.MinValue);
+            yield return T.CreateTruncating(float.MaxValue);
+            yield return T.CreateTruncating(double.MinValue);
+            yield return T.CreateTruncating(double.MaxValue);
+
+            // Other known constants
+            yield return T.E;
+            yield return T.Pi;
+            yield return T.Tau;
         }
 
         protected override void SetSpecialValues(Span<T> x, Span<T> y)
diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs
index cd0c1c21e201ec..b0b44fddb0aef5 100644
--- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs
+++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs
@@ -165,7 +165,7 @@ protected T NextRandom(T avoid)
         /// </summary>
         protected void RunForEachSpecialValue(Action action, BoundedMemory<T> x)
         {
-            foreach (T value in GetSpecialValues())
+            Assert.All(GetSpecialValues(), value =>
             {
                 int pos = Random.Next(x.Length);
                 T orig = x[pos];
@@ -174,7 +174,7 @@ protected void RunForEachSpecialValue(Action action, BoundedMemory<T> x)
                 action();
 
                 x[pos] = orig;
-            }
+            });
         }
         #endregion