diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index 633dd2cfd017a8..dec6446cd7653b 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -13274,6 +13274,28 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(RemainderUInt64Mask_8x9)) throw new NotSupportedException(); } + // TODO: The uses of these ApplyScalar methods are all as part of operators when handling edge cases (NaN, Infinity, really large inputs, etc.) + // Currently, these edge cases are not handled in a vectorized way and instead fall back to scalar processing. We can look into + // handling those in a vectorized manner as well. + + private static Vector128 ApplyScalar(Vector128 floats) where TOperator : IUnaryOperator => + Vector128.Create(TOperator.Invoke(floats[0]), TOperator.Invoke(floats[1]), TOperator.Invoke(floats[2]), TOperator.Invoke(floats[3])); + + private static Vector256 ApplyScalar(Vector256 floats) where TOperator : IUnaryOperator => + Vector256.Create(ApplyScalar(floats.GetLower()), ApplyScalar(floats.GetUpper())); + + private static Vector512 ApplyScalar(Vector512 floats) where TOperator : IUnaryOperator => + Vector512.Create(ApplyScalar(floats.GetLower()), ApplyScalar(floats.GetUpper())); + + private static Vector128 ApplyScalar(Vector128 doubles) where TOperator : IUnaryOperator => + Vector128.Create(TOperator.Invoke(doubles[0]), TOperator.Invoke(doubles[1])); + + private static Vector256 ApplyScalar(Vector256 doubles) where TOperator : IUnaryOperator => + Vector256.Create(ApplyScalar(doubles.GetLower()), ApplyScalar(doubles.GetUpper())); + + private static Vector512 ApplyScalar(Vector512 doubles) where TOperator : IUnaryOperator => + Vector512.Create(ApplyScalar(doubles.GetLower()), ApplyScalar(doubles.GetUpper())); + /// Creates a span of from a when they're the same type. private static unsafe ReadOnlySpan Rename(ReadOnlySpan span) { @@ -14811,6 +14833,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) /// (x * y) + z internal readonly struct MultiplyAddEstimateOperator : ITernaryOperator where T : INumberBase { + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static T Invoke(T x, T y, T z) { // TODO https://github.com/dotnet/runtime/issues/98053: Use T.MultiplyAddEstimate when it's available. @@ -14839,6 +14862,7 @@ public static T Invoke(T x, T y, T z) return (x * y) + z; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Invoke(Vector128 x, Vector128 y, Vector128 z) { if (Fma.IsSupported) @@ -14860,6 +14884,7 @@ public static Vector128 Invoke(Vector128 x, Vector128 y, Vector128 z return (x * y) + z; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Invoke(Vector256 x, Vector256 y, Vector256 z) { if (Fma.IsSupported) @@ -14871,6 +14896,7 @@ public static Vector256 Invoke(Vector256 x, Vector256 y, Vector256 z return (x * y) + z; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 Invoke(Vector512 x, Vector512 y, Vector512 z) { if (Avx512F.IsSupported) @@ -15952,22 +15978,367 @@ public static Vector512 Invoke(Vector512 x) internal readonly struct CosOperator : IUnaryOperator where T : ITrigonometricFunctions { - public static bool Vectorizable => false; // TODO: Vectorize + // This code is based on `vrs4_cos` and `vrd2_cos` from amd/aocl-libm-ose + // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved. + // + // Licensed under the BSD 3-Clause "New" or "Revised" License + // See THIRD-PARTY-NOTICES.TXT for the full license text + + // Implementation notes from amd/aocl-libm-ose: + // -------------------------------------------- + // To compute cosf(float x) + // Using the identity, + // cos(x) = sin(x + pi/2) (1) + // + // 1. Argument Reduction + // Now, let x be represented as, + // |x| = N * pi + f (2) | N is an integer, + // -pi/2 <= f <= pi/2 + // + // From (2), N = int( (x + pi/2) / pi) - 0.5 + // f = |x| - (N * pi) + // + // 2. Polynomial Evaluation + // From (1) and (2),sin(f) can be calculated using a polynomial + // sin(f) = f*(1 + C1*f^2 + C2*f^4 + C3*f^6 + c4*f^8) + // + // 3. Reconstruction + // Hence, cos(x) = sin(x + pi/2) = (-1)^N * sin(f) + + public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double); + public static T Invoke(T x) => T.Cos(x); - public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); - public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); - public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + + public static Vector128 Invoke(Vector128 x) + { + if (typeof(T) == typeof(float)) + { + return CosOperatorSingle.Invoke(x.AsSingle()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + return CosOperatorDouble.Invoke(x.AsDouble()).As(); + } + } + + public static Vector256 Invoke(Vector256 x) + { + if (typeof(T) == typeof(float)) + { + return CosOperatorSingle.Invoke(x.AsSingle()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + return CosOperatorDouble.Invoke(x.AsDouble()).As(); + } + } + + public static Vector512 Invoke(Vector512 x) + { + if (typeof(T) == typeof(float)) + { + return CosOperatorSingle.Invoke(x.AsSingle()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + return CosOperatorDouble.Invoke(x.AsDouble()).As(); + } + } + } + + /// float.Cos(x) + private readonly struct CosOperatorSingle : IUnaryOperator + { + internal const uint MaxVectorizedValue = 0x4A989680u; + internal const uint SignMask = 0x7FFFFFFFu; + private const float AlmHuge = 1.2582912e7f; + private const float Pi_Tail1 = 8.742278e-8f; + private const float Pi_Tail2 = 3.430249e-15f; + private const float C1 = -0.16666657f; + private const float C2 = 0.008332962f; + private const float C3 = -1.9801206e-4f; + private const float C4 = 2.5867037e-6f; + + public static bool Vectorizable => true; + + public static float Invoke(float x) => float.Cos(x); + + public static Vector128 Invoke(Vector128 x) + { + Vector128 uxMasked = Vector128.Abs(x).AsUInt32(); + if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector128 r = uxMasked.AsSingle(); + Vector128 almHuge = Vector128.Create(AlmHuge); + Vector128 dn = ((r + Vector128.Create(float.Pi / 2)) * Vector128.Create(1 / float.Pi)) + almHuge; + Vector128 odd = dn.AsUInt32() << 31; + dn = dn - almHuge - Vector128.Create(0.5f); + Vector128 f = r + (dn * Vector128.Create(-float.Pi)) + (dn * Vector128.Create(Pi_Tail1)) + (dn * Vector128.Create(Pi_Tail2)); + + // POLY_EVAL_ODD_9 + Vector128 f2 = f * f; + Vector128 f4 = f2 * f2; + Vector128 a0 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1)); + Vector128 a1 = MultiplyAddEstimateOperator.Invoke(a0, f2, Vector128.One); + Vector128 a2 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C3), f2, Vector128.Create(C4) * f4); + Vector128 a3 = MultiplyAddEstimateOperator.Invoke(a2, f4, a1); + Vector128 poly = f * a3; + + return (poly.AsUInt32() ^ odd).AsSingle(); + } + + public static Vector256 Invoke(Vector256 x) + { + Vector256 uxMasked = Vector256.Abs(x).AsUInt32(); + if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector256 r = uxMasked.AsSingle(); + Vector256 almHuge = Vector256.Create(AlmHuge); + Vector256 dn = ((r + Vector256.Create(float.Pi / 2)) * Vector256.Create(1 / float.Pi)) + almHuge; + Vector256 odd = dn.AsUInt32() << 31; + dn = dn - almHuge - Vector256.Create(0.5f); + Vector256 f = r + (dn * Vector256.Create(-float.Pi)) + (dn * Vector256.Create(Pi_Tail1)) + (dn * Vector256.Create(Pi_Tail2)); + + // POLY_EVAL_ODD_9 + Vector256 f2 = f * f; + Vector256 f4 = f2 * f2; + Vector256 a0 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1)); + Vector256 a1 = MultiplyAddEstimateOperator.Invoke(a0, f2, Vector256.One); + Vector256 a2 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C3), f2, Vector256.Create(C4) * f4); + Vector256 a3 = MultiplyAddEstimateOperator.Invoke(a2, f4, a1); + Vector256 poly = f * a3; + + return (poly.AsUInt32() ^ odd).AsSingle(); + } + + public static Vector512 Invoke(Vector512 x) + { + Vector512 uxMasked = Vector512.Abs(x).AsUInt32(); + if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector512 r = uxMasked.AsSingle(); + Vector512 almHuge = Vector512.Create(AlmHuge); + Vector512 dn = ((r + Vector512.Create(float.Pi / 2)) * Vector512.Create(1 / float.Pi)) + almHuge; + Vector512 odd = dn.AsUInt32() << 31; + dn = dn - almHuge - Vector512.Create(0.5f); + Vector512 f = r + (dn * Vector512.Create(-float.Pi)) + (dn * Vector512.Create(Pi_Tail1)) + (dn * Vector512.Create(Pi_Tail2)); + + // POLY_EVAL_ODD_9 + Vector512 f2 = f * f; + Vector512 f4 = f2 * f2; + Vector512 a0 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1)); + Vector512 a1 = MultiplyAddEstimateOperator.Invoke(a0, f2, Vector512.One); + Vector512 a2 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C3), f2, Vector512.Create(C4) * f4); + Vector512 a3 = MultiplyAddEstimateOperator.Invoke(a2, f4, a1); + Vector512 poly = f * a3; + + return (poly.AsUInt32() ^ odd).AsSingle(); + } + } + + /// double.Cos(x) + internal readonly struct CosOperatorDouble : IUnaryOperator + { + internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul; + internal const ulong MaxVectorizedValue = 0x4160000000000000ul; + private const double AlmHuge = 6.755399441055744E15; + private const double Pi_Tail2 = -1.2246467991473532E-16; + private const double Pi_Tail3 = 2.9947698097183397E-33; + private const double C1 = -0.16666666666666666; + private const double C2 = 0.008333333333333165; + private const double C3 = -1.984126984120184E-4; + private const double C4 = 2.7557319210152756E-6; + private const double C5 = -2.5052106798274616E-8; + private const double C6 = 1.6058936490373254E-10; + private const double C7 = -7.642917806937501E-13; + private const double C8 = 2.7204790963151784E-15; + + public static bool Vectorizable => true; + + public static double Invoke(double x) => double.Cos(x); + + public static Vector128 Invoke(Vector128 x) + { + Vector128 uxMasked = Vector128.Abs(x).AsUInt64(); + if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector128 r = uxMasked.AsDouble(); + Vector128 almHuge = Vector128.Create(AlmHuge); + Vector128 dn = (r * Vector128.Create(1 / double.Pi)) + Vector128.Create(double.Pi / 2) + almHuge; + Vector128 odd = dn.AsUInt64() << 63; + dn = dn - almHuge - Vector128.Create(0.5); + Vector128 f = r + (dn * Vector128.Create(-double.Pi)) + (dn * Vector128.Create(Pi_Tail2)) + (dn * Vector128.Create(Pi_Tail3)); + + // POLY_EVAL_ODD_17 + Vector128 f2 = f * f; + Vector128 f4 = f2 * f2; + Vector128 f6 = f4 * f2; + Vector128 f10 = f6 * f4; + Vector128 f14 = f10 * f4; + Vector128 a1 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1)); + Vector128 a2 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C4), f2, Vector128.Create(C3)); + Vector128 a3 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C6), f2, Vector128.Create(C5)); + Vector128 a4 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C8), f2, Vector128.Create(C7)); + Vector128 b1 = MultiplyAddEstimateOperator.Invoke(a1, f2, a2 * f6); + Vector128 b2 = MultiplyAddEstimateOperator.Invoke(f10, a3, f14 * a4); + Vector128 poly = MultiplyAddEstimateOperator.Invoke(f, b1 + b2, f); + + return (poly.AsUInt64() ^ odd).AsDouble(); + } + + public static Vector256 Invoke(Vector256 x) + { + Vector256 uxMasked = Vector256.Abs(x).AsUInt64(); + if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector256 r = uxMasked.AsDouble(); + Vector256 almHuge = Vector256.Create(AlmHuge); + Vector256 dn = (r * Vector256.Create(1 / double.Pi)) + Vector256.Create(double.Pi / 2) + almHuge; + Vector256 odd = dn.AsUInt64() << 63; + dn = dn - almHuge - Vector256.Create(0.5); + Vector256 f = r + (dn * Vector256.Create(-double.Pi)) + (dn * Vector256.Create(Pi_Tail2)) + (dn * Vector256.Create(Pi_Tail3)); + + // POLY_EVAL_ODD_17 + Vector256 f2 = f * f; + Vector256 f4 = f2 * f2; + Vector256 f6 = f4 * f2; + Vector256 f10 = f6 * f4; + Vector256 f14 = f10 * f4; + Vector256 a1 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1)); + Vector256 a2 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C4), f2, Vector256.Create(C3)); + Vector256 a3 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C6), f2, Vector256.Create(C5)); + Vector256 a4 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C8), f2, Vector256.Create(C7)); + Vector256 b1 = MultiplyAddEstimateOperator.Invoke(a1, f2, a2 * f6); + Vector256 b2 = MultiplyAddEstimateOperator.Invoke(f10, a3, f14 * a4); + Vector256 poly = MultiplyAddEstimateOperator.Invoke(f, b1 + b2, f); + + return (poly.AsUInt64() ^ odd).AsDouble(); + } + + public static Vector512 Invoke(Vector512 x) + { + Vector512 uxMasked = Vector512.Abs(x).AsUInt64(); + if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector512 r = uxMasked.AsDouble(); + Vector512 almHuge = Vector512.Create(AlmHuge); + Vector512 dn = (r * Vector512.Create(1 / double.Pi)) + Vector512.Create(double.Pi / 2) + almHuge; + Vector512 odd = dn.AsUInt64() << 63; + dn = dn - almHuge - Vector512.Create(0.5); + Vector512 f = r + (dn * Vector512.Create(-double.Pi)) + (dn * Vector512.Create(Pi_Tail2)) + (dn * Vector512.Create(Pi_Tail3)); + + // POLY_EVAL_ODD_17 + Vector512 f2 = f * f; + Vector512 f4 = f2 * f2; + Vector512 f6 = f4 * f2; + Vector512 f10 = f6 * f4; + Vector512 f14 = f10 * f4; + Vector512 a1 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1)); + Vector512 a2 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C4), f2, Vector512.Create(C3)); + Vector512 a3 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C6), f2, Vector512.Create(C5)); + Vector512 a4 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C8), f2, Vector512.Create(C7)); + Vector512 b1 = MultiplyAddEstimateOperator.Invoke(a1, f2, a2 * f6); + Vector512 b2 = MultiplyAddEstimateOperator.Invoke(f10, a3, f14 * a4); + Vector512 poly = MultiplyAddEstimateOperator.Invoke(f, b1 + b2, f); + + return (poly.AsUInt64() ^ odd).AsDouble(); + } } /// T.CosPi(x) internal readonly struct CosPiOperator : IUnaryOperator where T : ITrigonometricFunctions { - public static bool Vectorizable => CosOperator.Vectorizable; + public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double); + public static T Invoke(T x) => T.CosPi(x); - public static Vector128 Invoke(Vector128 x) => CosOperator.Invoke(x * Vector128.Create(T.Pi)); - public static Vector256 Invoke(Vector256 x) => CosOperator.Invoke(x * Vector256.Create(T.Pi)); - public static Vector512 Invoke(Vector512 x) => CosOperator.Invoke(x * Vector512.Create(T.Pi)); + + public static Vector128 Invoke(Vector128 x) + { + Vector128 xpi = x * Vector128.Create(T.Pi); + if (typeof(T) == typeof(float)) + { + if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(CosOperatorSingle.SignMask), Vector128.Create(CosOperatorSingle.MaxVectorizedValue))) + { + return ApplyScalar>(x.AsSingle()).As(); + } + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + if (Vector128.GreaterThanAny(xpi.AsUInt64() & Vector128.Create(CosOperatorDouble.SignMask), Vector128.Create(CosOperatorDouble.MaxVectorizedValue))) + { + return ApplyScalar>(x.AsDouble()).As(); + } + } + + return CosOperator.Invoke(xpi); + } + + public static Vector256 Invoke(Vector256 x) + { + Vector256 xpi = x * Vector256.Create(T.Pi); + if (typeof(T) == typeof(float)) + { + if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(CosOperatorSingle.SignMask), Vector256.Create(CosOperatorSingle.MaxVectorizedValue))) + { + return ApplyScalar>(x.AsSingle()).As(); + } + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + if (Vector256.GreaterThanAny(xpi.AsUInt64() & Vector256.Create(CosOperatorDouble.SignMask), Vector256.Create(CosOperatorDouble.MaxVectorizedValue))) + { + return ApplyScalar>(x.AsDouble()).As(); + } + } + + return CosOperator.Invoke(xpi); + } + + public static Vector512 Invoke(Vector512 x) + { + Vector512 xpi = x * Vector512.Create(T.Pi); + if (typeof(T) == typeof(float)) + { + if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(CosOperatorSingle.SignMask), Vector512.Create(CosOperatorSingle.MaxVectorizedValue))) + { + return ApplyScalar>(x.AsSingle()).As(); + } + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + if (Vector512.GreaterThanAny(xpi.AsUInt64() & Vector512.Create(CosOperatorDouble.SignMask), Vector512.Create(CosOperatorDouble.MaxVectorizedValue))) + { + return ApplyScalar>(x.AsDouble()).As(); + } + } + + return CosOperator.Invoke(xpi); + } } /// T.Cosh(x) @@ -15995,13 +16366,13 @@ public static Vector512 Invoke(Vector512 x) // // coshf = v/2 * exp(x - log(v)) where v = 0x1.0000e8p-1 - private const float SINGLE_LOGV = 0.693161f; - private const float SINGLE_HALFV = 1.0000138f; - private const float SINGLE_INVV2 = 0.24999309f; + private const float Single_LOGV = 0.693161f; + private const float Single_HALFV = 1.0000138f; + private const float Single_INVV2 = 0.24999309f; - private const double DOUBLE_LOGV = 0.6931471805599453; - private const double DOUBLE_HALFV = 1.0; - private const double DOUBLE_INVV2 = 0.25; + private const double Double_LOGV = 0.6931471805599453; + private const double Double_HALFV = 1.0; + private const double Double_INVV2 = 0.25; public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double); @@ -16014,8 +16385,8 @@ public static Vector128 Invoke(Vector128 t) Vector128 x = t.AsSingle(); Vector128 y = Vector128.Abs(x); - Vector128 z = ExpOperator.Invoke(y - Vector128.Create((float)SINGLE_LOGV)); - return (Vector128.Create((float)SINGLE_HALFV) * (z + (Vector128.Create((float)SINGLE_INVV2) / z))).As(); + Vector128 z = ExpOperator.Invoke(y - Vector128.Create((float)Single_LOGV)); + return (Vector128.Create((float)Single_HALFV) * (z + (Vector128.Create((float)Single_INVV2) / z))).As(); } else { @@ -16023,8 +16394,8 @@ public static Vector128 Invoke(Vector128 t) Vector128 x = t.AsDouble(); Vector128 y = Vector128.Abs(x); - Vector128 z = ExpOperator.Invoke(y - Vector128.Create(DOUBLE_LOGV)); - return (Vector128.Create(DOUBLE_HALFV) * (z + (Vector128.Create(DOUBLE_INVV2) / z))).As(); + Vector128 z = ExpOperator.Invoke(y - Vector128.Create(Double_LOGV)); + return (Vector128.Create(Double_HALFV) * (z + (Vector128.Create(Double_INVV2) / z))).As(); } } @@ -16035,8 +16406,8 @@ public static Vector256 Invoke(Vector256 t) Vector256 x = t.AsSingle(); Vector256 y = Vector256.Abs(x); - Vector256 z = ExpOperator.Invoke(y - Vector256.Create((float)SINGLE_LOGV)); - return (Vector256.Create((float)SINGLE_HALFV) * (z + (Vector256.Create((float)SINGLE_INVV2) / z))).As(); + Vector256 z = ExpOperator.Invoke(y - Vector256.Create((float)Single_LOGV)); + return (Vector256.Create((float)Single_HALFV) * (z + (Vector256.Create((float)Single_INVV2) / z))).As(); } else { @@ -16044,8 +16415,8 @@ public static Vector256 Invoke(Vector256 t) Vector256 x = t.AsDouble(); Vector256 y = Vector256.Abs(x); - Vector256 z = ExpOperator.Invoke(y - Vector256.Create(DOUBLE_LOGV)); - return (Vector256.Create(DOUBLE_HALFV) * (z + (Vector256.Create(DOUBLE_INVV2) / z))).As(); + Vector256 z = ExpOperator.Invoke(y - Vector256.Create(Double_LOGV)); + return (Vector256.Create(Double_HALFV) * (z + (Vector256.Create(Double_INVV2) / z))).As(); } } @@ -16056,8 +16427,8 @@ public static Vector512 Invoke(Vector512 t) Vector512 x = t.AsSingle(); Vector512 y = Vector512.Abs(x); - Vector512 z = ExpOperator.Invoke(y - Vector512.Create((float)SINGLE_LOGV)); - return (Vector512.Create((float)SINGLE_HALFV) * (z + (Vector512.Create((float)SINGLE_INVV2) / z))).As(); + Vector512 z = ExpOperator.Invoke(y - Vector512.Create((float)Single_LOGV)); + return (Vector512.Create((float)Single_HALFV) * (z + (Vector512.Create((float)Single_INVV2) / z))).As(); } else { @@ -16065,8 +16436,8 @@ public static Vector512 Invoke(Vector512 t) Vector512 x = t.AsDouble(); Vector512 y = Vector512.Abs(x); - Vector512 z = ExpOperator.Invoke(y - Vector512.Create(DOUBLE_LOGV)); - return (Vector512.Create(DOUBLE_HALFV) * (z + (Vector512.Create(DOUBLE_INVV2) / z))).As(); + Vector512 z = ExpOperator.Invoke(y - Vector512.Create(Double_LOGV)); + return (Vector512.Create(Double_HALFV) * (z + (Vector512.Create(Double_INVV2) / z))).As(); } } } @@ -16075,22 +16446,369 @@ public static Vector512 Invoke(Vector512 t) internal readonly struct SinOperator : IUnaryOperator where T : ITrigonometricFunctions { - public static bool Vectorizable => false; // TODO: Vectorize + // This code is based on `vrs4_sin` and `vrd2_sin` from amd/aocl-libm-ose + // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved. + // + // Licensed under the BSD 3-Clause "New" or "Revised" License + // See THIRD-PARTY-NOTICES.TXT for the full license text + + // Implementation notes from amd/aocl-libm-ose: + // ----------------------------------------------------------------- + // Convert given x into the form + // |x| = N * pi + f where N is an integer and f lies in [-pi/2,pi/2] + // N is obtained by : N = round(x/pi) + // f is obtained by : f = abs(x)-N*pi + // sin(x) = sin(N * pi + f) = sin(N * pi)*cos(f) + cos(N*pi)*sin(f) + // sin(x) = sign(x)*sin(f)*(-1)**N + // + // The term sin(f) can be approximated by using a polynomial + + public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double); + public static T Invoke(T x) => T.Sin(x); - public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); - public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); - public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + + public static Vector128 Invoke(Vector128 x) + { + if (typeof(T) == typeof(float)) + { + return SinOperatorSingle.Invoke(x.AsSingle()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + return SinOperatorDouble.Invoke(x.AsDouble()).As(); + } + } + + public static Vector256 Invoke(Vector256 x) + { + if (typeof(T) == typeof(float)) + { + return SinOperatorSingle.Invoke(x.AsSingle()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + return SinOperatorDouble.Invoke(x.AsDouble()).As(); + } + } + + public static Vector512 Invoke(Vector512 x) + { + if (typeof(T) == typeof(float)) + { + return SinOperatorSingle.Invoke(x.AsSingle()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + return SinOperatorDouble.Invoke(x.AsDouble()).As(); + } + } + } + + /// float.Sin(x) + private readonly struct SinOperatorSingle : IUnaryOperator + { + internal const uint SignMask = 0x7FFFFFFFu; + internal const uint MaxVectorizedValue = 0x49800000u; + private const float AlmHuge = 1.2582912e7f; + private const float Pi_Tail1 = 8.742278e-8f; + private const float Pi_Tail2 = 3.430249e-15f; + private const float C1 = -0.16666657f; + private const float C2 = 0.0083330255f; + private const float C3 = -1.980742e-4f; + private const float C4 = 2.6019031e-6f; + + public static bool Vectorizable => true; + + public static float Invoke(float x) => float.Sin(x); + + public static Vector128 Invoke(Vector128 x) + { + Vector128 sign = x.AsUInt32() & Vector128.Create(~SignMask); + Vector128 uxMasked = Vector128.Abs(x).AsUInt32(); + + if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector128 r = uxMasked.AsSingle(); + Vector128 almHuge = Vector128.Create(AlmHuge); + Vector128 dn = (r * Vector128.Create(1 / float.Pi)) + almHuge; + Vector128 odd = dn.AsUInt32() << 31; + dn -= almHuge; + Vector128 f = r + (dn * Vector128.Create(-float.Pi)) + (dn * Vector128.Create(Pi_Tail1)) + (dn * Vector128.Create(Pi_Tail2)); + + // POLY_EVAL_ODD_9 + Vector128 f2 = f * f; + Vector128 f4 = f2 * f2; + Vector128 a0 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1)); + Vector128 a1 = MultiplyAddEstimateOperator.Invoke(a0, f2, Vector128.One); + Vector128 a2 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C3), f2, Vector128.Create(C4) * f4); + Vector128 a3 = MultiplyAddEstimateOperator.Invoke(a2, f4, a1); + Vector128 poly = f * a3; + + return (poly.AsUInt32() ^ sign ^ odd).AsSingle(); + } + + public static Vector256 Invoke(Vector256 x) + { + Vector256 sign = x.AsUInt32() & Vector256.Create(~SignMask); + Vector256 uxMasked = Vector256.Abs(x).AsUInt32(); + + if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector256 r = uxMasked.AsSingle(); + Vector256 almHuge = Vector256.Create(AlmHuge); + Vector256 dn = (r * Vector256.Create(1 / float.Pi)) + almHuge; + Vector256 odd = dn.AsUInt32() << 31; + dn -= almHuge; + Vector256 f = r + (dn * Vector256.Create(-float.Pi)) + (dn * Vector256.Create(Pi_Tail1)) + (dn * Vector256.Create(Pi_Tail2)); + + // POLY_EVAL_ODD_9 + Vector256 f2 = f * f; + Vector256 f4 = f2 * f2; + Vector256 a0 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1)); + Vector256 a1 = MultiplyAddEstimateOperator.Invoke(a0, f2, Vector256.One); + Vector256 a2 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C3), f2, Vector256.Create(C4) * f4); + Vector256 a3 = MultiplyAddEstimateOperator.Invoke(a2, f4, a1); + Vector256 poly = f * a3; + + return (poly.AsUInt32() ^ sign ^ odd).AsSingle(); + } + + public static Vector512 Invoke(Vector512 x) + { + Vector512 sign = x.AsUInt32() & Vector512.Create(~SignMask); + Vector512 uxMasked = Vector512.Abs(x).AsUInt32(); + + if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector512 r = uxMasked.AsSingle(); + Vector512 almHuge = Vector512.Create(AlmHuge); + Vector512 dn = (r * Vector512.Create(1 / float.Pi)) + almHuge; + Vector512 odd = dn.AsUInt32() << 31; + dn -= almHuge; + Vector512 f = r + (dn * Vector512.Create(-float.Pi)) + (dn * Vector512.Create(Pi_Tail1)) + (dn * Vector512.Create(Pi_Tail2)); + + // POLY_EVAL_ODD_9 + Vector512 f2 = f * f; + Vector512 f4 = f2 * f2; + Vector512 a0 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1)); + Vector512 a1 = MultiplyAddEstimateOperator.Invoke(a0, f2, Vector512.One); + Vector512 a2 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C3), f2, Vector512.Create(C4) * f4); + Vector512 a3 = MultiplyAddEstimateOperator.Invoke(a2, f4, a1); + Vector512 poly = f * a3; + + return (poly.AsUInt32() ^ sign ^ odd).AsSingle(); + } + } + + /// double.Sin(x) + private readonly struct SinOperatorDouble : IUnaryOperator + { + internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul; + internal const ulong MaxVectorizedValue = 0x4160000000000000ul; + private const double AlmHuge = 6.755399441055744e15; + private const double Pi_Tail1 = 1.224646799147353e-16; + private const double Pi_Tail2 = 2.165713347843828e-32; + private const double C0 = -0.16666666666666666; + private const double C2 = 0.008333333333333165; + private const double C4 = -1.984126984120184e-4; + private const double C6 = 2.7557319210152756e-6; + private const double C8 = -2.5052106798274583e-8; + private const double C10 = 1.605893649037159e-10; + private const double C12 = -7.642917806891047e-13; + private const double C14 = 2.7204790957888847e-15; + + public static bool Vectorizable => true; + + public static double Invoke(double x) => double.Sin(x); + + public static Vector128 Invoke(Vector128 x) + { + Vector128 sign = x.AsUInt64() & Vector128.Create(~SignMask); + Vector128 uxMasked = Vector128.Abs(x).AsUInt64(); + + if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector128 r = uxMasked.AsDouble(); + Vector128 almHuge = Vector128.Create(AlmHuge); + Vector128 dn = (r * Vector128.Create(1 / double.Pi)) + almHuge; + Vector128 odd = dn.AsUInt64() << 63; + dn -= almHuge; + Vector128 f = r - (dn * Vector128.Create(double.Pi)) - (dn * Vector128.Create(Pi_Tail1)) - (dn * Vector128.Create(Pi_Tail2)); + + // POLY_EVAL_ODD_17 + Vector128 f2 = f * f; + Vector128 f4 = f2 * f2; + Vector128 f6 = f4 * f2; + Vector128 f10 = f6 * f4; + Vector128 f14 = f10 * f4; + Vector128 a1 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C2), f2, Vector128.Create(C0)); + Vector128 a2 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C6), f2, Vector128.Create(C4)); + Vector128 a3 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C10), f2, Vector128.Create(C8)); + Vector128 a4 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C14), f2, Vector128.Create(C12)); + Vector128 b1 = MultiplyAddEstimateOperator.Invoke(a1, f2, a2 * f6); + Vector128 b2 = MultiplyAddEstimateOperator.Invoke(f10, a3, f14 * a4); + Vector128 poly = MultiplyAddEstimateOperator.Invoke(f, b1 + b2, f); + + return (poly.AsUInt64() ^ sign ^ odd).AsDouble(); + } + + public static Vector256 Invoke(Vector256 x) + { + Vector256 sign = x.AsUInt64() & Vector256.Create(~SignMask); + Vector256 uxMasked = Vector256.Abs(x).AsUInt64(); + + if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector256 r = uxMasked.AsDouble(); + Vector256 almHuge = Vector256.Create(AlmHuge); + Vector256 dn = (r * Vector256.Create(1 / double.Pi)) + almHuge; + Vector256 odd = dn.AsUInt64() << 63; + dn -= almHuge; + Vector256 f = r - (dn * Vector256.Create(double.Pi)) - (dn * Vector256.Create(Pi_Tail1)) - (dn * Vector256.Create(Pi_Tail2)); + + // POLY_EVAL_ODD_17 + Vector256 f2 = f * f; + Vector256 f4 = f2 * f2; + Vector256 f6 = f4 * f2; + Vector256 f10 = f6 * f4; + Vector256 f14 = f10 * f4; + Vector256 a1 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C2), f2, Vector256.Create(C0)); + Vector256 a2 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C6), f2, Vector256.Create(C4)); + Vector256 a3 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C10), f2, Vector256.Create(C8)); + Vector256 a4 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C14), f2, Vector256.Create(C12)); + Vector256 b1 = MultiplyAddEstimateOperator.Invoke(a1, f2, a2 * f6); + Vector256 b2 = MultiplyAddEstimateOperator.Invoke(f10, a3, f14 * a4); + Vector256 poly = MultiplyAddEstimateOperator.Invoke(f, b1 + b2, f); + + return (poly.AsUInt64() ^ sign ^ odd).AsDouble(); + } + + public static Vector512 Invoke(Vector512 x) + { + Vector512 sign = x.AsUInt64() & Vector512.Create(~SignMask); + Vector512 uxMasked = Vector512.Abs(x).AsUInt64(); + + if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector512 r = uxMasked.AsDouble(); + Vector512 almHuge = Vector512.Create(AlmHuge); + Vector512 dn = (r * Vector512.Create(1 / double.Pi)) + almHuge; + Vector512 odd = dn.AsUInt64() << 63; + dn -= almHuge; + Vector512 f = r - (dn * Vector512.Create(double.Pi)) - (dn * Vector512.Create(Pi_Tail1)) - (dn * Vector512.Create(Pi_Tail2)); + + // POLY_EVAL_ODD_17 + Vector512 f2 = f * f; + Vector512 f4 = f2 * f2; + Vector512 f6 = f4 * f2; + Vector512 f10 = f6 * f4; + Vector512 f14 = f10 * f4; + Vector512 a1 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C2), f2, Vector512.Create(C0)); + Vector512 a2 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C6), f2, Vector512.Create(C4)); + Vector512 a3 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C10), f2, Vector512.Create(C8)); + Vector512 a4 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C14), f2, Vector512.Create(C12)); + Vector512 b1 = MultiplyAddEstimateOperator.Invoke(a1, f2, a2 * f6); + Vector512 b2 = MultiplyAddEstimateOperator.Invoke(f10, a3, f14 * a4); + Vector512 poly = MultiplyAddEstimateOperator.Invoke(f, b1 + b2, f); + + return (poly.AsUInt64() ^ sign ^ odd).AsDouble(); + } } /// T.SinPi(x) internal readonly struct SinPiOperator : IUnaryOperator where T : ITrigonometricFunctions { - public static bool Vectorizable => SinOperator.Vectorizable; + public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double); + public static T Invoke(T x) => T.SinPi(x); - public static Vector128 Invoke(Vector128 x) => SinOperator.Invoke(x * Vector128.Create(T.Pi)); - public static Vector256 Invoke(Vector256 x) => SinOperator.Invoke(x * Vector256.Create(T.Pi)); - public static Vector512 Invoke(Vector512 x) => SinOperator.Invoke(x * Vector512.Create(T.Pi)); + + public static Vector128 Invoke(Vector128 x) + { + Vector128 xpi = x * Vector128.Create(T.Pi); + if (typeof(T) == typeof(float)) + { + if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(SinOperatorSingle.SignMask), Vector128.Create(SinOperatorSingle.MaxVectorizedValue))) + { + return ApplyScalar>(x.AsSingle()).As(); + } + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + if (Vector128.GreaterThanAny(xpi.AsUInt64() & Vector128.Create(SinOperatorDouble.SignMask), Vector128.Create(SinOperatorDouble.MaxVectorizedValue))) + { + return ApplyScalar>(x.AsDouble()).As(); + } + } + + return SinOperator.Invoke(xpi); + } + + public static Vector256 Invoke(Vector256 x) + { + Vector256 xpi = x * Vector256.Create(T.Pi); + if (typeof(T) == typeof(float)) + { + if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(SinOperatorSingle.SignMask), Vector256.Create(SinOperatorSingle.MaxVectorizedValue))) + { + return ApplyScalar>(x.AsSingle()).As(); + } + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + if (Vector256.GreaterThanAny(xpi.AsUInt64() & Vector256.Create(SinOperatorDouble.SignMask), Vector256.Create(SinOperatorDouble.MaxVectorizedValue))) + { + return ApplyScalar>(x.AsDouble()).As(); + } + } + + return SinOperator.Invoke(xpi); + } + + public static Vector512 Invoke(Vector512 x) + { + Vector512 xpi = x * Vector512.Create(T.Pi); + if (typeof(T) == typeof(float)) + { + if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(SinOperatorSingle.SignMask), Vector512.Create(SinOperatorSingle.MaxVectorizedValue))) + { + return ApplyScalar>(x.AsSingle()).As(); + } + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + if (Vector512.GreaterThanAny(xpi.AsUInt64() & Vector512.Create(SinOperatorDouble.SignMask), Vector512.Create(SinOperatorDouble.MaxVectorizedValue))) + { + return ApplyScalar>(x.AsDouble()).As(); + } + } + + return SinOperator.Invoke(xpi); + } } /// T.Sinh(x) @@ -16100,13 +16818,13 @@ public static Vector512 Invoke(Vector512 t) // Same as cosh, but with `z -` rather than `z +`, and with the sign // flipped on the result based on the sign of the input. - private const float SINGLE_LOGV = 0.693161f; - private const float SINGLE_HALFV = 1.0000138f; - private const float SINGLE_INVV2 = 0.24999309f; + private const float Single_LOGV = 0.693161f; + private const float Single_HALFV = 1.0000138f; + private const float Single_INVV2 = 0.24999309f; - private const double DOUBLE_LOGV = 0.6931471805599453; - private const double DOUBLE_HALFV = 1.0; - private const double DOUBLE_INVV2 = 0.25; + private const double Double_LOGV = 0.6931471805599453; + private const double Double_HALFV = 1.0; + private const double Double_INVV2 = 0.25; public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double); @@ -16119,8 +16837,8 @@ public static Vector128 Invoke(Vector128 t) Vector128 x = t.AsSingle(); Vector128 y = Vector128.Abs(x); - Vector128 z = ExpOperator.Invoke(y - Vector128.Create((float)SINGLE_LOGV)); - Vector128 result = Vector128.Create((float)SINGLE_HALFV) * (z - (Vector128.Create((float)SINGLE_INVV2) / z)); + Vector128 z = ExpOperator.Invoke(y - Vector128.Create((float)Single_LOGV)); + Vector128 result = Vector128.Create((float)Single_HALFV) * (z - (Vector128.Create((float)Single_INVV2) / z)); Vector128 sign = x.AsUInt32() & Vector128.Create(~(uint)int.MaxValue); return (sign ^ result.AsUInt32()).As(); } @@ -16130,8 +16848,8 @@ public static Vector128 Invoke(Vector128 t) Vector128 x = t.AsDouble(); Vector128 y = Vector128.Abs(x); - Vector128 z = ExpOperator.Invoke(y - Vector128.Create(DOUBLE_LOGV)); - Vector128 result = Vector128.Create(DOUBLE_HALFV) * (z - (Vector128.Create(DOUBLE_INVV2) / z)); + Vector128 z = ExpOperator.Invoke(y - Vector128.Create(Double_LOGV)); + Vector128 result = Vector128.Create(Double_HALFV) * (z - (Vector128.Create(Double_INVV2) / z)); Vector128 sign = x.AsUInt64() & Vector128.Create(~(ulong)long.MaxValue); return (sign ^ result.AsUInt64()).As(); } @@ -16144,8 +16862,8 @@ public static Vector256 Invoke(Vector256 t) Vector256 x = t.AsSingle(); Vector256 y = Vector256.Abs(x); - Vector256 z = ExpOperator.Invoke(y - Vector256.Create((float)SINGLE_LOGV)); - Vector256 result = Vector256.Create((float)SINGLE_HALFV) * (z - (Vector256.Create((float)SINGLE_INVV2) / z)); + Vector256 z = ExpOperator.Invoke(y - Vector256.Create((float)Single_LOGV)); + Vector256 result = Vector256.Create((float)Single_HALFV) * (z - (Vector256.Create((float)Single_INVV2) / z)); Vector256 sign = x.AsUInt32() & Vector256.Create(~(uint)int.MaxValue); return (sign ^ result.AsUInt32()).As(); } @@ -16155,8 +16873,8 @@ public static Vector256 Invoke(Vector256 t) Vector256 x = t.AsDouble(); Vector256 y = Vector256.Abs(x); - Vector256 z = ExpOperator.Invoke(y - Vector256.Create(DOUBLE_LOGV)); - Vector256 result = Vector256.Create(DOUBLE_HALFV) * (z - (Vector256.Create(DOUBLE_INVV2) / z)); + Vector256 z = ExpOperator.Invoke(y - Vector256.Create(Double_LOGV)); + Vector256 result = Vector256.Create(Double_HALFV) * (z - (Vector256.Create(Double_INVV2) / z)); Vector256 sign = x.AsUInt64() & Vector256.Create(~(ulong)long.MaxValue); return (sign ^ result.AsUInt64()).As(); } @@ -16169,8 +16887,8 @@ public static Vector512 Invoke(Vector512 t) Vector512 x = t.AsSingle(); Vector512 y = Vector512.Abs(x); - Vector512 z = ExpOperator.Invoke(y - Vector512.Create((float)SINGLE_LOGV)); - Vector512 result = Vector512.Create((float)SINGLE_HALFV) * (z - (Vector512.Create((float)SINGLE_INVV2) / z)); + Vector512 z = ExpOperator.Invoke(y - Vector512.Create((float)Single_LOGV)); + Vector512 result = Vector512.Create((float)Single_HALFV) * (z - (Vector512.Create((float)Single_INVV2) / z)); Vector512 sign = x.AsUInt32() & Vector512.Create(~(uint)int.MaxValue); return (sign ^ result.AsUInt32()).As(); } @@ -16180,8 +16898,8 @@ public static Vector512 Invoke(Vector512 t) Vector512 x = t.AsDouble(); Vector512 y = Vector512.Abs(x); - Vector512 z = ExpOperator.Invoke(y - Vector512.Create(DOUBLE_LOGV)); - Vector512 result = Vector512.Create(DOUBLE_HALFV) * (z - (Vector512.Create(DOUBLE_INVV2) / z)); + Vector512 z = ExpOperator.Invoke(y - Vector512.Create(Double_LOGV)); + Vector512 result = Vector512.Create(Double_HALFV) * (z - (Vector512.Create(Double_INVV2) / z)); Vector512 sign = x.AsUInt64() & Vector512.Create(~(ulong)long.MaxValue); return (sign ^ result.AsUInt64()).As(); } @@ -16824,7 +17542,7 @@ public static Vector128 Invoke(Vector128 x) vx = (vx & Vector128.Create(V_MASK)) + Vector128.Create(V_OFF); - Vector128 r = vx.AsSingle() - Vector128.Create(1.0f); + Vector128 r = vx.AsSingle() - Vector128.One; Vector128 r2 = r * r; Vector128 r4 = r2 * r2; @@ -16893,7 +17611,7 @@ public static Vector256 Invoke(Vector256 x) vx = (vx & Vector256.Create(V_MASK)) + Vector256.Create(V_OFF); - Vector256 r = vx.AsSingle() - Vector256.Create(1.0f); + Vector256 r = vx.AsSingle() - Vector256.One; Vector256 r2 = r * r; Vector256 r4 = r2 * r2; @@ -16962,7 +17680,7 @@ public static Vector512 Invoke(Vector512 x) vx = (vx & Vector512.Create(V_MASK)) + Vector512.Create(V_OFF); - Vector512 r = vx.AsSingle() - Vector512.Create(1.0f); + Vector512 r = vx.AsSingle() - Vector512.One; Vector512 r2 = r * r; Vector512 r4 = r2 * r2; @@ -17496,7 +18214,7 @@ public static Vector128 Invoke(Vector128 x) vx = (vx & Vector128.Create(V_MASK)) + Vector128.Create(V_OFF); - Vector128 r = vx.AsSingle() - Vector128.Create(1.0f); + Vector128 r = vx.AsSingle() - Vector128.One; Vector128 r2 = r * r; Vector128 r4 = r2 * r2; @@ -17565,7 +18283,7 @@ public static Vector256 Invoke(Vector256 x) vx = (vx & Vector256.Create(V_MASK)) + Vector256.Create(V_OFF); - Vector256 r = vx.AsSingle() - Vector256.Create(1.0f); + Vector256 r = vx.AsSingle() - Vector256.One; Vector256 r2 = r * r; Vector256 r4 = r2 * r2; @@ -17634,7 +18352,7 @@ public static Vector512 Invoke(Vector512 x) vx = (vx & Vector512.Create(V_MASK)) + Vector512.Create(V_OFF); - Vector512 r = vx.AsSingle() - Vector512.Create(1.0f); + Vector512 r = vx.AsSingle() - Vector512.One; Vector512 r2 = r * r; Vector512 r4 = r2 * r2; diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs index 2bafef98fd20ea..cd6ae2455491c8 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs @@ -18,9 +18,29 @@ namespace System.Numerics.Tensors.Tests { public class ConvertTests { - [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBuiltWithAggressiveTrimming))] + [Fact] [SkipOnCoreClr("Depends heavily on folded type comparisons", RuntimeTestModes.JitMinOpts)] public void ConvertTruncatingAndSaturating() + { + // A few cases. More exhaustive testing is done in the OuterLoop test. + + ConvertTruncatingImpl(); + ConvertTruncatingImpl(); + ConvertTruncatingImpl(); + ConvertTruncatingImpl(); + ConvertTruncatingImpl(); + + ConvertSaturatingImpl(); + ConvertSaturatingImpl(); + ConvertSaturatingImpl(); + ConvertSaturatingImpl(); + ConvertSaturatingImpl(); + } + + [OuterLoop] + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBuiltWithAggressiveTrimming))] + [SkipOnCoreClr("Depends heavily on folded type comparisons", RuntimeTestModes.JitMinOpts)] + public void ConvertTruncatingAndSaturating_Outerloop() { MethodInfo convertTruncatingImpl = typeof(ConvertTests).GetMethod(nameof(ConvertTruncatingImpl), BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance); Assert.NotNull(convertTruncatingImpl); @@ -54,11 +74,8 @@ public void ConvertChecked() { // Conversions that never overflow. This isn't an exhaustive list; just a sampling. ConvertCheckedImpl(); - ConvertCheckedImpl(); ConvertCheckedImpl(); ConvertCheckedImpl(); - ConvertCheckedImpl(); - ConvertCheckedImpl(); ConvertCheckedImpl(); ConvertCheckedImpl(); ConvertCheckedImpl(); @@ -78,12 +95,12 @@ private static void ConvertTruncatingImpl() { AssertExtensions.Throws("destination", () => TensorPrimitives.ConvertTruncating(new TFrom[3], new TTo[2])); + Random rand = new(42); foreach (int tensorLength in Helpers.TensorLengthsIncluding0) { using BoundedMemory source = BoundedMemory.Allocate(tensorLength); using BoundedMemory destination = BoundedMemory.Allocate(tensorLength); - Random rand = new(42); Span sourceSpan = source.Span; for (int i = 0; i < tensorLength; i++) { @@ -110,12 +127,12 @@ private static void ConvertSaturatingImpl() { AssertExtensions.Throws("destination", () => TensorPrimitives.ConvertSaturating(new TFrom[3], new TTo[2])); + Random rand = new(42); foreach (int tensorLength in Helpers.TensorLengthsIncluding0) { using BoundedMemory source = BoundedMemory.Allocate(tensorLength); using BoundedMemory destination = BoundedMemory.Allocate(tensorLength); - Random rand = new(42); Span sourceSpan = source.Span; for (int i = 0; i < tensorLength; i++) { @@ -209,6 +226,8 @@ private static bool IsEqualWithTolerance(T expected, T actual, T? tolerance = } } + // The tests for some types have been marked as OuterLoop simply to decrease inner loop testing time. + public class DoubleGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimitivesTests { } public class SingleGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimitivesTests { } public class HalfGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimitivesTests @@ -216,22 +235,31 @@ public class HalfGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimi protected override void AssertEqualTolerance(Half expected, Half actual, Half? tolerance = null) => base.AssertEqualTolerance(expected, actual, tolerance ?? Half.CreateTruncating(0.001)); } + + [OuterLoop] public class NFloatGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimitivesTests { } + [OuterLoop] public class SByteGenericTensorPrimitives : GenericSignedIntegerTensorPrimitivesTests { } public class Int16GenericTensorPrimitives : GenericSignedIntegerTensorPrimitivesTests { } + [OuterLoop] public class Int32GenericTensorPrimitives : GenericSignedIntegerTensorPrimitivesTests { } public class Int64GenericTensorPrimitives : GenericSignedIntegerTensorPrimitivesTests { } + [OuterLoop] public class IntPtrGenericTensorPrimitives : GenericSignedIntegerTensorPrimitivesTests { } public class Int128GenericTensorPrimitives : GenericSignedIntegerTensorPrimitivesTests { } public class ByteGenericTensorPrimitives : GenericIntegerTensorPrimitivesTests { } + [OuterLoop] public class UInt16GenericTensorPrimitives : GenericIntegerTensorPrimitivesTests { } + [OuterLoop] public class CharGenericTensorPrimitives : GenericIntegerTensorPrimitivesTests { } public class UInt32GenericTensorPrimitives : GenericIntegerTensorPrimitivesTests { } + [OuterLoop] public class UInt64GenericTensorPrimitives : GenericIntegerTensorPrimitivesTests { } public class UIntPtrGenericTensorPrimitives : GenericIntegerTensorPrimitivesTests { } + [OuterLoop] public class UInt128GenericTensorPrimitives : GenericIntegerTensorPrimitivesTests { } public unsafe abstract class GenericFloatingPointNumberTensorPrimitivesTests : GenericNumberTensorPrimitivesTests @@ -270,13 +298,17 @@ protected override IEnumerable GetSpecialValues() yield return T.CreateTruncating(BitConverter.UInt32BitsToSingle(0x7FA0_0000)); // +sNaN // +Infinity, -Infinity - yield return T.CreateTruncating(float.PositiveInfinity); - yield return T.CreateTruncating(float.NegativeInfinity); + yield return T.PositiveInfinity; + yield return T.NegativeInfinity; - // +Zero, -Zero + // +0, -0 yield return T.Zero; yield return T.NegativeZero; + // +1, -1 + yield return T.One; + yield return T.NegativeOne; + // Subnormals yield return T.Epsilon; yield return -T.Epsilon; @@ -286,8 +318,15 @@ protected override IEnumerable GetSpecialValues() // Normals yield return T.CreateTruncating(BitConverter.UInt32BitsToSingle(0x0080_0000)); yield return T.CreateTruncating(BitConverter.UInt32BitsToSingle(0x8080_0000)); - yield return T.CreateTruncating(BitConverter.UInt32BitsToSingle(0x7F7F_FFFF)); // MaxValue - yield return T.CreateTruncating(BitConverter.UInt32BitsToSingle(0xFF7F_FFFF)); // MinValue + yield return T.CreateTruncating(float.MinValue); + yield return T.CreateTruncating(float.MaxValue); + yield return T.CreateTruncating(double.MinValue); + yield return T.CreateTruncating(double.MaxValue); + + // Other known constants + yield return T.E; + yield return T.Pi; + yield return T.Tau; } protected override void SetSpecialValues(Span x, Span y) diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs index cd0c1c21e201ec..b0b44fddb0aef5 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs @@ -165,7 +165,7 @@ protected T NextRandom(T avoid) /// protected void RunForEachSpecialValue(Action action, BoundedMemory x) { - foreach (T value in GetSpecialValues()) + Assert.All(GetSpecialValues(), value => { int pos = Random.Next(x.Length); T orig = x[pos]; @@ -174,7 +174,7 @@ protected void RunForEachSpecialValue(Action action, BoundedMemory x) action(); x[pos] = orig; - } + }); } #endregion