@@ -750,6 +750,12 @@ impl {{ self_t }} {
750750 {{ vec4_t }}::from(self).abs_diff_eq({{ vec4_t }}::from(rhs), max_abs_diff)
751751 }
752752
753+ #[inline(always)]
754+ #[must_use]
755+ fn lerp_impl(self, end: Self, s: {{ scalar_t }}) -> Self {
756+ (self * (1.0 - s) + end * s).normalize()
757+ }
758+
753759 /// Performs a linear interpolation between `self` and `rhs` based on
754760 /// the value `s`.
755761 ///
@@ -767,69 +773,41 @@ impl {{ self_t }} {
767773 glam_assert!(end.is_normalized());
768774
769775 {% if is_scalar %}
770- let start = self;
771- let dot = start.dot(end);
776+ let dot = self.dot(end);
772777 let bias = if dot >= 0.0 { 1.0 } else { -1.0 };
773- let interpolated = start.add(end.mul(bias).sub(start).mul(s));
774- interpolated.normalize()
778+ self.lerp_impl(end * bias, s)
775779 {% elif is_sse2 %}
776780 const NEG_ZERO: __m128 = m128_from_f32x4([-0.0; 4]);
777- let start = self.0;
778- let end = end.0;
779781 unsafe {
780- let dot = dot4_into_m128(start , end);
782+ let dot = dot4_into_m128(self.0 , end.0 );
781783 // Calculate the bias, if the dot product is positive or zero, there is no bias
782784 // but if it is negative, we want to flip the 'end' rotation XYZW components
783785 let bias = _mm_and_ps(dot, NEG_ZERO);
784- let interpolated = _mm_add_ps(
785- _mm_mul_ps(_mm_sub_ps(_mm_xor_ps(end, bias), start), _mm_set_ps1(s)),
786- start,
787- );
788- {{ self_t }}(interpolated).normalize()
786+ self.lerp_impl(Self(_mm_xor_ps(end.0, bias)), s)
789787 }
790788 {% elif is_wasm32 %}
791789 const NEG_ZERO: v128 = v128_from_f32x4([-0.0; 4]);
792- let start = self.0;
793- let end = end.0;
794- let dot = dot4_into_v128(start, end);
790+ let dot = dot4_into_v128(self.0, end.0);
795791 // Calculate the bias, if the dot product is positive or zero, there is no bias
796792 // but if it is negative, we want to flip the 'end' rotation XYZW components
797793 let bias = v128_and(dot, NEG_ZERO);
798- let interpolated = f32x4_add(
799- f32x4_mul(f32x4_sub(v128_xor(end, bias), start), f32x4_splat(s)),
800- start,
801- );
802- {{ self_t }}(interpolated).normalize()
794+ self.lerp_impl(Self(v128_xor(end.0, bias)), s)
803795 {% elif is_coresimd %}
804796 const NEG_ZERO: f32x4 = f32x4::from_array([-0.0; 4]);
805- let start = self.0;
806- let end = end.0;
807- let dot = dot4_into_f32x4(start, end);
797+ let dot = dot4_into_f32x4(self.0, end.0);
808798 // Calculate the bias, if the dot product is positive or zero, there is no bias
809799 // but if it is negative, we want to flip the 'end' rotation XYZW components
810800 let bias = f32x4_bitand(dot, NEG_ZERO);
811- let interpolated = start + ((f32x4_bitxor(end, bias) - start) * f32x4::splat(s));
812- {{ self_t }}(interpolated).normalize()
801+ self.lerp_impl(Self(f32x4_bitxor(end.0, bias)), s)
813802 {% elif is_neon %}
814803 const NEG_ZERO: float32x4_t = f32x4_from_array([-0.0; 4]);
815- let start = self.0;
816- let end = end.0;
817804 unsafe {
818- let dot = dot4_into_f32x4(start , end);
805+ let dot = dot4_into_f32x4(self.0 , end.0 );
819806 // Calculate the bias, if the dot product is positive or zero, there is no bias
820807 // but if it is negative, we want to flip the 'end' rotation XYZW components
821808 let bias = vandq_u32(vreinterpretq_u32_f32(dot), vreinterpretq_u32_f32(NEG_ZERO));
822- let interpolated = vaddq_f32(
823- vmulq_f32(
824- vsubq_f32(
825- vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(end), bias)),
826- start,
827- ),
828- vld1q_dup_f32(&s),
829- ),
830- start,
831- );
832- {{ self_t }}(interpolated).normalize()
809+ self.lerp_impl(
810+ Self(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(end.0), bias))), s)
833811 }
834812 {% else %}
835813 unimplemented!()
@@ -852,8 +830,6 @@ impl {{ self_t }} {
852830 glam_assert!(self.is_normalized());
853831 glam_assert!(end.is_normalized());
854832
855- const DOT_THRESHOLD: {{ scalar_t }} = 0.9995;
856-
857833 // Note that a rotation can be represented by two quaternions: `q` and
858834 // `-q`. The slerp path between `q` and `end` will be different from the
859835 // path between `-q` and `end`. One path will take the long way around and
@@ -866,20 +842,13 @@ impl {{ self_t }} {
866842 dot = -dot;
867843 }
868844
845+ const DOT_THRESHOLD: {{ scalar_t }} = 1.0 - {{ scalar_t }}::EPSILON;
869846 if dot > DOT_THRESHOLD {
870- // assumes lerp returns a normalized quaternion
871- self.lerp (end, s)
847+ // if above threshold perform linear interpolation to avoid divide by zero
848+ self.lerp_impl (end, s)
872849 } else {
873850 let theta = math::acos_approx(dot);
874- {% if is_scalar %}
875- let scale1 = math::sin(theta * (1.0 - s));
876- let scale2 = math::sin(theta * s);
877- let theta_sin = math::sin(theta);
878-
879- self.mul(scale1)
880- .add(end.mul(scale2))
881- .mul(1.0 / theta_sin)
882- {% elif is_sse2 %}
851+ {% if is_sse2 %}
883852 let x = 1.0 - s;
884853 let y = s;
885854 let z = 1.0;
@@ -897,57 +866,11 @@ impl {{ self_t }} {
897866 theta_sin,
898867 ))
899868 }
900- {% elif is_wasm32 %}
901- // TODO: v128_sin is broken
902- // let x = 1.0 - s;
903- // let y = s;
904- // let z = 1.0;
905- // let w = 0.0;
906- // let tmp = f32x4_mul(f32x4_splat(theta), f32x4(x, y, z, w));
907- // let tmp = v128_sin(tmp);
908- let x = math::sin(theta * (1.0 - s));
909- let y = math::sin(theta * s);
910- let z = math::sin(theta);
911- let w = 0.0;
912- let tmp = f32x4(x, y, z, w);
913-
914- let scale1 = i32x4_shuffle::<0, 0, 4, 4>(tmp, tmp);
915- let scale2 = i32x4_shuffle::<1, 1, 5, 5>(tmp, tmp);
916- let theta_sin = i32x4_shuffle::<2, 2, 6, 6>(tmp, tmp);
917-
918- Self(f32x4_div(
919- f32x4_add(f32x4_mul(self.0, scale1), f32x4_mul(end.0, scale2)),
920- theta_sin,
921- ))
922- {% elif is_coresimd %}
923- let x = math::sin(theta * (1.0 - s));
924- let y = math::sin(theta * s);
925- let z = math::sin(theta);
926- let w = 0.0;
927- let tmp = f32x4::from_array([x, y, z, w]);
928-
929- let scale1 = simd_swizzle!(tmp, [0, 0, 0, 0]);
930- let scale2 = simd_swizzle!(tmp, [1, 1, 1, 1]);
931- let theta_sin = simd_swizzle!(tmp, [2, 2, 2, 2]);
932-
933- Self(self.0.mul(scale1).add(end.0.mul(scale2)).div(theta_sin))
934- {% elif is_neon %}
935- let x = math::sin(theta * (1.0 - s));
936- let y = math::sin(theta * s);
937- let z = math::sin(theta);
938- let w = 0.0;
939- unsafe {
940- let tmp = vld1q_f32([x, y, z, w].as_ptr());
941-
942- let scale1 = vdupq_laneq_f32(tmp, 0);
943- let scale2 = vdupq_laneq_f32(tmp, 1);
944- let theta_sin = vdupq_laneq_f32(tmp, 2);
945-
946- Self(vdivq_f32(
947- vaddq_f32(vmulq_f32(self.0, scale1), vmulq_f32(end.0, scale2)),
948- theta_sin,
949- ))
950- }
869+ {% else %}
870+ let scale1 = math::sin(theta * (1.0 - s));
871+ let scale2 = math::sin(theta * s);
872+ let theta_sin = math::sin(theta);
873+ ((self * scale1) + (end * scale2)) * (1.0 / theta_sin)
951874 {% endif %}
952875 }
953876 }
0 commit comments