summaryrefslogtreecommitdiff
diff options
authorPhilippe Michel <[email protected]>2025-09-22 22:10:11 +0159
committerPhilippe Michel <[email protected]>2025-09-22 22:10:11 +0159
commitad740cba533c66f5b5063df6218612b72fd82bdc (patch)
tree6379e2b01a20be1677eae5fbdd95fd95f5c58977
parent8cd047269a57e986fe336af265f7daafb5398805 (diff)
downloadgnubg-master.tar.gz
Fix inconsistency in sigmoid interfaceHEADmaster
Scalar version was called as output = sigmoid(-beta * sum(inputs)) and SIMD version as output = sigmoid(beta * sum(inputs)). Fix SIMD version and its uses.
-rw-r--r--lib/neuralnetsse.c12
1 files changed, 6 insertions, 6 deletions
diff --git a/lib/neuralnetsse.c b/lib/neuralnetsse.c
index 3251584e..543db552 100644
--- a/lib/neuralnetsse.c
+++ b/lib/neuralnetsse.c
@@ -242,20 +242,20 @@ sigmoid_ps(float_vector xin)
float_vector c;
xin = _mm256_and_ps(xin, abs_mask.ps); /* Abs. value by clearing signbit */
c = sigmoid_positive_ps(xin);
- return _mm256_blendv_ps(_mm256_sub_ps(ones.ps, c), c, mask);
+ return _mm256_blendv_ps(c, _mm256_sub_ps(ones.ps, c), mask);
#elif defined(HAVE_SSE)
float_vector mask = _mm_cmplt_ps(xin, _mm_setzero_ps());
float_vector c;
xin = _mm_and_ps(xin, abs_mask.ps); /* Abs. value by clearing signbit */
c = sigmoid_positive_ps(xin);
/* _mm_blendv_ps() is only available with SSE4.1 or later */
- return _mm_or_ps(_mm_and_ps(mask, c), _mm_andnot_ps(mask, _mm_sub_ps(ones.ps, c)));
+ return _mm_or_ps(_mm_andnot_ps(mask, c), _mm_and_ps(mask, _mm_sub_ps(ones.ps, c)));
#else
int_vector mask = (int_vector)vcltq_f32(xin, vdupq_n_f32(0.0f));
float_vector c;
xin = (float_vector)vandq_s32((int_vector)xin, (int_vector)abs_mask.ps); /* Abs. value by clearing signbit */
c = sigmoid_positive_ps(xin);
- return vbslq_f32((uint32x4_t)mask, c, vsubq_f32(ones.ps, c));
+ return vbslq_f32((uint32x4_t)mask, vsubq_f32(ones.ps, c), c);
#endif
}
@@ -458,11 +458,11 @@ EvaluateSSE(const neuralnet * restrict pnn, const float arInput[], float ar[], f
#if defined(USE_SSE2) || defined(USE_AVX) || defined(USE_NEON)
#if defined(USE_AVX)
- scalevec = _mm256_set1_ps(pnn->rBetaHidden);
+ scalevec = _mm256_set1_ps(-pnn->rBetaHidden);
#elif defined(HAVE_SSE)
- scalevec = _mm_set1_ps(pnn->rBetaHidden);
+ scalevec = _mm_set1_ps(-pnn->rBetaHidden);
#else
- scalevec = vdupq_n_f32(pnn->rBetaHidden);
+ scalevec = vdupq_n_f32(-pnn->rBetaHidden);
#endif
for (par = ar, i = (cHidden >> LOG2VEC_SIZE); i; i--, par += VEC_SIZE) {