From 79e2d1373add8d9e265d6a16c4952f5273600e97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0?= <1671644+arrufat@users.noreply.github.com>
Date: Fri, 13 Jun 2025 11:11:32 +0900
Subject: [PATCH] Fix: Correct YOLO loss gradient for x,y coordinates (#3088)

* Fix: Correct YOLO loss gradient for x,y coordinates

The gradient calculation for bounding box x and y coordinates in the
`loss_yolo_` helper `impl::yolo_helper_impl::tensor_to_loss` was
missing a factor of 2.0. This factor arises from the chain rule
due to the coordinate transformation `output_scaled = network_output * 2.0 - 0.5`,
where the loss is calculated based on `output_scaled`, but the gradient
needs to be with respect to `network_output`.

This commit multiplies the affected gradient terms by 2.0f to correctly
apply the chain rule.

* scale the loss

---------

Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
---
 dlib/dnn/loss.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dlib/dnn/loss.h b/dlib/dnn/loss.h
index a569268398..36b37a2956 100644
--- a/dlib/dnn/loss.h
+++ b/dlib/dnn/loss.h
@@ -3827,8 +3827,8 @@ namespace dlib
                         const auto y_idx = tensor_index(output_tensor, n, k + 1, r, c);
                         const auto w_idx = tensor_index(output_tensor, n, k + 2, r, c);
                         const auto h_idx = tensor_index(output_tensor, n, k + 3, r, c);
-                        g[x_idx] = scale_box * put_in_range(-1, 1, (out_data[x_idx] * 2.0 - 0.5 - tx));
-                        g[y_idx] = scale_box * put_in_range(-1, 1, (out_data[y_idx] * 2.0 - 0.5 - ty));
+                        g[x_idx] = scale_box * put_in_range(-1, 1, (out_data[x_idx] * 2.0 - 0.5 - tx)) * 2.0f;
+                        g[y_idx] = scale_box * put_in_range(-1, 1, (out_data[y_idx] * 2.0 - 0.5 - ty)) * 2.0f;
                         g[w_idx] = scale_box * put_in_range(-1, 1, (out_data[w_idx] - tw));
                         g[h_idx] = scale_box * put_in_range(-1, 1, (out_data[h_idx] - th));
 
@@ -3863,7 +3863,7 @@ namespace dlib
                 }
 
                 // The loss is the squared norm of the gradient
-                loss += length_squared(rowm(mat(grad), n));
+                loss += 0.5 * length_squared(rowm(mat(grad), n));
             }
         };
     }