This seems to happen occationally with testAP (potentially others).
The invalid read appears to happen when the frame_xchg_tx_cb detects
an early status and no ACK. In this particular case there is no
retry interval so we reach the retry limit and 'done' the frame.
This frees the 'fx' data all before the destroy callback can get
called. Once we finally return and the destroy callback is called
'fx' is freed and we see the invalid write.
==206== Memcheck, a memory error detector
==206== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==206== Using Valgrind-3.16.1 and LibVEX; rerun with -h for copyright info
==206== Command: iwd -p rad1,rad2,rad3,rad4 -d
==206== Parent PID: 140
==206==
==206== Invalid write of size 4
==206== at 0x4493A0: frame_xchg_tx_destroy (frame-xchg.c:941)
==206== by 0x46DAF6: destroy_request (genl.c:673)
==206== by 0x46DAF6: process_unicast (genl.c:1002)
==206== by 0x46DAF6: received_data (genl.c:1101)
==206== by 0x46AA4B: io_callback (io.c:118)
==206== by 0x469D6C: l_main_iterate (main.c:477)
==206== by 0x469E1B: l_main_run (main.c:524)
==206== by 0x469E1B: l_main_run (main.c:506)
==206== by 0x46A02B: l_main_run_with_signal (main.c:646)
==206== by 0x403E78: main (main.c:490)
==206== Address 0x4c59c6c is 172 bytes inside a block of size 176 free'd
==206== at 0x483B9F5: free (vg_replace_malloc.c:538)
==206== by 0x40F14C: destroy_work (wiphy.c:248)
==206== by 0x40F14C: wiphy_radio_work_done (wiphy.c:1578)
==206== by 0x44A916: frame_xchg_tx_cb (frame-xchg.c:930)
==206== by 0x46DAD9: process_unicast (genl.c:993)
==206== by 0x46DAD9: received_data (genl.c:1101)
==206== by 0x46AA4B: io_callback (io.c:118)
==206== by 0x469D6C: l_main_iterate (main.c:477)
==206== by 0x469E1B: l_main_run (main.c:524)
==206== by 0x469E1B: l_main_run (main.c:506)
==206== by 0x46A02B: l_main_run_with_signal (main.c:646)
==206== by 0x403E78: main (main.c:490)
==206== Block was alloc'd at
==206== at 0x483A809: malloc (vg_replace_malloc.c:307)
==206== by 0x4643CD: l_malloc (util.c:61)
==206== by 0x44AF8C: frame_xchg_startv (frame-xchg.c:1155)
==206== by 0x44B2A4: frame_xchg_start (frame-xchg.c:1108)
==206== by 0x42BC55: ap_send_mgmt_frame (ap.c:709)
==206== by 0x42F513: ap_probe_req_cb (ap.c:1869)
==206== by 0x449752: frame_watch_unicast_notify (frame-xchg.c:233)
==206== by 0x46DA2F: dispatch_unicast_watches (genl.c:961)
==206== by 0x46DA2F: process_unicast (genl.c:980)
==206== by 0x46DA2F: received_data (genl.c:1101)
==206== by 0x46AA4B: io_callback (io.c:118)
==206== by 0x469D6C: l_main_iterate (main.c:477)
==206== by 0x469E1B: l_main_run (main.c:524)
==206== by 0x469E1B: l_main_run (main.c:506)
==206== by 0x46A02B: l_main_run_with_signal (main.c:646)
==206==
---
src/frame-xchg.c | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
v2:
* Set fx->tx_cmd_id to zero after canceling
diff --git a/src/frame-xchg.c b/src/frame-xchg.c
index 0e3e330d..b4e092b8 100644
--- a/src/frame-xchg.c
+++ b/src/frame-xchg.c
@@ -773,8 +773,10 @@ static void frame_xchg_reset(struct frame_xchg_data *fx)
if (fx->timeout)
l_timeout_remove(fx->timeout);
- if (fx->tx_cmd_id)
+ if (fx->tx_cmd_id) {
l_genl_family_cancel(nl80211, fx->tx_cmd_id);
+ fx->tx_cmd_id = 0;
+ }
l_free(fx->early_frame.mpdu);
fx->early_frame.mpdu = NULL;
@@ -901,6 +903,8 @@ static void frame_xchg_tx_cb(struct l_genl_msg *msg, void *user_data)
uint64_t cookie;
bool early_status;
+ fx->tx_cmd_id = 0;
+
l_debug("err %i", -error);
if (error < 0) {
@@ -934,13 +938,6 @@ error:
frame_xchg_done(fx, error);
}
-static void frame_xchg_tx_destroy(void *user_data)
-{
- struct frame_xchg_data *fx = user_data;
-
- fx->tx_cmd_id = 0;
-}
-
static bool frame_xchg_tx_retry(struct wiphy_radio_work_item *item)
{
struct frame_xchg_data *fx = l_container_of(item,
@@ -974,7 +971,7 @@ static bool frame_xchg_tx_retry(struct wiphy_radio_work_item *item)
&duration);
fx->tx_cmd_id = l_genl_family_send(nl80211, msg, frame_xchg_tx_cb, fx,
- frame_xchg_tx_destroy);
+ NULL);
if (!fx->tx_cmd_id) {
l_error("Error sending frame");
l_genl_msg_unref(msg);
--
2.26.2
Show replies by date
Hi James,
On 10/30/20 11:35 AM, James Prestwood wrote:
This seems to happen occationally with testAP (potentially others).
The invalid read appears to happen when the frame_xchg_tx_cb detects
an early status and no ACK. In this particular case there is no
retry interval so we reach the retry limit and 'done' the frame.
This frees the 'fx' data all before the destroy callback can get
called. Once we finally return and the destroy callback is called
'fx' is freed and we see the invalid write.
Applied, thanks.
Regards,
-Denis