aboutsummaryrefslogtreecommitdiffstats
path: root/CommonLibs/trx_rate_ctr.cpp
blob: 381b387d18260337dee96771c4b112c47c6ba774 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
/*
 * Copyright (C) 2019 sysmocom - s.f.m.c. GmbH
 * All Rights Reserved
 *
 * SPDX-License-Identifier: AGPL-3.0+
 *
 * Author: Pau Espin Pedrol <pespin@sysmocom.de>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * See the COPYING file in the main directory for details.
 */

/*
 * rate_ctr API uses several osmocom select loop features, and as a result,
 * calls to it must be done through the main thread (the one running the osmocom
 * loop in osmo-trx).
 * Since read/write from/to SDR is done in separate threads (even read and write
 * each use a different thread), we must use some sort of message passing system
 * between main thread feeding rate_ctr structures and the Rx/Tx threads
 * generating the events.
 * The idea is that upon read/write issues, lower layers (SDR APIs) provide us with
 * underrun/overrun/droppedPackets information, and in that case we pass that up
 * the stack through signal <SS_DEVICE,S_DEVICE_COUNTER_CHANGE> with signal_cb
 * being a pointer to a "struct device_counters" structure, which contains
 * device (implementation agnostic) statful counters for different kind of
 * statistics.
 * That signal is processed here in device_sig_cb, where a copy of the "struct
 * device_counters" structure is held and the main thread is instructed through
 * a timerfd to update rate_ctr APIs against this copy. All this is done inside
 * a mutex to avoid different race conditions (between Rx andTx threads, and
 * between Rx/Tx and main thread). For the same reason, callers of signal
 * <SS_DEVICE,S_DEVICE_COUNTER_CHANGE> (device_sig_cb), that is Rx/Tx threads,
 * must do so with PTHREAD_CANCEL_DISABLE, in order to avoid possible deadlocks
 * in case the main thread decides to cancel other threads due to a shutdown
 * operation (fi SIGKILL received)
 */

#include <string.h>
#include <stdint.h>
#include <inttypes.h>
#include <netinet/in.h>
#include <arpa/inet.h>

extern "C" {
#include <osmocom/core/talloc.h>
#include <osmocom/core/utils.h>
#include <osmocom/core/rate_ctr.h>
#include <osmocom/core/select.h>
#include <osmocom/core/stats.h>
#include <osmocom/core/timer.h>

#include "osmo_signal.h"
#include "trx_vty.h"
#include "trx_rate_ctr.h"
}
#include "Threads.h"
#include "Logger.h"

/* Used in ctrs_pending, when set it means that channel slot contains unused
   (non-pending) counter data */
#define PENDING_CHAN_NONE SIZE_MAX

static void *trx_rate_ctr_ctx;

static struct rate_ctr_group** rate_ctrs;
static struct device_counters* ctrs_pending;
static size_t chan_len;
static struct osmo_fd rate_ctr_timerfd;
static Mutex rate_ctr_mutex;

struct osmo_timer_list threshold_timer;
static LLIST_HEAD(threshold_list);
static unsigned int threshold_timer_sched_secs;
static bool threshold_initied;

const struct value_string rate_ctr_intv[] = {
	{ RATE_CTR_INTV_SEC,	"per-second" },
	{ RATE_CTR_INTV_MIN,	"per-minute" },
	{ RATE_CTR_INTV_HOUR,	"per-hour" },
	{ RATE_CTR_INTV_DAY, 	"per-day" },
	{ 0, NULL }
};

const struct value_string trx_chan_ctr_names[] = {
	{ TRX_CTR_RX_OVERRUNS,	"rx_overruns" },
	{ TRX_CTR_TX_UNDERRUNS,	"tx_underruns" },
	{ TRX_CTR_RX_DROP_EV,	"rx_drop_events" },
	{ TRX_CTR_RX_DROP_SMPL,	"rx_drop_samples" },
	{ TRX_CTR_TX_DROP_EV,	"tx_drop_events" },
	{ TRX_CTR_TX_DROP_SMPL,	"tx_drop_samples" },
	{ 0, NULL }
};

static const struct rate_ctr_desc trx_chan_ctr_desc[] = {
	[TRX_CTR_RX_OVERRUNS]		= { "device:rx_overruns",	"Number of Rx overruns in FIFO queue" },
	[TRX_CTR_TX_UNDERRUNS]		= { "device:tx_underruns",	"Number of Tx underruns in FIFO queue" },
	[TRX_CTR_RX_DROP_EV]		= { "device:rx_drop_events",	"Number of times Rx samples were dropped by HW" },
	[TRX_CTR_RX_DROP_SMPL]		= { "device:rx_drop_samples",	"Number of Rx samples dropped by HW" },
	[TRX_CTR_TX_DROP_EV]		= { "device:tx_drop_events",	"Number of times Tx samples were dropped by HW" },
	[TRX_CTR_TX_DROP_SMPL]		= { "device:tx_drop_samples",	"Number of Tx samples dropped by HW" }
};

static const struct rate_ctr_group_desc trx_chan_ctr_group_desc = {
	.group_name_prefix		= "trx:chan",
	.group_description		= "osmo-trx statistics",
	.class_id			= OSMO_STATS_CLASS_GLOBAL,
	.num_ctr			= ARRAY_SIZE(trx_chan_ctr_desc),
	.ctr_desc			= trx_chan_ctr_desc,
};

static int rate_ctr_timerfd_cb(struct osmo_fd *ofd, unsigned int what) {
	size_t chan;
	struct rate_ctr *ctr;
	LOGC(DMAIN, NOTICE) << "Main thread is updating counters";
	rate_ctr_mutex.lock();
	for (chan = 0; chan < chan_len; chan++) {
		if (ctrs_pending[chan].chan == PENDING_CHAN_NONE)
			continue;
		LOGCHAN(chan, DMAIN, INFO) << "rate_ctr update";
		ctr = &rate_ctrs[chan]->ctr[TRX_CTR_RX_OVERRUNS];
		rate_ctr_add(ctr, ctrs_pending[chan].rx_overruns - ctr->current);
		ctr = &rate_ctrs[chan]->ctr[TRX_CTR_TX_UNDERRUNS];
		rate_ctr_add(ctr, ctrs_pending[chan].tx_underruns - ctr->current);
		ctr = &rate_ctrs[chan]->ctr[TRX_CTR_RX_DROP_EV];
		rate_ctr_add(ctr, ctrs_pending[chan].rx_dropped_events - ctr->current);
		ctr = &rate_ctrs[chan]->ctr[TRX_CTR_RX_DROP_SMPL];
		rate_ctr_add(ctr, ctrs_pending[chan].rx_dropped_samples - ctr->current);
		ctr = &rate_ctrs[chan]->ctr[TRX_CTR_TX_DROP_EV];
		rate_ctr_add(ctr, ctrs_pending[chan].tx_dropped_events - ctr->current);
		ctr = &rate_ctrs[chan]->ctr[TRX_CTR_TX_DROP_SMPL];
		rate_ctr_add(ctr, ctrs_pending[chan].tx_dropped_samples - ctr->current);

		/* Mark as done */
		ctrs_pending[chan].chan = PENDING_CHAN_NONE;
	}
	if (osmo_timerfd_disable(&rate_ctr_timerfd) < 0)
		LOGC(DMAIN, ERROR) << "Failed to disable timerfd";
	rate_ctr_mutex.unlock();
	return 0;
}

/* Callback function to be called every time we receive a signal from DEVICE */
static int device_sig_cb(unsigned int subsys, unsigned int signal,
			 void *handler_data, void *signal_data)
{
	struct device_counters *ctr;
	/* Delay sched around 20 ms, in case we receive several calls from several
	 * channels batched */
	struct timespec next_sched = {.tv_sec = 0, .tv_nsec = 20*1000*1000};
	/* no automatic re-trigger */
	struct timespec intv_sched = {.tv_sec = 0, .tv_nsec = 0};

	switch (signal) {
	case S_DEVICE_COUNTER_CHANGE:
		ctr = (struct device_counters *)signal_data;
		LOGCHAN(ctr->chan, DMAIN, NOTICE) << "Received counter change from radioDevice";
		rate_ctr_mutex.lock();
		ctrs_pending[ctr->chan] = *ctr;
		if (osmo_timerfd_schedule(&rate_ctr_timerfd, &next_sched, &intv_sched) < 0) {
			LOGC(DMAIN, ERROR) << "Failed to schedule timerfd: " << errno << " = "<< strerror(errno);
		}
		rate_ctr_mutex.unlock();
		break;
	default:
		break;
	}
	return 0;
}

/************************************
 * ctr_threshold  APIs
 ************************************/
static const char* ctr_threshold_2_vty_str(struct ctr_threshold *ctr)
{
	static char buf[256];
	int rc = 0;
	rc += snprintf(buf, sizeof(buf), "ctr-error-threshold %s", get_value_string(trx_chan_ctr_names, ctr->ctr_id));
	rc += snprintf(buf + rc, sizeof(buf) - rc, " %d %s", ctr->val, get_value_string(rate_ctr_intv, ctr->intv));
	return buf;
}

static void threshold_timer_cb(void *data)
{
	struct ctr_threshold *ctr_thr;
	struct rate_ctr *rate_ctr;
	size_t chan;
	LOGC(DMAIN, DEBUG) << "threshold_timer_cb fired!";

	llist_for_each_entry(ctr_thr, &threshold_list, list) {
		for (chan = 0; chan < chan_len; chan++) {
			rate_ctr = &rate_ctrs[chan]->ctr[ctr_thr->ctr_id];
			LOGCHAN(chan, DMAIN, INFO) << "checking threshold: " << ctr_threshold_2_vty_str(ctr_thr)
						   << " ("<< rate_ctr->intv[ctr_thr->intv].rate << " vs " << ctr_thr->val << ")";
			if (rate_ctr->intv[ctr_thr->intv].rate >= ctr_thr->val) {
				LOGCHAN(chan, DMAIN, FATAL) << "threshold reached, stopping! " << ctr_threshold_2_vty_str(ctr_thr)
							   << " ("<< rate_ctr->intv[ctr_thr->intv].rate << " vs " << ctr_thr->val << ")";
				osmo_signal_dispatch(SS_MAIN, S_MAIN_STOP_REQUIRED, NULL);
				return;
			}
		}
	}
	osmo_timer_schedule(&threshold_timer, threshold_timer_sched_secs, 0);
}

static size_t ctr_threshold_2_seconds(struct ctr_threshold *ctr)
{
	size_t mult = 0;
	switch (ctr->intv) {
	case RATE_CTR_INTV_SEC:
		mult = 1;
		break;
	case RATE_CTR_INTV_MIN:
		mult = 60;
		break;
	case RATE_CTR_INTV_HOUR:
		mult = 60*60;
		break;
	case RATE_CTR_INTV_DAY:
		mult = 60*60*24;
		break;
	default:
		OSMO_ASSERT(false);
	}
	return mult;
}

static void threshold_timer_update_intv() {
	struct ctr_threshold *ctr, *min_ctr;
	size_t secs, min_secs;

	/* Avoid scheduling timer until itself and other structures are prepared
	   by trx_rate_ctr_init */
	if (!threshold_initied)
		return;

	if (llist_empty(&threshold_list)) {
		if (osmo_timer_pending(&threshold_timer))
			osmo_timer_del(&threshold_timer);
		return;
	}

	min_ctr = llist_first_entry(&threshold_list, struct ctr_threshold, list);
	min_secs = ctr_threshold_2_seconds(min_ctr);

	llist_for_each_entry(ctr, &threshold_list, list) {
		secs = ctr_threshold_2_seconds(ctr);
		if (min_secs > secs)
			min_secs = secs;
	}


	threshold_timer_sched_secs = OSMO_MAX((int)(min_secs / 2 - 1), 1);
	LOGC(DMAIN, INFO) << "New ctr-error-threshold check interval: "
			  << threshold_timer_sched_secs << " seconds";
	osmo_timer_schedule(&threshold_timer, threshold_timer_sched_secs, 0);
}

/* Init rate_ctr subsystem. Expected to be called during process start by main thread before VTY is ready */
void trx_rate_ctr_init(void *ctx, struct trx_ctx* trx_ctx)
{
	size_t  i;
	trx_rate_ctr_ctx = ctx;
	chan_len = trx_ctx->cfg.num_chans;
	ctrs_pending = (struct device_counters*) talloc_zero_size(ctx, chan_len * sizeof(struct device_counters));
	rate_ctrs = (struct rate_ctr_group**) talloc_zero_size(ctx, chan_len * sizeof(struct rate_ctr_group*));

	for (i = 0; i < chan_len; i++) {
		ctrs_pending[i].chan = PENDING_CHAN_NONE;
		rate_ctrs[i] = rate_ctr_group_alloc(ctx, &trx_chan_ctr_group_desc, i);
		if (!rate_ctrs[i]) {
			LOGCHAN(i, DMAIN, ERROR) << "Failed to allocate rate ctr";
			exit(1);
		}
	}
	rate_ctr_timerfd.fd = -1;
	if (osmo_timerfd_setup(&rate_ctr_timerfd, rate_ctr_timerfd_cb, NULL) < 0) {
		LOGC(DMAIN, ERROR) << "Failed to setup timerfd";
		exit(1);
	}
	osmo_signal_register_handler(SS_DEVICE, device_sig_cb, NULL);

	/* Now set up threshold checks */
	threshold_initied = true;
	osmo_timer_setup(&threshold_timer, threshold_timer_cb, NULL);
	threshold_timer_update_intv();
}

void trx_rate_ctr_threshold_add(struct ctr_threshold *ctr)
{
	struct ctr_threshold *new_ctr;

	new_ctr = talloc_zero(trx_rate_ctr_ctx, struct ctr_threshold);
	*new_ctr = *ctr;
	LOGC(DMAIN, NOTICE) << "Adding new threshold check: " << ctr_threshold_2_vty_str(new_ctr);
	llist_add(&new_ctr->list, &threshold_list);
	threshold_timer_update_intv();
}

int trx_rate_ctr_threshold_del(struct ctr_threshold *del_ctr)
{
	struct ctr_threshold *ctr;

	llist_for_each_entry(ctr, &threshold_list, list) {
		if (ctr->intv != del_ctr->intv ||
		    ctr->ctr_id != del_ctr->ctr_id ||
		    ctr->val != del_ctr->val)
			continue;

		LOGC(DMAIN, NOTICE) << "Deleting threshold check: " << ctr_threshold_2_vty_str(del_ctr);
		llist_del(&ctr->list);
		talloc_free(ctr);
		threshold_timer_update_intv();
		return 0;
	}
	return -1;
}

void trx_rate_ctr_threshold_write_config(struct vty *vty, char *indent_prefix)
{
	struct ctr_threshold *ctr;

	llist_for_each_entry(ctr, &threshold_list, list) {
		vty_out(vty, "%s%s%s", indent_prefix, ctr_threshold_2_vty_str(ctr), VTY_NEWLINE);
	}
}