blob: 1c1c38f536dc35591008931e6d664bf3e4615ca3
1 | /* |
2 | * NTP client/server, based on OpenNTPD 3.9p1 |
3 | * |
4 | * Author: Adam Tkac <vonsch@gmail.com> |
5 | * |
6 | * Licensed under GPLv2, see file LICENSE in this source tree. |
7 | * |
8 | * Parts of OpenNTPD clock syncronization code is replaced by |
9 | * code which is based on ntp-4.2.6, whuch carries the following |
10 | * copyright notice: |
11 | * |
12 | *********************************************************************** |
13 | * * |
14 | * Copyright (c) University of Delaware 1992-2009 * |
15 | * * |
16 | * Permission to use, copy, modify, and distribute this software and * |
17 | * its documentation for any purpose with or without fee is hereby * |
18 | * granted, provided that the above copyright notice appears in all * |
19 | * copies and that both the copyright notice and this permission * |
20 | * notice appear in supporting documentation, and that the name * |
21 | * University of Delaware not be used in advertising or publicity * |
22 | * pertaining to distribution of the software without specific, * |
23 | * written prior permission. The University of Delaware makes no * |
24 | * representations about the suitability this software for any * |
25 | * purpose. It is provided "as is" without express or implied * |
26 | * warranty. * |
27 | * * |
28 | *********************************************************************** |
29 | */ |
30 | |
31 | //usage:#define ntpd_trivial_usage |
32 | //usage: "[-dnqNw"IF_FEATURE_NTPD_SERVER("l")"] [-S PROG] [-p PEER]..." |
33 | //usage:#define ntpd_full_usage "\n\n" |
34 | //usage: "NTP client/server\n" |
35 | //usage: "\n -d Verbose" |
36 | //usage: "\n -n Do not daemonize" |
37 | //usage: "\n -q Quit after clock is set" |
38 | //usage: "\n -N Run at high priority" |
39 | //usage: "\n -w Do not set time (only query peers), implies -n" |
40 | //usage: IF_FEATURE_NTPD_SERVER( |
41 | //usage: "\n -l Run as server on port 123" |
42 | //usage: ) |
43 | //usage: "\n -S PROG Run PROG after stepping time, stratum change, and every 11 mins" |
44 | //usage: "\n -p PEER Obtain time from PEER (may be repeated)" |
45 | |
46 | #include "libbb.h" |
47 | #include <math.h> |
48 | #include <netinet/ip.h> /* For IPTOS_LOWDELAY definition */ |
49 | #include <sys/resource.h> /* setpriority */ |
50 | |
51 | #ifdef __BIONIC__ |
52 | #include <linux/timex.h> |
53 | extern int adjtimex (struct timex *); |
54 | #else |
55 | #include <sys/timex.h> |
56 | #endif |
57 | #ifndef IPTOS_LOWDELAY |
58 | # define IPTOS_LOWDELAY 0x10 |
59 | #endif |
60 | #ifndef IP_PKTINFO |
61 | # error "Sorry, your kernel has to support IP_PKTINFO" |
62 | #endif |
63 | |
64 | |
65 | /* Verbosity control (max level of -dddd options accepted). |
66 | * max 6 is very talkative (and bloated). 3 is non-bloated, |
67 | * production level setting. |
68 | */ |
69 | #define MAX_VERBOSE 3 |
70 | |
71 | |
72 | /* High-level description of the algorithm: |
73 | * |
74 | * We start running with very small poll_exp, BURSTPOLL, |
75 | * in order to quickly accumulate INITIAL_SAMPLES datapoints |
76 | * for each peer. Then, time is stepped if the offset is larger |
77 | * than STEP_THRESHOLD, otherwise it isn't; anyway, we enlarge |
78 | * poll_exp to MINPOLL and enter frequency measurement step: |
79 | * we collect new datapoints but ignore them for WATCH_THRESHOLD |
80 | * seconds. After WATCH_THRESHOLD seconds we look at accumulated |
81 | * offset and estimate frequency drift. |
82 | * |
83 | * (frequency measurement step seems to not be strictly needed, |
84 | * it is conditionally disabled with USING_INITIAL_FREQ_ESTIMATION |
85 | * define set to 0) |
86 | * |
87 | * After this, we enter "steady state": we collect a datapoint, |
88 | * we select the best peer, if this datapoint is not a new one |
89 | * (IOW: if this datapoint isn't for selected peer), sleep |
90 | * and collect another one; otherwise, use its offset to update |
91 | * frequency drift, if offset is somewhat large, reduce poll_exp, |
92 | * otherwise increase poll_exp. |
93 | * |
94 | * If offset is larger than STEP_THRESHOLD, which shouldn't normally |
95 | * happen, we assume that something "bad" happened (computer |
96 | * was hibernated, someone set totally wrong date, etc), |
97 | * then the time is stepped, all datapoints are discarded, |
98 | * and we go back to steady state. |
99 | * |
100 | * Made some changes to speed up re-syncing after our clock goes bad |
101 | * (tested with suspending my laptop): |
102 | * - if largish offset (>= STEP_THRESHOLD * 8 == 1 sec) is seen |
103 | * from a peer, schedule next query for this peer soon |
104 | * without drastically lowering poll interval for everybody. |
105 | * This makes us collect enough data for step much faster: |
106 | * e.g. at poll = 10 (1024 secs), step was done within 5 minutes |
107 | * after first reply which indicated that our clock is 14 seconds off. |
108 | * - on step, do not discard d_dispersion data of the existing datapoints, |
109 | * do not clear reachable_bits. This prevents discarding first ~8 |
110 | * datapoints after the step. |
111 | */ |
112 | |
113 | #define RETRY_INTERVAL 5 /* on error, retry in N secs */ |
114 | #define RESPONSE_INTERVAL 15 /* wait for reply up to N secs */ |
115 | #define INITIAL_SAMPLES 4 /* how many samples do we want for init */ |
116 | #define BAD_DELAY_GROWTH 4 /* drop packet if its delay grew by more than this */ |
117 | |
118 | /* Clock discipline parameters and constants */ |
119 | |
120 | /* Step threshold (sec). std ntpd uses 0.128. |
121 | * Using exact power of 2 (1/8) results in smaller code */ |
122 | #define STEP_THRESHOLD 0.125 |
123 | #define WATCH_THRESHOLD 128 /* stepout threshold (sec). std ntpd uses 900 (11 mins (!)) */ |
124 | /* NB: set WATCH_THRESHOLD to ~60 when debugging to save time) */ |
125 | //UNUSED: #define PANIC_THRESHOLD 1000 /* panic threshold (sec) */ |
126 | |
127 | #define FREQ_TOLERANCE 0.000015 /* frequency tolerance (15 PPM) */ |
128 | #define BURSTPOLL 0 /* initial poll */ |
129 | #define MINPOLL 5 /* minimum poll interval. std ntpd uses 6 (6: 64 sec) */ |
130 | /* If we got largish offset from a peer, cap next query interval |
131 | * for this peer by this many seconds: |
132 | */ |
133 | #define BIGOFF_INTERVAL (1 << 6) |
134 | /* If offset > discipline_jitter * POLLADJ_GATE, and poll interval is >= 2^BIGPOLL, |
135 | * then it is decreased _at once_. (If < 2^BIGPOLL, it will be decreased _eventually_). |
136 | */ |
137 | #define BIGPOLL 10 /* 2^10 sec ~= 17 min */ |
138 | #define MAXPOLL 12 /* maximum poll interval (12: 1.1h, 17: 36.4h). std ntpd uses 17 */ |
139 | /* Actively lower poll when we see such big offsets. |
140 | * With STEP_THRESHOLD = 0.125, it means we try to sync more aggressively |
141 | * if offset increases over ~0.04 sec */ |
142 | #define POLLDOWN_OFFSET (STEP_THRESHOLD / 3) |
143 | #define MINDISP 0.01 /* minimum dispersion (sec) */ |
144 | #define MAXDISP 16 /* maximum dispersion (sec) */ |
145 | #define MAXSTRAT 16 /* maximum stratum (infinity metric) */ |
146 | #define MAXDIST 1 /* distance threshold (sec) */ |
147 | #define MIN_SELECTED 1 /* minimum intersection survivors */ |
148 | #define MIN_CLUSTERED 3 /* minimum cluster survivors */ |
149 | |
150 | #define MAXDRIFT 0.000500 /* frequency drift we can correct (500 PPM) */ |
151 | |
152 | /* Poll-adjust threshold. |
153 | * When we see that offset is small enough compared to discipline jitter, |
154 | * we grow a counter: += MINPOLL. When counter goes over POLLADJ_LIMIT, |
155 | * we poll_exp++. If offset isn't small, counter -= poll_exp*2, |
156 | * and when it goes below -POLLADJ_LIMIT, we poll_exp--. |
157 | * (Bumped from 30 to 40 since otherwise I often see poll_exp going *2* steps down) |
158 | */ |
159 | #define POLLADJ_LIMIT 40 |
160 | /* If offset < discipline_jitter * POLLADJ_GATE, then we decide to increase |
161 | * poll interval (we think we can't improve timekeeping |
162 | * by staying at smaller poll). |
163 | */ |
164 | #define POLLADJ_GATE 4 |
165 | #define TIMECONST_HACK_GATE 2 |
166 | /* Compromise Allan intercept (sec). doc uses 1500, std ntpd uses 512 */ |
167 | #define ALLAN 512 |
168 | /* PLL loop gain */ |
169 | #define PLL 65536 |
170 | /* FLL loop gain [why it depends on MAXPOLL??] */ |
171 | #define FLL (MAXPOLL + 1) |
172 | /* Parameter averaging constant */ |
173 | #define AVG 4 |
174 | |
175 | |
176 | enum { |
177 | NTP_VERSION = 4, |
178 | NTP_MAXSTRATUM = 15, |
179 | |
180 | NTP_DIGESTSIZE = 16, |
181 | NTP_MSGSIZE_NOAUTH = 48, |
182 | NTP_MSGSIZE = (NTP_MSGSIZE_NOAUTH + 4 + NTP_DIGESTSIZE), |
183 | |
184 | /* Status Masks */ |
185 | MODE_MASK = (7 << 0), |
186 | VERSION_MASK = (7 << 3), |
187 | VERSION_SHIFT = 3, |
188 | LI_MASK = (3 << 6), |
189 | |
190 | /* Leap Second Codes (high order two bits of m_status) */ |
191 | LI_NOWARNING = (0 << 6), /* no warning */ |
192 | LI_PLUSSEC = (1 << 6), /* add a second (61 seconds) */ |
193 | LI_MINUSSEC = (2 << 6), /* minus a second (59 seconds) */ |
194 | LI_ALARM = (3 << 6), /* alarm condition */ |
195 | |
196 | /* Mode values */ |
197 | MODE_RES0 = 0, /* reserved */ |
198 | MODE_SYM_ACT = 1, /* symmetric active */ |
199 | MODE_SYM_PAS = 2, /* symmetric passive */ |
200 | MODE_CLIENT = 3, /* client */ |
201 | MODE_SERVER = 4, /* server */ |
202 | MODE_BROADCAST = 5, /* broadcast */ |
203 | MODE_RES1 = 6, /* reserved for NTP control message */ |
204 | MODE_RES2 = 7, /* reserved for private use */ |
205 | }; |
206 | |
207 | //TODO: better base selection |
208 | #define OFFSET_1900_1970 2208988800UL /* 1970 - 1900 in seconds */ |
209 | |
210 | #define NUM_DATAPOINTS 8 |
211 | |
212 | typedef struct { |
213 | uint32_t int_partl; |
214 | uint32_t fractionl; |
215 | } l_fixedpt_t; |
216 | |
217 | typedef struct { |
218 | uint16_t int_parts; |
219 | uint16_t fractions; |
220 | } s_fixedpt_t; |
221 | |
222 | typedef struct { |
223 | uint8_t m_status; /* status of local clock and leap info */ |
224 | uint8_t m_stratum; |
225 | uint8_t m_ppoll; /* poll value */ |
226 | int8_t m_precision_exp; |
227 | s_fixedpt_t m_rootdelay; |
228 | s_fixedpt_t m_rootdisp; |
229 | uint32_t m_refid; |
230 | l_fixedpt_t m_reftime; |
231 | l_fixedpt_t m_orgtime; |
232 | l_fixedpt_t m_rectime; |
233 | l_fixedpt_t m_xmttime; |
234 | uint32_t m_keyid; |
235 | uint8_t m_digest[NTP_DIGESTSIZE]; |
236 | } msg_t; |
237 | |
238 | typedef struct { |
239 | double d_offset; |
240 | double d_recv_time; |
241 | double d_dispersion; |
242 | } datapoint_t; |
243 | |
244 | typedef struct { |
245 | len_and_sockaddr *p_lsa; |
246 | char *p_dotted; |
247 | int p_fd; |
248 | int datapoint_idx; |
249 | uint32_t lastpkt_refid; |
250 | uint8_t lastpkt_status; |
251 | uint8_t lastpkt_stratum; |
252 | uint8_t reachable_bits; |
253 | /* when to send new query (if p_fd == -1) |
254 | * or when receive times out (if p_fd >= 0): */ |
255 | double next_action_time; |
256 | double p_xmttime; |
257 | double lastpkt_recv_time; |
258 | double lastpkt_delay; |
259 | double lastpkt_rootdelay; |
260 | double lastpkt_rootdisp; |
261 | /* produced by filter algorithm: */ |
262 | double filter_offset; |
263 | double filter_dispersion; |
264 | double filter_jitter; |
265 | datapoint_t filter_datapoint[NUM_DATAPOINTS]; |
266 | /* last sent packet: */ |
267 | msg_t p_xmt_msg; |
268 | } peer_t; |
269 | |
270 | |
271 | #define USING_KERNEL_PLL_LOOP 1 |
272 | #define USING_INITIAL_FREQ_ESTIMATION 0 |
273 | |
274 | enum { |
275 | OPT_n = (1 << 0), |
276 | OPT_q = (1 << 1), |
277 | OPT_N = (1 << 2), |
278 | OPT_x = (1 << 3), |
279 | /* Insert new options above this line. */ |
280 | /* Non-compat options: */ |
281 | OPT_w = (1 << 4), |
282 | OPT_p = (1 << 5), |
283 | OPT_S = (1 << 6), |
284 | OPT_l = (1 << 7) * ENABLE_FEATURE_NTPD_SERVER, |
285 | /* We hijack some bits for other purposes */ |
286 | OPT_qq = (1 << 31), |
287 | }; |
288 | |
289 | struct globals { |
290 | double cur_time; |
291 | /* total round trip delay to currently selected reference clock */ |
292 | double rootdelay; |
293 | /* reference timestamp: time when the system clock was last set or corrected */ |
294 | double reftime; |
295 | /* total dispersion to currently selected reference clock */ |
296 | double rootdisp; |
297 | |
298 | double last_script_run; |
299 | char *script_name; |
300 | llist_t *ntp_peers; |
301 | #if ENABLE_FEATURE_NTPD_SERVER |
302 | int listen_fd; |
303 | # define G_listen_fd (G.listen_fd) |
304 | #else |
305 | # define G_listen_fd (-1) |
306 | #endif |
307 | unsigned verbose; |
308 | unsigned peer_cnt; |
309 | /* refid: 32-bit code identifying the particular server or reference clock |
310 | * in stratum 0 packets this is a four-character ASCII string, |
311 | * called the kiss code, used for debugging and monitoring |
312 | * in stratum 1 packets this is a four-character ASCII string |
313 | * assigned to the reference clock by IANA. Example: "GPS " |
314 | * in stratum 2+ packets, it's IPv4 address or 4 first bytes |
315 | * of MD5 hash of IPv6 |
316 | */ |
317 | uint32_t refid; |
318 | uint8_t ntp_status; |
319 | /* precision is defined as the larger of the resolution and time to |
320 | * read the clock, in log2 units. For instance, the precision of a |
321 | * mains-frequency clock incrementing at 60 Hz is 16 ms, even when the |
322 | * system clock hardware representation is to the nanosecond. |
323 | * |
324 | * Delays, jitters of various kinds are clamped down to precision. |
325 | * |
326 | * If precision_sec is too large, discipline_jitter gets clamped to it |
327 | * and if offset is smaller than discipline_jitter * POLLADJ_GATE, poll |
328 | * interval grows even though we really can benefit from staying at |
329 | * smaller one, collecting non-lagged datapoits and correcting offset. |
330 | * (Lagged datapoits exist when poll_exp is large but we still have |
331 | * systematic offset error - the time distance between datapoints |
332 | * is significant and older datapoints have smaller offsets. |
333 | * This makes our offset estimation a bit smaller than reality) |
334 | * Due to this effect, setting G_precision_sec close to |
335 | * STEP_THRESHOLD isn't such a good idea - offsets may grow |
336 | * too big and we will step. I observed it with -6. |
337 | * |
338 | * OTOH, setting precision_sec far too small would result in futile |
339 | * attempts to syncronize to an unachievable precision. |
340 | * |
341 | * -6 is 1/64 sec, -7 is 1/128 sec and so on. |
342 | * -8 is 1/256 ~= 0.003906 (worked well for me --vda) |
343 | * -9 is 1/512 ~= 0.001953 (let's try this for some time) |
344 | */ |
345 | #define G_precision_exp -9 |
346 | /* |
347 | * G_precision_exp is used only for construction outgoing packets. |
348 | * It's ok to set G_precision_sec to a slightly different value |
349 | * (One which is "nicer looking" in logs). |
350 | * Exact value would be (1.0 / (1 << (- G_precision_exp))): |
351 | */ |
352 | #define G_precision_sec 0.002 |
353 | uint8_t stratum; |
354 | /* Bool. After set to 1, never goes back to 0: */ |
355 | smallint initial_poll_complete; |
356 | |
357 | #define STATE_NSET 0 /* initial state, "nothing is set" */ |
358 | //#define STATE_FSET 1 /* frequency set from file */ |
359 | //#define STATE_SPIK 2 /* spike detected */ |
360 | //#define STATE_FREQ 3 /* initial frequency */ |
361 | #define STATE_SYNC 4 /* clock synchronized (normal operation) */ |
362 | uint8_t discipline_state; // doc calls it c.state |
363 | uint8_t poll_exp; // s.poll |
364 | int polladj_count; // c.count |
365 | long kernel_freq_drift; |
366 | peer_t *last_update_peer; |
367 | double last_update_offset; // c.last |
368 | double last_update_recv_time; // s.t |
369 | double discipline_jitter; // c.jitter |
370 | /* Since we only compare it with ints, can simplify code |
371 | * by not making this variable floating point: |
372 | */ |
373 | unsigned offset_to_jitter_ratio; |
374 | //double cluster_offset; // s.offset |
375 | //double cluster_jitter; // s.jitter |
376 | #if !USING_KERNEL_PLL_LOOP |
377 | double discipline_freq_drift; // c.freq |
378 | /* Maybe conditionally calculate wander? it's used only for logging */ |
379 | double discipline_wander; // c.wander |
380 | #endif |
381 | }; |
382 | #define G (*ptr_to_globals) |
383 | |
384 | static const int const_IPTOS_LOWDELAY = IPTOS_LOWDELAY; |
385 | |
386 | |
387 | #define VERB1 if (MAX_VERBOSE && G.verbose) |
388 | #define VERB2 if (MAX_VERBOSE >= 2 && G.verbose >= 2) |
389 | #define VERB3 if (MAX_VERBOSE >= 3 && G.verbose >= 3) |
390 | #define VERB4 if (MAX_VERBOSE >= 4 && G.verbose >= 4) |
391 | #define VERB5 if (MAX_VERBOSE >= 5 && G.verbose >= 5) |
392 | #define VERB6 if (MAX_VERBOSE >= 6 && G.verbose >= 6) |
393 | |
394 | |
395 | static double LOG2D(int a) |
396 | { |
397 | if (a < 0) |
398 | return 1.0 / (1UL << -a); |
399 | return 1UL << a; |
400 | } |
401 | static ALWAYS_INLINE double SQUARE(double x) |
402 | { |
403 | return x * x; |
404 | } |
405 | static ALWAYS_INLINE double MAXD(double a, double b) |
406 | { |
407 | if (a > b) |
408 | return a; |
409 | return b; |
410 | } |
411 | static ALWAYS_INLINE double MIND(double a, double b) |
412 | { |
413 | if (a < b) |
414 | return a; |
415 | return b; |
416 | } |
417 | static NOINLINE double my_SQRT(double X) |
418 | { |
419 | union { |
420 | float f; |
421 | int32_t i; |
422 | } v; |
423 | double invsqrt; |
424 | double Xhalf = X * 0.5; |
425 | |
426 | /* Fast and good approximation to 1/sqrt(X), black magic */ |
427 | v.f = X; |
428 | /*v.i = 0x5f3759df - (v.i >> 1);*/ |
429 | v.i = 0x5f375a86 - (v.i >> 1); /* - this constant is slightly better */ |
430 | invsqrt = v.f; /* better than 0.2% accuracy */ |
431 | |
432 | /* Refining it using Newton's method: x1 = x0 - f(x0)/f'(x0) |
433 | * f(x) = 1/(x*x) - X (f==0 when x = 1/sqrt(X)) |
434 | * f'(x) = -2/(x*x*x) |
435 | * f(x)/f'(x) = (X - 1/(x*x)) / (2/(x*x*x)) = X*x*x*x/2 - x/2 |
436 | * x1 = x0 - (X*x0*x0*x0/2 - x0/2) = 1.5*x0 - X*x0*x0*x0/2 = x0*(1.5 - (X/2)*x0*x0) |
437 | */ |
438 | invsqrt = invsqrt * (1.5 - Xhalf * invsqrt * invsqrt); /* ~0.05% accuracy */ |
439 | /* invsqrt = invsqrt * (1.5 - Xhalf * invsqrt * invsqrt); 2nd iter: ~0.0001% accuracy */ |
440 | /* With 4 iterations, more than half results will be exact, |
441 | * at 6th iterations result stabilizes with about 72% results exact. |
442 | * We are well satisfied with 0.05% accuracy. |
443 | */ |
444 | |
445 | return X * invsqrt; /* X * 1/sqrt(X) ~= sqrt(X) */ |
446 | } |
447 | static ALWAYS_INLINE double SQRT(double X) |
448 | { |
449 | /* If this arch doesn't use IEEE 754 floats, fall back to using libm */ |
450 | if (sizeof(float) != 4) |
451 | return sqrt(X); |
452 | |
453 | /* This avoids needing libm, saves about 0.5k on x86-32 */ |
454 | return my_SQRT(X); |
455 | } |
456 | |
457 | static double |
458 | gettime1900d(void) |
459 | { |
460 | struct timeval tv; |
461 | gettimeofday(&tv, NULL); /* never fails */ |
462 | G.cur_time = tv.tv_sec + (1.0e-6 * tv.tv_usec) + OFFSET_1900_1970; |
463 | return G.cur_time; |
464 | } |
465 | |
466 | static void |
467 | d_to_tv(double d, struct timeval *tv) |
468 | { |
469 | tv->tv_sec = (long)d; |
470 | tv->tv_usec = (d - tv->tv_sec) * 1000000; |
471 | } |
472 | |
473 | static double |
474 | lfp_to_d(l_fixedpt_t lfp) |
475 | { |
476 | double ret; |
477 | lfp.int_partl = ntohl(lfp.int_partl); |
478 | lfp.fractionl = ntohl(lfp.fractionl); |
479 | ret = (double)lfp.int_partl + ((double)lfp.fractionl / UINT_MAX); |
480 | return ret; |
481 | } |
482 | static double |
483 | sfp_to_d(s_fixedpt_t sfp) |
484 | { |
485 | double ret; |
486 | sfp.int_parts = ntohs(sfp.int_parts); |
487 | sfp.fractions = ntohs(sfp.fractions); |
488 | ret = (double)sfp.int_parts + ((double)sfp.fractions / USHRT_MAX); |
489 | return ret; |
490 | } |
491 | #if ENABLE_FEATURE_NTPD_SERVER |
492 | static l_fixedpt_t |
493 | d_to_lfp(double d) |
494 | { |
495 | l_fixedpt_t lfp; |
496 | lfp.int_partl = (uint32_t)d; |
497 | lfp.fractionl = (uint32_t)((d - lfp.int_partl) * UINT_MAX); |
498 | lfp.int_partl = htonl(lfp.int_partl); |
499 | lfp.fractionl = htonl(lfp.fractionl); |
500 | return lfp; |
501 | } |
502 | static s_fixedpt_t |
503 | d_to_sfp(double d) |
504 | { |
505 | s_fixedpt_t sfp; |
506 | sfp.int_parts = (uint16_t)d; |
507 | sfp.fractions = (uint16_t)((d - sfp.int_parts) * USHRT_MAX); |
508 | sfp.int_parts = htons(sfp.int_parts); |
509 | sfp.fractions = htons(sfp.fractions); |
510 | return sfp; |
511 | } |
512 | #endif |
513 | |
514 | static double |
515 | dispersion(const datapoint_t *dp) |
516 | { |
517 | return dp->d_dispersion + FREQ_TOLERANCE * (G.cur_time - dp->d_recv_time); |
518 | } |
519 | |
520 | static double |
521 | root_distance(peer_t *p) |
522 | { |
523 | /* The root synchronization distance is the maximum error due to |
524 | * all causes of the local clock relative to the primary server. |
525 | * It is defined as half the total delay plus total dispersion |
526 | * plus peer jitter. |
527 | */ |
528 | return MAXD(MINDISP, p->lastpkt_rootdelay + p->lastpkt_delay) / 2 |
529 | + p->lastpkt_rootdisp |
530 | + p->filter_dispersion |
531 | + FREQ_TOLERANCE * (G.cur_time - p->lastpkt_recv_time) |
532 | + p->filter_jitter; |
533 | } |
534 | |
535 | static void |
536 | set_next(peer_t *p, unsigned t) |
537 | { |
538 | p->next_action_time = G.cur_time + t; |
539 | } |
540 | |
541 | /* |
542 | * Peer clock filter and its helpers |
543 | */ |
544 | static void |
545 | filter_datapoints(peer_t *p) |
546 | { |
547 | int i, idx; |
548 | double sum, wavg; |
549 | datapoint_t *fdp; |
550 | |
551 | #if 0 |
552 | /* Simulations have shown that use of *averaged* offset for p->filter_offset |
553 | * is in fact worse than simply using last received one: with large poll intervals |
554 | * (>= 2048) averaging code uses offset values which are outdated by hours, |
555 | * and time/frequency correction goes totally wrong when fed essentially bogus offsets. |
556 | */ |
557 | int got_newest; |
558 | double minoff, maxoff, w; |
559 | double x = x; /* for compiler */ |
560 | double oldest_off = oldest_off; |
561 | double oldest_age = oldest_age; |
562 | double newest_off = newest_off; |
563 | double newest_age = newest_age; |
564 | |
565 | fdp = p->filter_datapoint; |
566 | |
567 | minoff = maxoff = fdp[0].d_offset; |
568 | for (i = 1; i < NUM_DATAPOINTS; i++) { |
569 | if (minoff > fdp[i].d_offset) |
570 | minoff = fdp[i].d_offset; |
571 | if (maxoff < fdp[i].d_offset) |
572 | maxoff = fdp[i].d_offset; |
573 | } |
574 | |
575 | idx = p->datapoint_idx; /* most recent datapoint's index */ |
576 | /* Average offset: |
577 | * Drop two outliers and take weighted average of the rest: |
578 | * most_recent/2 + older1/4 + older2/8 ... + older5/32 + older6/32 |
579 | * we use older6/32, not older6/64 since sum of weights should be 1: |
580 | * 1/2 + 1/4 + 1/8 + 1/16 + 1/32 + 1/32 = 1 |
581 | */ |
582 | wavg = 0; |
583 | w = 0.5; |
584 | /* n-1 |
585 | * --- dispersion(i) |
586 | * filter_dispersion = \ ------------- |
587 | * / (i+1) |
588 | * --- 2 |
589 | * i=0 |
590 | */ |
591 | got_newest = 0; |
592 | sum = 0; |
593 | for (i = 0; i < NUM_DATAPOINTS; i++) { |
594 | VERB5 { |
595 | bb_error_msg("datapoint[%d]: off:%f disp:%f(%f) age:%f%s", |
596 | i, |
597 | fdp[idx].d_offset, |
598 | fdp[idx].d_dispersion, dispersion(&fdp[idx]), |
599 | G.cur_time - fdp[idx].d_recv_time, |
600 | (minoff == fdp[idx].d_offset || maxoff == fdp[idx].d_offset) |
601 | ? " (outlier by offset)" : "" |
602 | ); |
603 | } |
604 | |
605 | sum += dispersion(&fdp[idx]) / (2 << i); |
606 | |
607 | if (minoff == fdp[idx].d_offset) { |
608 | minoff -= 1; /* so that we don't match it ever again */ |
609 | } else |
610 | if (maxoff == fdp[idx].d_offset) { |
611 | maxoff += 1; |
612 | } else { |
613 | oldest_off = fdp[idx].d_offset; |
614 | oldest_age = G.cur_time - fdp[idx].d_recv_time; |
615 | if (!got_newest) { |
616 | got_newest = 1; |
617 | newest_off = oldest_off; |
618 | newest_age = oldest_age; |
619 | } |
620 | x = oldest_off * w; |
621 | wavg += x; |
622 | w /= 2; |
623 | } |
624 | |
625 | idx = (idx - 1) & (NUM_DATAPOINTS - 1); |
626 | } |
627 | p->filter_dispersion = sum; |
628 | wavg += x; /* add another older6/64 to form older6/32 */ |
629 | /* Fix systematic underestimation with large poll intervals. |
630 | * Imagine that we still have a bit of uncorrected drift, |
631 | * and poll interval is big (say, 100 sec). Offsets form a progression: |
632 | * 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 - 0.7 is most recent. |
633 | * The algorithm above drops 0.0 and 0.7 as outliers, |
634 | * and then we have this estimation, ~25% off from 0.7: |
635 | * 0.1/32 + 0.2/32 + 0.3/16 + 0.4/8 + 0.5/4 + 0.6/2 = 0.503125 |
636 | */ |
637 | x = oldest_age - newest_age; |
638 | if (x != 0) { |
639 | x = newest_age / x; /* in above example, 100 / (600 - 100) */ |
640 | if (x < 1) { /* paranoia check */ |
641 | x = (newest_off - oldest_off) * x; /* 0.5 * 100/500 = 0.1 */ |
642 | wavg += x; |
643 | } |
644 | } |
645 | p->filter_offset = wavg; |
646 | |
647 | #else |
648 | |
649 | fdp = p->filter_datapoint; |
650 | idx = p->datapoint_idx; /* most recent datapoint's index */ |
651 | |
652 | /* filter_offset: simply use the most recent value */ |
653 | p->filter_offset = fdp[idx].d_offset; |
654 | |
655 | /* n-1 |
656 | * --- dispersion(i) |
657 | * filter_dispersion = \ ------------- |
658 | * / (i+1) |
659 | * --- 2 |
660 | * i=0 |
661 | */ |
662 | wavg = 0; |
663 | sum = 0; |
664 | for (i = 0; i < NUM_DATAPOINTS; i++) { |
665 | sum += dispersion(&fdp[idx]) / (2 << i); |
666 | wavg += fdp[idx].d_offset; |
667 | idx = (idx - 1) & (NUM_DATAPOINTS - 1); |
668 | } |
669 | wavg /= NUM_DATAPOINTS; |
670 | p->filter_dispersion = sum; |
671 | #endif |
672 | |
673 | /* +----- -----+ ^ 1/2 |
674 | * | n-1 | |
675 | * | --- | |
676 | * | 1 \ 2 | |
677 | * filter_jitter = | --- * / (avg-offset_j) | |
678 | * | n --- | |
679 | * | j=0 | |
680 | * +----- -----+ |
681 | * where n is the number of valid datapoints in the filter (n > 1); |
682 | * if filter_jitter < precision then filter_jitter = precision |
683 | */ |
684 | sum = 0; |
685 | for (i = 0; i < NUM_DATAPOINTS; i++) { |
686 | sum += SQUARE(wavg - fdp[i].d_offset); |
687 | } |
688 | sum = SQRT(sum / NUM_DATAPOINTS); |
689 | p->filter_jitter = sum > G_precision_sec ? sum : G_precision_sec; |
690 | |
691 | VERB4 bb_error_msg("filter offset:%+f disp:%f jitter:%f", |
692 | p->filter_offset, |
693 | p->filter_dispersion, |
694 | p->filter_jitter); |
695 | } |
696 | |
697 | static void |
698 | reset_peer_stats(peer_t *p, double offset) |
699 | { |
700 | int i; |
701 | bool small_ofs = fabs(offset) < 16 * STEP_THRESHOLD; |
702 | |
703 | /* Used to set p->filter_datapoint[i].d_dispersion = MAXDISP |
704 | * and clear reachable bits, but this proved to be too agressive: |
705 | * after step (tested with suspinding laptop for ~30 secs), |
706 | * this caused all previous data to be considered invalid, |
707 | * making us needing to collect full ~8 datapoins per peer |
708 | * after step in order to start trusting them. |
709 | * In turn, this was making poll interval decrease even after |
710 | * step was done. (Poll interval decreases already before step |
711 | * in this scenario, because we see large offsets and end up with |
712 | * no good peer to select). |
713 | */ |
714 | |
715 | for (i = 0; i < NUM_DATAPOINTS; i++) { |
716 | if (small_ofs) { |
717 | p->filter_datapoint[i].d_recv_time += offset; |
718 | if (p->filter_datapoint[i].d_offset != 0) { |
719 | p->filter_datapoint[i].d_offset -= offset; |
720 | //bb_error_msg("p->filter_datapoint[%d].d_offset %f -> %f", |
721 | // i, |
722 | // p->filter_datapoint[i].d_offset + offset, |
723 | // p->filter_datapoint[i].d_offset); |
724 | } |
725 | } else { |
726 | p->filter_datapoint[i].d_recv_time = G.cur_time; |
727 | p->filter_datapoint[i].d_offset = 0; |
728 | /*p->filter_datapoint[i].d_dispersion = MAXDISP;*/ |
729 | } |
730 | } |
731 | if (small_ofs) { |
732 | p->lastpkt_recv_time += offset; |
733 | } else { |
734 | /*p->reachable_bits = 0;*/ |
735 | p->lastpkt_recv_time = G.cur_time; |
736 | } |
737 | filter_datapoints(p); /* recalc p->filter_xxx */ |
738 | VERB6 bb_error_msg("%s->lastpkt_recv_time=%f", p->p_dotted, p->lastpkt_recv_time); |
739 | } |
740 | |
741 | static void |
742 | add_peers(char *s) |
743 | { |
744 | peer_t *p; |
745 | |
746 | p = xzalloc(sizeof(*p)); |
747 | p->p_lsa = xhost2sockaddr(s, 123); |
748 | p->p_dotted = xmalloc_sockaddr2dotted_noport(&p->p_lsa->u.sa); |
749 | p->p_fd = -1; |
750 | p->p_xmt_msg.m_status = MODE_CLIENT | (NTP_VERSION << 3); |
751 | p->next_action_time = G.cur_time; /* = set_next(p, 0); */ |
752 | reset_peer_stats(p, 16 * STEP_THRESHOLD); |
753 | |
754 | llist_add_to(&G.ntp_peers, p); |
755 | G.peer_cnt++; |
756 | } |
757 | |
758 | static int |
759 | do_sendto(int fd, |
760 | const struct sockaddr *from, const struct sockaddr *to, socklen_t addrlen, |
761 | msg_t *msg, ssize_t len) |
762 | { |
763 | ssize_t ret; |
764 | |
765 | errno = 0; |
766 | if (!from) { |
767 | ret = sendto(fd, msg, len, MSG_DONTWAIT, to, addrlen); |
768 | } else { |
769 | ret = send_to_from(fd, msg, len, MSG_DONTWAIT, to, from, addrlen); |
770 | } |
771 | if (ret != len) { |
772 | bb_perror_msg("send failed"); |
773 | return -1; |
774 | } |
775 | return 0; |
776 | } |
777 | |
778 | static void |
779 | send_query_to_peer(peer_t *p) |
780 | { |
781 | /* Why do we need to bind()? |
782 | * See what happens when we don't bind: |
783 | * |
784 | * socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 3 |
785 | * setsockopt(3, SOL_IP, IP_TOS, [16], 4) = 0 |
786 | * gettimeofday({1259071266, 327885}, NULL) = 0 |
787 | * sendto(3, "xxx", 48, MSG_DONTWAIT, {sa_family=AF_INET, sin_port=htons(123), sin_addr=inet_addr("10.34.32.125")}, 16) = 48 |
788 | * ^^^ we sent it from some source port picked by kernel. |
789 | * time(NULL) = 1259071266 |
790 | * write(2, "ntpd: entering poll 15 secs\n", 28) = 28 |
791 | * poll([{fd=3, events=POLLIN}], 1, 15000) = 1 ([{fd=3, revents=POLLIN}]) |
792 | * recv(3, "yyy", 68, MSG_DONTWAIT) = 48 |
793 | * ^^^ this recv will receive packets to any local port! |
794 | * |
795 | * Uncomment this and use strace to see it in action: |
796 | */ |
797 | #define PROBE_LOCAL_ADDR /* { len_and_sockaddr lsa; lsa.len = LSA_SIZEOF_SA; getsockname(p->query.fd, &lsa.u.sa, &lsa.len); } */ |
798 | |
799 | if (p->p_fd == -1) { |
800 | int fd, family; |
801 | len_and_sockaddr *local_lsa; |
802 | |
803 | family = p->p_lsa->u.sa.sa_family; |
804 | p->p_fd = fd = xsocket_type(&local_lsa, family, SOCK_DGRAM); |
805 | /* local_lsa has "null" address and port 0 now. |
806 | * bind() ensures we have a *particular port* selected by kernel |
807 | * and remembered in p->p_fd, thus later recv(p->p_fd) |
808 | * receives only packets sent to this port. |
809 | */ |
810 | PROBE_LOCAL_ADDR |
811 | xbind(fd, &local_lsa->u.sa, local_lsa->len); |
812 | PROBE_LOCAL_ADDR |
813 | #if ENABLE_FEATURE_IPV6 |
814 | if (family == AF_INET) |
815 | #endif |
816 | setsockopt(fd, IPPROTO_IP, IP_TOS, &const_IPTOS_LOWDELAY, sizeof(const_IPTOS_LOWDELAY)); |
817 | free(local_lsa); |
818 | } |
819 | |
820 | /* Emit message _before_ attempted send. Think of a very short |
821 | * roundtrip networks: we need to go back to recv loop ASAP, |
822 | * to reduce delay. Printing messages after send works against that. |
823 | */ |
824 | VERB1 bb_error_msg("sending query to %s", p->p_dotted); |
825 | |
826 | /* |
827 | * Send out a random 64-bit number as our transmit time. The NTP |
828 | * server will copy said number into the originate field on the |
829 | * response that it sends us. This is totally legal per the SNTP spec. |
830 | * |
831 | * The impact of this is two fold: we no longer send out the current |
832 | * system time for the world to see (which may aid an attacker), and |
833 | * it gives us a (not very secure) way of knowing that we're not |
834 | * getting spoofed by an attacker that can't capture our traffic |
835 | * but can spoof packets from the NTP server we're communicating with. |
836 | * |
837 | * Save the real transmit timestamp locally. |
838 | */ |
839 | p->p_xmt_msg.m_xmttime.int_partl = random(); |
840 | p->p_xmt_msg.m_xmttime.fractionl = random(); |
841 | p->p_xmttime = gettime1900d(); |
842 | |
843 | /* Were doing it only if sendto worked, but |
844 | * loss of sync detection needs reachable_bits updated |
845 | * even if sending fails *locally*: |
846 | * "network is unreachable" because cable was pulled? |
847 | * We still need to declare "unsync" if this condition persists. |
848 | */ |
849 | p->reachable_bits <<= 1; |
850 | |
851 | if (do_sendto(p->p_fd, /*from:*/ NULL, /*to:*/ &p->p_lsa->u.sa, /*addrlen:*/ p->p_lsa->len, |
852 | &p->p_xmt_msg, NTP_MSGSIZE_NOAUTH) == -1 |
853 | ) { |
854 | close(p->p_fd); |
855 | p->p_fd = -1; |
856 | /* |
857 | * We know that we sent nothing. |
858 | * We can retry *soon* without fearing |
859 | * that we are flooding the peer. |
860 | */ |
861 | set_next(p, RETRY_INTERVAL); |
862 | return; |
863 | } |
864 | |
865 | set_next(p, RESPONSE_INTERVAL); |
866 | } |
867 | |
868 | |
869 | /* Note that there is no provision to prevent several run_scripts |
870 | * to be started in quick succession. In fact, it happens rather often |
871 | * if initial syncronization results in a step. |
872 | * You will see "step" and then "stratum" script runs, sometimes |
873 | * as close as only 0.002 seconds apart. |
874 | * Script should be ready to deal with this. |
875 | */ |
876 | static void run_script(const char *action, double offset) |
877 | { |
878 | char *argv[3]; |
879 | char *env1, *env2, *env3, *env4; |
880 | |
881 | G.last_script_run = G.cur_time; |
882 | |
883 | if (!G.script_name) |
884 | return; |
885 | |
886 | argv[0] = (char*) G.script_name; |
887 | argv[1] = (char*) action; |
888 | argv[2] = NULL; |
889 | |
890 | VERB1 bb_error_msg("executing '%s %s'", G.script_name, action); |
891 | |
892 | env1 = xasprintf("%s=%u", "stratum", G.stratum); |
893 | putenv(env1); |
894 | env2 = xasprintf("%s=%ld", "freq_drift_ppm", G.kernel_freq_drift); |
895 | putenv(env2); |
896 | env3 = xasprintf("%s=%u", "poll_interval", 1 << G.poll_exp); |
897 | putenv(env3); |
898 | env4 = xasprintf("%s=%f", "offset", offset); |
899 | putenv(env4); |
900 | /* Other items of potential interest: selected peer, |
901 | * rootdelay, reftime, rootdisp, refid, ntp_status, |
902 | * last_update_offset, last_update_recv_time, discipline_jitter, |
903 | * how many peers have reachable_bits = 0? |
904 | */ |
905 | |
906 | /* Don't want to wait: it may run hwclock --systohc, and that |
907 | * may take some time (seconds): */ |
908 | /*spawn_and_wait(argv);*/ |
909 | spawn(argv); |
910 | |
911 | unsetenv("stratum"); |
912 | unsetenv("freq_drift_ppm"); |
913 | unsetenv("poll_interval"); |
914 | unsetenv("offset"); |
915 | free(env1); |
916 | free(env2); |
917 | free(env3); |
918 | free(env4); |
919 | } |
920 | |
921 | static NOINLINE void |
922 | step_time(double offset) |
923 | { |
924 | llist_t *item; |
925 | double dtime; |
926 | struct timeval tvc, tvn; |
927 | char buf[sizeof("yyyy-mm-dd hh:mm:ss") + /*paranoia:*/ 4]; |
928 | time_t tval; |
929 | |
930 | gettimeofday(&tvc, NULL); /* never fails */ |
931 | dtime = tvc.tv_sec + (1.0e-6 * tvc.tv_usec) + offset; |
932 | d_to_tv(dtime, &tvn); |
933 | if (settimeofday(&tvn, NULL) == -1) |
934 | bb_perror_msg_and_die("settimeofday"); |
935 | |
936 | VERB2 { |
937 | tval = tvc.tv_sec; |
938 | strftime_YYYYMMDDHHMMSS(buf, sizeof(buf), &tval); |
939 | bb_error_msg("current time is %s.%06u", buf, (unsigned)tvc.tv_usec); |
940 | } |
941 | tval = tvn.tv_sec; |
942 | strftime_YYYYMMDDHHMMSS(buf, sizeof(buf), &tval); |
943 | bb_error_msg("setting time to %s.%06u (offset %+fs)", buf, (unsigned)tvn.tv_usec, offset); |
944 | |
945 | /* Correct various fields which contain time-relative values: */ |
946 | |
947 | /* Globals: */ |
948 | G.cur_time += offset; |
949 | G.last_update_recv_time += offset; |
950 | G.last_script_run += offset; |
951 | |
952 | /* p->lastpkt_recv_time, p->next_action_time and such: */ |
953 | for (item = G.ntp_peers; item != NULL; item = item->link) { |
954 | peer_t *pp = (peer_t *) item->data; |
955 | reset_peer_stats(pp, offset); |
956 | //bb_error_msg("offset:%+f pp->next_action_time:%f -> %f", |
957 | // offset, pp->next_action_time, pp->next_action_time + offset); |
958 | pp->next_action_time += offset; |
959 | if (pp->p_fd >= 0) { |
960 | /* We wait for reply from this peer too. |
961 | * But due to step we are doing, reply's data is no longer |
962 | * useful (in fact, it'll be bogus). Stop waiting for it. |
963 | */ |
964 | close(pp->p_fd); |
965 | pp->p_fd = -1; |
966 | set_next(pp, RETRY_INTERVAL); |
967 | } |
968 | } |
969 | } |
970 | |
971 | |
972 | /* |
973 | * Selection and clustering, and their helpers |
974 | */ |
975 | typedef struct { |
976 | peer_t *p; |
977 | int type; |
978 | double edge; |
979 | double opt_rd; /* optimization */ |
980 | } point_t; |
981 | static int |
982 | compare_point_edge(const void *aa, const void *bb) |
983 | { |
984 | const point_t *a = aa; |
985 | const point_t *b = bb; |
986 | if (a->edge < b->edge) { |
987 | return -1; |
988 | } |
989 | return (a->edge > b->edge); |
990 | } |
991 | typedef struct { |
992 | peer_t *p; |
993 | double metric; |
994 | } survivor_t; |
995 | static int |
996 | compare_survivor_metric(const void *aa, const void *bb) |
997 | { |
998 | const survivor_t *a = aa; |
999 | const survivor_t *b = bb; |
1000 | if (a->metric < b->metric) { |
1001 | return -1; |
1002 | } |
1003 | return (a->metric > b->metric); |
1004 | } |
1005 | static int |
1006 | fit(peer_t *p, double rd) |
1007 | { |
1008 | if ((p->reachable_bits & (p->reachable_bits-1)) == 0) { |
1009 | /* One or zero bits in reachable_bits */ |
1010 | VERB4 bb_error_msg("peer %s unfit for selection: unreachable", p->p_dotted); |
1011 | return 0; |
1012 | } |
1013 | #if 0 /* we filter out such packets earlier */ |
1014 | if ((p->lastpkt_status & LI_ALARM) == LI_ALARM |
1015 | || p->lastpkt_stratum >= MAXSTRAT |
1016 | ) { |
1017 | VERB4 bb_error_msg("peer %s unfit for selection: bad status/stratum", p->p_dotted); |
1018 | return 0; |
1019 | } |
1020 | #endif |
1021 | /* rd is root_distance(p) */ |
1022 | if (rd > MAXDIST + FREQ_TOLERANCE * (1 << G.poll_exp)) { |
1023 | VERB4 bb_error_msg("peer %s unfit for selection: root distance too high", p->p_dotted); |
1024 | return 0; |
1025 | } |
1026 | //TODO |
1027 | // /* Do we have a loop? */ |
1028 | // if (p->refid == p->dstaddr || p->refid == s.refid) |
1029 | // return 0; |
1030 | return 1; |
1031 | } |
1032 | static peer_t* |
1033 | select_and_cluster(void) |
1034 | { |
1035 | peer_t *p; |
1036 | llist_t *item; |
1037 | int i, j; |
1038 | int size = 3 * G.peer_cnt; |
1039 | /* for selection algorithm */ |
1040 | point_t point[size]; |
1041 | int num_points, num_candidates; |
1042 | double low, high; |
1043 | int num_falsetickers; |
1044 | /* for cluster algorithm */ |
1045 | survivor_t survivor[size]; |
1046 | int num_survivors; |
1047 | |
1048 | /* Selection */ |
1049 | |
1050 | num_points = 0; |
1051 | item = G.ntp_peers; |
1052 | if (G.initial_poll_complete) while (item != NULL) { |
1053 | double rd, offset; |
1054 | |
1055 | p = (peer_t *) item->data; |
1056 | rd = root_distance(p); |
1057 | offset = p->filter_offset; |
1058 | if (!fit(p, rd)) { |
1059 | item = item->link; |
1060 | continue; |
1061 | } |
1062 | |
1063 | VERB5 bb_error_msg("interval: [%f %f %f] %s", |
1064 | offset - rd, |
1065 | offset, |
1066 | offset + rd, |
1067 | p->p_dotted |
1068 | ); |
1069 | point[num_points].p = p; |
1070 | point[num_points].type = -1; |
1071 | point[num_points].edge = offset - rd; |
1072 | point[num_points].opt_rd = rd; |
1073 | num_points++; |
1074 | point[num_points].p = p; |
1075 | point[num_points].type = 0; |
1076 | point[num_points].edge = offset; |
1077 | point[num_points].opt_rd = rd; |
1078 | num_points++; |
1079 | point[num_points].p = p; |
1080 | point[num_points].type = 1; |
1081 | point[num_points].edge = offset + rd; |
1082 | point[num_points].opt_rd = rd; |
1083 | num_points++; |
1084 | item = item->link; |
1085 | } |
1086 | num_candidates = num_points / 3; |
1087 | if (num_candidates == 0) { |
1088 | VERB3 bb_error_msg("no valid datapoints%s", ", no peer selected"); |
1089 | return NULL; |
1090 | } |
1091 | //TODO: sorting does not seem to be done in reference code |
1092 | qsort(point, num_points, sizeof(point[0]), compare_point_edge); |
1093 | |
1094 | /* Start with the assumption that there are no falsetickers. |
1095 | * Attempt to find a nonempty intersection interval containing |
1096 | * the midpoints of all truechimers. |
1097 | * If a nonempty interval cannot be found, increase the number |
1098 | * of assumed falsetickers by one and try again. |
1099 | * If a nonempty interval is found and the number of falsetickers |
1100 | * is less than the number of truechimers, a majority has been found |
1101 | * and the midpoint of each truechimer represents |
1102 | * the candidates available to the cluster algorithm. |
1103 | */ |
1104 | num_falsetickers = 0; |
1105 | while (1) { |
1106 | int c; |
1107 | int num_midpoints = 0; |
1108 | |
1109 | low = 1 << 9; |
1110 | high = - (1 << 9); |
1111 | c = 0; |
1112 | for (i = 0; i < (int) num_points; i++) { |
1113 | /* We want to do: |
1114 | * if (point[i].type == -1) c++; |
1115 | * if (point[i].type == 1) c--; |
1116 | * and it's simpler to do it this way: |
1117 | */ |
1118 | c -= point[i].type; |
1119 | if (c >= num_candidates - num_falsetickers) { |
1120 | /* If it was c++ and it got big enough... */ |
1121 | low = point[i].edge; |
1122 | break; |
1123 | } |
1124 | if (point[i].type == 0) |
1125 | num_midpoints++; |
1126 | } |
1127 | c = 0; |
1128 | for (i = num_points-1; i >= 0; i--) { |
1129 | c += point[i].type; |
1130 | if (c >= num_candidates - num_falsetickers) { |
1131 | high = point[i].edge; |
1132 | break; |
1133 | } |
1134 | if (point[i].type == 0) |
1135 | num_midpoints++; |
1136 | } |
1137 | /* If the number of midpoints is greater than the number |
1138 | * of allowed falsetickers, the intersection contains at |
1139 | * least one truechimer with no midpoint - bad. |
1140 | * Also, interval should be nonempty. |
1141 | */ |
1142 | if (num_midpoints <= num_falsetickers && low < high) |
1143 | break; |
1144 | num_falsetickers++; |
1145 | if (num_falsetickers * 2 >= num_candidates) { |
1146 | VERB3 bb_error_msg("falsetickers:%d, candidates:%d%s", |
1147 | num_falsetickers, num_candidates, |
1148 | ", no peer selected"); |
1149 | return NULL; |
1150 | } |
1151 | } |
1152 | VERB4 bb_error_msg("selected interval: [%f, %f]; candidates:%d falsetickers:%d", |
1153 | low, high, num_candidates, num_falsetickers); |
1154 | |
1155 | /* Clustering */ |
1156 | |
1157 | /* Construct a list of survivors (p, metric) |
1158 | * from the chime list, where metric is dominated |
1159 | * first by stratum and then by root distance. |
1160 | * All other things being equal, this is the order of preference. |
1161 | */ |
1162 | num_survivors = 0; |
1163 | for (i = 0; i < num_points; i++) { |
1164 | if (point[i].edge < low || point[i].edge > high) |
1165 | continue; |
1166 | p = point[i].p; |
1167 | survivor[num_survivors].p = p; |
1168 | /* x.opt_rd == root_distance(p); */ |
1169 | survivor[num_survivors].metric = MAXDIST * p->lastpkt_stratum + point[i].opt_rd; |
1170 | VERB5 bb_error_msg("survivor[%d] metric:%f peer:%s", |
1171 | num_survivors, survivor[num_survivors].metric, p->p_dotted); |
1172 | num_survivors++; |
1173 | } |
1174 | /* There must be at least MIN_SELECTED survivors to satisfy the |
1175 | * correctness assertions. Ordinarily, the Byzantine criteria |
1176 | * require four survivors, but for the demonstration here, one |
1177 | * is acceptable. |
1178 | */ |
1179 | if (num_survivors < MIN_SELECTED) { |
1180 | VERB3 bb_error_msg("survivors:%d%s", |
1181 | num_survivors, |
1182 | ", no peer selected"); |
1183 | return NULL; |
1184 | } |
1185 | |
1186 | //looks like this is ONLY used by the fact that later we pick survivor[0]. |
1187 | //we can avoid sorting then, just find the minimum once! |
1188 | qsort(survivor, num_survivors, sizeof(survivor[0]), compare_survivor_metric); |
1189 | |
1190 | /* For each association p in turn, calculate the selection |
1191 | * jitter p->sjitter as the square root of the sum of squares |
1192 | * (p->offset - q->offset) over all q associations. The idea is |
1193 | * to repeatedly discard the survivor with maximum selection |
1194 | * jitter until a termination condition is met. |
1195 | */ |
1196 | while (1) { |
1197 | static int max_idx; |
1198 | double max_selection_jitter = max_selection_jitter; |
1199 | double min_jitter = min_jitter; |
1200 | |
1201 | if (num_survivors <= MIN_CLUSTERED) { |
1202 | VERB4 bb_error_msg("num_survivors %d <= %d, not discarding more", |
1203 | num_survivors, MIN_CLUSTERED); |
1204 | break; |
1205 | } |
1206 | |
1207 | /* To make sure a few survivors are left |
1208 | * for the clustering algorithm to chew on, |
1209 | * we stop if the number of survivors |
1210 | * is less than or equal to MIN_CLUSTERED (3). |
1211 | */ |
1212 | for (i = 0; i < num_survivors; i++) { |
1213 | double selection_jitter_sq; |
1214 | |
1215 | p = survivor[i].p; |
1216 | if (i == 0 || p->filter_jitter < min_jitter) |
1217 | min_jitter = p->filter_jitter; |
1218 | |
1219 | selection_jitter_sq = 0; |
1220 | for (j = 0; j < num_survivors; j++) { |
1221 | peer_t *q = survivor[j].p; |
1222 | selection_jitter_sq += SQUARE(p->filter_offset - q->filter_offset); |
1223 | } |
1224 | if (i == 0 || selection_jitter_sq > max_selection_jitter) { |
1225 | max_selection_jitter = selection_jitter_sq; |
1226 | max_idx = i; |
1227 | } |
1228 | VERB6 bb_error_msg("survivor %d selection_jitter^2:%f", |
1229 | i, selection_jitter_sq); |
1230 | } |
1231 | max_selection_jitter = SQRT(max_selection_jitter / num_survivors); |
1232 | VERB5 bb_error_msg("max_selection_jitter (at %d):%f min_jitter:%f", |
1233 | max_idx, max_selection_jitter, min_jitter); |
1234 | |
1235 | /* If the maximum selection jitter is less than the |
1236 | * minimum peer jitter, then tossing out more survivors |
1237 | * will not lower the minimum peer jitter, so we might |
1238 | * as well stop. |
1239 | */ |
1240 | if (max_selection_jitter < min_jitter) { |
1241 | VERB4 bb_error_msg("max_selection_jitter:%f < min_jitter:%f, num_survivors:%d, not discarding more", |
1242 | max_selection_jitter, min_jitter, num_survivors); |
1243 | break; |
1244 | } |
1245 | |
1246 | /* Delete survivor[max_idx] from the list |
1247 | * and go around again. |
1248 | */ |
1249 | VERB6 bb_error_msg("dropping survivor %d", max_idx); |
1250 | num_survivors--; |
1251 | while (max_idx < num_survivors) { |
1252 | survivor[max_idx] = survivor[max_idx + 1]; |
1253 | max_idx++; |
1254 | } |
1255 | } |
1256 | |
1257 | if (0) { |
1258 | /* Combine the offsets of the clustering algorithm survivors |
1259 | * using a weighted average with weight determined by the root |
1260 | * distance. Compute the selection jitter as the weighted RMS |
1261 | * difference between the first survivor and the remaining |
1262 | * survivors. In some cases the inherent clock jitter can be |
1263 | * reduced by not using this algorithm, especially when frequent |
1264 | * clockhopping is involved. bbox: thus we don't do it. |
1265 | */ |
1266 | double x, y, z, w; |
1267 | y = z = w = 0; |
1268 | for (i = 0; i < num_survivors; i++) { |
1269 | p = survivor[i].p; |
1270 | x = root_distance(p); |
1271 | y += 1 / x; |
1272 | z += p->filter_offset / x; |
1273 | w += SQUARE(p->filter_offset - survivor[0].p->filter_offset) / x; |
1274 | } |
1275 | //G.cluster_offset = z / y; |
1276 | //G.cluster_jitter = SQRT(w / y); |
1277 | } |
1278 | |
1279 | /* Pick the best clock. If the old system peer is on the list |
1280 | * and at the same stratum as the first survivor on the list, |
1281 | * then don't do a clock hop. Otherwise, select the first |
1282 | * survivor on the list as the new system peer. |
1283 | */ |
1284 | p = survivor[0].p; |
1285 | if (G.last_update_peer |
1286 | && G.last_update_peer->lastpkt_stratum <= p->lastpkt_stratum |
1287 | ) { |
1288 | /* Starting from 1 is ok here */ |
1289 | for (i = 1; i < num_survivors; i++) { |
1290 | if (G.last_update_peer == survivor[i].p) { |
1291 | VERB5 bb_error_msg("keeping old synced peer"); |
1292 | p = G.last_update_peer; |
1293 | goto keep_old; |
1294 | } |
1295 | } |
1296 | } |
1297 | G.last_update_peer = p; |
1298 | keep_old: |
1299 | VERB4 bb_error_msg("selected peer %s filter_offset:%+f age:%f", |
1300 | p->p_dotted, |
1301 | p->filter_offset, |
1302 | G.cur_time - p->lastpkt_recv_time |
1303 | ); |
1304 | return p; |
1305 | } |
1306 | |
1307 | |
1308 | /* |
1309 | * Local clock discipline and its helpers |
1310 | */ |
1311 | static void |
1312 | set_new_values(int disc_state, double offset, double recv_time) |
1313 | { |
1314 | /* Enter new state and set state variables. Note we use the time |
1315 | * of the last clock filter sample, which must be earlier than |
1316 | * the current time. |
1317 | */ |
1318 | VERB4 bb_error_msg("disc_state=%d last update offset=%f recv_time=%f", |
1319 | disc_state, offset, recv_time); |
1320 | G.discipline_state = disc_state; |
1321 | G.last_update_offset = offset; |
1322 | G.last_update_recv_time = recv_time; |
1323 | } |
1324 | /* Return: -1: decrease poll interval, 0: leave as is, 1: increase */ |
1325 | static NOINLINE int |
1326 | update_local_clock(peer_t *p) |
1327 | { |
1328 | int rc; |
1329 | struct timex tmx; |
1330 | /* Note: can use G.cluster_offset instead: */ |
1331 | double offset = p->filter_offset; |
1332 | double recv_time = p->lastpkt_recv_time; |
1333 | double abs_offset; |
1334 | #if !USING_KERNEL_PLL_LOOP |
1335 | double freq_drift; |
1336 | #endif |
1337 | double since_last_update; |
1338 | double etemp, dtemp; |
1339 | |
1340 | abs_offset = fabs(offset); |
1341 | |
1342 | #if 0 |
1343 | /* If needed, -S script can do it by looking at $offset |
1344 | * env var and killing parent */ |
1345 | /* If the offset is too large, give up and go home */ |
1346 | if (abs_offset > PANIC_THRESHOLD) { |
1347 | bb_error_msg_and_die("offset %f far too big, exiting", offset); |
1348 | } |
1349 | #endif |
1350 | |
1351 | /* If this is an old update, for instance as the result |
1352 | * of a system peer change, avoid it. We never use |
1353 | * an old sample or the same sample twice. |
1354 | */ |
1355 | if (recv_time <= G.last_update_recv_time) { |
1356 | VERB3 bb_error_msg("update from %s: same or older datapoint, not using it", |
1357 | p->p_dotted); |
1358 | return 0; /* "leave poll interval as is" */ |
1359 | } |
1360 | |
1361 | /* Clock state machine transition function. This is where the |
1362 | * action is and defines how the system reacts to large time |
1363 | * and frequency errors. |
1364 | */ |
1365 | since_last_update = recv_time - G.reftime; |
1366 | #if !USING_KERNEL_PLL_LOOP |
1367 | freq_drift = 0; |
1368 | #endif |
1369 | #if USING_INITIAL_FREQ_ESTIMATION |
1370 | if (G.discipline_state == STATE_FREQ) { |
1371 | /* Ignore updates until the stepout threshold */ |
1372 | if (since_last_update < WATCH_THRESHOLD) { |
1373 | VERB4 bb_error_msg("measuring drift, datapoint ignored, %f sec remains", |
1374 | WATCH_THRESHOLD - since_last_update); |
1375 | return 0; /* "leave poll interval as is" */ |
1376 | } |
1377 | # if !USING_KERNEL_PLL_LOOP |
1378 | freq_drift = (offset - G.last_update_offset) / since_last_update; |
1379 | # endif |
1380 | } |
1381 | #endif |
1382 | |
1383 | /* There are two main regimes: when the |
1384 | * offset exceeds the step threshold and when it does not. |
1385 | */ |
1386 | if (abs_offset > STEP_THRESHOLD) { |
1387 | #if 0 |
1388 | double remains; |
1389 | |
1390 | // This "spike state" seems to be useless, peer selection already drops |
1391 | // occassional "bad" datapoints. If we are here, there were _many_ |
1392 | // large offsets. When a few first large offsets are seen, |
1393 | // we end up in "no valid datapoints, no peer selected" state. |
1394 | // Only when enough of them are seen (which means it's not a fluke), |
1395 | // we end up here. Looks like _our_ clock is off. |
1396 | switch (G.discipline_state) { |
1397 | case STATE_SYNC: |
1398 | /* The first outlyer: ignore it, switch to SPIK state */ |
1399 | VERB3 bb_error_msg("update from %s: offset:%+f, spike%s", |
1400 | p->p_dotted, offset, |
1401 | ""); |
1402 | G.discipline_state = STATE_SPIK; |
1403 | return -1; /* "decrease poll interval" */ |
1404 | |
1405 | case STATE_SPIK: |
1406 | /* Ignore succeeding outlyers until either an inlyer |
1407 | * is found or the stepout threshold is exceeded. |
1408 | */ |
1409 | remains = WATCH_THRESHOLD - since_last_update; |
1410 | if (remains > 0) { |
1411 | VERB3 bb_error_msg("update from %s: offset:%+f, spike%s", |
1412 | p->p_dotted, offset, |
1413 | ", datapoint ignored"); |
1414 | return -1; /* "decrease poll interval" */ |
1415 | } |
1416 | /* fall through: we need to step */ |
1417 | } /* switch */ |
1418 | #endif |
1419 | |
1420 | /* Step the time and clamp down the poll interval. |
1421 | * |
1422 | * In NSET state an initial frequency correction is |
1423 | * not available, usually because the frequency file has |
1424 | * not yet been written. Since the time is outside the |
1425 | * capture range, the clock is stepped. The frequency |
1426 | * will be set directly following the stepout interval. |
1427 | * |
1428 | * In FSET state the initial frequency has been set |
1429 | * from the frequency file. Since the time is outside |
1430 | * the capture range, the clock is stepped immediately, |
1431 | * rather than after the stepout interval. Guys get |
1432 | * nervous if it takes 17 minutes to set the clock for |
1433 | * the first time. |
1434 | * |
1435 | * In SPIK state the stepout threshold has expired and |
1436 | * the phase is still above the step threshold. Note |
1437 | * that a single spike greater than the step threshold |
1438 | * is always suppressed, even at the longer poll |
1439 | * intervals. |
1440 | */ |
1441 | VERB4 bb_error_msg("stepping time by %+f; poll_exp=MINPOLL", offset); |
1442 | step_time(offset); |
1443 | if (option_mask32 & OPT_q) { |
1444 | /* We were only asked to set time once. Done. */ |
1445 | exit(0); |
1446 | } |
1447 | |
1448 | G.polladj_count = 0; |
1449 | G.poll_exp = MINPOLL; |
1450 | G.stratum = MAXSTRAT; |
1451 | |
1452 | run_script("step", offset); |
1453 | |
1454 | recv_time += offset; |
1455 | |
1456 | #if USING_INITIAL_FREQ_ESTIMATION |
1457 | if (G.discipline_state == STATE_NSET) { |
1458 | set_new_values(STATE_FREQ, /*offset:*/ 0, recv_time); |
1459 | return 1; /* "ok to increase poll interval" */ |
1460 | } |
1461 | #endif |
1462 | abs_offset = offset = 0; |
1463 | set_new_values(STATE_SYNC, offset, recv_time); |
1464 | |
1465 | } else { /* abs_offset <= STEP_THRESHOLD */ |
1466 | |
1467 | if (G.poll_exp < MINPOLL && G.initial_poll_complete) { |
1468 | VERB4 bb_error_msg("small offset:%+f, disabling burst mode", offset); |
1469 | G.polladj_count = 0; |
1470 | G.poll_exp = MINPOLL; |
1471 | } |
1472 | |
1473 | /* Compute the clock jitter as the RMS of exponentially |
1474 | * weighted offset differences. Used by the poll adjust code. |
1475 | */ |
1476 | etemp = SQUARE(G.discipline_jitter); |
1477 | dtemp = SQUARE(offset - G.last_update_offset); |
1478 | G.discipline_jitter = SQRT(etemp + (dtemp - etemp) / AVG); |
1479 | |
1480 | switch (G.discipline_state) { |
1481 | case STATE_NSET: |
1482 | if (option_mask32 & OPT_q) { |
1483 | /* We were only asked to set time once. |
1484 | * The clock is precise enough, no need to step. |
1485 | */ |
1486 | exit(0); |
1487 | } |
1488 | #if USING_INITIAL_FREQ_ESTIMATION |
1489 | /* This is the first update received and the frequency |
1490 | * has not been initialized. The first thing to do |
1491 | * is directly measure the oscillator frequency. |
1492 | */ |
1493 | set_new_values(STATE_FREQ, offset, recv_time); |
1494 | #else |
1495 | set_new_values(STATE_SYNC, offset, recv_time); |
1496 | #endif |
1497 | VERB4 bb_error_msg("transitioning to FREQ, datapoint ignored"); |
1498 | return 0; /* "leave poll interval as is" */ |
1499 | |
1500 | #if 0 /* this is dead code for now */ |
1501 | case STATE_FSET: |
1502 | /* This is the first update and the frequency |
1503 | * has been initialized. Adjust the phase, but |
1504 | * don't adjust the frequency until the next update. |
1505 | */ |
1506 | set_new_values(STATE_SYNC, offset, recv_time); |
1507 | /* freq_drift remains 0 */ |
1508 | break; |
1509 | #endif |
1510 | |
1511 | #if USING_INITIAL_FREQ_ESTIMATION |
1512 | case STATE_FREQ: |
1513 | /* since_last_update >= WATCH_THRESHOLD, we waited enough. |
1514 | * Correct the phase and frequency and switch to SYNC state. |
1515 | * freq_drift was already estimated (see code above) |
1516 | */ |
1517 | set_new_values(STATE_SYNC, offset, recv_time); |
1518 | break; |
1519 | #endif |
1520 | |
1521 | default: |
1522 | #if !USING_KERNEL_PLL_LOOP |
1523 | /* Compute freq_drift due to PLL and FLL contributions. |
1524 | * |
1525 | * The FLL and PLL frequency gain constants |
1526 | * depend on the poll interval and Allan |
1527 | * intercept. The FLL is not used below one-half |
1528 | * the Allan intercept. Above that the loop gain |
1529 | * increases in steps to 1 / AVG. |
1530 | */ |
1531 | if ((1 << G.poll_exp) > ALLAN / 2) { |
1532 | etemp = FLL - G.poll_exp; |
1533 | if (etemp < AVG) |
1534 | etemp = AVG; |
1535 | freq_drift += (offset - G.last_update_offset) / (MAXD(since_last_update, ALLAN) * etemp); |
1536 | } |
1537 | /* For the PLL the integration interval |
1538 | * (numerator) is the minimum of the update |
1539 | * interval and poll interval. This allows |
1540 | * oversampling, but not undersampling. |
1541 | */ |
1542 | etemp = MIND(since_last_update, (1 << G.poll_exp)); |
1543 | dtemp = (4 * PLL) << G.poll_exp; |
1544 | freq_drift += offset * etemp / SQUARE(dtemp); |
1545 | #endif |
1546 | set_new_values(STATE_SYNC, offset, recv_time); |
1547 | break; |
1548 | } |
1549 | if (G.stratum != p->lastpkt_stratum + 1) { |
1550 | G.stratum = p->lastpkt_stratum + 1; |
1551 | run_script("stratum", offset); |
1552 | } |
1553 | } |
1554 | |
1555 | if (G.discipline_jitter < G_precision_sec) |
1556 | G.discipline_jitter = G_precision_sec; |
1557 | G.offset_to_jitter_ratio = abs_offset / G.discipline_jitter; |
1558 | |
1559 | G.reftime = G.cur_time; |
1560 | G.ntp_status = p->lastpkt_status; |
1561 | G.refid = p->lastpkt_refid; |
1562 | G.rootdelay = p->lastpkt_rootdelay + p->lastpkt_delay; |
1563 | dtemp = p->filter_jitter; // SQRT(SQUARE(p->filter_jitter) + SQUARE(G.cluster_jitter)); |
1564 | dtemp += MAXD(p->filter_dispersion + FREQ_TOLERANCE * (G.cur_time - p->lastpkt_recv_time) + abs_offset, MINDISP); |
1565 | G.rootdisp = p->lastpkt_rootdisp + dtemp; |
1566 | VERB4 bb_error_msg("updating leap/refid/reftime/rootdisp from peer %s", p->p_dotted); |
1567 | |
1568 | /* We are in STATE_SYNC now, but did not do adjtimex yet. |
1569 | * (Any other state does not reach this, they all return earlier) |
1570 | * By this time, freq_drift and offset are set |
1571 | * to values suitable for adjtimex. |
1572 | */ |
1573 | #if !USING_KERNEL_PLL_LOOP |
1574 | /* Calculate the new frequency drift and frequency stability (wander). |
1575 | * Compute the clock wander as the RMS of exponentially weighted |
1576 | * frequency differences. This is not used directly, but can, |
1577 | * along with the jitter, be a highly useful monitoring and |
1578 | * debugging tool. |
1579 | */ |
1580 | dtemp = G.discipline_freq_drift + freq_drift; |
1581 | G.discipline_freq_drift = MAXD(MIND(MAXDRIFT, dtemp), -MAXDRIFT); |
1582 | etemp = SQUARE(G.discipline_wander); |
1583 | dtemp = SQUARE(dtemp); |
1584 | G.discipline_wander = SQRT(etemp + (dtemp - etemp) / AVG); |
1585 | |
1586 | VERB4 bb_error_msg("discipline freq_drift=%.9f(int:%ld corr:%e) wander=%f", |
1587 | G.discipline_freq_drift, |
1588 | (long)(G.discipline_freq_drift * 65536e6), |
1589 | freq_drift, |
1590 | G.discipline_wander); |
1591 | #endif |
1592 | VERB4 { |
1593 | memset(&tmx, 0, sizeof(tmx)); |
1594 | if (adjtimex(&tmx) < 0) |
1595 | bb_perror_msg_and_die("adjtimex"); |
1596 | bb_error_msg("p adjtimex freq:%ld offset:%+ld status:0x%x tc:%ld", |
1597 | tmx.freq, tmx.offset, tmx.status, tmx.constant); |
1598 | } |
1599 | |
1600 | memset(&tmx, 0, sizeof(tmx)); |
1601 | #if 0 |
1602 | //doesn't work, offset remains 0 (!) in kernel: |
1603 | //ntpd: set adjtimex freq:1786097 tmx.offset:77487 |
1604 | //ntpd: prev adjtimex freq:1786097 tmx.offset:0 |
1605 | //ntpd: cur adjtimex freq:1786097 tmx.offset:0 |
1606 | tmx.modes = ADJ_FREQUENCY | ADJ_OFFSET; |
1607 | /* 65536 is one ppm */ |
1608 | tmx.freq = G.discipline_freq_drift * 65536e6; |
1609 | #endif |
1610 | tmx.modes = ADJ_OFFSET | ADJ_STATUS | ADJ_TIMECONST;// | ADJ_MAXERROR | ADJ_ESTERROR; |
1611 | tmx.offset = (offset * 1000000); /* usec */ |
1612 | tmx.status = STA_PLL; |
1613 | if (G.ntp_status & LI_PLUSSEC) |
1614 | tmx.status |= STA_INS; |
1615 | if (G.ntp_status & LI_MINUSSEC) |
1616 | tmx.status |= STA_DEL; |
1617 | |
1618 | tmx.constant = G.poll_exp - 4; |
1619 | /* EXPERIMENTAL. |
1620 | * The below if statement should be unnecessary, but... |
1621 | * It looks like Linux kernel's PLL is far too gentle in changing |
1622 | * tmx.freq in response to clock offset. Offset keeps growing |
1623 | * and eventually we fall back to smaller poll intervals. |
1624 | * We can make correction more agressive (about x2) by supplying |
1625 | * PLL time constant which is one less than the real one. |
1626 | * To be on a safe side, let's do it only if offset is significantly |
1627 | * larger than jitter. |
1628 | */ |
1629 | if (tmx.constant > 0 && G.offset_to_jitter_ratio >= TIMECONST_HACK_GATE) |
1630 | tmx.constant--; |
1631 | |
1632 | //tmx.esterror = (uint32_t)(clock_jitter * 1e6); |
1633 | //tmx.maxerror = (uint32_t)((sys_rootdelay / 2 + sys_rootdisp) * 1e6); |
1634 | rc = adjtimex(&tmx); |
1635 | if (rc < 0) |
1636 | bb_perror_msg_and_die("adjtimex"); |
1637 | /* NB: here kernel returns constant == G.poll_exp, not == G.poll_exp - 4. |
1638 | * Not sure why. Perhaps it is normal. |
1639 | */ |
1640 | VERB4 bb_error_msg("adjtimex:%d freq:%ld offset:%+ld status:0x%x", |
1641 | rc, tmx.freq, tmx.offset, tmx.status); |
1642 | G.kernel_freq_drift = tmx.freq / 65536; |
1643 | VERB2 bb_error_msg("update from:%s offset:%+f jitter:%f clock drift:%+.3fppm tc:%d", |
1644 | p->p_dotted, offset, G.discipline_jitter, (double)tmx.freq / 65536, (int)tmx.constant); |
1645 | |
1646 | return 1; /* "ok to increase poll interval" */ |
1647 | } |
1648 | |
1649 | |
1650 | /* |
1651 | * We've got a new reply packet from a peer, process it |
1652 | * (helpers first) |
1653 | */ |
1654 | static unsigned |
1655 | retry_interval(void) |
1656 | { |
1657 | /* Local problem, want to retry soon */ |
1658 | unsigned interval, r; |
1659 | interval = RETRY_INTERVAL; |
1660 | r = random(); |
1661 | interval += r % (unsigned)(RETRY_INTERVAL / 4); |
1662 | VERB4 bb_error_msg("chose retry interval:%u", interval); |
1663 | return interval; |
1664 | } |
1665 | static unsigned |
1666 | poll_interval(int exponent) |
1667 | { |
1668 | unsigned interval, r; |
1669 | exponent = G.poll_exp + exponent; |
1670 | if (exponent < 0) |
1671 | exponent = 0; |
1672 | interval = 1 << exponent; |
1673 | r = random(); |
1674 | interval += ((r & (interval-1)) >> 4) + ((r >> 8) & 1); /* + 1/16 of interval, max */ |
1675 | VERB4 bb_error_msg("chose poll interval:%u (poll_exp:%d exp:%d)", interval, G.poll_exp, exponent); |
1676 | return interval; |
1677 | } |
1678 | static NOINLINE void |
1679 | recv_and_process_peer_pkt(peer_t *p) |
1680 | { |
1681 | int rc; |
1682 | ssize_t size; |
1683 | msg_t msg; |
1684 | double T1, T2, T3, T4; |
1685 | double dv, offset; |
1686 | unsigned interval; |
1687 | datapoint_t *datapoint; |
1688 | peer_t *q; |
1689 | |
1690 | offset = 0; |
1691 | |
1692 | /* We can recvfrom here and check from.IP, but some multihomed |
1693 | * ntp servers reply from their *other IP*. |
1694 | * TODO: maybe we should check at least what we can: from.port == 123? |
1695 | */ |
1696 | size = recv(p->p_fd, &msg, sizeof(msg), MSG_DONTWAIT); |
1697 | if (size == -1) { |
1698 | bb_perror_msg("recv(%s) error", p->p_dotted); |
1699 | if (errno == EHOSTUNREACH || errno == EHOSTDOWN |
1700 | || errno == ENETUNREACH || errno == ENETDOWN |
1701 | || errno == ECONNREFUSED || errno == EADDRNOTAVAIL |
1702 | || errno == EAGAIN |
1703 | ) { |
1704 | //TODO: always do this? |
1705 | interval = retry_interval(); |
1706 | goto set_next_and_ret; |
1707 | } |
1708 | xfunc_die(); |
1709 | } |
1710 | |
1711 | if (size != NTP_MSGSIZE_NOAUTH && size != NTP_MSGSIZE) { |
1712 | bb_error_msg("malformed packet received from %s", p->p_dotted); |
1713 | return; |
1714 | } |
1715 | |
1716 | if (msg.m_orgtime.int_partl != p->p_xmt_msg.m_xmttime.int_partl |
1717 | || msg.m_orgtime.fractionl != p->p_xmt_msg.m_xmttime.fractionl |
1718 | ) { |
1719 | /* Somebody else's packet */ |
1720 | return; |
1721 | } |
1722 | |
1723 | /* We do not expect any more packets from this peer for now. |
1724 | * Closing the socket informs kernel about it. |
1725 | * We open a new socket when we send a new query. |
1726 | */ |
1727 | close(p->p_fd); |
1728 | p->p_fd = -1; |
1729 | |
1730 | if ((msg.m_status & LI_ALARM) == LI_ALARM |
1731 | || msg.m_stratum == 0 |
1732 | || msg.m_stratum > NTP_MAXSTRATUM |
1733 | ) { |
1734 | // TODO: stratum 0 responses may have commands in 32-bit m_refid field: |
1735 | // "DENY", "RSTR" - peer does not like us at all |
1736 | // "RATE" - peer is overloaded, reduce polling freq |
1737 | bb_error_msg("reply from %s: peer is unsynced", p->p_dotted); |
1738 | goto pick_normal_interval; |
1739 | } |
1740 | |
1741 | // /* Verify valid root distance */ |
1742 | // if (msg.m_rootdelay / 2 + msg.m_rootdisp >= MAXDISP || p->lastpkt_reftime > msg.m_xmt) |
1743 | // return; /* invalid header values */ |
1744 | |
1745 | p->lastpkt_status = msg.m_status; |
1746 | p->lastpkt_stratum = msg.m_stratum; |
1747 | p->lastpkt_rootdelay = sfp_to_d(msg.m_rootdelay); |
1748 | p->lastpkt_rootdisp = sfp_to_d(msg.m_rootdisp); |
1749 | p->lastpkt_refid = msg.m_refid; |
1750 | |
1751 | /* |
1752 | * From RFC 2030 (with a correction to the delay math): |
1753 | * |
1754 | * Timestamp Name ID When Generated |
1755 | * ------------------------------------------------------------ |
1756 | * Originate Timestamp T1 time request sent by client |
1757 | * Receive Timestamp T2 time request received by server |
1758 | * Transmit Timestamp T3 time reply sent by server |
1759 | * Destination Timestamp T4 time reply received by client |
1760 | * |
1761 | * The roundtrip delay and local clock offset are defined as |
1762 | * |
1763 | * delay = (T4 - T1) - (T3 - T2); offset = ((T2 - T1) + (T3 - T4)) / 2 |
1764 | */ |
1765 | T1 = p->p_xmttime; |
1766 | T2 = lfp_to_d(msg.m_rectime); |
1767 | T3 = lfp_to_d(msg.m_xmttime); |
1768 | T4 = G.cur_time; |
1769 | |
1770 | p->lastpkt_recv_time = T4; |
1771 | VERB6 bb_error_msg("%s->lastpkt_recv_time=%f", p->p_dotted, p->lastpkt_recv_time); |
1772 | |
1773 | /* The delay calculation is a special case. In cases where the |
1774 | * server and client clocks are running at different rates and |
1775 | * with very fast networks, the delay can appear negative. In |
1776 | * order to avoid violating the Principle of Least Astonishment, |
1777 | * the delay is clamped not less than the system precision. |
1778 | */ |
1779 | dv = p->lastpkt_delay; |
1780 | p->lastpkt_delay = (T4 - T1) - (T3 - T2); |
1781 | if (p->lastpkt_delay < G_precision_sec) |
1782 | p->lastpkt_delay = G_precision_sec; |
1783 | /* |
1784 | * If this packet's delay is much bigger than the last one, |
1785 | * it's better to just ignore it than use its much less precise value. |
1786 | */ |
1787 | if (p->reachable_bits && p->lastpkt_delay > dv * BAD_DELAY_GROWTH) { |
1788 | bb_error_msg("reply from %s: delay %f is too high, ignoring", p->p_dotted, p->lastpkt_delay); |
1789 | goto pick_normal_interval; |
1790 | } |
1791 | |
1792 | p->datapoint_idx = p->reachable_bits ? (p->datapoint_idx + 1) % NUM_DATAPOINTS : 0; |
1793 | datapoint = &p->filter_datapoint[p->datapoint_idx]; |
1794 | datapoint->d_recv_time = T4; |
1795 | datapoint->d_offset = offset = ((T2 - T1) + (T3 - T4)) / 2; |
1796 | datapoint->d_dispersion = LOG2D(msg.m_precision_exp) + G_precision_sec; |
1797 | if (!p->reachable_bits) { |
1798 | /* 1st datapoint ever - replicate offset in every element */ |
1799 | int i; |
1800 | for (i = 0; i < NUM_DATAPOINTS; i++) { |
1801 | p->filter_datapoint[i].d_offset = offset; |
1802 | } |
1803 | } |
1804 | |
1805 | p->reachable_bits |= 1; |
1806 | if ((MAX_VERBOSE && G.verbose) || (option_mask32 & OPT_w)) { |
1807 | bb_error_msg("reply from %s: offset:%+f delay:%f status:0x%02x strat:%d refid:0x%08x rootdelay:%f reach:0x%02x", |
1808 | p->p_dotted, |
1809 | offset, |
1810 | p->lastpkt_delay, |
1811 | p->lastpkt_status, |
1812 | p->lastpkt_stratum, |
1813 | p->lastpkt_refid, |
1814 | p->lastpkt_rootdelay, |
1815 | p->reachable_bits |
1816 | /* not shown: m_ppoll, m_precision_exp, m_rootdisp, |
1817 | * m_reftime, m_orgtime, m_rectime, m_xmttime |
1818 | */ |
1819 | ); |
1820 | } |
1821 | |
1822 | /* Muck with statictics and update the clock */ |
1823 | filter_datapoints(p); |
1824 | q = select_and_cluster(); |
1825 | rc = -1; |
1826 | if (q) { |
1827 | rc = 0; |
1828 | if (!(option_mask32 & OPT_w)) { |
1829 | rc = update_local_clock(q); |
1830 | /* If drift is dangerously large, immediately |
1831 | * drop poll interval one step down. |
1832 | */ |
1833 | if (fabs(q->filter_offset) >= POLLDOWN_OFFSET) { |
1834 | VERB4 bb_error_msg("offset:%+f > POLLDOWN_OFFSET", q->filter_offset); |
1835 | goto poll_down; |
1836 | } |
1837 | } |
1838 | } |
1839 | /* else: no peer selected, rc = -1: we want to poll more often */ |
1840 | |
1841 | if (rc != 0) { |
1842 | /* Adjust the poll interval by comparing the current offset |
1843 | * with the clock jitter. If the offset is less than |
1844 | * the clock jitter times a constant, then the averaging interval |
1845 | * is increased, otherwise it is decreased. A bit of hysteresis |
1846 | * helps calm the dance. Works best using burst mode. |
1847 | */ |
1848 | if (rc > 0 && G.offset_to_jitter_ratio <= POLLADJ_GATE) { |
1849 | /* was += G.poll_exp but it is a bit |
1850 | * too optimistic for my taste at high poll_exp's */ |
1851 | G.polladj_count += MINPOLL; |
1852 | if (G.polladj_count > POLLADJ_LIMIT) { |
1853 | G.polladj_count = 0; |
1854 | if (G.poll_exp < MAXPOLL) { |
1855 | G.poll_exp++; |
1856 | VERB4 bb_error_msg("polladj: discipline_jitter:%f ++poll_exp=%d", |
1857 | G.discipline_jitter, G.poll_exp); |
1858 | } |
1859 | } else { |
1860 | VERB4 bb_error_msg("polladj: incr:%d", G.polladj_count); |
1861 | } |
1862 | } else { |
1863 | G.polladj_count -= G.poll_exp * 2; |
1864 | if (G.polladj_count < -POLLADJ_LIMIT || G.poll_exp >= BIGPOLL) { |
1865 | poll_down: |
1866 | G.polladj_count = 0; |
1867 | if (G.poll_exp > MINPOLL) { |
1868 | llist_t *item; |
1869 | |
1870 | G.poll_exp--; |
1871 | /* Correct p->next_action_time in each peer |
1872 | * which waits for sending, so that they send earlier. |
1873 | * Old pp->next_action_time are on the order |
1874 | * of t + (1 << old_poll_exp) + small_random, |
1875 | * we simply need to subtract ~half of that. |
1876 | */ |
1877 | for (item = G.ntp_peers; item != NULL; item = item->link) { |
1878 | peer_t *pp = (peer_t *) item->data; |
1879 | if (pp->p_fd < 0) |
1880 | pp->next_action_time -= (1 << G.poll_exp); |
1881 | } |
1882 | VERB4 bb_error_msg("polladj: discipline_jitter:%f --poll_exp=%d", |
1883 | G.discipline_jitter, G.poll_exp); |
1884 | } |
1885 | } else { |
1886 | VERB4 bb_error_msg("polladj: decr:%d", G.polladj_count); |
1887 | } |
1888 | } |
1889 | } |
1890 | |
1891 | /* Decide when to send new query for this peer */ |
1892 | pick_normal_interval: |
1893 | interval = poll_interval(0); |
1894 | if (fabs(offset) >= STEP_THRESHOLD * 8 && interval > BIGOFF_INTERVAL) { |
1895 | /* If we are synced, offsets are less than STEP_THRESHOLD, |
1896 | * or at the very least not much larger than it. |
1897 | * Now we see a largish one. |
1898 | * Either this peer is feeling bad, or packet got corrupted, |
1899 | * or _our_ clock is wrong now and _all_ peers will show similar |
1900 | * largish offsets too. |
1901 | * I observed this with laptop suspend stopping clock. |
1902 | * In any case, it makes sense to make next request soonish: |
1903 | * cases 1 and 2: get a better datapoint, |
1904 | * case 3: allows to resync faster. |
1905 | */ |
1906 | interval = BIGOFF_INTERVAL; |
1907 | } |
1908 | |
1909 | set_next_and_ret: |
1910 | set_next(p, interval); |
1911 | } |
1912 | |
1913 | #if ENABLE_FEATURE_NTPD_SERVER |
1914 | static NOINLINE void |
1915 | recv_and_process_client_pkt(void /*int fd*/) |
1916 | { |
1917 | ssize_t size; |
1918 | //uint8_t version; |
1919 | len_and_sockaddr *to; |
1920 | struct sockaddr *from; |
1921 | msg_t msg; |
1922 | uint8_t query_status; |
1923 | l_fixedpt_t query_xmttime; |
1924 | |
1925 | to = get_sock_lsa(G_listen_fd); |
1926 | from = xzalloc(to->len); |
1927 | |
1928 | size = recv_from_to(G_listen_fd, &msg, sizeof(msg), MSG_DONTWAIT, from, &to->u.sa, to->len); |
1929 | if (size != NTP_MSGSIZE_NOAUTH && size != NTP_MSGSIZE) { |
1930 | char *addr; |
1931 | if (size < 0) { |
1932 | if (errno == EAGAIN) |
1933 | goto bail; |
1934 | bb_perror_msg_and_die("recv"); |
1935 | } |
1936 | addr = xmalloc_sockaddr2dotted_noport(from); |
1937 | bb_error_msg("malformed packet received from %s: size %u", addr, (int)size); |
1938 | free(addr); |
1939 | goto bail; |
1940 | } |
1941 | |
1942 | query_status = msg.m_status; |
1943 | query_xmttime = msg.m_xmttime; |
1944 | |
1945 | /* Build a reply packet */ |
1946 | memset(&msg, 0, sizeof(msg)); |
1947 | msg.m_status = G.stratum < MAXSTRAT ? (G.ntp_status & LI_MASK) : LI_ALARM; |
1948 | msg.m_status |= (query_status & VERSION_MASK); |
1949 | msg.m_status |= ((query_status & MODE_MASK) == MODE_CLIENT) ? |
1950 | MODE_SERVER : MODE_SYM_PAS; |
1951 | msg.m_stratum = G.stratum; |
1952 | msg.m_ppoll = G.poll_exp; |
1953 | msg.m_precision_exp = G_precision_exp; |
1954 | /* this time was obtained between poll() and recv() */ |
1955 | msg.m_rectime = d_to_lfp(G.cur_time); |
1956 | msg.m_xmttime = d_to_lfp(gettime1900d()); /* this instant */ |
1957 | if (G.peer_cnt == 0) { |
1958 | /* we have no peers: "stratum 1 server" mode. reftime = our own time */ |
1959 | G.reftime = G.cur_time; |
1960 | } |
1961 | msg.m_reftime = d_to_lfp(G.reftime); |
1962 | msg.m_orgtime = query_xmttime; |
1963 | msg.m_rootdelay = d_to_sfp(G.rootdelay); |
1964 | //simple code does not do this, fix simple code! |
1965 | msg.m_rootdisp = d_to_sfp(G.rootdisp); |
1966 | //version = (query_status & VERSION_MASK); /* ... >> VERSION_SHIFT - done below instead */ |
1967 | msg.m_refid = G.refid; // (version > (3 << VERSION_SHIFT)) ? G.refid : G.refid3; |
1968 | |
1969 | /* We reply from the local address packet was sent to, |
1970 | * this makes to/from look swapped here: */ |
1971 | do_sendto(G_listen_fd, |
1972 | /*from:*/ &to->u.sa, /*to:*/ from, /*addrlen:*/ to->len, |
1973 | &msg, size); |
1974 | |
1975 | bail: |
1976 | free(to); |
1977 | free(from); |
1978 | } |
1979 | #endif |
1980 | |
1981 | /* Upstream ntpd's options: |
1982 | * |
1983 | * -4 Force DNS resolution of host names to the IPv4 namespace. |
1984 | * -6 Force DNS resolution of host names to the IPv6 namespace. |
1985 | * -a Require cryptographic authentication for broadcast client, |
1986 | * multicast client and symmetric passive associations. |
1987 | * This is the default. |
1988 | * -A Do not require cryptographic authentication for broadcast client, |
1989 | * multicast client and symmetric passive associations. |
1990 | * This is almost never a good idea. |
1991 | * -b Enable the client to synchronize to broadcast servers. |
1992 | * -c conffile |
1993 | * Specify the name and path of the configuration file, |
1994 | * default /etc/ntp.conf |
1995 | * -d Specify debugging mode. This option may occur more than once, |
1996 | * with each occurrence indicating greater detail of display. |
1997 | * -D level |
1998 | * Specify debugging level directly. |
1999 | * -f driftfile |
2000 | * Specify the name and path of the frequency file. |
2001 | * This is the same operation as the "driftfile FILE" |
2002 | * configuration command. |
2003 | * -g Normally, ntpd exits with a message to the system log |
2004 | * if the offset exceeds the panic threshold, which is 1000 s |
2005 | * by default. This option allows the time to be set to any value |
2006 | * without restriction; however, this can happen only once. |
2007 | * If the threshold is exceeded after that, ntpd will exit |
2008 | * with a message to the system log. This option can be used |
2009 | * with the -q and -x options. See the tinker command for other options. |
2010 | * -i jaildir |
2011 | * Chroot the server to the directory jaildir. This option also implies |
2012 | * that the server attempts to drop root privileges at startup |
2013 | * (otherwise, chroot gives very little additional security). |
2014 | * You may need to also specify a -u option. |
2015 | * -k keyfile |
2016 | * Specify the name and path of the symmetric key file, |
2017 | * default /etc/ntp/keys. This is the same operation |
2018 | * as the "keys FILE" configuration command. |
2019 | * -l logfile |
2020 | * Specify the name and path of the log file. The default |
2021 | * is the system log file. This is the same operation as |
2022 | * the "logfile FILE" configuration command. |
2023 | * -L Do not listen to virtual IPs. The default is to listen. |
2024 | * -n Don't fork. |
2025 | * -N To the extent permitted by the operating system, |
2026 | * run the ntpd at the highest priority. |
2027 | * -p pidfile |
2028 | * Specify the name and path of the file used to record the ntpd |
2029 | * process ID. This is the same operation as the "pidfile FILE" |
2030 | * configuration command. |
2031 | * -P priority |
2032 | * To the extent permitted by the operating system, |
2033 | * run the ntpd at the specified priority. |
2034 | * -q Exit the ntpd just after the first time the clock is set. |
2035 | * This behavior mimics that of the ntpdate program, which is |
2036 | * to be retired. The -g and -x options can be used with this option. |
2037 | * Note: The kernel time discipline is disabled with this option. |
2038 | * -r broadcastdelay |
2039 | * Specify the default propagation delay from the broadcast/multicast |
2040 | * server to this client. This is necessary only if the delay |
2041 | * cannot be computed automatically by the protocol. |
2042 | * -s statsdir |
2043 | * Specify the directory path for files created by the statistics |
2044 | * facility. This is the same operation as the "statsdir DIR" |
2045 | * configuration command. |
2046 | * -t key |
2047 | * Add a key number to the trusted key list. This option can occur |
2048 | * more than once. |
2049 | * -u user[:group] |
2050 | * Specify a user, and optionally a group, to switch to. |
2051 | * -v variable |
2052 | * -V variable |
2053 | * Add a system variable listed by default. |
2054 | * -x Normally, the time is slewed if the offset is less than the step |
2055 | * threshold, which is 128 ms by default, and stepped if above |
2056 | * the threshold. This option sets the threshold to 600 s, which is |
2057 | * well within the accuracy window to set the clock manually. |
2058 | * Note: since the slew rate of typical Unix kernels is limited |
2059 | * to 0.5 ms/s, each second of adjustment requires an amortization |
2060 | * interval of 2000 s. Thus, an adjustment as much as 600 s |
2061 | * will take almost 14 days to complete. This option can be used |
2062 | * with the -g and -q options. See the tinker command for other options. |
2063 | * Note: The kernel time discipline is disabled with this option. |
2064 | */ |
2065 | |
2066 | /* By doing init in a separate function we decrease stack usage |
2067 | * in main loop. |
2068 | */ |
2069 | static NOINLINE void ntp_init(char **argv) |
2070 | { |
2071 | unsigned opts; |
2072 | llist_t *peers; |
2073 | |
2074 | srandom(getpid()); |
2075 | |
2076 | if (getuid()) |
2077 | bb_error_msg_and_die("%s", bb_msg_you_must_be_root); |
2078 | |
2079 | /* Set some globals */ |
2080 | G.stratum = MAXSTRAT; |
2081 | if (BURSTPOLL != 0) |
2082 | G.poll_exp = BURSTPOLL; /* speeds up initial sync */ |
2083 | G.last_script_run = G.reftime = G.last_update_recv_time = gettime1900d(); /* sets G.cur_time too */ |
2084 | |
2085 | /* Parse options */ |
2086 | peers = NULL; |
2087 | opt_complementary = "dd:p::wn"; /* d: counter; p: list; -w implies -n */ |
2088 | opts = getopt32(argv, |
2089 | "nqNx" /* compat */ |
2090 | "wp:S:"IF_FEATURE_NTPD_SERVER("l") /* NOT compat */ |
2091 | "d" /* compat */ |
2092 | "46aAbgL", /* compat, ignored */ |
2093 | &peers, &G.script_name, &G.verbose); |
2094 | if (!(opts & (OPT_p|OPT_l))) |
2095 | bb_show_usage(); |
2096 | // if (opts & OPT_x) /* disable stepping, only slew is allowed */ |
2097 | // G.time_was_stepped = 1; |
2098 | if (peers) { |
2099 | while (peers) |
2100 | add_peers(llist_pop(&peers)); |
2101 | } else { |
2102 | /* -l but no peers: "stratum 1 server" mode */ |
2103 | G.stratum = 1; |
2104 | } |
2105 | if (!(opts & OPT_n)) { |
2106 | bb_daemonize_or_rexec(DAEMON_DEVNULL_STDIO, argv); |
2107 | logmode = LOGMODE_NONE; |
2108 | } |
2109 | #if ENABLE_FEATURE_NTPD_SERVER |
2110 | G_listen_fd = -1; |
2111 | if (opts & OPT_l) { |
2112 | G_listen_fd = create_and_bind_dgram_or_die(NULL, 123); |
2113 | socket_want_pktinfo(G_listen_fd); |
2114 | setsockopt(G_listen_fd, IPPROTO_IP, IP_TOS, &const_IPTOS_LOWDELAY, sizeof(const_IPTOS_LOWDELAY)); |
2115 | } |
2116 | #endif |
2117 | /* I hesitate to set -20 prio. -15 should be high enough for timekeeping */ |
2118 | if (opts & OPT_N) |
2119 | setpriority(PRIO_PROCESS, 0, -15); |
2120 | |
2121 | /* If network is up, syncronization occurs in ~10 seconds. |
2122 | * We give "ntpd -q" 10 seconds to get first reply, |
2123 | * then another 50 seconds to finish syncing. |
2124 | * |
2125 | * I tested ntpd 4.2.6p1 and apparently it never exits |
2126 | * (will try forever), but it does not feel right. |
2127 | * The goal of -q is to act like ntpdate: set time |
2128 | * after a reasonably small period of polling, or fail. |
2129 | */ |
2130 | if (opts & OPT_q) { |
2131 | option_mask32 |= OPT_qq; |
2132 | alarm(10); |
2133 | } |
2134 | |
2135 | bb_signals(0 |
2136 | | (1 << SIGTERM) |
2137 | | (1 << SIGINT) |
2138 | | (1 << SIGALRM) |
2139 | , record_signo |
2140 | ); |
2141 | bb_signals(0 |
2142 | | (1 << SIGPIPE) |
2143 | | (1 << SIGCHLD) |
2144 | , SIG_IGN |
2145 | ); |
2146 | } |
2147 | |
2148 | int ntpd_main(int argc UNUSED_PARAM, char **argv) MAIN_EXTERNALLY_VISIBLE; |
2149 | int ntpd_main(int argc UNUSED_PARAM, char **argv) |
2150 | { |
2151 | #undef G |
2152 | struct globals G; |
2153 | struct pollfd *pfd; |
2154 | peer_t **idx2peer; |
2155 | unsigned cnt; |
2156 | |
2157 | memset(&G, 0, sizeof(G)); |
2158 | SET_PTR_TO_GLOBALS(&G); |
2159 | |
2160 | ntp_init(argv); |
2161 | |
2162 | /* If ENABLE_FEATURE_NTPD_SERVER, + 1 for listen_fd: */ |
2163 | cnt = G.peer_cnt + ENABLE_FEATURE_NTPD_SERVER; |
2164 | idx2peer = xzalloc(sizeof(idx2peer[0]) * cnt); |
2165 | pfd = xzalloc(sizeof(pfd[0]) * cnt); |
2166 | |
2167 | /* Countdown: we never sync before we sent INITIAL_SAMPLES+1 |
2168 | * packets to each peer. |
2169 | * NB: if some peer is not responding, we may end up sending |
2170 | * fewer packets to it and more to other peers. |
2171 | * NB2: sync usually happens using INITIAL_SAMPLES packets, |
2172 | * since last reply does not come back instantaneously. |
2173 | */ |
2174 | cnt = G.peer_cnt * (INITIAL_SAMPLES + 1); |
2175 | |
2176 | write_pidfile(CONFIG_PID_FILE_PATH "/ntpd.pid"); |
2177 | |
2178 | while (!bb_got_signal) { |
2179 | llist_t *item; |
2180 | unsigned i, j; |
2181 | int nfds, timeout; |
2182 | double nextaction; |
2183 | |
2184 | /* Nothing between here and poll() blocks for any significant time */ |
2185 | |
2186 | nextaction = G.cur_time + 3600; |
2187 | |
2188 | i = 0; |
2189 | #if ENABLE_FEATURE_NTPD_SERVER |
2190 | if (G_listen_fd != -1) { |
2191 | pfd[0].fd = G_listen_fd; |
2192 | pfd[0].events = POLLIN; |
2193 | i++; |
2194 | } |
2195 | #endif |
2196 | /* Pass over peer list, send requests, time out on receives */ |
2197 | for (item = G.ntp_peers; item != NULL; item = item->link) { |
2198 | peer_t *p = (peer_t *) item->data; |
2199 | |
2200 | if (p->next_action_time <= G.cur_time) { |
2201 | if (p->p_fd == -1) { |
2202 | /* Time to send new req */ |
2203 | if (--cnt == 0) { |
2204 | G.initial_poll_complete = 1; |
2205 | } |
2206 | send_query_to_peer(p); |
2207 | } else { |
2208 | /* Timed out waiting for reply */ |
2209 | close(p->p_fd); |
2210 | p->p_fd = -1; |
2211 | timeout = poll_interval(-2); /* -2: try a bit sooner */ |
2212 | bb_error_msg("timed out waiting for %s, reach 0x%02x, next query in %us", |
2213 | p->p_dotted, p->reachable_bits, timeout); |
2214 | set_next(p, timeout); |
2215 | } |
2216 | } |
2217 | |
2218 | if (p->next_action_time < nextaction) |
2219 | nextaction = p->next_action_time; |
2220 | |
2221 | if (p->p_fd >= 0) { |
2222 | /* Wait for reply from this peer */ |
2223 | pfd[i].fd = p->p_fd; |
2224 | pfd[i].events = POLLIN; |
2225 | idx2peer[i] = p; |
2226 | i++; |
2227 | } |
2228 | } |
2229 | |
2230 | timeout = nextaction - G.cur_time; |
2231 | if (timeout < 0) |
2232 | timeout = 0; |
2233 | timeout++; /* (nextaction - G.cur_time) rounds down, compensating */ |
2234 | |
2235 | /* Here we may block */ |
2236 | VERB2 { |
2237 | if (i > (ENABLE_FEATURE_NTPD_SERVER && G_listen_fd != -1)) { |
2238 | /* We wait for at least one reply. |
2239 | * Poll for it, without wasting time for message. |
2240 | * Since replies often come under 1 second, this also |
2241 | * reduces clutter in logs. |
2242 | */ |
2243 | nfds = poll(pfd, i, 1000); |
2244 | if (nfds != 0) |
2245 | goto did_poll; |
2246 | if (--timeout <= 0) |
2247 | goto did_poll; |
2248 | } |
2249 | bb_error_msg("poll:%us sockets:%u interval:%us", timeout, i, 1 << G.poll_exp); |
2250 | } |
2251 | nfds = poll(pfd, i, timeout * 1000); |
2252 | did_poll: |
2253 | gettime1900d(); /* sets G.cur_time */ |
2254 | if (nfds <= 0) { |
2255 | if (!bb_got_signal /* poll wasn't interrupted by a signal */ |
2256 | && G.cur_time - G.last_script_run > 11*60 |
2257 | ) { |
2258 | /* Useful for updating battery-backed RTC and such */ |
2259 | run_script("periodic", G.last_update_offset); |
2260 | gettime1900d(); /* sets G.cur_time */ |
2261 | } |
2262 | goto check_unsync; |
2263 | } |
2264 | |
2265 | /* Process any received packets */ |
2266 | j = 0; |
2267 | #if ENABLE_FEATURE_NTPD_SERVER |
2268 | if (G.listen_fd != -1) { |
2269 | if (pfd[0].revents /* & (POLLIN|POLLERR)*/) { |
2270 | nfds--; |
2271 | recv_and_process_client_pkt(/*G.listen_fd*/); |
2272 | gettime1900d(); /* sets G.cur_time */ |
2273 | } |
2274 | j = 1; |
2275 | } |
2276 | #endif |
2277 | for (; nfds != 0 && j < i; j++) { |
2278 | if (pfd[j].revents /* & (POLLIN|POLLERR)*/) { |
2279 | /* |
2280 | * At init, alarm was set to 10 sec. |
2281 | * Now we did get a reply. |
2282 | * Increase timeout to 50 seconds to finish syncing. |
2283 | */ |
2284 | if (option_mask32 & OPT_qq) { |
2285 | option_mask32 &= ~OPT_qq; |
2286 | alarm(50); |
2287 | } |
2288 | nfds--; |
2289 | recv_and_process_peer_pkt(idx2peer[j]); |
2290 | gettime1900d(); /* sets G.cur_time */ |
2291 | } |
2292 | } |
2293 | |
2294 | check_unsync: |
2295 | if (G.ntp_peers && G.stratum != MAXSTRAT) { |
2296 | for (item = G.ntp_peers; item != NULL; item = item->link) { |
2297 | peer_t *p = (peer_t *) item->data; |
2298 | if (p->reachable_bits) |
2299 | goto have_reachable_peer; |
2300 | } |
2301 | /* No peer responded for last 8 packets, panic */ |
2302 | G.polladj_count = 0; |
2303 | G.poll_exp = MINPOLL; |
2304 | G.stratum = MAXSTRAT; |
2305 | run_script("unsync", 0.0); |
2306 | have_reachable_peer: ; |
2307 | } |
2308 | } /* while (!bb_got_signal) */ |
2309 | |
2310 | remove_pidfile(CONFIG_PID_FILE_PATH "/ntpd.pid"); |
2311 | kill_myself_with_sig(bb_got_signal); |
2312 | } |
2313 | |
2314 | |
2315 | |
2316 | |
2317 | |
2318 | |
2319 | /*** openntpd-4.6 uses only adjtime, not adjtimex ***/ |
2320 | |
2321 | /*** ntp-4.2.6/ntpd/ntp_loopfilter.c - adjtimex usage ***/ |
2322 | |
2323 | #if 0 |
2324 | static double |
2325 | direct_freq(double fp_offset) |
2326 | { |
2327 | #ifdef KERNEL_PLL |
2328 | /* |
2329 | * If the kernel is enabled, we need the residual offset to |
2330 | * calculate the frequency correction. |
2331 | */ |
2332 | if (pll_control && kern_enable) { |
2333 | memset(&ntv, 0, sizeof(ntv)); |
2334 | ntp_adjtime(&ntv); |
2335 | #ifdef STA_NANO |
2336 | clock_offset = ntv.offset / 1e9; |
2337 | #else /* STA_NANO */ |
2338 | clock_offset = ntv.offset / 1e6; |
2339 | #endif /* STA_NANO */ |
2340 | drift_comp = FREQTOD(ntv.freq); |
2341 | } |
2342 | #endif /* KERNEL_PLL */ |
2343 | set_freq((fp_offset - clock_offset) / (current_time - clock_epoch) + drift_comp); |
2344 | wander_resid = 0; |
2345 | return drift_comp; |
2346 | } |
2347 | |
2348 | static void |
2349 | set_freq(double freq) /* frequency update */ |
2350 | { |
2351 | char tbuf[80]; |
2352 | |
2353 | drift_comp = freq; |
2354 | |
2355 | #ifdef KERNEL_PLL |
2356 | /* |
2357 | * If the kernel is enabled, update the kernel frequency. |
2358 | */ |
2359 | if (pll_control && kern_enable) { |
2360 | memset(&ntv, 0, sizeof(ntv)); |
2361 | ntv.modes = MOD_FREQUENCY; |
2362 | ntv.freq = DTOFREQ(drift_comp); |
2363 | ntp_adjtime(&ntv); |
2364 | snprintf(tbuf, sizeof(tbuf), "kernel %.3f PPM", drift_comp * 1e6); |
2365 | report_event(EVNT_FSET, NULL, tbuf); |
2366 | } else { |
2367 | snprintf(tbuf, sizeof(tbuf), "ntpd %.3f PPM", drift_comp * 1e6); |
2368 | report_event(EVNT_FSET, NULL, tbuf); |
2369 | } |
2370 | #else /* KERNEL_PLL */ |
2371 | snprintf(tbuf, sizeof(tbuf), "ntpd %.3f PPM", drift_comp * 1e6); |
2372 | report_event(EVNT_FSET, NULL, tbuf); |
2373 | #endif /* KERNEL_PLL */ |
2374 | } |
2375 | |
2376 | ... |
2377 | ... |
2378 | ... |
2379 | |
2380 | #ifdef KERNEL_PLL |
2381 | /* |
2382 | * This code segment works when clock adjustments are made using |
2383 | * precision time kernel support and the ntp_adjtime() system |
2384 | * call. This support is available in Solaris 2.6 and later, |
2385 | * Digital Unix 4.0 and later, FreeBSD, Linux and specially |
2386 | * modified kernels for HP-UX 9 and Ultrix 4. In the case of the |
2387 | * DECstation 5000/240 and Alpha AXP, additional kernel |
2388 | * modifications provide a true microsecond clock and nanosecond |
2389 | * clock, respectively. |
2390 | * |
2391 | * Important note: The kernel discipline is used only if the |
2392 | * step threshold is less than 0.5 s, as anything higher can |
2393 | * lead to overflow problems. This might occur if some misguided |
2394 | * lad set the step threshold to something ridiculous. |
2395 | */ |
2396 | if (pll_control && kern_enable) { |
2397 | |
2398 | #define MOD_BITS (MOD_OFFSET | MOD_MAXERROR | MOD_ESTERROR | MOD_STATUS | MOD_TIMECONST) |
2399 | |
2400 | /* |
2401 | * We initialize the structure for the ntp_adjtime() |
2402 | * system call. We have to convert everything to |
2403 | * microseconds or nanoseconds first. Do not update the |
2404 | * system variables if the ext_enable flag is set. In |
2405 | * this case, the external clock driver will update the |
2406 | * variables, which will be read later by the local |
2407 | * clock driver. Afterwards, remember the time and |
2408 | * frequency offsets for jitter and stability values and |
2409 | * to update the frequency file. |
2410 | */ |
2411 | memset(&ntv, 0, sizeof(ntv)); |
2412 | if (ext_enable) { |
2413 | ntv.modes = MOD_STATUS; |
2414 | } else { |
2415 | #ifdef STA_NANO |
2416 | ntv.modes = MOD_BITS | MOD_NANO; |
2417 | #else /* STA_NANO */ |
2418 | ntv.modes = MOD_BITS; |
2419 | #endif /* STA_NANO */ |
2420 | if (clock_offset < 0) |
2421 | dtemp = -.5; |
2422 | else |
2423 | dtemp = .5; |
2424 | #ifdef STA_NANO |
2425 | ntv.offset = (int32)(clock_offset * 1e9 + dtemp); |
2426 | ntv.constant = sys_poll; |
2427 | #else /* STA_NANO */ |
2428 | ntv.offset = (int32)(clock_offset * 1e6 + dtemp); |
2429 | ntv.constant = sys_poll - 4; |
2430 | #endif /* STA_NANO */ |
2431 | ntv.esterror = (u_int32)(clock_jitter * 1e6); |
2432 | ntv.maxerror = (u_int32)((sys_rootdelay / 2 + sys_rootdisp) * 1e6); |
2433 | ntv.status = STA_PLL; |
2434 | |
2435 | /* |
2436 | * Enable/disable the PPS if requested. |
2437 | */ |
2438 | if (pps_enable) { |
2439 | if (!(pll_status & STA_PPSTIME)) |
2440 | report_event(EVNT_KERN, |
2441 | NULL, "PPS enabled"); |
2442 | ntv.status |= STA_PPSTIME | STA_PPSFREQ; |
2443 | } else { |
2444 | if (pll_status & STA_PPSTIME) |
2445 | report_event(EVNT_KERN, |
2446 | NULL, "PPS disabled"); |
2447 | ntv.status &= ~(STA_PPSTIME | STA_PPSFREQ); |
2448 | } |
2449 | if (sys_leap == LEAP_ADDSECOND) |
2450 | ntv.status |= STA_INS; |
2451 | else if (sys_leap == LEAP_DELSECOND) |
2452 | ntv.status |= STA_DEL; |
2453 | } |
2454 | |
2455 | /* |
2456 | * Pass the stuff to the kernel. If it squeals, turn off |
2457 | * the pps. In any case, fetch the kernel offset, |
2458 | * frequency and jitter. |
2459 | */ |
2460 | if (ntp_adjtime(&ntv) == TIME_ERROR) { |
2461 | if (!(ntv.status & STA_PPSSIGNAL)) |
2462 | report_event(EVNT_KERN, NULL, |
2463 | "PPS no signal"); |
2464 | } |
2465 | pll_status = ntv.status; |
2466 | #ifdef STA_NANO |
2467 | clock_offset = ntv.offset / 1e9; |
2468 | #else /* STA_NANO */ |
2469 | clock_offset = ntv.offset / 1e6; |
2470 | #endif /* STA_NANO */ |
2471 | clock_frequency = FREQTOD(ntv.freq); |
2472 | |
2473 | /* |
2474 | * If the kernel PPS is lit, monitor its performance. |
2475 | */ |
2476 | if (ntv.status & STA_PPSTIME) { |
2477 | #ifdef STA_NANO |
2478 | clock_jitter = ntv.jitter / 1e9; |
2479 | #else /* STA_NANO */ |
2480 | clock_jitter = ntv.jitter / 1e6; |
2481 | #endif /* STA_NANO */ |
2482 | } |
2483 | |
2484 | #if defined(STA_NANO) && NTP_API == 4 |
2485 | /* |
2486 | * If the TAI changes, update the kernel TAI. |
2487 | */ |
2488 | if (loop_tai != sys_tai) { |
2489 | loop_tai = sys_tai; |
2490 | ntv.modes = MOD_TAI; |
2491 | ntv.constant = sys_tai; |
2492 | ntp_adjtime(&ntv); |
2493 | } |
2494 | #endif /* STA_NANO */ |
2495 | } |
2496 | #endif /* KERNEL_PLL */ |
2497 | #endif |
2498 |