File: | /Users/vlad/tmp/xnu-3789.41.3/bsd/kern/sys_ulock.c |
Warning: | line 69, column 8 Assigned value is garbage or undefined |
1 | /* | |||
2 | * Copyright (c) 2015 Apple Inc. All rights reserved. | |||
3 | * | |||
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |||
5 | * | |||
6 | * This file contains Original Code and/or Modifications of Original Code | |||
7 | * as defined in and that are subject to the Apple Public Source License | |||
8 | * Version 2.0 (the 'License'). You may not use this file except in | |||
9 | * compliance with the License. The rights granted to you under the License | |||
10 | * may not be used to create, or enable the creation or redistribution of, | |||
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |||
12 | * circumvent, violate, or enable the circumvention or violation of, any | |||
13 | * terms of an Apple operating system software license agreement. | |||
14 | * | |||
15 | * Please obtain a copy of the License at | |||
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |||
17 | * | |||
18 | * The Original Code and all software distributed under the License are | |||
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |||
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |||
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |||
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |||
23 | * Please see the License for the specific language governing rights and | |||
24 | * limitations under the License. | |||
25 | * | |||
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |||
27 | */ | |||
28 | ||||
29 | #include <sys/param.h> | |||
30 | #include <sys/systm.h> | |||
31 | #include <sys/ioctl.h> | |||
32 | #include <sys/file_internal.h> | |||
33 | #include <sys/proc_internal.h> | |||
34 | #include <sys/kernel.h> | |||
35 | #include <sys/guarded.h> | |||
36 | #include <sys/stat.h> | |||
37 | #include <sys/malloc.h> | |||
38 | #include <sys/sysproto.h> | |||
39 | #include <sys/pthread_shims.h> | |||
40 | ||||
41 | #include <mach/mach_types.h> | |||
42 | ||||
43 | #include <kern/cpu_data.h> | |||
44 | #include <kern/mach_param.h> | |||
45 | #include <kern/kern_types.h> | |||
46 | #include <kern/assert.h> | |||
47 | #include <kern/kalloc.h> | |||
48 | #include <kern/thread.h> | |||
49 | #include <kern/clock.h> | |||
50 | #include <kern/ledger.h> | |||
51 | #include <kern/policy_internal.h> | |||
52 | #include <kern/task.h> | |||
53 | #include <kern/telemetry.h> | |||
54 | #include <kern/waitq.h> | |||
55 | #include <kern/sched_prim.h> | |||
56 | #include <kern/zalloc.h> | |||
57 | ||||
58 | #include <pexpert/pexpert.h> | |||
59 | ||||
60 | #define XNU_TEST_BITMAP | |||
61 | #include <kern/bits.h> | |||
62 | ||||
63 | #include <sys/ulock.h> | |||
64 | ||||
65 | /* | |||
66 | * How ulock promotion works: | |||
67 | * | |||
68 | * There’s a requested policy field on every thread called ‘promotions’, which | |||
69 | * expresses which ulock promotions are happening to this thread. | |||
70 | * The promotion priority saturates until the promotion count goes to 0. | |||
71 | * | |||
72 | * We also track effective promotion qos, which is the qos before clamping. | |||
73 | * This value is used for promoting a thread that another thread is waiting on, | |||
74 | * so that the lock owner reinflates to the right priority after unclamping. | |||
75 | * | |||
76 | * This also works for non-QoS threads, which can donate base priority to QoS | |||
77 | * and non-QoS threads alike. | |||
78 | * | |||
79 | * ulock wait applies a promotion to the owner communicated through | |||
80 | * UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as | |||
81 | * there is still an owner. In ulock wake, if the waker is still the owner, | |||
82 | * then it clears its ownership and drops the boost. It does NOT transfer | |||
83 | * ownership/priority boost to the new thread. Instead, it selects the | |||
84 | * waiting thread with the highest base priority to be woken next, and | |||
85 | * relies on that thread to carry the torch for the other waiting threads. | |||
86 | */ | |||
87 | ||||
88 | static lck_grp_t *ull_lck_grp; | |||
89 | static lck_mtx_t ull_table_lock; | |||
90 | ||||
91 | #define ull_global_lock()lck_mtx_lock(&ull_table_lock) lck_mtx_lock(&ull_table_lock) | |||
92 | #define ull_global_unlock()lck_mtx_unlock(&ull_table_lock) lck_mtx_unlock(&ull_table_lock) | |||
93 | ||||
94 | #define ull_lock(ull)lck_mtx_lock(&ull->ull_lock) lck_mtx_lock(&ull->ull_lock) | |||
95 | #define ull_unlock(ull)lck_mtx_unlock(&ull->ull_lock) lck_mtx_unlock(&ull->ull_lock) | |||
96 | #define ull_assert_owned(ull) LCK_MTX_ASSERT(&ull->ull_lock, LCK_MTX_ASSERT_OWNED) | |||
97 | ||||
98 | typedef struct __attribute__((packed)) { | |||
99 | user_addr_t ulk_addr; | |||
100 | pid_t ulk_pid; | |||
101 | } ulk_t; | |||
102 | ||||
103 | inline static bool_Bool | |||
104 | ull_key_match(ulk_t *a, ulk_t *b) | |||
105 | { | |||
106 | return ((a->ulk_pid == b->ulk_pid) && | |||
107 | (a->ulk_addr == b->ulk_addr)); | |||
108 | } | |||
109 | ||||
110 | typedef struct ull { | |||
111 | /* | |||
112 | * ull_owner is the most recent known value for the owner of this ulock | |||
113 | * i.e. it may be out of date WRT the real value in userspace. | |||
114 | */ | |||
115 | thread_t ull_owner; /* holds +1 thread reference */ | |||
116 | ulk_t ull_key; | |||
117 | ulk_t ull_saved_key; | |||
118 | lck_mtx_t ull_lock; | |||
119 | int32_t ull_nwaiters; | |||
120 | int32_t ull_max_nwaiters; | |||
121 | int32_t ull_refcount; | |||
122 | struct promote_token ull_promote_token; | |||
123 | queue_chain_t ull_hash_link; | |||
124 | uint8_t ull_opcode; | |||
125 | } ull_t; | |||
126 | ||||
127 | static const bool_Bool ull_debug = false0; | |||
128 | ||||
129 | extern void ulock_initialize(void); | |||
130 | ||||
131 | #define ULL_MUST_EXIST0x0001 0x0001 | |||
132 | static ull_t *ull_get(ulk_t *, uint32_t); | |||
133 | static void ull_put(ull_t *); | |||
134 | ||||
135 | static thread_t ull_promote_owner_locked(ull_t* ull, thread_t thread); | |||
136 | ||||
137 | #if DEVELOPMENT0 || DEBUG | |||
138 | static int ull_simulate_copyin_fault = 0; | |||
139 | static int ull_panic_on_corruption = 0; | |||
140 | ||||
141 | static void | |||
142 | ull_dump(ull_t *ull) | |||
143 | { | |||
144 | kprintf("ull\t%p\n", ull); | |||
145 | kprintf("ull_key.ulk_pid\t%d\n", ull->ull_key.ulk_pid); | |||
146 | kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr)); | |||
147 | kprintf("ull_saved_key.ulk_pid\t%d\n", ull->ull_saved_key.ulk_pid); | |||
148 | kprintf("ull_saved_key.ulk_addr\t%p\n", (void *)(ull->ull_saved_key.ulk_addr)); | |||
149 | kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters); | |||
150 | kprintf("ull_max_nwaiters\t%d\n", ull->ull_max_nwaiters); | |||
151 | kprintf("ull_refcount\t%d\n", ull->ull_refcount); | |||
152 | kprintf("ull_opcode\t%d\n\n", ull->ull_opcode); | |||
153 | kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner)); | |||
154 | kprintf("ull_promote_token\t%d, %d\n\n", ull->ull_promote_token.pt_basepri, ull->ull_promote_token.pt_qos); | |||
155 | } | |||
156 | #endif | |||
157 | ||||
158 | static int ull_hash_buckets; | |||
159 | static queue_head_t *ull_bucket; | |||
160 | static uint32_t ull_nzalloc = 0; | |||
161 | static zone_t ull_zone; | |||
162 | ||||
163 | static __inline__ uint32_t | |||
164 | ull_hash_index(char *key, size_t length) | |||
165 | { | |||
166 | uint32_t hash = jenkins_hash(key, length); | |||
167 | ||||
168 | hash &= (ull_hash_buckets - 1); | |||
169 | ||||
170 | return hash; | |||
171 | } | |||
172 | ||||
173 | /* Ensure that the key structure is packed, | |||
174 | * so that no undefined memory is passed to | |||
175 | * ull_hash_index() | |||
176 | */ | |||
177 | static_assert(sizeof(ulk_t) == sizeof(user_addr_t) + sizeof(pid_t))_Static_assert((sizeof(ulk_t) == sizeof(user_addr_t) + sizeof (pid_t)), "sizeof(ulk_t) == sizeof(user_addr_t) + sizeof(pid_t)" ); | |||
178 | ||||
179 | #define ULL_INDEX(keyp)ull_hash_index((char *)keyp, sizeof *keyp) ull_hash_index((char *)keyp, sizeof *keyp) | |||
180 | ||||
181 | void | |||
182 | ulock_initialize(void) | |||
183 | { | |||
184 | ull_lck_grp = lck_grp_alloc_init("ulocks", NULL((void *)0)); | |||
185 | lck_mtx_init(&ull_table_lock, ull_lck_grp, NULL((void *)0)); | |||
186 | ||||
187 | assert(thread_max > 16)((void)0); | |||
188 | /* Size ull_hash_buckets based on thread_max. | |||
189 | * Round up to nearest power of 2, then divide by 4 | |||
190 | */ | |||
191 | ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2)); | |||
192 | ||||
193 | kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets); | |||
194 | assert(ull_hash_buckets >= thread_max/4)((void)0); | |||
195 | ||||
196 | ull_bucket = (queue_head_t *)kalloc(sizeof(queue_head_t) * ull_hash_buckets)({ static vm_allocation_site_t site __attribute__((section("__DATA, __data" ))); vm_size_t tsize = (sizeof(queue_head_t) * ull_hash_buckets ); kalloc_canblock(&tsize, 1, &site); }); | |||
197 | assert(ull_bucket != NULL)((void)0); | |||
198 | ||||
199 | for (int i = 0; i < ull_hash_buckets; i++) { | |||
200 | queue_init(&ull_bucket[i])do { (&ull_bucket[i])->next = (&ull_bucket[i]); (& ull_bucket[i])->prev = (&ull_bucket[i]);} while (0); | |||
201 | } | |||
202 | ||||
203 | ull_zone = zinit(sizeof(ull_t), | |||
204 | thread_max * sizeof(ull_t), | |||
205 | 0, "ulocks"); | |||
206 | ||||
207 | zone_change(ull_zone, Z_NOENCRYPT6, TRUE1); | |||
208 | ||||
209 | #if DEVELOPMENT0 || DEBUG | |||
210 | if (!PE_parse_boot_argn("ulock_panic_on_corruption", | |||
211 | &ull_panic_on_corruption, sizeof(ull_panic_on_corruption))) { | |||
212 | ull_panic_on_corruption = 0; | |||
213 | } | |||
214 | #endif | |||
215 | } | |||
216 | ||||
217 | #if DEVELOPMENT0 || DEBUG | |||
218 | /* Count the number of hash entries for a given pid. | |||
219 | * if pid==0, dump the whole table. | |||
220 | */ | |||
221 | static int | |||
222 | ull_hash_dump(pid_t pid) | |||
223 | { | |||
224 | int count = 0; | |||
225 | ull_global_lock()lck_mtx_lock(&ull_table_lock); | |||
226 | if (pid == 0) { | |||
227 | kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__, ull_nzalloc); | |||
228 | kprintf("%s>BEGIN\n", __FUNCTION__); | |||
229 | } | |||
230 | for (int i = 0; i < ull_hash_buckets; i++) { | |||
231 | if (!queue_empty(&ull_bucket[i])(((&ull_bucket[i])) == (((&ull_bucket[i])->next)))) { | |||
232 | ull_t *elem; | |||
233 | if (pid == 0) { | |||
234 | kprintf("%s>index %d:\n", __FUNCTION__, i); | |||
235 | } | |||
236 | qe_foreach_element(elem, &ull_bucket[i], ull_hash_link)for (elem = ((typeof(*(elem)) *)((void *)((char *)((&ull_bucket [i])->next) - __builtin_offsetof(typeof(*(elem)), ull_hash_link )))); &((elem)->ull_hash_link) != (&ull_bucket[i]) ; elem = ((typeof(*(elem)) *)((void *)((char *)((elem)->ull_hash_link .next) - __builtin_offsetof(typeof(*(elem)), ull_hash_link))) )) { | |||
237 | if ((pid == 0) || (pid == elem->ull_key.ulk_pid)) { | |||
238 | ull_dump(elem); | |||
239 | count++; | |||
240 | } | |||
241 | } | |||
242 | } | |||
243 | } | |||
244 | if (pid == 0) { | |||
245 | kprintf("%s>END\n", __FUNCTION__); | |||
246 | ull_nzalloc = 0; | |||
247 | } | |||
248 | ull_global_unlock()lck_mtx_unlock(&ull_table_lock); | |||
249 | return count; | |||
250 | } | |||
251 | #endif | |||
252 | ||||
253 | static ull_t * | |||
254 | ull_alloc(ulk_t *key) | |||
255 | { | |||
256 | ull_t *ull = (ull_t *)zalloc(ull_zone); | |||
257 | assert(ull != NULL)((void)0); | |||
258 | ||||
259 | ull->ull_refcount = 1; | |||
260 | ull->ull_key = *key; | |||
261 | ull->ull_saved_key = *key; | |||
262 | ull->ull_nwaiters = 0; | |||
263 | ull->ull_max_nwaiters = 0; | |||
264 | ull->ull_opcode = 0; | |||
265 | ||||
266 | ull->ull_owner = THREAD_NULL((thread_t) 0); | |||
267 | ull->ull_promote_token = PROMOTE_TOKEN_INIT((struct promote_token){.pt_basepri = 0, .pt_qos = 0}); | |||
268 | ||||
269 | lck_mtx_init(&ull->ull_lock, ull_lck_grp, NULL((void *)0)); | |||
270 | ||||
271 | ull_nzalloc++; | |||
272 | return ull; | |||
273 | } | |||
274 | ||||
275 | static void | |||
276 | ull_free(ull_t *ull) | |||
277 | { | |||
278 | assert(ull->ull_owner == THREAD_NULL)((void)0); | |||
279 | ||||
280 | lck_mtx_assert(&ull->ull_lock, LCK_ASSERT_NOTOWNED2); | |||
281 | ||||
282 | lck_mtx_destroy(&ull->ull_lock, ull_lck_grp); | |||
283 | ||||
284 | zfree(ull_zone, ull); | |||
285 | } | |||
286 | ||||
287 | /* Finds an existing ulock structure (ull_t), or creates a new one. | |||
288 | * If MUST_EXIST flag is set, returns NULL instead of creating a new one. | |||
289 | * The ulock structure is returned with ull_lock locked | |||
290 | * | |||
291 | * TODO: Per-bucket lock to reduce contention on global lock | |||
292 | */ | |||
293 | static ull_t * | |||
294 | ull_get(ulk_t *key, uint32_t flags) | |||
295 | { | |||
296 | ull_t *ull = NULL((void *)0); | |||
297 | uint i = ULL_INDEX(key)ull_hash_index((char *)key, sizeof *key); | |||
298 | ull_t *elem; | |||
299 | ull_global_lock()lck_mtx_lock(&ull_table_lock); | |||
300 | qe_foreach_element(elem, &ull_bucket[i], ull_hash_link)for (elem = ((typeof(*(elem)) *)((void *)((char *)((&ull_bucket [i])->next) - __builtin_offsetof(typeof(*(elem)), ull_hash_link )))); &((elem)->ull_hash_link) != (&ull_bucket[i]) ; elem = ((typeof(*(elem)) *)((void *)((char *)((elem)->ull_hash_link .next) - __builtin_offsetof(typeof(*(elem)), ull_hash_link))) )) { | |||
301 | ull_lock(elem)lck_mtx_lock(&elem->ull_lock); | |||
302 | if (ull_key_match(&elem->ull_key, key)) { | |||
303 | ull = elem; | |||
304 | break; | |||
305 | } else { | |||
306 | ull_unlock(elem)lck_mtx_unlock(&elem->ull_lock); | |||
307 | } | |||
308 | } | |||
309 | if (ull == NULL((void *)0)) { | |||
310 | if (flags & ULL_MUST_EXIST0x0001) { | |||
311 | /* Must already exist (called from wake) */ | |||
312 | ull_global_unlock()lck_mtx_unlock(&ull_table_lock); | |||
313 | return NULL((void *)0); | |||
314 | } | |||
315 | ||||
316 | /* NRG maybe drop the ull_global_lock before the kalloc, | |||
317 | * then take the lock and check again for a key match | |||
318 | * and either use the new ull_t or free it. | |||
319 | */ | |||
320 | ||||
321 | ull = ull_alloc(key); | |||
322 | ||||
323 | if (ull == NULL((void *)0)) { | |||
324 | ull_global_unlock()lck_mtx_unlock(&ull_table_lock); | |||
325 | return NULL((void *)0); | |||
326 | } | |||
327 | ||||
328 | ull_lock(ull)lck_mtx_lock(&ull->ull_lock); | |||
329 | ||||
330 | enqueue(&ull_bucket[i], &ull->ull_hash_link)enqueue_tail(&ull_bucket[i], &ull->ull_hash_link); | |||
331 | } | |||
332 | ||||
333 | ull->ull_refcount++; | |||
334 | ||||
335 | ull_global_unlock()lck_mtx_unlock(&ull_table_lock); | |||
336 | ||||
337 | return ull; /* still locked */ | |||
338 | } | |||
339 | ||||
340 | /* | |||
341 | * Must be called with ull_lock held | |||
342 | */ | |||
343 | static void | |||
344 | ull_put(ull_t *ull) | |||
345 | { | |||
346 | ull_assert_owned(ull); | |||
347 | int refcount = --ull->ull_refcount; | |||
348 | assert(refcount == 0 ? (ull->ull_key.ulk_pid == 0 && ull->ull_key.ulk_addr == 0) : 1)((void)0); | |||
349 | ull_unlock(ull)lck_mtx_unlock(&ull->ull_lock); | |||
350 | ||||
351 | if (refcount > 0) { | |||
352 | return; | |||
353 | } | |||
354 | ||||
355 | ull_global_lock()lck_mtx_lock(&ull_table_lock); | |||
356 | remqueue(&ull->ull_hash_link); | |||
357 | ull_global_unlock()lck_mtx_unlock(&ull_table_lock); | |||
358 | ||||
359 | #if DEVELOPMENT0 || DEBUG | |||
360 | if (ull_debug) { | |||
361 | kprintf("%s>", __FUNCTION__); | |||
362 | ull_dump(ull); | |||
363 | } | |||
364 | #endif | |||
365 | ull_free(ull); | |||
366 | } | |||
367 | ||||
368 | int | |||
369 | ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) | |||
370 | { | |||
371 | uint opcode = args->operation & UL_OPCODE_MASK0x000000FF; | |||
372 | uint flags = args->operation & UL_FLAGS_MASK0xFFFFFF00; | |||
373 | int ret = 0; | |||
374 | thread_t self = current_thread(); | |||
375 | int id = thread_tid(self); | |||
376 | ulk_t key; | |||
377 | ||||
378 | /* involved threads - each variable holds +1 ref if not null */ | |||
379 | thread_t owner_thread = THREAD_NULL((thread_t) 0); | |||
380 | thread_t old_owner = THREAD_NULL((thread_t) 0); | |||
381 | thread_t old_lingering_owner = THREAD_NULL((thread_t) 0); | |||
382 | sched_call_t workq_callback = NULL((void *)0); | |||
383 | ||||
384 | if (ull_debug) { | |||
385 | kprintf("[%d]%s>ENTER opcode %d addr %llx value %llx timeout %d flags %x\n", id, __FUNCTION__, opcode, (unsigned long long)(args->addr), args->value, args->timeout, flags); | |||
386 | } | |||
387 | ||||
388 | if ((flags & ULF_WAIT_MASK(0x01000000 | 0x00010000)) != flags) { | |||
389 | ret = EINVAL22; | |||
390 | goto munge_retval; | |||
391 | } | |||
392 | ||||
393 | boolean_t set_owner = FALSE0; | |||
394 | ||||
395 | switch (opcode) { | |||
396 | case UL_UNFAIR_LOCK2: | |||
397 | set_owner = TRUE1; | |||
398 | break; | |||
399 | case UL_COMPARE_AND_WAIT1: | |||
400 | break; | |||
401 | default: | |||
402 | if (ull_debug) { | |||
403 | kprintf("[%d]%s>EINVAL opcode %d addr 0x%llx flags 0x%x\n", | |||
404 | id, __FUNCTION__, opcode, | |||
405 | (unsigned long long)(args->addr), flags); | |||
406 | } | |||
407 | ret = EINVAL22; | |||
408 | goto munge_retval; | |||
409 | } | |||
410 | ||||
411 | /* 32-bit lock type for UL_COMPARE_AND_WAIT and UL_UNFAIR_LOCK */ | |||
412 | uint32_t value = 0; | |||
413 | ||||
414 | if ((args->addr == 0) || (args->addr % _Alignof(_Atomic(typeof(value))))) { | |||
415 | ret = EINVAL22; | |||
416 | goto munge_retval; | |||
417 | } | |||
418 | ||||
419 | key.ulk_pid = p->p_pid; | |||
420 | key.ulk_addr = args->addr; | |||
421 | ||||
422 | if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION0x00010000) { | |||
423 | workq_callback = workqueue_get_sched_callback(); | |||
424 | workq_callback = thread_disable_sched_call(self, workq_callback); | |||
425 | } | |||
426 | ||||
427 | ull_t *ull = ull_get(&key, 0); | |||
428 | if (ull == NULL((void *)0)) { | |||
429 | ret = ENOMEM12; | |||
430 | goto munge_retval; | |||
431 | } | |||
432 | /* ull is locked */ | |||
433 | ||||
434 | ull->ull_nwaiters++; | |||
435 | ||||
436 | if (ull->ull_nwaiters > ull->ull_max_nwaiters) { | |||
437 | ull->ull_max_nwaiters = ull->ull_nwaiters; | |||
438 | } | |||
439 | ||||
440 | if (ull->ull_opcode == 0) { | |||
441 | ull->ull_opcode = opcode; | |||
442 | } else if (ull->ull_opcode != opcode) { | |||
443 | ull_unlock(ull)lck_mtx_unlock(&ull->ull_lock); | |||
444 | ret = EDOM33; | |||
445 | goto out; | |||
446 | } | |||
447 | ||||
448 | /* | |||
449 | * We don't want this copyin to get wedged behind VM operations, | |||
450 | * but we have to read the userspace value under the ull lock for correctness. | |||
451 | * | |||
452 | * Until <rdar://problem/24999882> exists, | |||
453 | * fake it by disabling preemption across copyin, which forces any | |||
454 | * vm_fault we encounter to fail. | |||
455 | */ | |||
456 | uint64_t val64; /* copyin_word always zero-extends to 64-bits */ | |||
457 | ||||
458 | disable_preemption()_disable_preemption(); | |||
459 | int copy_ret = copyin_word(args->addr, &val64, sizeof(value)); | |||
460 | enable_preemption()_enable_preemption(); | |||
461 | ||||
462 | value = (uint32_t)val64; | |||
463 | ||||
464 | #if DEVELOPMENT0 || DEBUG | |||
465 | /* Occasionally simulate copyin finding the user address paged out */ | |||
466 | if (((ull_simulate_copyin_fault == p->p_pid) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) { | |||
467 | static _Atomic int fault_inject = 0; | |||
468 | if (__c11_atomic_fetch_add(&fault_inject, 1, __ATOMIC_RELAXED0) % 73 == 0) { | |||
469 | copy_ret = EFAULT14; | |||
470 | } | |||
471 | } | |||
472 | #endif | |||
473 | if (copy_ret != 0) { | |||
474 | ull_unlock(ull)lck_mtx_unlock(&ull->ull_lock); | |||
475 | ||||
476 | /* copyin() will return an error if the access to the user addr would have faulted, | |||
477 | * so just return and let the user level code fault it in. | |||
478 | */ | |||
479 | ret = copy_ret; | |||
480 | goto out; | |||
481 | } | |||
482 | ||||
483 | if (value != args->value) { | |||
484 | /* Lock value has changed from expected so bail out */ | |||
485 | ull_unlock(ull)lck_mtx_unlock(&ull->ull_lock); | |||
486 | if (ull_debug) { | |||
487 | kprintf("[%d]%s>Lock value %d has changed from expected %d so bail out\n", | |||
488 | id, __FUNCTION__, value, (uint32_t)(args->value)); | |||
489 | } | |||
490 | goto out; | |||
491 | } | |||
492 | ||||
493 | if (set_owner) { | |||
494 | mach_port_name_t owner_name = ulock_owner_value_to_port_name(args->value); | |||
495 | owner_thread = port_name_to_thread_for_ulock(owner_name); | |||
496 | ||||
497 | /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */ | |||
498 | if (owner_name != MACH_PORT_DEAD((mach_port_name_t) ~0) && owner_thread == THREAD_NULL((thread_t) 0)) { | |||
499 | #if DEBUG || DEVELOPMENT0 | |||
500 | if (ull_panic_on_corruption) { | |||
501 | if (flags & ULF_NO_ERRNO0x01000000) { | |||
502 | // ULF_NO_ERRNO is used by libplatform ulocks, but not libdispatch ones. | |||
503 | // Don't panic on libdispatch ulock corruptions; the userspace likely | |||
504 | // mismanaged a dispatch queue. | |||
505 | panic("ulock_wait: ulock is corrupted; value=0x%x, ull=%p",(panic)("\"ulock_wait: ulock is corrupted; value=0x%x, ull=%p\"" "@" "/Users/vlad/tmp/xnu-3789.41.3/bsd/kern/sys_ulock.c" ":" "506", (uint32_t)(args->value), ull) | |||
506 | (uint32_t)(args->value), ull)(panic)("\"ulock_wait: ulock is corrupted; value=0x%x, ull=%p\"" "@" "/Users/vlad/tmp/xnu-3789.41.3/bsd/kern/sys_ulock.c" ":" "506", (uint32_t)(args->value), ull); | |||
507 | } | |||
508 | } | |||
509 | #endif | |||
510 | /* | |||
511 | * Translation failed - even though the lock value is up to date, | |||
512 | * whatever was stored in the lock wasn't actually a thread port. | |||
513 | */ | |||
514 | ull_unlock(ull)lck_mtx_unlock(&ull->ull_lock); | |||
515 | ret = EOWNERDEAD105; | |||
516 | goto out; | |||
517 | } | |||
518 | /* owner_thread has a +1 reference */ | |||
519 | ||||
520 | /* | |||
521 | * At this point, I know: | |||
522 | * a) owner_thread is definitely the current owner, because I just read the value | |||
523 | * b) owner_thread is either: | |||
524 | * i) holding the user lock or | |||
525 | * ii) has just unlocked the user lock after I looked | |||
526 | * and is heading toward the kernel to call ull_wake. | |||
527 | * If so, it's going to have to wait for the ull mutex. | |||
528 | * | |||
529 | * Therefore, I can promote its priority to match mine, and I can rely on it to | |||
530 | * come by later to issue the wakeup and lose its promotion. | |||
531 | */ | |||
532 | ||||
533 | old_owner = ull_promote_owner_locked(ull, owner_thread); | |||
534 | } | |||
535 | ||||
536 | wait_result_t wr; | |||
537 | uint32_t timeout = args->timeout; | |||
538 | if (timeout) { | |||
539 | wr = assert_wait_timeout((event_t)ull, THREAD_ABORTSAFE2, timeout, NSEC_PER_USEC1000ull); | |||
540 | } else { | |||
541 | wr = assert_wait((event_t)ull, THREAD_ABORTSAFE2); | |||
542 | } | |||
543 | ||||
544 | ull_unlock(ull)lck_mtx_unlock(&ull->ull_lock); | |||
545 | ||||
546 | if (ull_debug) { | |||
547 | kprintf("[%d]%s>after assert_wait() returned %d\n", id, __FUNCTION__, wr); | |||
548 | } | |||
549 | ||||
550 | if (set_owner && owner_thread != THREAD_NULL((thread_t) 0) && wr == THREAD_WAITING-1) { | |||
551 | wr = thread_handoff(owner_thread); | |||
552 | /* owner_thread ref is consumed */ | |||
553 | owner_thread = THREAD_NULL((thread_t) 0); | |||
554 | } else { | |||
555 | /* NRG At some point this should be a continuation based block, so that we can avoid saving the full kernel context. */ | |||
556 | wr = thread_block(NULL((void *)0)); | |||
557 | } | |||
558 | if (ull_debug) { | |||
559 | kprintf("[%d]%s>thread_block() returned %d\n", id, __FUNCTION__, wr); | |||
560 | } | |||
561 | switch (wr) { | |||
562 | case THREAD_AWAKENED0: | |||
563 | break; | |||
564 | case THREAD_TIMED_OUT1: | |||
565 | ret = ETIMEDOUT60; | |||
566 | break; | |||
567 | case THREAD_INTERRUPTED2: | |||
568 | case THREAD_RESTART3: | |||
569 | default: | |||
570 | ret = EINTR4; | |||
571 | break; | |||
572 | } | |||
573 | ||||
574 | out: | |||
575 | ull_lock(ull)lck_mtx_lock(&ull->ull_lock); | |||
576 | *retval = --ull->ull_nwaiters; | |||
577 | if (ull->ull_nwaiters == 0) { | |||
578 | /* | |||
579 | * If the wait was canceled early, we might need to | |||
580 | * clear out the lingering owner reference before | |||
581 | * freeing the ull. | |||
582 | */ | |||
583 | if (ull->ull_owner != THREAD_NULL((thread_t) 0)) { | |||
584 | old_lingering_owner = ull_promote_owner_locked(ull, THREAD_NULL((thread_t) 0)); | |||
585 | } | |||
586 | ||||
587 | assert(ull->ull_owner == THREAD_NULL)((void)0); | |||
588 | ||||
589 | ull->ull_key.ulk_pid = 0; | |||
590 | ull->ull_key.ulk_addr = 0; | |||
591 | ull->ull_refcount--; | |||
592 | assert(ull->ull_refcount > 0)((void)0); | |||
593 | } | |||
594 | ull_put(ull); | |||
595 | ||||
596 | if (owner_thread != THREAD_NULL((thread_t) 0)) { | |||
597 | thread_deallocate(owner_thread); | |||
598 | } | |||
599 | ||||
600 | if (old_owner != THREAD_NULL((thread_t) 0)) { | |||
601 | thread_deallocate(old_owner); | |||
602 | } | |||
603 | ||||
604 | if (old_lingering_owner != THREAD_NULL((thread_t) 0)) { | |||
605 | thread_deallocate(old_lingering_owner); | |||
606 | } | |||
607 | ||||
608 | assert(*retval >= 0)((void)0); | |||
609 | ||||
610 | munge_retval: | |||
611 | if (workq_callback) { | |||
612 | thread_reenable_sched_call(self, workq_callback); | |||
613 | } | |||
614 | ||||
615 | if ((flags & ULF_NO_ERRNO0x01000000) && (ret != 0)) { | |||
616 | *retval = -ret; | |||
617 | ret = 0; | |||
618 | } | |||
619 | return ret; | |||
620 | } | |||
621 | ||||
622 | int | |||
623 | ulock_wake(struct proc *p, struct ulock_wake_args *args, __unused__attribute__((unused)) int32_t *retval) | |||
624 | { | |||
625 | uint opcode = args->operation & UL_OPCODE_MASK0x000000FF; | |||
626 | uint flags = args->operation & UL_FLAGS_MASK0xFFFFFF00; | |||
627 | int ret = 0; | |||
628 | int id = thread_tid(current_thread()); | |||
629 | ulk_t key; | |||
630 | ||||
631 | /* involved threads - each variable holds +1 ref if not null */ | |||
632 | thread_t wake_thread = THREAD_NULL((thread_t) 0); | |||
633 | thread_t old_owner = THREAD_NULL((thread_t) 0); | |||
634 | ||||
635 | if (ull_debug) { | |||
| ||||
636 | kprintf("[%d]%s>ENTER opcode %d addr %llx flags %x\n", | |||
637 | id, __FUNCTION__, opcode, (unsigned long long)(args->addr), flags); | |||
638 | } | |||
639 | ||||
640 | if ((flags & ULF_WAKE_MASK(0x00000100 | 0x00000200 | 0x01000000)) != flags) { | |||
641 | ret = EINVAL22; | |||
642 | goto munge_retval; | |||
643 | } | |||
644 | ||||
645 | #if DEVELOPMENT0 || DEBUG | |||
646 | if (opcode == UL_DEBUG_HASH_DUMP_PID255) { | |||
647 | *retval = ull_hash_dump(p->p_pid); | |||
648 | return ret; | |||
649 | } else if (opcode == UL_DEBUG_HASH_DUMP_ALL254) { | |||
650 | *retval = ull_hash_dump(0); | |||
651 | return ret; | |||
652 | } else if (opcode == UL_DEBUG_SIMULATE_COPYIN_FAULT253) { | |||
653 | ull_simulate_copyin_fault = (int)(args->wake_value); | |||
654 | return ret; | |||
655 | } | |||
656 | #endif | |||
657 | ||||
658 | if (args->addr == 0) { | |||
659 | ret = EINVAL22; | |||
660 | goto munge_retval; | |||
661 | } | |||
662 | ||||
663 | if (flags & ULF_WAKE_THREAD0x00000200) { | |||
664 | if (flags & ULF_WAKE_ALL0x00000100) { | |||
665 | ret = EINVAL22; | |||
666 | goto munge_retval; | |||
667 | } | |||
668 | mach_port_name_t wake_thread_name = (mach_port_name_t)(args->wake_value); | |||
669 | wake_thread = port_name_to_thread_for_ulock(wake_thread_name); | |||
670 | if (wake_thread == THREAD_NULL((thread_t) 0)) { | |||
671 | ret = ESRCH3; | |||
672 | goto munge_retval; | |||
673 | } | |||
674 | } | |||
675 | ||||
676 | key.ulk_pid = p->p_pid; | |||
677 | key.ulk_addr = args->addr; | |||
678 | ||||
679 | ull_t *ull = ull_get(&key, ULL_MUST_EXIST0x0001); | |||
680 | if (ull == NULL((void *)0)) { | |||
681 | if (wake_thread != THREAD_NULL((thread_t) 0)) { | |||
682 | thread_deallocate(wake_thread); | |||
683 | } | |||
684 | ret = ENOENT2; | |||
685 | goto munge_retval; | |||
686 | } | |||
687 | /* ull is locked */ | |||
688 | ||||
689 | boolean_t clear_owner = FALSE0; /* need to reset owner */ | |||
690 | ||||
691 | switch (opcode) { | |||
692 | case UL_UNFAIR_LOCK2: | |||
693 | clear_owner = TRUE1; | |||
694 | break; | |||
695 | case UL_COMPARE_AND_WAIT1: | |||
696 | break; | |||
697 | default: | |||
698 | if (ull_debug) { | |||
699 | kprintf("[%d]%s>EINVAL opcode %d addr 0x%llx flags 0x%x\n", | |||
700 | id, __FUNCTION__, opcode, (unsigned long long)(args->addr), flags); | |||
701 | } | |||
702 | ret = EINVAL22; | |||
703 | goto out_locked; | |||
704 | } | |||
705 | ||||
706 | if (opcode != ull->ull_opcode) { | |||
707 | if (ull_debug) { | |||
708 | kprintf("[%d]%s>EDOM - opcode mismatch - opcode %d addr 0x%llx flags 0x%x\n", | |||
709 | id, __FUNCTION__, opcode, (unsigned long long)(args->addr), flags); | |||
710 | } | |||
711 | ret = EDOM33; | |||
712 | goto out_locked; | |||
713 | } | |||
714 | ||||
715 | if (!clear_owner) { | |||
716 | assert(ull->ull_owner == THREAD_NULL)((void)0); | |||
717 | } | |||
718 | ||||
719 | if (flags & ULF_WAKE_ALL0x00000100) { | |||
720 | thread_wakeup((event_t)ull)thread_wakeup_prim(((event_t)ull), 0, 0); | |||
721 | } else if (flags & ULF_WAKE_THREAD0x00000200) { | |||
722 | kern_return_t kr = thread_wakeup_thread((event_t)ull, wake_thread); | |||
723 | if (kr != KERN_SUCCESS0) { | |||
724 | assert(kr == KERN_NOT_WAITING)((void)0); | |||
725 | ret = EALREADY37; | |||
726 | } | |||
727 | } else { | |||
728 | /* | |||
729 | * TODO: WAITQ_SELECT_MAX_PRI forces a linear scan of the (hashed) global waitq. | |||
730 | * Move to a ulock-private, priority sorted waitq to avoid that. | |||
731 | * | |||
732 | * TODO: 'owner is not current_thread (or null)' likely means we can avoid this wakeup | |||
733 | * <rdar://problem/25487001> | |||
734 | */ | |||
735 | thread_wakeup_one_with_pri((event_t)ull, WAITQ_SELECT_MAX_PRI(-3)); | |||
736 | } | |||
737 | ||||
738 | /* | |||
739 | * Reaching this point means I previously moved the lock to 'unowned' state in userspace. | |||
740 | * Therefore I need to relinquish my promotion. | |||
741 | * | |||
742 | * However, someone else could have locked it after I unlocked, and then had a third thread | |||
743 | * block on the lock, causing a promotion of some other owner. | |||
744 | * | |||
745 | * I don't want to stomp over that, so only remove the promotion if I'm the current owner. | |||
746 | */ | |||
747 | ||||
748 | if (ull->ull_owner == current_thread()) { | |||
749 | old_owner = ull_promote_owner_locked(ull, THREAD_NULL((thread_t) 0)); | |||
750 | } | |||
751 | ||||
752 | out_locked: | |||
753 | ull_put(ull); | |||
754 | ||||
755 | if (wake_thread != THREAD_NULL((thread_t) 0)) { | |||
756 | thread_deallocate(wake_thread); | |||
757 | } | |||
758 | ||||
759 | if (old_owner != THREAD_NULL((thread_t) 0)) { | |||
760 | thread_deallocate(old_owner); | |||
761 | } | |||
762 | ||||
763 | munge_retval: | |||
764 | if ((flags & ULF_NO_ERRNO0x01000000) && (ret != 0)) { | |||
765 | *retval = -ret; | |||
766 | ret = 0; | |||
767 | } | |||
768 | return ret; | |||
769 | } | |||
770 | ||||
771 | /* | |||
772 | * Change ull_owner to be new_owner, and update it with the properties | |||
773 | * of the current thread. | |||
774 | * | |||
775 | * Records the highest current promotion value in ull_promote_token, and applies that | |||
776 | * to any new owner. | |||
777 | * | |||
778 | * Returns +1 ref to the old ull_owner if it is going away. | |||
779 | */ | |||
780 | static thread_t | |||
781 | ull_promote_owner_locked(ull_t* ull, | |||
782 | thread_t new_owner) | |||
783 | { | |||
784 | if (new_owner != THREAD_NULL((thread_t) 0) && ull->ull_owner == new_owner) { | |||
785 | thread_user_promotion_update(new_owner, current_thread(), &ull->ull_promote_token); | |||
786 | return THREAD_NULL((thread_t) 0); | |||
787 | } | |||
788 | ||||
789 | thread_t old_owner = ull->ull_owner; | |||
790 | ull->ull_owner = THREAD_NULL((thread_t) 0); | |||
791 | ||||
792 | if (new_owner != THREAD_NULL((thread_t) 0)) { | |||
793 | /* The ull_owner field now owns a +1 ref on thread */ | |||
794 | thread_reference(new_owner); | |||
795 | ull->ull_owner = new_owner; | |||
796 | ||||
797 | thread_user_promotion_add(new_owner, current_thread(), &ull->ull_promote_token); | |||
798 | } else { | |||
799 | /* No new owner - clear the saturated promotion value */ | |||
800 | ull->ull_promote_token = PROMOTE_TOKEN_INIT((struct promote_token){.pt_basepri = 0, .pt_qos = 0}); | |||
801 | } | |||
802 | ||||
803 | if (old_owner != THREAD_NULL((thread_t) 0)) { | |||
804 | thread_user_promotion_drop(old_owner); | |||
805 | } | |||
806 | ||||
807 | /* Return the +1 ref from the ull_owner field */ | |||
808 | return old_owner; | |||
809 | } | |||
810 |
1 | #ifndef _WAITQ_H_ | |||
2 | #define _WAITQ_H_ | |||
3 | /* | |||
4 | * Copyright (c) 2014-2015 Apple Computer, Inc. All rights reserved. | |||
5 | * | |||
6 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |||
7 | * | |||
8 | * This file contains Original Code and/or Modifications of Original Code | |||
9 | * as defined in and that are subject to the Apple Public Source License | |||
10 | * Version 2.0 (the 'License'). You may not use this file except in | |||
11 | * compliance with the License. The rights granted to you under the License | |||
12 | * may not be used to create, or enable the creation or redistribution of, | |||
13 | * unlawful or unlicensed copies of an Apple operating system, or to | |||
14 | * circumvent, violate, or enable the circumvention or violation of, any | |||
15 | * terms of an Apple operating system software license agreement. | |||
16 | * | |||
17 | * Please obtain a copy of the License at | |||
18 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |||
19 | * | |||
20 | * The Original Code and all software distributed under the License are | |||
21 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |||
22 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |||
23 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |||
24 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |||
25 | * Please see the License for the specific language governing rights and | |||
26 | * limitations under the License. | |||
27 | * | |||
28 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |||
29 | */ | |||
30 | #ifdef KERNEL_PRIVATE1 | |||
31 | ||||
32 | #include <mach/mach_types.h> | |||
33 | #include <mach/sync_policy.h> | |||
34 | #include <mach/kern_return.h> /* for kern_return_t */ | |||
35 | ||||
36 | #include <kern/kern_types.h> /* for wait_queue_t */ | |||
37 | #include <kern/queue.h> | |||
38 | #include <kern/assert.h> | |||
39 | ||||
40 | #include <sys/cdefs.h> | |||
41 | ||||
42 | /* | |||
43 | * Constants and types used in the waitq APIs | |||
44 | */ | |||
45 | #define WAITQ_ALL_PRIORITIES(-1) (-1) | |||
46 | #define WAITQ_PROMOTE_PRIORITY(-2) (-2) | |||
47 | #define WAITQ_SELECT_MAX_PRI(-3) (-3) | |||
48 | ||||
49 | typedef enum e_waitq_lock_state { | |||
50 | WAITQ_KEEP_LOCKED = 0x01, | |||
51 | WAITQ_UNLOCK = 0x02, | |||
52 | WAITQ_SHOULD_LOCK = 0x04, | |||
53 | WAITQ_ALREADY_LOCKED = 0x08, | |||
54 | WAITQ_DONT_LOCK = 0x10, | |||
55 | } waitq_lock_state_t; | |||
56 | ||||
57 | /* | |||
58 | * The Jenkins "one at a time" hash. | |||
59 | * TBD: There may be some value to unrolling here, | |||
60 | * depending on the architecture. | |||
61 | */ | |||
62 | static __inline__ uint32_t | |||
63 | jenkins_hash(char *key, size_t length) | |||
64 | { | |||
65 | uint32_t hash = 0; | |||
66 | size_t i; | |||
67 | ||||
68 | for (i = 0; i < length; i++) { | |||
69 | hash += (uint32_t)key[i]; | |||
| ||||
70 | hash += (hash << 10); | |||
71 | hash ^= (hash >> 6); | |||
72 | } | |||
73 | ||||
74 | hash += (hash << 3); | |||
75 | hash ^= (hash >> 11); | |||
76 | hash += (hash << 15); | |||
77 | ||||
78 | return hash; | |||
79 | } | |||
80 | ||||
81 | /* Opaque sizes and alignment used for struct verification */ | |||
82 | #if __x86_64__1 | |||
83 | #define WQ_OPAQUE_ALIGN8 8 | |||
84 | #define WQS_OPAQUE_ALIGN8 8 | |||
85 | #define WQ_OPAQUE_SIZE48 48 | |||
86 | #define WQS_OPAQUE_SIZE64 64 | |||
87 | #else | |||
88 | #error Unknown size requirement | |||
89 | #endif | |||
90 | ||||
91 | #ifndef MACH_KERNEL_PRIVATE | |||
92 | ||||
93 | /* | |||
94 | * The opaque waitq structure is here mostly for AIO and selinfo, | |||
95 | * but could potentially be used by other BSD subsystems. | |||
96 | */ | |||
97 | struct waitq { char opaque[WQ_OPAQUE_SIZE48]; } __attribute__((aligned(WQ_OPAQUE_ALIGN8))); | |||
98 | struct waitq_set { char opaque[WQS_OPAQUE_SIZE64]; } __attribute__((aligned(WQS_OPAQUE_ALIGN8))); | |||
99 | ||||
100 | #else /* MACH_KERNEL_PRIVATE */ | |||
101 | ||||
102 | #include <kern/spl.h> | |||
103 | #include <kern/simple_lock.h> | |||
104 | #include <mach/branch_predicates.h> | |||
105 | ||||
106 | #include <machine/cpu_number.h> | |||
107 | #include <machine/machine_routines.h> /* machine_timeout_suspended() */ | |||
108 | ||||
109 | /* | |||
110 | * The event mask is of 59 bits on 64 bit architeture and 27 bits on | |||
111 | * 32 bit architecture and so we calculate its size using sizeof(long). | |||
112 | * If the bitfield for wq_type and wq_fifo is changed, then value of | |||
113 | * EVENT_MASK_BITS will also change. | |||
114 | * | |||
115 | * New plan: this is an optimization anyway, so I'm stealing 32bits | |||
116 | * from the mask to shrink the waitq object even further. | |||
117 | */ | |||
118 | #define _EVENT_MASK_BITS ((sizeof(uint32_t) * 8) - 6) | |||
119 | ||||
120 | #define WAITQ_BOOST_PRIORITY 31 | |||
121 | ||||
122 | enum waitq_type { | |||
123 | WQT_INVALID = 0, | |||
124 | WQT_QUEUE = 0x2, | |||
125 | WQT_SET = 0x3, | |||
126 | }; | |||
127 | ||||
128 | #if CONFIG_WAITQ_STATS | |||
129 | #define NWAITQ_BTFRAMES 5 | |||
130 | struct wq_stats { | |||
131 | uint64_t waits; | |||
132 | uint64_t wakeups; | |||
133 | uint64_t clears; | |||
134 | uint64_t failed_wakeups; | |||
135 | ||||
136 | uintptr_t last_wait[NWAITQ_BTFRAMES]; | |||
137 | uintptr_t last_wakeup[NWAITQ_BTFRAMES]; | |||
138 | uintptr_t last_failed_wakeup[NWAITQ_BTFRAMES]; | |||
139 | }; | |||
140 | #endif | |||
141 | ||||
142 | /* | |||
143 | * struct waitq | |||
144 | * | |||
145 | * This is the definition of the common event wait queue | |||
146 | * that the scheduler APIs understand. It is used | |||
147 | * internally by the gerneralized event waiting mechanism | |||
148 | * (assert_wait), and also for items that maintain their | |||
149 | * own wait queues (such as ports and semaphores). | |||
150 | * | |||
151 | * It is not published to other kernel components. | |||
152 | * | |||
153 | * NOTE: Hardware locks are used to protect event wait | |||
154 | * queues since interrupt code is free to post events to | |||
155 | * them. | |||
156 | */ | |||
157 | struct waitq { | |||
158 | uint32_t /* flags */ | |||
159 | waitq_type:2, /* only public field */ | |||
160 | waitq_fifo:1, /* fifo wakeup policy? */ | |||
161 | waitq_prepost:1, /* waitq supports prepost? */ | |||
162 | waitq_irq:1, /* waitq requires interrupts disabled */ | |||
163 | waitq_isvalid:1, /* waitq structure is valid */ | |||
164 | waitq_eventmask:_EVENT_MASK_BITS; | |||
165 | /* the wait queue set (set-of-sets) to which this queue belongs */ | |||
166 | hw_lock_data_t waitq_interlock; /* interlock */ | |||
167 | ||||
168 | uint64_t waitq_set_id; | |||
169 | uint64_t waitq_prepost_id; | |||
170 | queue_head_t waitq_queue; /* queue of elements */ | |||
171 | }; | |||
172 | ||||
173 | static_assert(sizeof(struct waitq) == WQ_OPAQUE_SIZE, "waitq structure size mismatch")_Static_assert((sizeof(struct waitq) == 48), "waitq structure size mismatch" ); | |||
174 | static_assert(__alignof(struct waitq) == WQ_OPAQUE_ALIGN, "waitq structure alignment mismatch")_Static_assert((__alignof(struct waitq) == 8), "waitq structure alignment mismatch" ); | |||
175 | ||||
176 | /* | |||
177 | * struct waitq_set | |||
178 | * | |||
179 | * This is the common definition for a set wait queue. | |||
180 | */ | |||
181 | struct waitq_set { | |||
182 | struct waitq wqset_q; | |||
183 | uint64_t wqset_id; | |||
184 | union { | |||
185 | uint64_t wqset_prepost_id; | |||
186 | void *wqset_prepost_hook; | |||
187 | }; | |||
188 | }; | |||
189 | ||||
190 | static_assert(sizeof(struct waitq_set) == WQS_OPAQUE_SIZE, "waitq_set structure size mismatch")_Static_assert((sizeof(struct waitq_set) == 64), "waitq_set structure size mismatch" ); | |||
191 | static_assert(__alignof(struct waitq_set) == WQS_OPAQUE_ALIGN, "waitq_set structure alignment mismatch")_Static_assert((__alignof(struct waitq_set) == 8), "waitq_set structure alignment mismatch" ); | |||
192 | ||||
193 | extern void waitq_bootstrap(void); | |||
194 | ||||
195 | #define waitq_is_queue(wq) \ | |||
196 | ((wq)->waitq_type == WQT_QUEUE) | |||
197 | ||||
198 | #define waitq_is_set(wq) \ | |||
199 | ((wq)->waitq_type == WQT_SET && ((struct waitq_set *)(wq))->wqset_id != 0) | |||
200 | ||||
201 | #define waitqs_is_set(wqs) \ | |||
202 | (((wqs)->wqset_q.waitq_type == WQT_SET) && ((wqs)->wqset_id != 0)) | |||
203 | ||||
204 | #define waitq_valid(wq) \ | |||
205 | ((wq) != NULL((void *)0) && (wq)->waitq_isvalid && ((wq)->waitq_type & ~1) == WQT_QUEUE) | |||
206 | ||||
207 | /* | |||
208 | * Invalidate a waitq. The only valid waitq functions to call after this are: | |||
209 | * waitq_deinit() | |||
210 | * waitq_set_deinit() | |||
211 | */ | |||
212 | extern void waitq_invalidate_locked(struct waitq *wq); | |||
213 | ||||
214 | #define waitq_empty(wq) \ | |||
215 | (queue_empty(&(wq)->waitq_queue)(((&(wq)->waitq_queue)) == (((&(wq)->waitq_queue )->next)))) | |||
216 | ||||
217 | ||||
218 | #define waitq_held(wq) \ | |||
219 | (hw_lock_held(&(wq)->waitq_interlock)) | |||
220 | ||||
221 | #define waitq_lock_try(wq) \ | |||
222 | (hw_lock_try(&(wq)->waitq_interlock)) | |||
223 | ||||
224 | ||||
225 | #define waitq_wait_possible(thread) \ | |||
226 | ((thread)->waitq == NULL((void *)0)) | |||
227 | ||||
228 | extern void waitq_lock(struct waitq *wq); | |||
229 | extern void waitq_unlock(struct waitq *wq); | |||
230 | ||||
231 | #define waitq_set_lock(wqs) waitq_lock(&(wqs)->wqset_q) | |||
232 | #define waitq_set_unlock(wqs) waitq_unlock(&(wqs)->wqset_q) | |||
233 | #define waitq_set_lock_try(wqs) waitq_lock_try(&(wqs)->wqset_q) | |||
234 | #define waitq_set_can_prepost(wqs) (waitqs_is_set(wqs) && \ | |||
235 | (wqs)->wqset_q.waitq_prepost) | |||
236 | #define waitq_set_maybe_preposted(wqs) ((wqs)->wqset_q.waitq_prepost && \ | |||
237 | (wqs)->wqset_prepost_id > 0) | |||
238 | #define waitq_set_has_prepost_hook(wqs) (waitqs_is_set(wqs) && \ | |||
239 | !((wqs)->wqset_q.waitq_prepost) && \ | |||
240 | (wqs)->wqset_prepost_hook) | |||
241 | ||||
242 | /* assert intent to wait on a locked wait queue */ | |||
243 | extern wait_result_t waitq_assert_wait64_locked(struct waitq *waitq, | |||
244 | event64_t wait_event, | |||
245 | wait_interrupt_t interruptible, | |||
246 | wait_timeout_urgency_t urgency, | |||
247 | uint64_t deadline, | |||
248 | uint64_t leeway, | |||
249 | thread_t thread); | |||
250 | ||||
251 | /* pull a thread from its wait queue */ | |||
252 | extern int waitq_pull_thread_locked(struct waitq *waitq, thread_t thread); | |||
253 | ||||
254 | /* wakeup all threads waiting for a particular event on locked queue */ | |||
255 | extern kern_return_t waitq_wakeup64_all_locked(struct waitq *waitq, | |||
256 | event64_t wake_event, | |||
257 | wait_result_t result, | |||
258 | uint64_t *reserved_preposts, | |||
259 | int priority, | |||
260 | waitq_lock_state_t lock_state); | |||
261 | ||||
262 | /* wakeup one thread waiting for a particular event on locked queue */ | |||
263 | extern kern_return_t waitq_wakeup64_one_locked(struct waitq *waitq, | |||
264 | event64_t wake_event, | |||
265 | wait_result_t result, | |||
266 | uint64_t *reserved_preposts, | |||
267 | int priority, | |||
268 | waitq_lock_state_t lock_state); | |||
269 | ||||
270 | /* return identity of a thread awakened for a particular <wait_queue,event> */ | |||
271 | extern thread_t | |||
272 | waitq_wakeup64_identify_locked(struct waitq *waitq, | |||
273 | event64_t wake_event, | |||
274 | wait_result_t result, | |||
275 | spl_t *spl, | |||
276 | uint64_t *reserved_preposts, | |||
277 | int priority, | |||
278 | waitq_lock_state_t lock_state); | |||
279 | ||||
280 | /* wakeup thread iff its still waiting for a particular event on locked queue */ | |||
281 | extern kern_return_t waitq_wakeup64_thread_locked(struct waitq *waitq, | |||
282 | event64_t wake_event, | |||
283 | thread_t thread, | |||
284 | wait_result_t result, | |||
285 | waitq_lock_state_t lock_state); | |||
286 | ||||
287 | /* clear all preposts generated by the given waitq */ | |||
288 | extern int waitq_clear_prepost_locked(struct waitq *waitq); | |||
289 | ||||
290 | /* clear all preposts from the given wait queue set */ | |||
291 | extern void waitq_set_clear_preposts_locked(struct waitq_set *wqset); | |||
292 | ||||
293 | /* unlink the given waitq from all sets - returns unlocked */ | |||
294 | extern kern_return_t waitq_unlink_all_unlock(struct waitq *waitq); | |||
295 | ||||
296 | /* unlink the given waitq set from all waitqs and waitq sets - returns unlocked */ | |||
297 | extern kern_return_t waitq_set_unlink_all_unlock(struct waitq_set *wqset); | |||
298 | ||||
299 | ||||
300 | ||||
301 | /* | |||
302 | * clear a thread's boosted priority | |||
303 | * (given via WAITQ_PROMOTE_PRIORITY in the wakeup function) | |||
304 | */ | |||
305 | extern void waitq_clear_promotion_locked(struct waitq *waitq, | |||
306 | thread_t thread); | |||
307 | ||||
308 | /* | |||
309 | * waitq iteration | |||
310 | */ | |||
311 | ||||
312 | enum waitq_iteration_constant { | |||
313 | WQ_ITERATE_DROPPED = -4, | |||
314 | WQ_ITERATE_INVALID = -3, | |||
315 | WQ_ITERATE_ABORTED = -2, | |||
316 | WQ_ITERATE_FAILURE = -1, | |||
317 | WQ_ITERATE_SUCCESS = 0, | |||
318 | WQ_ITERATE_CONTINUE = 1, | |||
319 | WQ_ITERATE_BREAK = 2, | |||
320 | WQ_ITERATE_BREAK_KEEP_LOCKED = 3, | |||
321 | WQ_ITERATE_INVALIDATE_CONTINUE = 4, | |||
322 | WQ_ITERATE_RESTART = 5, | |||
323 | WQ_ITERATE_FOUND = 6, | |||
324 | WQ_ITERATE_UNLINKED = 7, | |||
325 | }; | |||
326 | ||||
327 | /* callback invoked with both 'waitq' and 'wqset' locked */ | |||
328 | typedef int (*waitq_iterator_t)(void *ctx, struct waitq *waitq, | |||
329 | struct waitq_set *wqset); | |||
330 | ||||
331 | /* iterate over all sets to which waitq belongs */ | |||
332 | extern int waitq_iterate_sets(struct waitq *waitq, void *ctx, | |||
333 | waitq_iterator_t it); | |||
334 | ||||
335 | /* iterator over all waitqs that have preposted to wqset */ | |||
336 | extern int waitq_set_iterate_preposts(struct waitq_set *wqset, | |||
337 | void *ctx, waitq_iterator_t it); | |||
338 | ||||
339 | /* | |||
340 | * prepost reservation | |||
341 | */ | |||
342 | extern uint64_t waitq_prepost_reserve(struct waitq *waitq, int extra, | |||
343 | waitq_lock_state_t lock_state); | |||
344 | ||||
345 | extern void waitq_prepost_release_reserve(uint64_t id); | |||
346 | ||||
347 | #endif /* MACH_KERNEL_PRIVATE */ | |||
348 | ||||
349 | ||||
350 | __BEGIN_DECLS | |||
351 | ||||
352 | /* | |||
353 | * waitq init | |||
354 | */ | |||
355 | extern kern_return_t waitq_init(struct waitq *waitq, int policy); | |||
356 | extern void waitq_deinit(struct waitq *waitq); | |||
357 | ||||
358 | /* | |||
359 | * global waitqs | |||
360 | */ | |||
361 | extern struct waitq *_global_eventq(char *event, size_t event_length); | |||
362 | #define global_eventq(event)_global_eventq((char *)&(event), sizeof(event)) _global_eventq((char *)&(event), sizeof(event)) | |||
363 | ||||
364 | extern struct waitq *global_waitq(int index); | |||
365 | ||||
366 | /* | |||
367 | * set alloc/init/free | |||
368 | */ | |||
369 | extern struct waitq_set *waitq_set_alloc(int policy, void *prepost_hook); | |||
370 | ||||
371 | extern kern_return_t waitq_set_init(struct waitq_set *wqset, | |||
372 | int policy, uint64_t *reserved_link, | |||
373 | void *prepost_hook); | |||
374 | ||||
375 | extern void waitq_set_deinit(struct waitq_set *wqset); | |||
376 | ||||
377 | extern kern_return_t waitq_set_free(struct waitq_set *wqset); | |||
378 | ||||
379 | #if defined(DEVELOPMENT0) || defined(DEBUG) | |||
380 | #if CONFIG_WAITQ_DEBUG | |||
381 | extern uint64_t wqset_id(struct waitq_set *wqset); | |||
382 | ||||
383 | struct waitq *wqset_waitq(struct waitq_set *wqset); | |||
384 | #endif /* CONFIG_WAITQ_DEBUG */ | |||
385 | #endif /* DEVELOPMENT || DEBUG */ | |||
386 | ||||
387 | ||||
388 | /* | |||
389 | * set membership | |||
390 | */ | |||
391 | extern uint64_t waitq_link_reserve(struct waitq *waitq); | |||
392 | ||||
393 | extern void waitq_link_release(uint64_t id); | |||
394 | ||||
395 | extern boolean_t waitq_member(struct waitq *waitq, struct waitq_set *wqset); | |||
396 | ||||
397 | /* returns true if the waitq is in at least 1 set */ | |||
398 | extern boolean_t waitq_in_set(struct waitq *waitq); | |||
399 | ||||
400 | ||||
401 | /* on success, consumes an reserved_link reference */ | |||
402 | extern kern_return_t waitq_link(struct waitq *waitq, | |||
403 | struct waitq_set *wqset, | |||
404 | waitq_lock_state_t lock_state, | |||
405 | uint64_t *reserved_link); | |||
406 | ||||
407 | extern kern_return_t waitq_unlink(struct waitq *waitq, struct waitq_set *wqset); | |||
408 | ||||
409 | extern kern_return_t waitq_unlink_all(struct waitq *waitq); | |||
410 | ||||
411 | extern kern_return_t waitq_set_unlink_all(struct waitq_set *wqset); | |||
412 | ||||
413 | /* | |||
414 | * preposts | |||
415 | */ | |||
416 | extern void waitq_clear_prepost(struct waitq *waitq); | |||
417 | ||||
418 | extern void waitq_set_clear_preposts(struct waitq_set *wqset); | |||
419 | ||||
420 | /* | |||
421 | * interfaces used primarily by the select/kqueue subsystems | |||
422 | */ | |||
423 | extern uint64_t waitq_get_prepost_id(struct waitq *waitq); | |||
424 | extern void waitq_unlink_by_prepost_id(uint64_t wqp_id, struct waitq_set *wqset); | |||
425 | ||||
426 | /* | |||
427 | * waitq attributes | |||
428 | */ | |||
429 | extern int waitq_is_valid(struct waitq *waitq); | |||
430 | ||||
431 | extern int waitq_set_is_valid(struct waitq_set *wqset); | |||
432 | ||||
433 | extern int waitq_is_global(struct waitq *waitq); | |||
434 | ||||
435 | extern int waitq_irq_safe(struct waitq *waitq); | |||
436 | ||||
437 | #if CONFIG_WAITQ_STATS | |||
438 | /* | |||
439 | * waitq statistics | |||
440 | */ | |||
441 | #define WAITQ_STATS_VERSION 1 | |||
442 | struct wq_table_stats { | |||
443 | uint32_t version; | |||
444 | uint32_t table_elements; | |||
445 | uint32_t table_used_elems; | |||
446 | uint32_t table_elem_sz; | |||
447 | uint32_t table_slabs; | |||
448 | uint32_t table_slab_sz; | |||
449 | ||||
450 | uint64_t table_num_allocs; | |||
451 | uint64_t table_num_preposts; | |||
452 | uint64_t table_num_reservations; | |||
453 | ||||
454 | uint64_t table_max_used; | |||
455 | uint64_t table_avg_used; | |||
456 | uint64_t table_max_reservations; | |||
457 | uint64_t table_avg_reservations; | |||
458 | }; | |||
459 | ||||
460 | extern void waitq_link_stats(struct wq_table_stats *stats); | |||
461 | extern void waitq_prepost_stats(struct wq_table_stats *stats); | |||
462 | #endif /* CONFIG_WAITQ_STATS */ | |||
463 | ||||
464 | /* | |||
465 | * | |||
466 | * higher-level waiting APIs | |||
467 | * | |||
468 | */ | |||
469 | ||||
470 | /* assert intent to wait on <waitq,event64> pair */ | |||
471 | extern wait_result_t waitq_assert_wait64(struct waitq *waitq, | |||
472 | event64_t wait_event, | |||
473 | wait_interrupt_t interruptible, | |||
474 | uint64_t deadline); | |||
475 | ||||
476 | extern wait_result_t waitq_assert_wait64_leeway(struct waitq *waitq, | |||
477 | event64_t wait_event, | |||
478 | wait_interrupt_t interruptible, | |||
479 | wait_timeout_urgency_t urgency, | |||
480 | uint64_t deadline, | |||
481 | uint64_t leeway); | |||
482 | ||||
483 | /* wakeup the most appropriate thread waiting on <waitq,event64> pair */ | |||
484 | extern kern_return_t waitq_wakeup64_one(struct waitq *waitq, | |||
485 | event64_t wake_event, | |||
486 | wait_result_t result, | |||
487 | int priority); | |||
488 | ||||
489 | /* wakeup all the threads waiting on <waitq,event64> pair */ | |||
490 | extern kern_return_t waitq_wakeup64_all(struct waitq *waitq, | |||
491 | event64_t wake_event, | |||
492 | wait_result_t result, | |||
493 | int priority); | |||
494 | ||||
495 | #ifdef XNU_KERNEL_PRIVATE1 | |||
496 | ||||
497 | /* wakeup a specified thread iff it's waiting on <waitq,event64> pair */ | |||
498 | extern kern_return_t waitq_wakeup64_thread(struct waitq *waitq, | |||
499 | event64_t wake_event, | |||
500 | thread_t thread, | |||
501 | wait_result_t result); | |||
502 | ||||
503 | /* return a reference to the thread that was woken up */ | |||
504 | extern thread_t | |||
505 | waitq_wakeup64_identify(struct waitq *waitq, | |||
506 | event64_t wake_event, | |||
507 | wait_result_t result, | |||
508 | int priority); | |||
509 | ||||
510 | #endif /* XNU_KERNEL_PRIVATE */ | |||
511 | ||||
512 | __END_DECLS | |||
513 | ||||
514 | #endif /* KERNEL_PRIVATE */ | |||
515 | #endif /* _WAITQ_H_ */ |