/sys/sys/proc.h
My machine
Linux debian-laptop 5.4.0-4-amd64 #1 SMP Debian 5.4.19-1 (2020-02-13) x86_64 GNU/Linux
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
Address sizes: 39 bits physical, 48 bits virtual
CPU(s): 4
On-line CPU(s) list: 0-3
Thread(s) per core: 2
Core(s) per socket: 2
Socket(s): 1
NUMA node(s): 1
Vendor ID: GenuineIntel
CPU family: 6
Model: 142
Model name: Intel(R) Core(TM) i5-7200U CPU @ 2.50GHz
Stepping: 9
CPU MHz: 3100.016
CPU max MHz: 3100.0000
CPU min MHz: 400.0000
BogoMIPS: 5399.81
Virtualization: VT-x
L1d cache: 64 KiB
L1i cache: 64 KiB
L2 cache: 512 KiB
L3 cache: 3 MiB
NUMA node0 CPU(s): 0-3
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdp
e1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monito
r ds_cpl vmx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand
lahf_lm abm 3dnowprefetch cpuid_fault epb invpcid_single ssbd ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase t
sc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx rdseed adx smap clflushopt intel_pt xsaveopt xsavec xgetbv1 xsaves dtherm ida arat pln
pts hwp hwp_notify hwp_act_window hwp_epp md_clear flush_l1d
gcc version 9.2.1 20200224 (Debian 9.2.1-30)
clang version 9.0.1-8
Target: x86_64-pc-linux-gnu
Thread model: posix
Description of a process
line 127
We should note this:
/*
* Below is a key of locks used to protect each member of struct proc. The
* lock is indicated by a reference to a specific character in parens in the
* associated comment.
*/
Proc structure
line 583
Comments are detailed and easy to understand, just read it when forget something.
Then let’s check line 598
int p_flag; /* (c) P_* flags. */
int p_flag2; /* (c) P2_* flags. */
and the following line 737
/* These flags are kept in p_flag. */
#define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */
#define P_CONTROLT 0x00002 /* Has a controlling terminal. */
#define P_KPROC 0x00004 /* Kernel process. */
#define P_UNUSED3 0x00008 /* --available-- */
#define P_PPWAIT 0x00010 /* Parent is waiting for child to exec/exit. */
#define P_PROFIL 0x00020 /* Has started profiling. */
#define P_STOPPROF 0x00040 /* Has thread requesting to stop profiling. */
#define P_HADTHREADS 0x00080 /* Has had threads (no cleanup shortcuts) */
#define P_SUGID 0x00100 /* Had set id privileges since last exec. */
#define P_SYSTEM 0x00200 /* System proc: no sigs, stats or swapping. */
#define P_SINGLE_EXIT 0x00400 /* Threads suspending should exit, not wait. */
#define P_TRACED 0x00800 /* Debugged process being traced. */
#define P_WAITED 0x01000 /* Someone is waiting for us. */
#define P_WEXIT 0x02000 /* Working on exiting. */
#define P_EXEC 0x04000 /* Process called exec. */
#define P_WKILLED 0x08000 /* Killed, go to kernel/user boundary ASAP. */
#define P_CONTINUED 0x10000 /* Proc has continued from a stopped state. */
#define P_STOPPED_SIG 0x20000 /* Stopped due to SIGSTOP/SIGTSTP. */
#define P_STOPPED_TRACE 0x40000 /* Stopped because of tracing. */
#define P_STOPPED_SINGLE 0x80000 /* Only 1 thread can continue (not to user). */
#define P_PROTECTED 0x100000 /* Do not kill on memory overcommit. */
#define P_SIGEVENT 0x200000 /* Process pending signals changed. */
#define P_SINGLE_BOUNDARY 0x400000 /* Threads should suspend at user boundary. */
#define P_HWPMC 0x800000 /* Process is using HWPMCs */
#define P_JAILED 0x1000000 /* Process is in jail. */
#define P_TOTAL_STOP 0x2000000 /* Stopped in stop_all_proc. */
#define P_INEXEC 0x4000000 /* Process is in execve(). */
#define P_STATCHILD 0x8000000 /* Child process stopped or exited. */
#define P_INMEM 0x10000000 /* Loaded into memory. */
#define P_SWAPPINGOUT 0x20000000 /* Process is being swapped out. */
#define P_SWAPPINGIN 0x40000000 /* Process is being swapped in. */
#define P_PPTRACE 0x80000000 /* PT_TRACEME by vforked child. */
#define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE)
#define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED)
#define P_KILLED(p) ((p)->p_flag & P_WKILLED)
/* These flags are kept in p_flag2. */
#define P2_INHERIT_PROTECTED 0x00000001 /* New children get P_PROTECTED. */
#define P2_NOTRACE 0x00000002 /* No ptrace(2) attach or coredumps. */
#define P2_NOTRACE_EXEC 0x00000004 /* Keep P2_NOPTRACE on exec(2). */
#define P2_AST_SU 0x00000008 /* Handles SU ast for kthreads. */
#define P2_PTRACE_FSTP 0x00000010 /* SIGSTOP from PT_ATTACH not yet handled. */
#define P2_TRAPCAP 0x00000020 /* SIGTRAP on ENOTCAPABLE */
#define P2_ASLR_ENABLE 0x00000040 /* Force enable ASLR. */
#define P2_ASLR_DISABLE 0x00000080 /* Force disable ASLR. */
#define P2_ASLR_IGNSTART 0x00000100 /* Enable ASLR to consume sbrk area. */
#define P2_PROTMAX_ENABLE 0x00000200 /* Force enable implied PROT_MAX. */
#define P2_PROTMAX_DISABLE 0x00000400 /* Force disable implied PROT_MAX. */
#define P2_STKGAP_DISABLE 0x00000800 /* Disable stack gap for MAP_STACK */
#define P2_STKGAP_DISABLE_EXEC 0x00001000 /* Stack gap disabled after exec */
Such implementation can make the most of bits of every flag and save much on computation consumption.
Note:
Address Space Layout Randomization (ASLR)
Hardware Performance Monitoring Counter (HWPMC)
the
td
argument in any system call is actually a pointer to the calling thread’s thread structure, which describes the thread
line 625
/* The following fields are all zeroed upon creation in fork. */
#define p_startzero p_vmspace
struct vmspace *p_vmspace; /* (b) Address space. */
u_int p_swtick; /* (c) Tick when swapped in or out. */
u_int p_cowgen; /* (c) Generation of COW pointers. */
struct itimerval p_realtimer; /* (c) Alarm timer. */
struct rusage p_ru; /* (a) Exit information. */
struct rusage_ext p_rux; /* (cu) Internal resource usage. */
struct rusage_ext p_crux; /* (c) Internal child resource usage. */
int p_profthreads; /* (c) Num threads in addupc_task. */
volatile int p_exitthreads; /* (j) Number of threads exiting */
int p_traceflag; /* (o) Kernel trace points. */
struct vnode *p_tracevp; /* (c + o) Trace to vnode. */
struct ucred *p_tracecred; /* (o) Credentials to trace with. */
struct vnode *p_textvp; /* (b) Vnode of executable. */
u_int p_lock; /* (c) Proclock (prevent swap) count. */
struct sigiolst p_sigiolst; /* (c) List of sigio sources. */
int p_sigparent; /* (c) Signal to parent on exit. */
int p_sig; /* (n) For core dump/debugger XXX. */
u_int p_stops; /* (c) Stop event bitmask. */
u_int p_stype; /* (c) Stop event type. */
char p_step; /* (c) Process is stopped. */
u_char p_pfsflags; /* (c) Procfs flags. */
u_int p_ptevents; /* (c + e) ptrace() event mask. */
struct nlminfo *p_nlminfo; /* (?) Only used by/for lockd. */
struct kaioinfo *p_aioinfo; /* (y) ASYNC I/O info. */
struct thread *p_singlethread;/* (c + j) If single threading this is it */
int p_suspcount; /* (j) Num threads in suspended mode. */
struct thread *p_xthread; /* (c) Trap thread */
int p_boundary_count;/* (j) Num threads at user boundary */
int p_pendingcnt; /* how many signals are pending */
struct itimers *p_itimers; /* (c) POSIX interval timers. */
struct procdesc *p_procdesc; /* (e) Process descriptor, if any. */
u_int p_treeflag; /* (e) P_TREE flags */
int p_pendingexits; /* (c) Count of pending thread exits. */
struct filemon *p_filemon; /* (c) filemon-specific data. */
int p_pdeathsig; /* (c) Signal from parent on exit. */
/* End area that is zeroed on creation. */
#define p_endzero p_magic
/* The following fields are all copied upon creation in fork. */
#define p_startcopy p_endzero
u_int p_magic; /* (b) Magic number. */
p_startzero
and p_endzero
will be used in sys/kern/kern_fork.c
line 371
bzero(&p2->p_startzero,
__rangeof(struct proc, p_startzero, p_endzero));
and indirectly depend on __builtin_memset
and __builtin_offsetof
on my platform which can be located in sys/sys/system.h
and sys/cdefs.h
#define bzero(buf, len) __builtin_memset((buf), 0, (len))
#ifdef KCSAN
void *kcsan_memset(void *, int, size_t);
void *kcsan_memcpy(void *, const void *, size_t);
void *kcsan_memmove(void *, const void *, size_t);
int kcsan_memcmp(const void *, const void *, size_t);
#define bcopy(from, to, len) kcsan_memmove((to), (from), (len))
#define bzero(buf, len) kcsan_memset((buf), 0, (len)) // comment by Kowalski, notice here
#define bcmp(b1, b2, len) kcsan_memcmp((b1), (b2), (len))
#define memset(buf, c, len) kcsan_memset((buf), (c), (len))
#define memcpy(to, from, len) kcsan_memcpy((to), (from), (len))
#define memmove(dest, src, n) kcsan_memmove((dest), (src), (n))
#define memcmp(b1, b2, len) kcsan_memcmp((b1), (b2), (len))
#else
#define bcopy(from, to, len) __builtin_memmove((to), (from), (len))
#define bzero(buf, len) __builtin_memset((buf), 0, (len)) // commnet by Kowalski, and here, selected on my platform
#define bcmp(b1, b2, len) __builtin_memcmp((b1), (b2), (len))
#define memset(buf, c, len) __builtin_memset((buf), (c), (len))
#define memcpy(to, from, len) __builtin_memcpy((to), (from), (len))
#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
#define memcmp(b1, b2, len) __builtin_memcmp((b1), (b2), (len))
#endif
#define __rangeof(type, start, end) \
(__offsetof(type, end) - __offsetof(type, start))
#if __GNUC_PREREQ__(4, 1)
#define __offsetof(type, field) __builtin_offsetof(type, field) // comment by Kowalski, selected on my platform
#else
#ifndef __cplusplus
#define __offsetof(type, field) \
((__size_t)(__uintptr_t)((const volatile void *)&((type *)0)->field))
#else
#define __offsetof(type, field) \
(__offsetof__ (reinterpret_cast <__size_t> \
(&reinterpret_cast <const volatile char &> \
(static_cast<type *> (0)->field))))
#endif