Open junlon2006 opened 4 years ago
如图32位系统,4G虚拟内存空间,高地址4G-3G为内核映射区,3G-0G为应用态访问区。 如图64位系统,256T虚拟空间(48根总线,高16位默认置零),内核映射区、应用态分别128T。
如图用户态虚拟地址,从低地址往高地址分别为: text(存放代码段) Data(存放已初始化的全局变量和已初始化的静态变量;初始化值必须是非零的,否则优化到BSS) BSS(存放未初始化的全局变量和未初始化的静态变量) heap(堆区,向高地址生长) mmap(mmap映射区,so、文件、largeBin存储区,向低地址生长) stack(栈空间,向低地址生长)
注意:data段数据会固化在可执行文件里,占用flash空间。
glibc ptmalloc下面的代码,比较p1、p2、p3、p4的大小,解释为什么?
#include <stdio.h>
#include <stdlib.h>
#define SMALL_BINS (1024 * 16)
#define BIG_BINS (1024 * 256)
int main() {
char *p1 = (char *)malloc(SMALL_BINS);
printf("p1=%p\n", p1);
char *p2 = (char *)malloc(BIG_BINS);
printf("p2=%p\n", p2);
char *p3 = (char *)malloc(SMALL_BINS);
printf("p3=%p\n", p3);
char *p4 = (char *)malloc(BIG_BINS);
printf("p4=%p\n", p4);
free(p1);
free(p2);
free(p3);
free(p4);
return 0;
}
内存分配器malloc 1、malloc通过glibc各种wrapper,指向系统调用mmap、brk(对于small bin调用brk,向高地址扩展,当sbrk达到顶时,调整brk向高地址继续扩展;对于large bin调用mmap,mmap内存向低地址扩展)上一条comment中的代码就是例子,比较p1,p2,p3,p4大小,p1、p3通过brk分配在heap区,地址较小(p1 < p3向高地址扩展),p2、p4通过mmap映射,地址较大(p2 > p4向低地址扩展) 2、malloc在glibc中默认使用ptmalloc。
每个进程具有独立的虚拟地址空间,虚拟内存通过分页分段映射物理内存,从进程的角度看物理内存像是当前进程独占。 内核对于所有的进程,不但物理内存只有一份,虚拟内存也是只有一份。 内核代码,全局变量,bss都是在代码段。
内核态的虚拟空间和某一个进程没有关系,所有进程通过系统调用进入到内核之后,看到的虚拟地址空间都是一样的。
也就是说内核可以修改一切进程的内存信息,但是kernel不会这么做,可以做但是没有必要。
内核中的结构体定义
struct mm_struct {
struct vm_area_struct *mmap; /* list of VMAs */
struct rb_root mm_rb;
u32 vmacache_seqnum; /* per-thread vmacache */
#ifdef CONFIG_MMU
unsigned long (*get_unmapped_area) (struct file *filp,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags);
#endif
unsigned long mmap_base; /* base of mmap area */
unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
/* Base adresses for compatible mmap() */
unsigned long mmap_compat_base;
unsigned long mmap_compat_legacy_base;
#endif
unsigned long task_size; /* size of task vm space */
unsigned long highest_vm_end; /* highest vma end address */
pgd_t * pgd;
/**
* @mm_users: The number of users including userspace.
*
* Use mmget()/mmget_not_zero()/mmput() to modify. When this drops
* to 0 (i.e. when the task exits and there are no other temporary
* reference holders), we also release a reference on @mm_count
* (which may then free the &struct mm_struct if @mm_count also
* drops to 0).
*/
atomic_t mm_users;
/**
* @mm_count: The number of references to &struct mm_struct
* (@mm_users count as 1).
*
* Use mmgrab()/mmdrop() to modify. When this drops to 0, the
* &struct mm_struct is freed.
*/
atomic_t mm_count;
atomic_long_t nr_ptes; /* PTE page table pages */
#if CONFIG_PGTABLE_LEVELS > 2
atomic_long_t nr_pmds; /* PMD page table pages */
#endif
int map_count; /* number of VMAs */
spinlock_t page_table_lock; /* Protects page tables and some counters */
struct rw_semaphore mmap_sem;
struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung
* together off init_mm.mmlist, and are protected
* by mmlist_lock
*/
unsigned long hiwater_rss; /* High-watermark of RSS usage */
unsigned long hiwater_vm; /* High-water virtual memory usage */
unsigned long total_vm; /* Total pages mapped */
unsigned long locked_vm; /* Pages that have PG_mlocked set */
unsigned long pinned_vm; /* Refcount permanently increased */
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
unsigned long stack_vm; /* VM_STACK */
unsigned long def_flags;
unsigned long start_code, end_code, start_data, end_data;
unsigned long start_brk, brk, start_stack;
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
/*
* Special counters, in some configurations protected by the
* page_table_lock, in other configurations by being atomic.
*/
struct mm_rss_stat rss_stat;
struct linux_binfmt *binfmt;
cpumask_var_t cpu_vm_mask_var;
/* Architecture-specific MM context */
mm_context_t context;
unsigned long flags; /* Must use atomic bitops to access the bits */
struct core_state *core_state; /* coredumping support */
#ifdef CONFIG_AIO
spinlock_t ioctx_lock;
struct kioctx_table __rcu *ioctx_table;
#endif
#ifdef CONFIG_MEMCG
/*
* "owner" points to a task that is regarded as the canonical
* user/owner of this mm. All of the following must be true in
* order for it to be changed:
*
* current == mm->owner
* current->mm != mm
* new_owner->mm == mm
* new_owner->alloc_lock is held
*/
struct task_struct __rcu *owner;
#endif
struct user_namespace *user_ns;
/* store ref to file /proc/<pid>/exe symlink points to */
struct file __rcu *exe_file;
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier_mm *mmu_notifier_mm;
#endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
#ifdef CONFIG_CPUMASK_OFFSTACK
struct cpumask cpumask_allocation;
#endif
#ifdef CONFIG_NUMA_BALANCING
/*
* numa_next_scan is the next time that the PTEs will be marked
* pte_numa. NUMA hinting faults will gather statistics and migrate
* pages to new nodes if necessary.
*/
unsigned long numa_next_scan;
/* Restart point for scanning and setting pte_numa */
unsigned long numa_scan_offset;
/* numa_scan_seq prevents two threads setting pte_numa */
int numa_scan_seq;
#endif
/*
* An operation with batched TLB flushing is going on. Anything that
* can move process memory needs to flush the TLB when moving a
* PROT_NONE or PROT_NUMA mapped page.
*/
atomic_t tlb_flush_pending;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
/* See flush_tlb_batched_pending() */
bool tlb_flush_batched;
#endif
struct uprobes_state uprobes_state;
#ifdef CONFIG_HUGETLB_PAGE
atomic_long_t hugetlb_usage;
#endif
struct work_struct async_put_work;
} __randomize_layout;
32位内核态虚拟内存布局
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
#define __pa(x) __phys_addr((unsigned long)(x))
#define __phys_addr(x) __phys_addr_nodebug(x)
#define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET)
base on linux kernel 4.13