junlon2006 / linux-c

computer system & Linux
Apache License 2.0
1 stars 1 forks source link

Linux内存管理 #17

Open junlon2006 opened 4 years ago

junlon2006 commented 4 years ago

base on linux kernel 4.13

junlon2006 commented 4 years ago

image

如图32位系统,4G虚拟内存空间,高地址4G-3G为内核映射区,3G-0G为应用态访问区。 如图64位系统,256T虚拟空间(48根总线,高16位默认置零),内核映射区、应用态分别128T。

junlon2006 commented 4 years ago

image

如图用户态虚拟地址,从低地址往高地址分别为: text(存放代码段) Data(存放已初始化的全局变量和已初始化的静态变量;初始化值必须是非零的,否则优化到BSS) BSS(存放未初始化的全局变量和未初始化的静态变量) heap(堆区,向高地址生长) mmap(mmap映射区,so、文件、largeBin存储区,向低地址生长) stack(栈空间,向低地址生长)

注意:data段数据会固化在可执行文件里,占用flash空间。

glibc ptmalloc下面的代码,比较p1、p2、p3、p4的大小,解释为什么?

#include <stdio.h>
#include <stdlib.h>

#define SMALL_BINS   (1024 * 16)
#define BIG_BINS     (1024 * 256)

int main() {
  char *p1 = (char *)malloc(SMALL_BINS);
  printf("p1=%p\n", p1);

  char *p2 = (char *)malloc(BIG_BINS);
  printf("p2=%p\n", p2);

  char *p3 = (char *)malloc(SMALL_BINS);
  printf("p3=%p\n", p3);

  char *p4 = (char *)malloc(BIG_BINS);
  printf("p4=%p\n", p4);

  free(p1);
  free(p2);
  free(p3);
  free(p4);

  return 0;
}
junlon2006 commented 4 years ago

内存分配器malloc 1、malloc通过glibc各种wrapper,指向系统调用mmap、brk(对于small bin调用brk,向高地址扩展,当sbrk达到顶时,调整brk向高地址继续扩展;对于large bin调用mmap,mmap内存向低地址扩展)上一条comment中的代码就是例子,比较p1,p2,p3,p4大小,p1、p3通过brk分配在heap区,地址较小(p1 < p3向高地址扩展),p2、p4通过mmap映射,地址较大(p2 > p4向低地址扩展) 2、malloc在glibc中默认使用ptmalloc。

junlon2006 commented 4 years ago

每个进程具有独立的虚拟地址空间,虚拟内存通过分页分段映射物理内存,从进程的角度看物理内存像是当前进程独占。 内核对于所有的进程,不但物理内存只有一份,虚拟内存也是只有一份。 内核代码,全局变量,bss都是在代码段。

junlon2006 commented 4 years ago

内核态的虚拟空间和某一个进程没有关系,所有进程通过系统调用进入到内核之后,看到的虚拟地址空间都是一样的。

也就是说内核可以修改一切进程的内存信息,但是kernel不会这么做,可以做但是没有必要。

junlon2006 commented 4 years ago

内核中的结构体定义

struct mm_struct {
    struct vm_area_struct *mmap;        /* list of VMAs */
    struct rb_root mm_rb;
    u32 vmacache_seqnum;                   /* per-thread vmacache */
#ifdef CONFIG_MMU
    unsigned long (*get_unmapped_area) (struct file *filp,
                unsigned long addr, unsigned long len,
                unsigned long pgoff, unsigned long flags);
#endif
    unsigned long mmap_base;        /* base of mmap area */
    unsigned long mmap_legacy_base;         /* base of mmap area in bottom-up allocations */
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
    /* Base adresses for compatible mmap() */
    unsigned long mmap_compat_base;
    unsigned long mmap_compat_legacy_base;
#endif
    unsigned long task_size;        /* size of task vm space */
    unsigned long highest_vm_end;       /* highest vma end address */
    pgd_t * pgd;

    /**
     * @mm_users: The number of users including userspace.
     *
     * Use mmget()/mmget_not_zero()/mmput() to modify. When this drops
     * to 0 (i.e. when the task exits and there are no other temporary
     * reference holders), we also release a reference on @mm_count
     * (which may then free the &struct mm_struct if @mm_count also
     * drops to 0).
     */
    atomic_t mm_users;

    /**
     * @mm_count: The number of references to &struct mm_struct
     * (@mm_users count as 1).
     *
     * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
     * &struct mm_struct is freed.
     */
    atomic_t mm_count;

    atomic_long_t nr_ptes;          /* PTE page table pages */
#if CONFIG_PGTABLE_LEVELS > 2
    atomic_long_t nr_pmds;          /* PMD page table pages */
#endif
    int map_count;              /* number of VMAs */

    spinlock_t page_table_lock;     /* Protects page tables and some counters */
    struct rw_semaphore mmap_sem;

    struct list_head mmlist;        /* List of maybe swapped mm's.  These are globally strung
                         * together off init_mm.mmlist, and are protected
                         * by mmlist_lock
                         */

    unsigned long hiwater_rss;  /* High-watermark of RSS usage */
    unsigned long hiwater_vm;   /* High-water virtual memory usage */

    unsigned long total_vm;     /* Total pages mapped */
    unsigned long locked_vm;    /* Pages that have PG_mlocked set */
    unsigned long pinned_vm;    /* Refcount permanently increased */
    unsigned long data_vm;      /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
    unsigned long exec_vm;      /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
    unsigned long stack_vm;     /* VM_STACK */
    unsigned long def_flags;
    unsigned long start_code, end_code, start_data, end_data;
    unsigned long start_brk, brk, start_stack;
    unsigned long arg_start, arg_end, env_start, env_end;

    unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */

    /*
     * Special counters, in some configurations protected by the
     * page_table_lock, in other configurations by being atomic.
     */
    struct mm_rss_stat rss_stat;

    struct linux_binfmt *binfmt;

    cpumask_var_t cpu_vm_mask_var;

    /* Architecture-specific MM context */
    mm_context_t context;

    unsigned long flags; /* Must use atomic bitops to access the bits */

    struct core_state *core_state; /* coredumping support */
#ifdef CONFIG_AIO
    spinlock_t          ioctx_lock;
    struct kioctx_table __rcu   *ioctx_table;
#endif
#ifdef CONFIG_MEMCG
    /*
     * "owner" points to a task that is regarded as the canonical
     * user/owner of this mm. All of the following must be true in
     * order for it to be changed:
     *
     * current == mm->owner
     * current->mm != mm
     * new_owner->mm == mm
     * new_owner->alloc_lock is held
     */
    struct task_struct __rcu *owner;
#endif
    struct user_namespace *user_ns;

    /* store ref to file /proc/<pid>/exe symlink points to */
    struct file __rcu *exe_file;
#ifdef CONFIG_MMU_NOTIFIER
    struct mmu_notifier_mm *mmu_notifier_mm;
#endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
    pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
#ifdef CONFIG_CPUMASK_OFFSTACK
    struct cpumask cpumask_allocation;
#endif
#ifdef CONFIG_NUMA_BALANCING
    /*
     * numa_next_scan is the next time that the PTEs will be marked
     * pte_numa. NUMA hinting faults will gather statistics and migrate
     * pages to new nodes if necessary.
     */
    unsigned long numa_next_scan;

    /* Restart point for scanning and setting pte_numa */
    unsigned long numa_scan_offset;

    /* numa_scan_seq prevents two threads setting pte_numa */
    int numa_scan_seq;
#endif
    /*
     * An operation with batched TLB flushing is going on. Anything that
     * can move process memory needs to flush the TLB when moving a
     * PROT_NONE or PROT_NUMA mapped page.
     */
    atomic_t tlb_flush_pending;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
    /* See flush_tlb_batched_pending() */
    bool tlb_flush_batched;
#endif
    struct uprobes_state uprobes_state;
#ifdef CONFIG_HUGETLB_PAGE
    atomic_long_t hugetlb_usage;
#endif
    struct work_struct async_put_work;
} __randomize_layout;
junlon2006 commented 4 years ago

32位内核态虚拟内存布局 image

    #define __va(x)      ((void *)((unsigned long)(x)+PAGE_OFFSET))  
    #define __pa(x)    __phys_addr((unsigned long)(x))  
    #define __phys_addr(x)    __phys_addr_nodebug(x)  
    #define __phys_addr_nodebug(x)  ((x) - PAGE_OFFSET)