큰 흐름...(초기 슬랩을 위한 페이지 할당부분만을 따라갈때)
kmem_cache_alloc() ->
slab_alloc() ->
__slab_alloc() ->
new_slab_objects() ->
new_slab() ->
allocate_slab() ->
// page order 가 여기서 정해짐.(s->oo)
static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
{
struct page *page;
struct kmem_cache_order_objects oo = s->oo;
alloc_slab_page() ->
alloc_pages() -> (매크로임. 이부분에서 mm/page_alloc.c 부분으로 넘어가게됨)
따라가면 아래의 __alloc_pages_nodemask 를 부르게됨.(NUMA 설정이 없는경우)
/*
* This is the 'heart' of the zoned buddy allocator.
*/
struct page *
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, nodemask_t *nodemask)
{
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
struct zone *preferred_zone;
struct page *page = NULL;
int migratetype = allocflags_to_migratetype(gfp_mask);
... gfp_zone 으로 ZONE index 가 정해지고 migrate type 이 여기서 정해짐.
/* First allocation attempt */
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
zonelist, high_zoneidx, alloc_flags,
preferred_zone, migratetype);
if (unlikely(!page))
page = __alloc_pages_slowpath(gfp_mask, order,
zonelist, high_zoneidx, nodemask,
preferred_zone, migratetype);
페이지의 특정한 zone의 페이지 freelist 에서 꺼내오거나, alloc_pages_slowpath 에서 가져옴.
nodemask 에서 get_page_from_freelist 를 호출.
get_page_from_freelist() ->
...
try_this_zone:
page = buffered_rmqueue(preferred_zone, zone, order,
gfp_mask, migratetype);
if (page)
break;
...
zone 리스트를 순회하다가 위에처럼 buffered_rmqueue 에서 페이지를 꺼내옴.
struct page *buffered_rmqueue(struct zone *preferred_zone,
struct zone *zone, int order, gfp_t gfp_flags,
int migratetype)
{
unsigned long flags;
struct page *page;
int cold = !!(gfp_flags & __GFP_COLD);
again:
if (likely(order == 0)) {
struct per_cpu_pages *pcp;
struct list_head *list;
local_irq_save(flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
pcp->batch, list,
migratetype, cold); // rmqueue!
if (unlikely(list_empty(list)))
goto failed;
}
if (cold)
page = list_entry(list->prev, struct page, lru);
else
page = list_entry(list->next, struct page, lru);
list_del(&page->lru);
pcp->count--;
} else {
if (unlikely(gfp_flags & __GFP_NOFAIL)) {
/*
* __GFP_NOFAIL is not to be used in new code.
*
* All __GFP_NOFAIL callers should be fixed so that they
* properly detect and handle allocation failures.
*
* We most definitely don't want callers attempting to
* allocate greater than order-1 page units with
* __GFP_NOFAIL.
*/
WARN_ON_ONCE(order > 1);
}
spin_lock_irqsave(&zone->lock, flags);
page = __rmqueue(zone, order, migratetype); // rmqueue!
spin_unlock(&zone->lock);
if (!page)
goto failed;
rmqueue_bulk 에서 1페이지만 꺼내오거나
order 가 1이상인경우 __rmqueue 에서 원하는만큼의 페이지를 가져옴.
__rmqueue 에서 따라가보면 아래처럼 __rmqueue_smallest 에서 꺼내오기를 반복하게 되어있음.
/*
* Do the hard work of removing an element from the buddy allocator.
* Call me with the zone->lock already held.
*/
static struct page *__rmqueue(struct zone *zone, unsigned int order,
int migratetype)
{
struct page *page;
retry_reserve:
page = __rmqueue_smallest(zone, order, migratetype);
if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
page = __rmqueue_fallback(zone, order, migratetype);
__rmqueue_smallest 는 아래와 같음.
아마도 SLAB 을 초기할당하면서 대신 페이지 자체는 free page 들이 있는 상황에서
이쪽으로 흐름이 오게될것 같음.
디버깅 메시지를 조금 심어서 테스트를 아래처럼 해봤음.
/*
* Go through the free lists for the given migratetype and remove
* the smallest available page from the freelists
*/
static inline
struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
int migratetype)
{
unsigned int current_order;
struct free_area * area;
struct page *page;
// KIMON DEBUG
printk("zone:%x, zonename:%s, order:%x, migratetype:%x\n", zone, zone->name, order, migratetype);
/* Find a page of the appropriate size in the preferred list */
for (current_order = order; current_order < MAX_ORDER; ++current_order) {
// KIMON DEBUG
printk("current order : %d\n", current_order);
area = &(zone->free_area[current_order]);
if (list_empty(&area->free_list[migratetype]))
continue;
// think through..!
page = list_entry(area->free_list[migratetype].next,
struct page, lru);
list_del(&page->lru);
rmv_page_order(page);
area->nr_free--;
expand(zone, page, order, current_order, area, migratetype);
return page;
}
return NULL;
}
그결과 zone name 은 언제나 Normal 만 나오는것으로 봐서 ZONE_NORMAL 에서만 전부 할당되고있다...
order 0 인것이 대부분인데, current order 가 4~5 정도까지 올라갈때도 종종 있다.
order 0 인데 연속된 페이지를 못찾는경우가 종종 있다는게 말이되나..? 뭔가 이상하다
그나저나 zone 의 경우 arch dependent 하기때문에 x86 디렉토리 밑의 mm 아래에 init.c 에 관련 코드가 있었음...
아무래도 부팅시 zone size, 및 종류를 초기화하는 코드가 arch dependent 하게 수행되고
page 구조체들도 처음에 쭉 다 만들어놓고 memory allocator 가 나중에 이것들을 가져다쓰는것같음...
root@ubuntu:/var/www/work/kimon/linux-3.8.1# grep -R 'zone_sizes_init' * -n
arch/m32r/mm/discontig.c:132:unsigned long __init zone_sizes_init(void)
arch/m32r/mm/init.c:63:unsigned long __init zone_sizes_init(void)
arch/m32r/mm/init.c:93:extern unsigned long zone_sizes_init(void);
arch/m32r/mm/init.c:111: hole_pages = zone_sizes_init();
arch/openrisc/mm/init.c:51:static void __init zone_sizes_init(void)
arch/openrisc/mm/init.c:155: zone_sizes_init();
arch/arm64/mm/init.c:72:static void __init zone_sizes_init(unsigned long min, unsigned long max)
arch/arm64/mm/init.c:194: zone_sizes_init(min, max);
arch/tile/kernel/setup.c:660:static void __init zone_sizes_init(void)
arch/tile/kernel/setup.c:1395: zone_sizes_init();
arch/x86/include/asm/init.h:8:extern void __init zone_sizes_init(void);
arch/x86/mm/init.c:407:void __init zone_sizes_init(void)
arch/x86/mm/init_32.c:697: zone_sizes_init();
arch/x86/mm/init_64.c:637: zone_sizes_init();
^C
root@ubuntu:/var/www/work/kimon/linux-3.8.1#
^[[Aroot@ubuntu:/var/www/work/kimon/linux-3.8.1# grep -R 'MAX_DMA32_PFN' * -n
arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h:88:#define MAX_DMA32_PFN (((1ULL << 32) - (1ULL << 28)) >> PAGE_SHIFT)
arch/mips/include/asm/dma.h:94:#ifndef MAX_DMA32_PFN
arch/mips/include/asm/dma.h:95:#define MAX_DMA32_PFN (1UL << (32 - PAGE_SHIFT))
arch/mips/mm/init.c:343: max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
arch/arm64/mm/init.c:70:#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
arch/arm64/mm/init.c:82: max_dma32 = max(min, min(max, MAX_DMA32_PFN));
arch/x86/kernel/pci-swiotlb.c:81: if (!no_iommu && max_pfn > MAX_DMA32_PFN)
arch/x86/kernel/amd_gart_64.c:756: (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
arch/x86/kernel/amd_gart_64.c:759: if (max_pfn > MAX_DMA32_PFN) {
arch/x86/kernel/early-quirks.c:51: if ((max_pfn > MAX_DMA32_PFN || force_iommu) &&
arch/x86/kernel/aperture_64.c:422: max_pfn > MAX_DMA32_PFN &&
arch/x86/kernel/aperture_64.c:463: } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
arch/x86/include/asm/dma.h:76:#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
arch/x86/mm/init.c:417: max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
arch/x86/mm/numa_emulation.c:128: u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
arch/x86/mm/numa_emulation.c:243: u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
arch/x86/xen/pci-swiotlb-xen.c:66: if (max_pfn > MAX_DMA32_PFN)
drivers/iommu/amd_iommu.c:3173: if (unhandled && max_pfn > MAX_DMA32_PFN) {
mm/bootmem.c:787:#ifdef MAX_DMA32_PFN
mm/bootmem.c:793: /* update goal according ...MAX_DMA32_PFN */
mm/bootmem.c:796: if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
mm/bootmem.c:797: (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
mm/bootmem.c:801: new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
'Programming' 카테고리의 다른 글
How NX is implemented in x86 Linux (0) | 2013.10.22 |
---|---|
Preemptive kernel vs Non-preemptive kernel (0) | 2013.10.14 |
QEMU NAT configuration (0) | 2013.09.17 |
Windows PE Structure (0) | 2013.09.12 |
Linux vmarea structure (0) | 2013.09.11 |