91 messages in com.xensource.lists.xen-devel[Xen-devel] [PATCH 23 of 36] x86_64: ...
FromSent OnAttachments
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:18 
Jeremy Fitzhardinge24 Jun 2008 21:19 
Jeremy Fitzhardinge24 Jun 2008 21:19 
Jeremy Fitzhardinge24 Jun 2008 21:19 
Jeremy Fitzhardinge24 Jun 2008 21:19 
Jeremy Fitzhardinge24 Jun 2008 21:19 
Jeremy Fitzhardinge24 Jun 2008 21:19 
Jeremy Fitzhardinge24 Jun 2008 21:19 
Jeremy Fitzhardinge24 Jun 2008 21:19 
Jeremy Fitzhardinge24 Jun 2008 21:19 
Jeremy Fitzhardinge24 Jun 2008 21:19 
Arjan van de Ven24 Jun 2008 21:44 
Ingo Molnar25 Jun 2008 01:42 
Ingo Molnar25 Jun 2008 01:46 
Jeremy Fitzhardinge25 Jun 2008 04:46 
Jeremy Fitzhardinge25 Jun 2008 04:48 
Andi Kleen25 Jun 2008 05:39 
Ingo Molnar25 Jun 2008 08:21 
Keir Fraser25 Jun 2008 11:45 
Keir Fraser25 Jun 2008 12:22 
Jeremy Fitzhardinge25 Jun 2008 13:03 
Jeremy Fitzhardinge25 Jun 2008 13:12 
Jeremy Fitzhardinge25 Jun 2008 14:08 
Jeremy Fitzhardinge25 Jun 2008 16:05 
Jeremy Fitzhardinge25 Jun 2008 16:37 
Ingo Molnar26 Jun 2008 03:56 
Ingo Molnar26 Jun 2008 03:57 
Jeremy Fitzhardinge26 Jun 2008 07:28 
Jeremy Fitzhardinge26 Jun 2008 07:33 
Jeremy Fitzhardinge26 Jun 2008 11:25 
Jeremy Fitzhardinge26 Jun 2008 12:02 
Ingo Molnar27 Jun 2008 08:56 
Jeremy Fitzhardinge27 Jun 2008 09:01 
Ingo Molnar27 Jun 2008 09:03 
Ingo Molnar27 Jun 2008 09:06 
Jeremy Fitzhardinge27 Jun 2008 09:25 
Jeremy Fitzhardinge27 Jun 2008 12:03 
Ingo Molnar29 Jun 2008 01:42 
Jeremy Fitzhardinge29 Jun 2008 20:02 
Yinghai Lu29 Jun 2008 21:34 
Jeremy Fitzhardinge29 Jun 2008 22:32 
Ingo Molnar30 Jun 2008 01:21 
Ingo Molnar30 Jun 2008 02:21 
Jeremy Fitzhardinge30 Jun 2008 10:17 
Jeremy Fitzhardinge30 Jun 2008 10:57 
Ingo Molnar30 Jun 2008 11:02 
Ingo Molnar30 Jun 2008 11:11 
Jeremy Fitzhardinge30 Jun 2008 11:36 
Ingo Molnar30 Jun 2008 11:43 
Jeremy Fitzhardinge30 Jun 2008 16:04 
Ingo Molnar01 Jul 2008 01:51 
Ingo Molnar01 Jul 2008 02:21 
Jeremy Fitzhardinge01 Jul 2008 09:10 
Jeremy Fitzhardinge01 Jul 2008 09:13 
Ingo Molnar01 Jul 2008 13:31 
Ingo Molnar03 Jul 2008 02:10 
Jeremy Fitzhardinge03 Jul 2008 08:47 
Yinghai Lu03 Jul 2008 11:20 
Jeremy Fitzhardinge03 Jul 2008 11:25 
Yinghai Lu03 Jul 2008 11:29 
Jeremy Fitzhardinge03 Jul 2008 11:40 
Yinghai Lu03 Jul 2008 11:50 
Yinghai Lu03 Jul 2008 12:19 
Yinghai Lu03 Jul 2008 12:29.patch
Ingo Molnar09 Jul 2008 00:42 
Subject:[Xen-devel] [PATCH 23 of 36] x86_64: adjust mapping of physical pagetables to work with Xen
From:Jeremy Fitzhardinge (jer@goop.org)
Date:06/24/2008 09:18:56 PM
List:com.xensource.lists.xen-devel

This makes a few of changes to the construction of the initial pagetables to work better with paravirt_ops/Xen. The main areas are:

1. Support non-PSE mapping of memory, since Xen doesn't currently allow 2M pages to be mapped in guests.

2. Make sure that the ioremap alias of all pages are dropped before attaching the new page to the pagetable. This avoids having writable aliases of pagetable pages.

3. Preserve existing pagetable entries, rather than overwriting. Its possible that a fair amount of pagetable has already been constructed, so reuse what's already in place rather than ignoring and overwriting it.

The algorithm relies on the invariant that any page which is part of the kernel pagetable is itself mapped in the linear memory area. This way, it can avoid using ioremap on a pagetable page.

The invariant holds because it maps memory from low to high addresses, and also allocates memory from low to high. Each allocated page can map at least 2M of address space, so the mapped area will always progress much faster than the allocated area. It relies on the early boot code mapping enough pages to get started.

Signed-off-by: Jeremy Fitzhardinge <jere@citrix.com>

--- arch/x86/mm/init_64.c | 94 ++++++++++++++++++++++++++++++++++++++++++------- arch/x86/mm/ioremap.c | 2 - 2 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -257,6 +257,43 @@ early_iounmap(adr, PAGE_SIZE); }

+static void __meminit +phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) +{ + unsigned pages = 0; + int i; + pte_t *pte = pte_page + pte_index(addr); + + for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { + + if (addr >= end) { + if (!after_bootmem) { + for(; i < PTRS_PER_PTE; i++, pte++) + set_pte(pte, __pte(0)); + } + break; + } + + if (pte_val(*pte)) + continue; + + if (0) + printk(" pte=%p addr=%lx pte=%016lx\n", + pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); + set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL)); + pages++; + } + update_page_count(PG_LEVEL_4K, pages); +} + +static void __meminit +phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) +{ + pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); + + phys_pte_init(pte, address, end); +} + static unsigned long __meminit phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) { @@ -265,7 +302,9 @@ int i = pmd_index(address);

for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { + unsigned long pte_phys; pmd_t *pmd = pmd_page + pmd_index(address); + pte_t *pte;

if (address >= end) { if (!after_bootmem) { @@ -275,12 +314,23 @@ break; }

- if (pmd_val(*pmd)) + if (pmd_val(*pmd)) { + phys_pte_update(pmd, address, end); continue; + }

- pages++; - set_pte((pte_t *)pmd, - pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); + if (cpu_has_pse) { + pages++; + set_pte((pte_t *)pmd, + pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); + continue; + } + + pte = alloc_low_page(&pte_phys); + phys_pte_init(pte, address, end); + unmap_low_page(pte); + + pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); } update_page_count(PG_LEVEL_2M, pages); return address; @@ -337,11 +387,11 @@ pmd = alloc_low_page(&pmd_phys);

spin_lock(&init_mm.page_table_lock); + last_map_addr = phys_pmd_init(pmd, addr, end); + unmap_low_page(pmd); pud_populate(&init_mm, pud, __va(pmd_phys)); - last_map_addr = phys_pmd_init(pmd, addr, end); spin_unlock(&init_mm.page_table_lock);

- unmap_low_page(pmd); } __flush_tlb_all(); update_page_count(PG_LEVEL_1G, pages); @@ -349,15 +399,29 @@ return last_map_addr >> PAGE_SHIFT; }

+static unsigned long __meminit +phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + pud_t *pud; + + pud = (pud_t *)pgd_page_vaddr(*pgd); + + return phys_pud_init(pud, addr, end); +} + static void __init find_early_table_space(unsigned long end) { - unsigned long puds, pmds, tables, start; + unsigned long puds, tables, start;

puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); if (!direct_gbpages) { - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + unsigned long pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); + } + if (!cpu_has_pse) { + unsigned long ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE); }

/* @@ -529,19 +593,25 @@ unsigned long pud_phys; pud_t *pud;

+ next = start + PGDIR_SIZE; + if (next > end) + next = end; + + if (pgd_val(*pgd)) { + last_map_addr = phys_pud_update(pgd, __pa(start), __pa(end)); + continue; + } + if (after_bootmem) pud = pud_offset(pgd, start & PGDIR_MASK); else pud = alloc_low_page(&pud_phys);

- next = start + PGDIR_SIZE; - if (next > end) - next = end; last_map_addr = phys_pud_init(pud, __pa(start), __pa(next)); + unmap_low_page(pud); if (!after_bootmem) pgd_populate(&init_mm, pgd_offset_k(start), __va(pud_phys)); - unmap_low_page(pud); }

if (!after_bootmem) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -513,7 +513,7 @@ if (pgprot_val(flags)) set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); else - pte_clear(NULL, addr, pte); + pte_clear(&init_mm, addr, pte); __flush_tlb_one(addr); }