xen: additional features for 3.19-rc0

- Linear p2m for x86 PV guests which simplifies the p2m code, improves
   performance and will allow for > 512 GB PV guests in the future.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQEcBAABAgAGBQJUjx7OAAoJEFxbo/MsZsTRXLIH/ishF/xDCL6F5r0I0SKDuaz5
 C/BediDcFzbzh4/t3x2PrPooHk4gPmeyIg688ZGgBAxHRXC5OJ2U5tdtZ/qUCnwf
 0J1pdp/yoAOVRJT+Sax10lN4+G8YV7+6Ptikz0C7glXBAg8SgFL3Y6tfBS0jNwYR
 wQph09S9n7gMZTodSBLbb0ymtJMhl16DrETJsYV73sU7bAL5sFDVkMQvY3SxkusX
 GNFeALfqM0cSK9mDI6O9avGJKoIdKlzt7VWHdlc+yKTlQsoyg/cSH3AaihhG6af9
 IElRxwH9Z40VFLKip0gNMOIrUwAjFGSw6N+Uhik27tlmvfI3Dll/+gsMz/5sHc8=
 =OyoK
 -----END PGP SIGNATURE-----

Merge tag 'stable/for-linus-3.19-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull additional xen update from David Vrabel:
 "Xen: additional features for 3.19-rc0

   - Linear p2m for x86 PV guests which simplifies the p2m code,
     improves performance and will allow for > 512 GB PV guests in the
     future.

  A last-minute, configuration specific issue was discovered with this
  change which is why it was not included in my previous pull request.
  This is now been fixed and tested"

* tag 'stable/for-linus-3.19-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen: switch to post-init routines in xen mmu.c earlier
  Revert "swiotlb-xen: pass dev_addr to swiotlb_tbl_unmap_single"
  xen: annotate xen_set_identity_and_remap_chunk() with __init
  xen: introduce helper functions to do safe read and write accesses
  xen: Speed up set_phys_to_machine() by using read-only mappings
  xen: switch to linear virtual mapped sparse p2m list
  xen: Hide get_phys_to_machine() to be able to tune common path
  x86: Introduce function to get pmd entry pointer
  xen: Delay invalidating extra memory
  xen: Delay m2p_override initialization
  xen: Delay remapping memory of pv-domain
  xen: use common page allocation function in p2m.c
  xen: Make functions static
  xen: fix some style issues in p2m.c
This commit is contained in:
Linus Torvalds 2014-12-16 13:23:03 -08:00
commit eb64c3c6cd
7 changed files with 803 additions and 989 deletions

View File

@ -452,6 +452,7 @@ static inline void update_page_count(int level, unsigned long pages) { }
extern pte_t *lookup_address(unsigned long address, unsigned int *level); extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level); unsigned int *level);
extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address); extern phys_addr_t slow_virt_to_phys(void *__address);
extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
unsigned numpages, unsigned long page_flags); unsigned numpages, unsigned long page_flags);

View File

@ -41,10 +41,12 @@ typedef struct xpaddr {
extern unsigned long *machine_to_phys_mapping; extern unsigned long *machine_to_phys_mapping;
extern unsigned long machine_to_phys_nr; extern unsigned long machine_to_phys_nr;
extern unsigned long *xen_p2m_addr;
extern unsigned long xen_p2m_size;
extern unsigned long xen_max_p2m_pfn;
extern unsigned long get_phys_to_machine(unsigned long pfn); extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern unsigned long set_phys_range_identity(unsigned long pfn_s, extern unsigned long set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e); unsigned long pfn_e);
@ -52,17 +54,52 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops, struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count); struct page **pages, unsigned int count);
extern int m2p_add_override(unsigned long mfn, struct page *page,
struct gnttab_map_grant_ref *kmap_op);
extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
struct gnttab_map_grant_ref *kmap_ops, struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count); struct page **pages, unsigned int count);
extern int m2p_remove_override(struct page *page,
struct gnttab_map_grant_ref *kmap_op,
unsigned long mfn);
extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
/*
* Helper functions to write or read unsigned long values to/from
* memory, when the access may fault.
*/
static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
{
return __put_user(val, (unsigned long __user *)addr);
}
static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val)
{
return __get_user(*val, (unsigned long __user *)addr);
}
/*
* When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine():
* - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator
* bits (identity or foreign) are set.
* - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set
* identity or foreign indicator will be still set. __pfn_to_mfn() is
* encapsulating get_phys_to_machine() which is called in special cases only.
* - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special
* cases needing an extended handling.
*/
static inline unsigned long __pfn_to_mfn(unsigned long pfn)
{
unsigned long mfn;
if (pfn < xen_p2m_size)
mfn = xen_p2m_addr[pfn];
else if (unlikely(pfn < xen_max_p2m_pfn))
return get_phys_to_machine(pfn);
else
return IDENTITY_FRAME(pfn);
if (unlikely(mfn == INVALID_P2M_ENTRY))
return get_phys_to_machine(pfn);
return mfn;
}
static inline unsigned long pfn_to_mfn(unsigned long pfn) static inline unsigned long pfn_to_mfn(unsigned long pfn)
{ {
unsigned long mfn; unsigned long mfn;
@ -70,7 +107,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
if (xen_feature(XENFEAT_auto_translated_physmap)) if (xen_feature(XENFEAT_auto_translated_physmap))
return pfn; return pfn;
mfn = get_phys_to_machine(pfn); mfn = __pfn_to_mfn(pfn);
if (mfn != INVALID_P2M_ENTRY) if (mfn != INVALID_P2M_ENTRY)
mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
@ -83,7 +120,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
if (xen_feature(XENFEAT_auto_translated_physmap)) if (xen_feature(XENFEAT_auto_translated_physmap))
return 1; return 1;
return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY;
} }
static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
@ -102,7 +139,7 @@ static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
* In such cases it doesn't matter what we return (we return garbage), * In such cases it doesn't matter what we return (we return garbage),
* but we must handle the fault without crashing! * but we must handle the fault without crashing!
*/ */
ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); ret = xen_safe_read_ulong(&machine_to_phys_mapping[mfn], &pfn);
if (ret < 0) if (ret < 0)
return ~0; return ~0;
@ -117,7 +154,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
return mfn; return mfn;
pfn = mfn_to_pfn_no_overrides(mfn); pfn = mfn_to_pfn_no_overrides(mfn);
if (get_phys_to_machine(pfn) != mfn) { if (__pfn_to_mfn(pfn) != mfn) {
/* /*
* If this appears to be a foreign mfn (because the pfn * If this appears to be a foreign mfn (because the pfn
* doesn't map back to the mfn), then check the local override * doesn't map back to the mfn), then check the local override
@ -133,8 +170,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
* entry doesn't map back to the mfn and m2p_override doesn't have a * entry doesn't map back to the mfn and m2p_override doesn't have a
* valid entry for it. * valid entry for it.
*/ */
if (pfn == ~0 && if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn))
get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
pfn = mfn; pfn = mfn;
return pfn; return pfn;
@ -180,7 +216,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
return mfn; return mfn;
pfn = mfn_to_pfn(mfn); pfn = mfn_to_pfn(mfn);
if (get_phys_to_machine(pfn) != mfn) if (__pfn_to_mfn(pfn) != mfn)
return -1; /* force !pfn_valid() */ return -1; /* force !pfn_valid() */
return pfn; return pfn;
} }

View File

@ -383,6 +383,26 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
return lookup_address(address, level); return lookup_address(address, level);
} }
/*
* Lookup the PMD entry for a virtual address. Return a pointer to the entry
* or NULL if not present.
*/
pmd_t *lookup_pmd_address(unsigned long address)
{
pgd_t *pgd;
pud_t *pud;
pgd = pgd_offset_k(address);
if (pgd_none(*pgd))
return NULL;
pud = pud_offset(pgd, address);
if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
return NULL;
return pmd_offset(pud, address);
}
/* /*
* This is necessary because __pa() does not work on some * This is necessary because __pa() does not work on some
* kinds of memory, like vmalloc() or the alloc_remap() * kinds of memory, like vmalloc() or the alloc_remap()

View File

@ -387,7 +387,7 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
unsigned long mfn; unsigned long mfn;
if (!xen_feature(XENFEAT_auto_translated_physmap)) if (!xen_feature(XENFEAT_auto_translated_physmap))
mfn = get_phys_to_machine(pfn); mfn = __pfn_to_mfn(pfn);
else else
mfn = pfn; mfn = pfn;
/* /*
@ -1113,20 +1113,16 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
* instead of somewhere later and be confusing. */ * instead of somewhere later and be confusing. */
xen_mc_flush(); xen_mc_flush();
} }
static void __init xen_pagetable_p2m_copy(void)
static void __init xen_pagetable_p2m_free(void)
{ {
unsigned long size; unsigned long size;
unsigned long addr; unsigned long addr;
unsigned long new_mfn_list;
if (xen_feature(XENFEAT_auto_translated_physmap))
return;
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
new_mfn_list = xen_revector_p2m_tree();
/* No memory or already called. */ /* No memory or already called. */
if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
return; return;
/* using __ka address and sticking INVALID_P2M_ENTRY! */ /* using __ka address and sticking INVALID_P2M_ENTRY! */
@ -1144,8 +1140,6 @@ static void __init xen_pagetable_p2m_copy(void)
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
memblock_free(__pa(xen_start_info->mfn_list), size); memblock_free(__pa(xen_start_info->mfn_list), size);
/* And revector! Bye bye old array */
xen_start_info->mfn_list = new_mfn_list;
/* At this stage, cleanup_highmap has already cleaned __ka space /* At this stage, cleanup_highmap has already cleaned __ka space
* from _brk_limit way up to the max_pfn_mapped (which is the end of * from _brk_limit way up to the max_pfn_mapped (which is the end of
@ -1169,17 +1163,35 @@ static void __init xen_pagetable_p2m_copy(void)
} }
#endif #endif
static void __init xen_pagetable_p2m_setup(void)
{
if (xen_feature(XENFEAT_auto_translated_physmap))
return;
xen_vmalloc_p2m_tree();
#ifdef CONFIG_X86_64
xen_pagetable_p2m_free();
#endif
/* And revector! Bye bye old array */
xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
}
static void __init xen_pagetable_init(void) static void __init xen_pagetable_init(void)
{ {
paging_init(); paging_init();
#ifdef CONFIG_X86_64 xen_post_allocator_init();
xen_pagetable_p2m_copy();
#endif xen_pagetable_p2m_setup();
/* Allocate and initialize top and mid mfn levels for p2m structure */ /* Allocate and initialize top and mid mfn levels for p2m structure */
xen_build_mfn_list_list(); xen_build_mfn_list_list();
/* Remap memory freed due to conflicts with E820 map */
if (!xen_feature(XENFEAT_auto_translated_physmap))
xen_remap_memory();
xen_setup_shared_info(); xen_setup_shared_info();
xen_post_allocator_init();
} }
static void xen_write_cr2(unsigned long cr2) static void xen_write_cr2(unsigned long cr2)
{ {

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,7 @@
#include "xen-ops.h" #include "xen-ops.h"
#include "vdso.h" #include "vdso.h"
#include "p2m.h" #include "p2m.h"
#include "mmu.h"
/* These are code, but not functions. Defined in entry.S */ /* These are code, but not functions. Defined in entry.S */
extern const char xen_hypervisor_callback[]; extern const char xen_hypervisor_callback[];
@ -47,8 +48,19 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
/* Number of pages released from the initial allocation. */ /* Number of pages released from the initial allocation. */
unsigned long xen_released_pages; unsigned long xen_released_pages;
/* Buffer used to remap identity mapped pages */ /*
unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata; * Buffer used to remap identity mapped pages. We only need the virtual space.
* The physical page behind this address is remapped as needed to different
* buffer pages.
*/
#define REMAP_SIZE (P2M_PER_PAGE - 3)
static struct {
unsigned long next_area_mfn;
unsigned long target_pfn;
unsigned long size;
unsigned long mfns[REMAP_SIZE];
} xen_remap_buf __initdata __aligned(PAGE_SIZE);
static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
/* /*
* The maximum amount of extra memory compared to the base size. The * The maximum amount of extra memory compared to the base size. The
@ -64,7 +76,6 @@ unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
static void __init xen_add_extra_mem(u64 start, u64 size) static void __init xen_add_extra_mem(u64 start, u64 size)
{ {
unsigned long pfn;
int i; int i;
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
@ -84,75 +95,76 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
printk(KERN_WARNING "Warning: not enough extra memory regions\n"); printk(KERN_WARNING "Warning: not enough extra memory regions\n");
memblock_reserve(start, size); memblock_reserve(start, size);
xen_max_p2m_pfn = PFN_DOWN(start + size);
for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
continue;
WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
pfn, mfn);
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
} }
static unsigned long __init xen_do_chunk(unsigned long start, static void __init xen_del_extra_mem(u64 start, u64 size)
unsigned long end, bool release)
{ {
struct xen_memory_reservation reservation = { int i;
.address_bits = 0, u64 start_r, size_r;
.extent_order = 0,
.domid = DOMID_SELF
};
unsigned long len = 0;
unsigned long pfn;
int ret;
for (pfn = start; pfn < end; pfn++) { for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
unsigned long frame; start_r = xen_extra_mem[i].start;
unsigned long mfn = pfn_to_mfn(pfn); size_r = xen_extra_mem[i].size;
if (release) { /* Start of region. */
/* Make sure pfn exists to start with */ if (start_r == start) {
if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) BUG_ON(size > size_r);
continue; xen_extra_mem[i].start += size;
frame = mfn; xen_extra_mem[i].size -= size;
} else {
if (mfn != INVALID_P2M_ENTRY)
continue;
frame = pfn;
}
set_xen_guest_handle(reservation.extent_start, &frame);
reservation.nr_extents = 1;
ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap,
&reservation);
WARN(ret != 1, "Failed to %s pfn %lx err=%d\n",
release ? "release" : "populate", pfn, ret);
if (ret == 1) {
if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) {
if (release)
break;
set_xen_guest_handle(reservation.extent_start, &frame);
reservation.nr_extents = 1;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
&reservation);
break;
}
len++;
} else
break; break;
}
/* End of region. */
if (start_r + size_r == start + size) {
BUG_ON(size > size_r);
xen_extra_mem[i].size -= size;
break;
}
/* Mid of region. */
if (start > start_r && start < start_r + size_r) {
BUG_ON(start + size > start_r + size_r);
xen_extra_mem[i].size = start - start_r;
/* Calling memblock_reserve() again is okay. */
xen_add_extra_mem(start + size, start_r + size_r -
(start + size));
break;
}
} }
if (len) memblock_free(start, size);
printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n", }
release ? "Freeing" : "Populating",
start, end, len,
release ? "freed" : "added");
return len; /*
* Called during boot before the p2m list can take entries beyond the
* hypervisor supplied p2m list. Entries in extra mem are to be regarded as
* invalid.
*/
unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
{
int i;
unsigned long addr = PFN_PHYS(pfn);
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
if (addr >= xen_extra_mem[i].start &&
addr < xen_extra_mem[i].start + xen_extra_mem[i].size)
return INVALID_P2M_ENTRY;
}
return IDENTITY_FRAME(pfn);
}
/*
* Mark all pfns of extra mem as invalid in p2m list.
*/
void __init xen_inv_extra_mem(void)
{
unsigned long pfn, pfn_s, pfn_e;
int i;
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
pfn_s = PFN_DOWN(xen_extra_mem[i].start);
pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size);
for (pfn = pfn_s; pfn < pfn_e; pfn++)
set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
} }
/* /*
@ -198,26 +210,62 @@ static unsigned long __init xen_find_pfn_range(
return done; return done;
} }
static int __init xen_free_mfn(unsigned long mfn)
{
struct xen_memory_reservation reservation = {
.address_bits = 0,
.extent_order = 0,
.domid = DOMID_SELF
};
set_xen_guest_handle(reservation.extent_start, &mfn);
reservation.nr_extents = 1;
return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
}
/* /*
* This releases a chunk of memory and then does the identity map. It's used as * This releases a chunk of memory and then does the identity map. It's used
* as a fallback if the remapping fails. * as a fallback if the remapping fails.
*/ */
static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity, unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
unsigned long *released) unsigned long *released)
{ {
unsigned long len = 0;
unsigned long pfn, end;
int ret;
WARN_ON(start_pfn > end_pfn); WARN_ON(start_pfn > end_pfn);
end = min(end_pfn, nr_pages);
for (pfn = start_pfn; pfn < end; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
/* Make sure pfn exists to start with */
if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
continue;
ret = xen_free_mfn(mfn);
WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
if (ret == 1) {
if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
break;
len++;
} else
break;
}
/* Need to release pages first */ /* Need to release pages first */
*released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true); *released += len;
*identity += set_phys_range_identity(start_pfn, end_pfn); *identity += set_phys_range_identity(start_pfn, end_pfn);
} }
/* /*
* Helper function to update both the p2m and m2p tables. * Helper function to update the p2m and m2p tables and kernel mapping.
*/ */
static unsigned long __init xen_update_mem_tables(unsigned long pfn, static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
unsigned long mfn)
{ {
struct mmu_update update = { struct mmu_update update = {
.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
@ -225,161 +273,88 @@ static unsigned long __init xen_update_mem_tables(unsigned long pfn,
}; };
/* Update p2m */ /* Update p2m */
if (!early_set_phys_to_machine(pfn, mfn)) { if (!set_phys_to_machine(pfn, mfn)) {
WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
pfn, mfn); pfn, mfn);
return false; BUG();
} }
/* Update m2p */ /* Update m2p */
if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
mfn, pfn); mfn, pfn);
return false; BUG();
} }
return true; /* Update kernel mapping, but not for highmem. */
if ((pfn << PAGE_SHIFT) >= __pa(high_memory))
return;
if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
mfn_pte(mfn, PAGE_KERNEL), 0)) {
WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
mfn, pfn);
BUG();
}
} }
/* /*
* This function updates the p2m and m2p tables with an identity map from * This function updates the p2m and m2p tables with an identity map from
* start_pfn to start_pfn+size and remaps the underlying RAM of the original * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the
* allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks * original allocation at remap_pfn. The information needed for remapping is
* to not exhaust the reserved brk space. Doing it in properly aligned blocks * saved in the memory itself to avoid the need for allocating buffers. The
* ensures we only allocate the minimum required leaf pages in the p2m table. It * complete remap information is contained in a list of MFNs each containing
* copies the existing mfns from the p2m table under the 1:1 map, overwrites * up to REMAP_SIZE MFNs and the start target PFN for doing the remap.
* them with the identity map and then updates the p2m and m2p tables with the * This enables us to preserve the original mfn sequence while doing the
* remapped memory. * remapping at a time when the memory management is capable of allocating
* virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and
* its callers.
*/ */
static unsigned long __init xen_do_set_identity_and_remap_chunk( static void __init xen_do_set_identity_and_remap_chunk(
unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
{ {
unsigned long buf = (unsigned long)&xen_remap_buf;
unsigned long mfn_save, mfn;
unsigned long ident_pfn_iter, remap_pfn_iter; unsigned long ident_pfn_iter, remap_pfn_iter;
unsigned long ident_start_pfn_align, remap_start_pfn_align; unsigned long ident_end_pfn = start_pfn + size;
unsigned long ident_end_pfn_align, remap_end_pfn_align;
unsigned long ident_boundary_pfn, remap_boundary_pfn;
unsigned long ident_cnt = 0;
unsigned long remap_cnt = 0;
unsigned long left = size; unsigned long left = size;
unsigned long mod; unsigned long ident_cnt = 0;
int i; unsigned int i, chunk;
WARN_ON(size == 0); WARN_ON(size == 0);
BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
/* mfn_save = virt_to_mfn(buf);
* Determine the proper alignment to remap memory in P2M_PER_PAGE sized
* blocks. We need to keep track of both the existing pfn mapping and
* the new pfn remapping.
*/
mod = start_pfn % P2M_PER_PAGE;
ident_start_pfn_align =
mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn;
mod = remap_pfn % P2M_PER_PAGE;
remap_start_pfn_align =
mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn;
mod = (start_pfn + size) % P2M_PER_PAGE;
ident_end_pfn_align = start_pfn + size - mod;
mod = (remap_pfn + size) % P2M_PER_PAGE;
remap_end_pfn_align = remap_pfn + size - mod;
/* Iterate over each p2m leaf node in each range */ for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align; ident_pfn_iter < ident_end_pfn;
ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align; ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) {
ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) { chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE;
/* Check we aren't past the end */
BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size);
BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size);
/* Save p2m mappings */ /* Map first pfn to xen_remap_buf */
for (i = 0; i < P2M_PER_PAGE; i++) mfn = pfn_to_mfn(ident_pfn_iter);
xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i); set_pte_mfn(buf, mfn, PAGE_KERNEL);
/* Set identity map which will free a p2m leaf */ /* Save mapping information in page */
xen_remap_buf.next_area_mfn = xen_remap_mfn;
xen_remap_buf.target_pfn = remap_pfn_iter;
xen_remap_buf.size = chunk;
for (i = 0; i < chunk; i++)
xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i);
/* Put remap buf into list. */
xen_remap_mfn = mfn;
/* Set identity map */
ident_cnt += set_phys_range_identity(ident_pfn_iter, ident_cnt += set_phys_range_identity(ident_pfn_iter,
ident_pfn_iter + P2M_PER_PAGE); ident_pfn_iter + chunk);
#ifdef DEBUG left -= chunk;
/* Helps verify a p2m leaf has been freed */
for (i = 0; i < P2M_PER_PAGE; i++) {
unsigned int pfn = ident_pfn_iter + i;
BUG_ON(pfn_to_mfn(pfn) != pfn);
}
#endif
/* Now remap memory */
for (i = 0; i < P2M_PER_PAGE; i++) {
unsigned long mfn = xen_remap_buf[i];
/* This will use the p2m leaf freed above */
if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) {
WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
remap_pfn_iter + i, mfn);
return 0;
}
remap_cnt++;
}
left -= P2M_PER_PAGE;
} }
/* Max boundary space possible */ /* Restore old xen_remap_buf mapping */
BUG_ON(left > (P2M_PER_PAGE - 1) * 2); set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
/* Now handle the boundary conditions */
ident_boundary_pfn = start_pfn;
remap_boundary_pfn = remap_pfn;
for (i = 0; i < left; i++) {
unsigned long mfn;
/* These two checks move from the start to end boundaries */
if (ident_boundary_pfn == ident_start_pfn_align)
ident_boundary_pfn = ident_pfn_iter;
if (remap_boundary_pfn == remap_start_pfn_align)
remap_boundary_pfn = remap_pfn_iter;
/* Check we aren't past the end */
BUG_ON(ident_boundary_pfn >= start_pfn + size);
BUG_ON(remap_boundary_pfn >= remap_pfn + size);
mfn = pfn_to_mfn(ident_boundary_pfn);
if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) {
WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
remap_pfn_iter + i, mfn);
return 0;
}
remap_cnt++;
ident_boundary_pfn++;
remap_boundary_pfn++;
}
/* Finish up the identity map */
if (ident_start_pfn_align >= ident_end_pfn_align) {
/*
* In this case we have an identity range which does not span an
* aligned block so everything needs to be identity mapped here.
* If we didn't check this we might remap too many pages since
* the align boundaries are not meaningful in this case.
*/
ident_cnt += set_phys_range_identity(start_pfn,
start_pfn + size);
} else {
/* Remapped above so check each end of the chunk */
if (start_pfn < ident_start_pfn_align)
ident_cnt += set_phys_range_identity(start_pfn,
ident_start_pfn_align);
if (start_pfn + size > ident_pfn_iter)
ident_cnt += set_phys_range_identity(ident_pfn_iter,
start_pfn + size);
}
BUG_ON(ident_cnt != size);
BUG_ON(remap_cnt != size);
return size;
} }
/* /*
@ -396,8 +371,7 @@ static unsigned long __init xen_do_set_identity_and_remap_chunk(
static unsigned long __init xen_set_identity_and_remap_chunk( static unsigned long __init xen_set_identity_and_remap_chunk(
const struct e820entry *list, size_t map_size, unsigned long start_pfn, const struct e820entry *list, size_t map_size, unsigned long start_pfn,
unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
unsigned long *identity, unsigned long *remapped, unsigned long *identity, unsigned long *released)
unsigned long *released)
{ {
unsigned long pfn; unsigned long pfn;
unsigned long i = 0; unsigned long i = 0;
@ -431,19 +405,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
if (size > remap_range_size) if (size > remap_range_size)
size = remap_range_size; size = remap_range_size;
if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) { xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn);
WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n",
cur_pfn, size, remap_pfn);
xen_set_identity_and_release_chunk(cur_pfn,
cur_pfn + left, nr_pages, identity, released);
break;
}
/* Update variables to reflect new mappings. */ /* Update variables to reflect new mappings. */
i += size; i += size;
remap_pfn += size; remap_pfn += size;
*identity += size; *identity += size;
*remapped += size;
} }
/* /*
@ -458,13 +425,12 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
return remap_pfn; return remap_pfn;
} }
static unsigned long __init xen_set_identity_and_remap( static void __init xen_set_identity_and_remap(
const struct e820entry *list, size_t map_size, unsigned long nr_pages, const struct e820entry *list, size_t map_size, unsigned long nr_pages,
unsigned long *released) unsigned long *released)
{ {
phys_addr_t start = 0; phys_addr_t start = 0;
unsigned long identity = 0; unsigned long identity = 0;
unsigned long remapped = 0;
unsigned long last_pfn = nr_pages; unsigned long last_pfn = nr_pages;
const struct e820entry *entry; const struct e820entry *entry;
unsigned long num_released = 0; unsigned long num_released = 0;
@ -494,8 +460,7 @@ static unsigned long __init xen_set_identity_and_remap(
last_pfn = xen_set_identity_and_remap_chunk( last_pfn = xen_set_identity_and_remap_chunk(
list, map_size, start_pfn, list, map_size, start_pfn,
end_pfn, nr_pages, last_pfn, end_pfn, nr_pages, last_pfn,
&identity, &remapped, &identity, &num_released);
&num_released);
start = end; start = end;
} }
} }
@ -503,12 +468,63 @@ static unsigned long __init xen_set_identity_and_remap(
*released = num_released; *released = num_released;
pr_info("Set %ld page(s) to 1-1 mapping\n", identity); pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped,
last_pfn);
pr_info("Released %ld page(s)\n", num_released); pr_info("Released %ld page(s)\n", num_released);
return last_pfn;
} }
/*
* Remap the memory prepared in xen_do_set_identity_and_remap_chunk().
* The remap information (which mfn remap to which pfn) is contained in the
* to be remapped memory itself in a linked list anchored at xen_remap_mfn.
* This scheme allows to remap the different chunks in arbitrary order while
* the resulting mapping will be independant from the order.
*/
void __init xen_remap_memory(void)
{
unsigned long buf = (unsigned long)&xen_remap_buf;
unsigned long mfn_save, mfn, pfn;
unsigned long remapped = 0;
unsigned int i;
unsigned long pfn_s = ~0UL;
unsigned long len = 0;
mfn_save = virt_to_mfn(buf);
while (xen_remap_mfn != INVALID_P2M_ENTRY) {
/* Map the remap information */
set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL);
BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]);
pfn = xen_remap_buf.target_pfn;
for (i = 0; i < xen_remap_buf.size; i++) {
mfn = xen_remap_buf.mfns[i];
xen_update_mem_tables(pfn, mfn);
remapped++;
pfn++;
}
if (pfn_s == ~0UL || pfn == pfn_s) {
pfn_s = xen_remap_buf.target_pfn;
len += xen_remap_buf.size;
} else if (pfn_s + len == xen_remap_buf.target_pfn) {
len += xen_remap_buf.size;
} else {
xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
pfn_s = xen_remap_buf.target_pfn;
len = xen_remap_buf.size;
}
mfn = xen_remap_mfn;
xen_remap_mfn = xen_remap_buf.next_area_mfn;
}
if (pfn_s != ~0UL && len)
xen_del_extra_mem(PFN_PHYS(pfn_s), PFN_PHYS(len));
set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
pr_info("Remapped %ld page(s)\n", remapped);
}
static unsigned long __init xen_get_max_pages(void) static unsigned long __init xen_get_max_pages(void)
{ {
unsigned long max_pages = MAX_DOMAIN_PAGES; unsigned long max_pages = MAX_DOMAIN_PAGES;
@ -569,7 +585,6 @@ char * __init xen_memory_setup(void)
int rc; int rc;
struct xen_memory_map memmap; struct xen_memory_map memmap;
unsigned long max_pages; unsigned long max_pages;
unsigned long last_pfn = 0;
unsigned long extra_pages = 0; unsigned long extra_pages = 0;
int i; int i;
int op; int op;
@ -616,17 +631,14 @@ char * __init xen_memory_setup(void)
extra_pages += max_pages - max_pfn; extra_pages += max_pages - max_pfn;
/* /*
* Set identity map on non-RAM pages and remap the underlying RAM. * Set identity map on non-RAM pages and prepare remapping the
* underlying RAM.
*/ */
last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
&xen_released_pages); &xen_released_pages);
extra_pages += xen_released_pages; extra_pages += xen_released_pages;
if (last_pfn > max_pfn) {
max_pfn = min(MAX_DOMAIN_PAGES, last_pfn);
mem_end = PFN_PHYS(max_pfn);
}
/* /*
* Clamp the amount of extra memory to a EXTRA_MEM_RATIO * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
* factor the base size. On non-highmem systems, the base * factor the base size. On non-highmem systems, the base
@ -653,6 +665,7 @@ char * __init xen_memory_setup(void)
size = min(size, (u64)extra_pages * PAGE_SIZE); size = min(size, (u64)extra_pages * PAGE_SIZE);
extra_pages -= size / PAGE_SIZE; extra_pages -= size / PAGE_SIZE;
xen_add_extra_mem(addr, size); xen_add_extra_mem(addr, size);
xen_max_p2m_pfn = PFN_DOWN(addr + size);
} else } else
type = E820_UNUSABLE; type = E820_UNUSABLE;
} }

View File

@ -29,11 +29,13 @@ void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void); void xen_setup_machphys_mapping(void);
void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
void xen_reserve_top(void); void xen_reserve_top(void);
extern unsigned long xen_max_p2m_pfn;
void xen_mm_pin_all(void); void xen_mm_pin_all(void);
void xen_mm_unpin_all(void); void xen_mm_unpin_all(void);
unsigned long __ref xen_chk_extra_mem(unsigned long pfn);
void __init xen_inv_extra_mem(void);
void __init xen_remap_memory(void);
char * __init xen_memory_setup(void); char * __init xen_memory_setup(void);
char * xen_auto_xlated_memory_setup(void); char * xen_auto_xlated_memory_setup(void);
void __init xen_arch_setup(void); void __init xen_arch_setup(void);
@ -46,7 +48,7 @@ void xen_hvm_init_shared_info(void);
void xen_unplug_emulated_devices(void); void xen_unplug_emulated_devices(void);
void __init xen_build_dynamic_phys_to_machine(void); void __init xen_build_dynamic_phys_to_machine(void);
unsigned long __init xen_revector_p2m_tree(void); void __init xen_vmalloc_p2m_tree(void);
void xen_init_irq_ops(void); void xen_init_irq_ops(void);
void xen_setup_timer(int cpu); void xen_setup_timer(int cpu);