Skip to content

Commit

Permalink
[S390] Virtual memmap for s390.
Browse files Browse the repository at this point in the history
Virtual memmap support for s390. Inspired by the ia64 implementation.

Unlike ia64 we need a mechanism which allows us to dynamically attach
shared memory regions.
These memory regions are accessed via the dcss device driver. dcss
implements the 'direct_access' operation, which requires struct pages
for every single shared page.
Therefore this implementation provides an interface to attach/detach
shared memory:

int add_shared_memory(unsigned long start, unsigned long size);
int remove_shared_memory(unsigned long start, unsigned long size);

The purpose of the add_shared_memory function is to add the given
memory range to the 1:1 mapping and to make sure that the
corresponding range in the vmemmap is backed with physical pages.
It also initialises the new struct pages.

remove_shared_memory in turn only invalidates the page table
entries in the 1:1 mapping. The page tables and the memory used for
struct pages in the vmemmap are currently not freed. They will be
reused when the next segment will be attached.
Given that the maximum size of a shared memory region is 2GB and
in addition all regions must reside below 2GB this is not too much of
a restriction, but there is room for improvement.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
  • Loading branch information
Heiko Carstens authored and Martin Schwidefsky committed Dec 8, 2006
1 parent 7f09014 commit f4eb07c
Show file tree
Hide file tree
Showing 9 changed files with 488 additions and 210 deletions.
3 changes: 3 additions & 0 deletions arch/s390/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ config WARN_STACK_SIZE

source "mm/Kconfig"

config HOLES_IN_ZONE
def_bool y

comment "I/O subsystem configuration"

config MACHCHK_WARNING
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ unsigned int console_devno = -1;
unsigned int console_irq = -1;
unsigned long machine_flags = 0;

struct mem_chunk memory_chunk[MEMORY_CHUNKS];
struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */
unsigned long __initdata zholes_size[MAX_NR_ZONES];
static unsigned long __initdata memory_end;
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/mm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
# Makefile for the linux s390-specific parts of the memory manager.
#

obj-y := init.o fault.o ioremap.o extmem.o mmap.o
obj-y := init.o fault.o ioremap.o extmem.o mmap.o vmem.o
obj-$(CONFIG_CMM) += cmm.o

106 changes: 26 additions & 80 deletions arch/s390/mm/extmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <linux/bootmem.h>
#include <linux/ctype.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/ebcdic.h>
#include <asm/errno.h>
#include <asm/extmem.h>
Expand Down Expand Up @@ -237,65 +238,6 @@ query_segment_type (struct dcss_segment *seg)
return rc;
}

/*
* check if the given segment collides with guest storage.
* returns 1 if this is the case, 0 if no collision was found
*/
static int
segment_overlaps_storage(struct dcss_segment *seg)
{
int i;

for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
if (memory_chunk[i].type != CHUNK_READ_WRITE)
continue;
if ((memory_chunk[i].addr >> 20) > (seg->end >> 20))
continue;
if (((memory_chunk[i].addr + memory_chunk[i].size - 1) >> 20)
< (seg->start_addr >> 20))
continue;
return 1;
}
return 0;
}

/*
* check if segment collides with other segments that are currently loaded
* returns 1 if this is the case, 0 if no collision was found
*/
static int
segment_overlaps_others (struct dcss_segment *seg)
{
struct list_head *l;
struct dcss_segment *tmp;

BUG_ON(!mutex_is_locked(&dcss_lock));
list_for_each(l, &dcss_list) {
tmp = list_entry(l, struct dcss_segment, list);
if ((tmp->start_addr >> 20) > (seg->end >> 20))
continue;
if ((tmp->end >> 20) < (seg->start_addr >> 20))
continue;
if (seg == tmp)
continue;
return 1;
}
return 0;
}

/*
* check if segment exceeds the kernel mapping range (detected or set via mem=)
* returns 1 if this is the case, 0 if segment fits into the range
*/
static inline int
segment_exceeds_range (struct dcss_segment *seg)
{
int seg_last_pfn = (seg->end) >> PAGE_SHIFT;
if (seg_last_pfn > max_pfn)
return 1;
return 0;
}

/*
* get info about a segment
* possible return values:
Expand Down Expand Up @@ -341,24 +283,26 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
rc = query_segment_type (seg);
if (rc < 0)
goto out_free;
if (segment_exceeds_range(seg)) {
PRINT_WARN ("segment_load: not loading segment %s - exceeds"
" kernel mapping range\n",name);
rc = -ERANGE;

rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);

switch (rc) {
case 0:
break;
case -ENOSPC:
PRINT_WARN("segment_load: not loading segment %s - overlaps "
"storage/segment\n", name);
goto out_free;
}
if (segment_overlaps_storage(seg)) {
PRINT_WARN ("segment_load: not loading segment %s - overlaps"
" storage\n",name);
rc = -ENOSPC;
case -ERANGE:
PRINT_WARN("segment_load: not loading segment %s - exceeds "
"kernel mapping range\n", name);
goto out_free;
}
if (segment_overlaps_others(seg)) {
PRINT_WARN ("segment_load: not loading segment %s - overlaps"
" other segments\n",name);
rc = -EBUSY;
default:
PRINT_WARN("segment_load: not loading segment %s (rc: %d)\n",
name, rc);
goto out_free;
}

if (do_nonshared)
dcss_command = DCSS_LOADNSR;
else
Expand All @@ -372,7 +316,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
rc = dcss_diag_translate_rc (seg->end);
dcss_diag(DCSS_PURGESEG, seg->dcss_name,
&seg->start_addr, &seg->end);
goto out_free;
goto out_shared;
}
seg->do_nonshared = do_nonshared;
atomic_set(&seg->ref_count, 1);
Expand All @@ -391,6 +335,8 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
(void*)seg->start_addr, (void*)seg->end,
segtype_string[seg->vm_segtype]);
goto out;
out_shared:
remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
out_free:
kfree(seg);
out:
Expand Down Expand Up @@ -530,12 +476,12 @@ segment_unload(char *name)
"please report to linux390@de.ibm.com\n",name);
goto out_unlock;
}
if (atomic_dec_return(&seg->ref_count) == 0) {
list_del(&seg->list);
dcss_diag(DCSS_PURGESEG, seg->dcss_name,
&dummy, &dummy);
kfree(seg);
}
if (atomic_dec_return(&seg->ref_count) != 0)
goto out_unlock;
remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
list_del(&seg->list);
dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
kfree(seg);
out_unlock:
mutex_unlock(&dcss_lock);
}
Expand Down
163 changes: 41 additions & 122 deletions arch/s390/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ void show_mem(void)
printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
i = max_mapnr;
while (i-- > 0) {
if (!pfn_valid(i))
continue;
page = pfn_to_page(i);
total++;
if (PageReserved(page))
Expand All @@ -84,67 +86,53 @@ void show_mem(void)
printk("%d pages swap cached\n",cached);
}

static void __init setup_ro_region(void)
{
pgd_t *pgd;
pmd_t *pmd;
pte_t *pte;
pte_t new_pte;
unsigned long address, end;

address = ((unsigned long)&__start_rodata) & PAGE_MASK;
end = PFN_ALIGN((unsigned long)&__end_rodata);

for (; address < end; address += PAGE_SIZE) {
pgd = pgd_offset_k(address);
pmd = pmd_offset(pgd, address);
pte = pte_offset_kernel(pmd, address);
new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO));
set_pte(pte, new_pte);
}
}

extern unsigned long __initdata zholes_size[];
extern void vmem_map_init(void);
/*
* paging_init() sets up the page tables
*/

#ifndef CONFIG_64BIT
void __init paging_init(void)
{
pgd_t * pg_dir;
pte_t * pg_table;
pte_t pte;
int i;
unsigned long tmp;
unsigned long pfn = 0;
unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE;
static const int ssm_mask = 0x04000000L;
unsigned long ro_start_pfn, ro_end_pfn;
pgd_t *pg_dir;
int i;
unsigned long pgdir_k;
static const int ssm_mask = 0x04000000L;
unsigned long zones_size[MAX_NR_ZONES];
unsigned long dma_pfn, high_pfn;

ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata);
ro_end_pfn = PFN_UP((unsigned long)&__end_rodata);

memset(zones_size, 0, sizeof(zones_size));
zones_size[ZONE_DMA] = max_low_pfn;
free_area_init_node(0, &contig_page_data, zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT,
zholes_size);

/* unmap whole virtual address space */
pg_dir = swapper_pg_dir;

pg_dir = swapper_pg_dir;

#ifdef CONFIG_64BIT
pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERN_REGION_TABLE;
for (i = 0; i < PTRS_PER_PGD; i++)
pmd_clear((pmd_t *) pg_dir++);

/*
* map whole physical memory to virtual memory (identity mapping)
*/

pg_dir = swapper_pg_dir;

while (pfn < max_low_pfn) {
/*
* pg_table is physical at this point
*/
pg_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);

pmd_populate_kernel(&init_mm, (pmd_t *) pg_dir, pg_table);
pg_dir++;

for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) {
if (pfn >= ro_start_pfn && pfn < ro_end_pfn)
pte = pfn_pte(pfn, __pgprot(_PAGE_RO));
else
pte = pfn_pte(pfn, PAGE_KERNEL);
if (pfn >= max_low_pfn)
pte_val(pte) = _PAGE_TYPE_EMPTY;
set_pte(pg_table, pte);
pfn++;
}
}
pgd_clear(pg_dir + i);
#else
pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE;
for (i = 0; i < PTRS_PER_PGD; i++)
pmd_clear((pmd_t *)(pg_dir + i));
#endif
vmem_map_init();
setup_ro_region();

S390_lowcore.kernel_asce = pgdir_k;

Expand All @@ -154,31 +142,9 @@ void __init paging_init(void)
__ctl_load(pgdir_k, 13, 13);
__raw_local_irq_ssm(ssm_mask);

local_flush_tlb();
}

#else /* CONFIG_64BIT */

void __init paging_init(void)
{
pgd_t * pg_dir;
pmd_t * pm_dir;
pte_t * pt_dir;
pte_t pte;
int i,j,k;
unsigned long pfn = 0;
unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) |
_KERN_REGION_TABLE;
static const int ssm_mask = 0x04000000L;
unsigned long zones_size[MAX_NR_ZONES];
unsigned long dma_pfn, high_pfn;
unsigned long ro_start_pfn, ro_end_pfn;

memset(zones_size, 0, sizeof(zones_size));
dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT;
high_pfn = max_low_pfn;
ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata);
ro_end_pfn = PFN_UP((unsigned long)&__end_rodata);

if (dma_pfn > high_pfn)
zones_size[ZONE_DMA] = high_pfn;
Expand All @@ -190,56 +156,7 @@ void __init paging_init(void)
/* Initialize mem_map[]. */
free_area_init_node(0, &contig_page_data, zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);

/*
* map whole physical memory to virtual memory (identity mapping)
*/

pg_dir = swapper_pg_dir;

for (i = 0 ; i < PTRS_PER_PGD ; i++,pg_dir++) {

if (pfn >= max_low_pfn) {
pgd_clear(pg_dir);
continue;
}

pm_dir = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE * 4);
pgd_populate(&init_mm, pg_dir, pm_dir);

for (j = 0 ; j < PTRS_PER_PMD ; j++,pm_dir++) {
if (pfn >= max_low_pfn) {
pmd_clear(pm_dir);
continue;
}

pt_dir = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
pmd_populate_kernel(&init_mm, pm_dir, pt_dir);

for (k = 0 ; k < PTRS_PER_PTE ; k++,pt_dir++) {
if (pfn >= ro_start_pfn && pfn < ro_end_pfn)
pte = pfn_pte(pfn, __pgprot(_PAGE_RO));
else
pte = pfn_pte(pfn, PAGE_KERNEL);
if (pfn >= max_low_pfn)
pte_val(pte) = _PAGE_TYPE_EMPTY;
set_pte(pt_dir, pte);
pfn++;
}
}
}

S390_lowcore.kernel_asce = pgdir_k;

/* enable virtual mapping in kernel mode */
__ctl_load(pgdir_k, 1, 1);
__ctl_load(pgdir_k, 7, 7);
__ctl_load(pgdir_k, 13, 13);
__raw_local_irq_ssm(ssm_mask);

local_flush_tlb();
}
#endif /* CONFIG_64BIT */

void __init mem_init(void)
{
Expand Down Expand Up @@ -269,6 +186,8 @@ void __init mem_init(void)
printk("Write protected kernel read-only data: %#lx - %#lx\n",
(unsigned long)&__start_rodata,
PFN_ALIGN((unsigned long)&__end_rodata) - 1);
printk("Virtual memmap size: %ldk\n",
(max_pfn * sizeof(struct page)) >> 10);
}

void free_initmem(void)
Expand Down
Loading

0 comments on commit f4eb07c

Please sign in to comment.