diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2c69ef8448059..e255d8ae5e77e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1972,6 +1972,7 @@ config INTEL_TDX_HOST depends on KVM_INTEL depends on X86_X2APIC select ARCH_KEEP_MEMBLOCK + depends on CONTIG_ALLOC help Intel Trust Domain Extensions (TDX) protects guest VMs from malicious host and certain physical attacks. This option enables necessary TDX diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index d7f928713b6c9..c2bff81245986 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -173,7 +173,7 @@ EXPORT_SYMBOL_GPL(tdx_cpu_enable); * overlap. */ static int add_tdx_memblock(struct list_head *tmb_list, unsigned long start_pfn, - unsigned long end_pfn) + unsigned long end_pfn, int nid) { struct tdx_memblock *tmb; @@ -184,6 +184,7 @@ static int add_tdx_memblock(struct list_head *tmb_list, unsigned long start_pfn, INIT_LIST_HEAD(&tmb->list); tmb->start_pfn = start_pfn; tmb->end_pfn = end_pfn; + tmb->nid = nid; /* @tmb_list is protected by mem_hotplug_lock */ list_add_tail(&tmb->list, tmb_list); @@ -211,9 +212,9 @@ static void free_tdx_memlist(struct list_head *tmb_list) static int build_tdx_memlist(struct list_head *tmb_list) { unsigned long start_pfn, end_pfn; - int i, ret; + int i, nid, ret; - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { /* * The first 1MB is not reported as TDX convertible memory. * Although the first 1MB is always reserved and won't end up @@ -229,7 +230,7 @@ static int build_tdx_memlist(struct list_head *tmb_list) * memblock has already guaranteed they are in address * ascending order and don't overlap. */ - ret = add_tdx_memblock(tmb_list, start_pfn, end_pfn); + ret = add_tdx_memblock(tmb_list, start_pfn, end_pfn, nid); if (ret) goto err; } @@ -465,6 +466,202 @@ static int fill_out_tdmrs(struct list_head *tmb_list, return 0; } +/* + * Calculate PAMT size given a TDMR and a page size. The returned + * PAMT size is always aligned up to 4K page boundary. + */ +static unsigned long tdmr_get_pamt_sz(struct tdmr_info *tdmr, int pgsz, + u16 pamt_entry_size) +{ + unsigned long pamt_sz, nr_pamt_entries; + + switch (pgsz) { + case TDX_PS_4K: + nr_pamt_entries = tdmr->size >> PAGE_SHIFT; + break; + case TDX_PS_2M: + nr_pamt_entries = tdmr->size >> PMD_SHIFT; + break; + case TDX_PS_1G: + nr_pamt_entries = tdmr->size >> PUD_SHIFT; + break; + default: + WARN_ON_ONCE(1); + return 0; + } + + pamt_sz = nr_pamt_entries * pamt_entry_size; + /* TDX requires PAMT size must be 4K aligned */ + pamt_sz = ALIGN(pamt_sz, PAGE_SIZE); + + return pamt_sz; +} + +/* + * Locate a NUMA node which should hold the allocation of the @tdmr + * PAMT. This node will have some memory covered by the TDMR. The + * relative amount of memory covered is not considered. + */ +static int tdmr_get_nid(struct tdmr_info *tdmr, struct list_head *tmb_list) +{ + struct tdx_memblock *tmb; + + /* + * A TDMR must cover at least part of one TMB. That TMB will end + * after the TDMR begins. But, that TMB may have started before + * the TDMR. Find the next 'tmb' that _ends_ after this TDMR + * begins. Ignore 'tmb' start addresses. They are irrelevant. + */ + list_for_each_entry(tmb, tmb_list, list) { + if (tmb->end_pfn > PHYS_PFN(tdmr->base)) + return tmb->nid; + } + + /* + * Fall back to allocating the TDMR's metadata from node 0 when + * no TDX memory block can be found. This should never happen + * since TDMRs originate from TDX memory blocks. + */ + pr_warn("TDMR [0x%llx, 0x%llx): unable to find local NUMA node for PAMT allocation, fallback to use node 0.\n", + tdmr->base, tdmr_end(tdmr)); + return 0; +} + +/* + * Allocate PAMTs from the local NUMA node of some memory in @tmb_list + * within @tdmr, and set up PAMTs for @tdmr. + */ +static int tdmr_set_up_pamt(struct tdmr_info *tdmr, + struct list_head *tmb_list, + u16 pamt_entry_size[]) +{ + unsigned long pamt_base[TDX_PS_NR]; + unsigned long pamt_size[TDX_PS_NR]; + unsigned long tdmr_pamt_base; + unsigned long tdmr_pamt_size; + struct page *pamt; + int pgsz, nid; + + nid = tdmr_get_nid(tdmr, tmb_list); + + /* + * Calculate the PAMT size for each TDX supported page size + * and the total PAMT size. + */ + tdmr_pamt_size = 0; + for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) { + pamt_size[pgsz] = tdmr_get_pamt_sz(tdmr, pgsz, + pamt_entry_size[pgsz]); + tdmr_pamt_size += pamt_size[pgsz]; + } + + /* + * Allocate one chunk of physically contiguous memory for all + * PAMTs. This helps minimize the PAMT's use of reserved areas + * in overlapped TDMRs. + */ + pamt = alloc_contig_pages(tdmr_pamt_size >> PAGE_SHIFT, GFP_KERNEL, + nid, &node_online_map); + if (!pamt) + return -ENOMEM; + + /* + * Break the contiguous allocation back up into the + * individual PAMTs for each page size. + */ + tdmr_pamt_base = page_to_pfn(pamt) << PAGE_SHIFT; + for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) { + pamt_base[pgsz] = tdmr_pamt_base; + tdmr_pamt_base += pamt_size[pgsz]; + } + + tdmr->pamt_4k_base = pamt_base[TDX_PS_4K]; + tdmr->pamt_4k_size = pamt_size[TDX_PS_4K]; + tdmr->pamt_2m_base = pamt_base[TDX_PS_2M]; + tdmr->pamt_2m_size = pamt_size[TDX_PS_2M]; + tdmr->pamt_1g_base = pamt_base[TDX_PS_1G]; + tdmr->pamt_1g_size = pamt_size[TDX_PS_1G]; + + return 0; +} + +static void tdmr_get_pamt(struct tdmr_info *tdmr, unsigned long *pamt_base, + unsigned long *pamt_size) +{ + unsigned long pamt_bs, pamt_sz; + + /* + * The PAMT was allocated in one contiguous unit. The 4K PAMT + * should always point to the beginning of that allocation. + */ + pamt_bs = tdmr->pamt_4k_base; + pamt_sz = tdmr->pamt_4k_size + tdmr->pamt_2m_size + tdmr->pamt_1g_size; + + WARN_ON_ONCE((pamt_bs & ~PAGE_MASK) || (pamt_sz & ~PAGE_MASK)); + + *pamt_base = pamt_bs; + *pamt_size = pamt_sz; +} + +static void tdmr_free_pamt(struct tdmr_info *tdmr) +{ + unsigned long pamt_base, pamt_size; + + tdmr_get_pamt(tdmr, &pamt_base, &pamt_size); + + /* Do nothing if PAMT hasn't been allocated for this TDMR */ + if (!pamt_size) + return; + + if (WARN_ON_ONCE(!pamt_base)) + return; + + free_contig_range(pamt_base >> PAGE_SHIFT, pamt_size >> PAGE_SHIFT); +} + +static void tdmrs_free_pamt_all(struct tdmr_info_list *tdmr_list) +{ + int i; + + for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) + tdmr_free_pamt(tdmr_entry(tdmr_list, i)); +} + +/* Allocate and set up PAMTs for all TDMRs */ +static int tdmrs_set_up_pamt_all(struct tdmr_info_list *tdmr_list, + struct list_head *tmb_list, + u16 pamt_entry_size[]) +{ + int i, ret = 0; + + for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { + ret = tdmr_set_up_pamt(tdmr_entry(tdmr_list, i), tmb_list, + pamt_entry_size); + if (ret) + goto err; + } + + return 0; +err: + tdmrs_free_pamt_all(tdmr_list); + return ret; +} + +static unsigned long tdmrs_count_pamt_kb(struct tdmr_info_list *tdmr_list) +{ + unsigned long pamt_size = 0; + int i; + + for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { + unsigned long base, size; + + tdmr_get_pamt(tdmr_entry(tdmr_list, i), &base, &size); + pamt_size += size; + } + + return pamt_size / 1024; +} + /* * Construct a list of TDMRs on the preallocated space in @tdmr_list * to cover all TDX memory regions in @tmb_list based on the TDX module @@ -480,10 +677,13 @@ static int construct_tdmrs(struct list_head *tmb_list, if (ret) return ret; + ret = tdmrs_set_up_pamt_all(tdmr_list, tmb_list, + tdmr_sysinfo->pamt_entry_size); + if (ret) + return ret; /* * TODO: * - * - Allocate and set up PAMTs for each TDMR. * - Designate reserved areas for each TDMR. * * Return -EINVAL until constructing TDMRs is done @@ -537,7 +737,10 @@ static int init_tdx_module(void) */ ret = -EINVAL; if (ret) - goto err_free_tdmrs; + goto err_free_pamts; + + pr_info("%lu KB allocated for PAMT\n", tdmrs_count_pamt_kb(&tdx_tdmr_list)); + out_put_tdxmem: /* * @tdx_memlist is written here and read at memory hotplug time. @@ -546,6 +749,8 @@ static int init_tdx_module(void) put_online_mems(); return ret; +err_free_pamts: + tdmrs_free_pamt_all(&tdx_tdmr_list); err_free_tdmrs: free_tdmr_list(&tdx_tdmr_list); err_free_tdxmem: diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index f18ce1b88b0af..1b04efece9dbd 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -87,6 +87,7 @@ struct tdx_memblock { struct list_head list; unsigned long start_pfn; unsigned long end_pfn; + int nid; }; /* "TDMR info" part of "Global Scope Metadata" for constructing TDMRs */