// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2022 Alibaba Cloud. or its affiliates. All Rights Reserved.
 *
 * Dragonball pvdump module for kernels without Dragonball platform driver.
 */

#include <linux/pgtable.h>
#include <linux/io.h>
#include <linux/memory.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/utsname.h>
#include <linux/kallsyms.h>
#include <dragonball/dragonball.h>

#ifdef CONFIG_ARM64
#define PHYSICAL_PAGE_MASK PAGE_MASK
#endif

/* Dragonball max guest memory */
#define DRAGONBALL_MAX_MEM     0x10000000000

/* The lowest hotmemory notify priority */
#define PVDUMP_CALLBACK_PRI    10

/* Indicate that pvdump fail to get section's memmap */
#define MEMMAP_ENTRY_NOTAVAIL    2

/* Dragonball's ioport to send low 32-bit of pvdump_data gpa */
#define MAGIC_IOPORT_SIGNAL_GUEST_PVDUMP_LOW 0x03f4
/* Dragonball's ioport to send high 32-bit of pvdump_data gpa */
#define MAGIC_IOPORT_SIGNAL_GUEST_PVDUMP_HIGH 0x03f8

extern void arch_outl(unsigned int value, int addr);

#ifdef CONFIG_X86_64
static struct mm_struct *orig_init_mm;
#endif

/*
 * Collection of data which will be used by VMMs to dump vmcore
 */
struct pvdump_data {
	/*
	 * Maximal possible section nr according to maximal supported memory
	 */
	unsigned long max_section_nr;
	/*
	 * Physical address of `memmaps_entry`,
	 * which will be used by VMMs to do page exclusion”.
	 */
	unsigned long memmaps_entry_gpa;
	/*
	 * Physical address of `new_utsname`
	 * which will be used by VMMs to identify kernel version in vmcore”.
	 */
	unsigned long uts_name_gpa;
};

/*
 * Pvdump feature's private data
 */
struct pvdump_feature {
	/*
	 * Pvdump data collection entry,
	 * whose physical address will be passed to VMMs.
	 */
	struct pvdump_data *data;
	/*
	 * This array records memmap physical addresses for memory sections.
	 * The lowest bit will be set for present sections.
	 */
	unsigned long *memmaps_entry;
} pvdump_priv;

#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
 * This function is the same with sparse_decode_mem_map whose symbol is not exported
 * for modules.
 */
struct page *priv_sparse_decode_mem_map(unsigned long coded_mem_map,
					unsigned long pnum)
{
	/* mask off the extra low bits of information */
	coded_mem_map &= SECTION_MAP_MASK;
	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
}

/*
 * For sparse memory model with vmemmap, we can easily get vaddrs of vmemmaps,
 * but the corresponding paddrs cannot be translated by __pa(), the kernel page
 * table should be walked.
 */
static int get_vmemmap_pa(unsigned long sec_nr, unsigned long *addr)
{
	unsigned long map_va;
	pgd_t *pgd = NULL;
	p4d_t *p4d = NULL;
	pud_t *pud = NULL;
	pmd_t *pmd = NULL;
	struct mem_section *mem_sec = __nr_to_section(sec_nr);

	map_va =
	    (unsigned long)priv_sparse_decode_mem_map(mem_sec->section_mem_map,
						      sec_nr);
#if defined(CONFIG_ARM64)
	pgd = swapper_pg_dir + pgd_index(map_va);	/* Kernel page table */
#elif defined(CONFIG_X86_64)
	pgd = pgd_offset(orig_init_mm, map_va); /* Kernel page table */
#endif

	if (pgd)
		p4d = p4d_offset(pgd, map_va);

	if (p4d)
		pud = pud_offset(p4d, map_va);

	if (pud)
		pmd = pmd_offset(pud, map_va);

	if (pmd) {
		*addr = pmd->pmd & PHYSICAL_PAGE_MASK;
		*addr |= 1;
	} else {
		*addr = MEMMAP_ENTRY_NOTAVAIL;
		pr_err("pvdump: failed to get memmap paddr for sec 0x%lx.\n",
		       sec_nr);
	}
	return 0;
}
#else
/*
 * We don't need to walk page tables for memmaps if SPARSEMEM_VMEMMAP is off.
 * get_vmemmap_pa for classic SPARSEMEM or FLATMEM will be supported on demand
 * in the future.
 */
static int get_vmemmap_pa(unsigned long sec_nr, unsigned long *addr)
{
	return -EPERM;
}
#endif

/*
 * Initialize memmap entry for all present sections.
 * This function will be called when initializing pvdump feature.
 */
static int init_memmaps_entry(void)
{
	unsigned long nr, memmap_pa = 0;
	int ret;

	for (nr = 0; nr < pvdump_priv.data->max_section_nr; nr++) {
		struct mem_section *mem_sec = __nr_to_section(nr);

		if (valid_section(mem_sec)) {
			ret = get_vmemmap_pa(nr, &memmap_pa);
			if (ret)
				return ret;

			pvdump_priv.memmaps_entry[nr] = memmap_pa;
			pr_debug("pvdump: find valid section 0x%lx at 0x%lx.\n",
				 nr, memmap_pa);
		}
	}
	return 0;
}

#ifdef CONFIG_ARM64
/*
 * Send pvdump data to vmm
 */
void send_pvdump_data_to_vmm(struct pvdump_feature *pvdump_priv)
{
	uint64_t half_addr;

	half_addr = __pa(pvdump_priv->data);
	arch_outl(half_addr, MAGIC_IOPORT_SIGNAL_GUEST_PVDUMP_LOW);
	half_addr = __pa(pvdump_priv->data) >> 32;
	arch_outl(half_addr, MAGIC_IOPORT_SIGNAL_GUEST_PVDUMP_HIGH);
	pr_info("pvdump: pvdump_data at 0x%llx, memmap_entry at 0x%lx.\n",
		__pa(pvdump_priv->data), pvdump_priv->data->memmaps_entry_gpa);
}
#elif defined(CONFIG_X86_64)
/*
 * Send pvdump data to vmm
 */
void send_pvdump_data_to_vmm(struct pvdump_feature *pvdump_priv)
{
	uint32_t half_addr;

	half_addr = __pa(pvdump_priv->data);
	outl(half_addr, MAGIC_IOPORT_SIGNAL_GUEST_PVDUMP_LOW);
	half_addr = __pa(pvdump_priv->data) >> 32;
	outl(half_addr, MAGIC_IOPORT_SIGNAL_GUEST_PVDUMP_HIGH);
	pr_info("pvdump: pvdump_data at 0x%llx, memmap_entry at 0x%lx.\n",
		__pa(pvdump_priv->data), pvdump_priv->data->memmaps_entry_gpa);
}
#endif

/*
 * Prepare pvdump data used by VMMs for pvdump feature.
 */
int init_pvdump_data(void)
{
	int ret;
	unsigned long max_sec_nr;
	unsigned long dragonball_max_memory;

	pvdump_priv.data = kzalloc(sizeof(struct pvdump_data), GFP_KERNEL);
	if (!pvdump_priv.data) {
		ret = -ENOMEM;
		goto err;
	}

	/*
	 * We cannot get the host physical memory in pvdump module, hence it is fixed here.
	 */
	dragonball_max_memory = DRAGONBALL_MAX_MEM;
	max_sec_nr =
	    min(dragonball_max_memory / (1 << SECTION_SIZE_BITS),
		NR_MEM_SECTIONS);
	pr_info("pvdump: max memory: 0x%lx, max sec nr: %lu.\n",
		dragonball_max_memory, max_sec_nr);

	pvdump_priv.memmaps_entry =
	    kcalloc(max_sec_nr, sizeof(unsigned long), GFP_KERNEL);
	if (!pvdump_priv.memmaps_entry) {
		ret = -ENOMEM;
		goto err;
	}
	pvdump_priv.data->memmaps_entry_gpa = __pa(pvdump_priv.memmaps_entry);
	pvdump_priv.data->max_section_nr = max_sec_nr;

#ifdef CONFIG_X86_64
	orig_init_mm = (struct mm_struct *)kallsyms_lookup_name("init_mm");
	if (!orig_init_mm) {
		ret = -ENODEV;
		goto err;
	}
#endif

	ret = init_memmaps_entry();
	if (ret)
		goto err;

	/*
	 * Get utsname for pvdump data.
	 */
	pvdump_priv.data->uts_name_gpa = __pa(utsname());

	/*
	 * Send pvdump data's physical address to VMMs.
	 * Since 64-bit data cannot be sent via ioport at once,
	 * we send low 32 bits and high 32 bits separately.
	 */
	send_pvdump_data_to_vmm(&pvdump_priv);

	return 0;

err:
	kfree(pvdump_priv.memmaps_entry);	/* Safe to kfree null pointer */
	kfree(pvdump_priv.data);	/* Safe to kfree null pointer */
	pr_err("pvdump: failed to init pvdump data: %d.\n", ret);

	return ret;
}

/*
 * Update memmap_entry for changed sections.
 */
static inline int pvdump_hotmemory_callback(struct notifier_block *self,
					    unsigned long action, void *arg)
{
	unsigned long sec_nr, p;
	unsigned long memmap_pa = 0;
	unsigned long sec_sz = 1 << SECTION_SIZE_BITS;
	struct memory_notify *mnb = arg;

	/*
	 * Memory block is the smallest granularity that can be hot(un)plugged,
	 * which consists of at least one memory section. Besides, hot(un)plugged
	 * memory must be section-aligned in address space. Hence we don't need
	 * to consider the case where mnb->nr_pages < sec_sz and or start_pfn is
	 * not aligned by section.
	 */
	sec_nr = pfn_to_section_nr(mnb->start_pfn);
	for (p = 0;
	     p < mnb->nr_pages && sec_nr < pvdump_priv.data->max_section_nr;
	     sec_nr += 1, p += sec_sz) {
		switch (action) {
		case MEM_ONLINE:
			get_vmemmap_pa(sec_nr, &memmap_pa);
			pvdump_priv.memmaps_entry[sec_nr] = memmap_pa;
			pr_debug("pvdump: online section 0x%lx at 0x%lx.\n",
				 sec_nr, memmap_pa);
			break;
		case MEM_OFFLINE:
			pvdump_priv.memmaps_entry[sec_nr] = 0;
			pr_debug("pvdump: offline section 0x%lx.\n", sec_nr);
		}
	}

	return NOTIFY_OK;
}

static struct notifier_block pvdump_hotmemory_callback_nb = {
	.notifier_call = pvdump_hotmemory_callback,
	.priority = PVDUMP_CALLBACK_PRI,
};

static int __init pvdump_init(void)
{
	int ret;

	ret = init_pvdump_data();
	if (ret)
		return ret;

	register_hotmemory_notifier(&pvdump_hotmemory_callback_nb);

	pr_info("Dragonball pvdump module: init!\n");

	return 0;
}

static void __exit pvdump_exit(void)
{
	unregister_hotmemory_notifier(&pvdump_hotmemory_callback_nb);
	kfree(pvdump_priv.memmaps_entry);
	kfree(pvdump_priv.data);
	pr_info("Dragonball pvdump module: exit!\n");
}

module_init(pvdump_init);
module_exit(pvdump_exit);

MODULE_LICENSE("GPL");
