diff --git a/Cargo.lock b/Cargo.lock index ea26a3a5b..1e3dfae4f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -97,6 +97,7 @@ dependencies = [ "kvm-bindings", "kvm-ioctls", "libc", + "log", "smbios", "tdx", "utils", @@ -436,6 +437,7 @@ dependencies = [ "virtio-bindings", "vm-fdt", "vm-memory", + "vmm-sys-util 0.14.0", "zerocopy", ] diff --git a/src/arch/Cargo.toml b/src/arch/Cargo.toml index 2b39cfa80..7021ad350 100644 --- a/src/arch/Cargo.toml +++ b/src/arch/Cargo.toml @@ -18,6 +18,7 @@ vmm-sys-util = "0.14" arch_gen = { path = "../arch_gen" } smbios = { path = "../smbios" } utils = { path = "../utils" } +log = "0.4" [target.'cfg(target_os = "linux")'.dependencies] kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } diff --git a/src/arch/src/lib.rs b/src/arch/src/lib.rs index 10f4de073..c541f98a6 100644 --- a/src/arch/src/lib.rs +++ b/src/arch/src/lib.rs @@ -17,8 +17,10 @@ pub struct ArchMemoryInfo { pub ram_last_addr: u64, pub shm_start_addr: u64, pub page_size: usize, - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "loongarch64"))] pub fdt_addr: u64, + #[cfg(target_arch = "loongarch64")] + pub efi_system_table_addr: u64, pub initrd_addr: u64, pub firmware_addr: u64, } @@ -43,6 +45,16 @@ pub use riscv64::{ layout::IRQ_MAX, layout::RESET_VECTOR, Error, MMIO_MEM_START, }; +/// Module for loongarch64 related functionality. +#[cfg(target_arch = "loongarch64")] +pub mod loongarch64; + +#[cfg(target_arch = "loongarch64")] +pub use loongarch64::{ + arch_memory_regions, configure_system, layout::CMDLINE_MAX_SIZE, layout::IRQ_BASE, + layout::IRQ_MAX, layout::RESET_VECTOR, Error, MMIO_MEM_START, +}; + /// Module for x86_64 related functionality. #[cfg(target_arch = "x86_64")] pub mod x86_64; diff --git a/src/arch/src/loongarch64/layout.rs b/src/arch/src/loongarch64/layout.rs new file mode 100644 index 000000000..4d7f9c688 --- /dev/null +++ b/src/arch/src/loongarch64/layout.rs @@ -0,0 +1,36 @@ +/// Start of RAM on LoongArch. +pub const DRAM_MEM_START: u64 = 0x4000_0000; // 1GB + +/// The maximum addressable RAM address. +pub const DRAM_MEM_END: u64 = 0x00FF_8000_0000; // 1022GB + +/// The maximum RAM size. +pub const DRAM_MEM_MAX_SIZE: u64 = DRAM_MEM_END - DRAM_MEM_START; + +/// Kernel command line maximum size. +pub const CMDLINE_MAX_SIZE: usize = 2048; + +/// Kernel command line args size +pub const CMDLINE_GUEST_SIZE: u64 = 0x4000; + +/// Usable CPU hardware interrupt range on LoongArch. +/// +/// The current virt platform injects serial/virtio interrupts through +/// `cpuintc + KVM_INTERRUPT`, so keep the MMIO allocator inside INT_HWI0..7. +pub const IRQ_BASE: u32 = 2; +pub const IRQ_MAX: u32 = 9; + +/// Below this address will reside MMIO devices. +pub const MAPPED_IO_START: u64 = 0x0a00_0000; + +/// Where the PC register will point after reset. +pub const RESET_VECTOR: u64 = 0; + +/// The address to load firmware, if present. +pub const FIRMWARE_START: u64 = 0; + +/// FDT maximum size. +pub const FDT_MAX_SIZE: usize = 0x1_0000; + +/// EFI Guest size. +pub const EFI_GUEST_SIZE: u64 = 0x4000; diff --git a/src/arch/src/loongarch64/linux/efi.rs b/src/arch/src/loongarch64/linux/efi.rs new file mode 100644 index 000000000..065324ea0 --- /dev/null +++ b/src/arch/src/loongarch64/linux/efi.rs @@ -0,0 +1,133 @@ +use crate::ArchMemoryInfo; +use log::debug; +use std::mem::size_of; +use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemoryError, GuestMemoryMmap}; + +const EFI_SYSTEM_TABLE_SIGNATURE: u64 = 0x5453_5953_2049_4249; +const EFI_2_10_SYSTEM_TABLE_REVISION: u32 = (2 << 16) | 10; + +const EFI_CONFIG_TABLE_OFFSET: u64 = 0x100; +const EFI_VENDOR_OFFSET: u64 = 0x200; +const DEVICE_TREE_GUID: EfiGuid = EfiGuid { + data1: 0xb1b621d5, + data2: 0xf19c, + data3: 0x41a5, + data4: [0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0], +}; + +#[repr(C)] +#[derive(Clone, Copy, Default)] +struct EfiGuid { + data1: u32, + data2: u16, + data3: u16, + data4: [u8; 8], +} +unsafe impl ByteValued for EfiGuid {} + +#[repr(C)] +#[derive(Clone, Copy, Default)] +struct EfiTableHeader { + signature: u64, + revision: u32, + headersize: u32, + crc32: u32, + reserved: u32, +} +unsafe impl ByteValued for EfiTableHeader {} + +#[repr(C)] +#[derive(Clone, Copy, Default)] +struct EfiConfigTable64 { + guid: EfiGuid, + table: u64, +} +unsafe impl ByteValued for EfiConfigTable64 {} + +#[repr(C)] +#[derive(Clone, Copy, Default)] +struct EfiSystemTable64 { + hdr: EfiTableHeader, + fw_vendor: u64, + fw_revision: u32, + __pad1: u32, + con_in_handle: u64, + con_in: u64, + con_out_handle: u64, + con_out: u64, + stderr_handle: u64, + stderr: u64, + runtime: u64, + boottime: u64, + nr_tables: u32, + __pad2: u32, + tables: u64, +} +unsafe impl ByteValued for EfiSystemTable64 {} + +#[derive(Debug)] +pub enum Error { + Write(GuestMemoryError), +} + +type Result = std::result::Result; + +pub fn setup_fdt_system_table(mem: &GuestMemoryMmap, info: &ArchMemoryInfo) -> Result<()> { + let systab_addr = GuestAddress(info.efi_system_table_addr); + let config_addr = systab_addr.unchecked_add(EFI_CONFIG_TABLE_OFFSET); + let vendor_addr = systab_addr.unchecked_add(EFI_VENDOR_OFFSET); + + let config = EfiConfigTable64 { + guid: DEVICE_TREE_GUID, + table: info.fdt_addr, + }; + mem.write_obj(config, config_addr).map_err(Error::Write)?; + + let systab = EfiSystemTable64 { + hdr: EfiTableHeader { + signature: EFI_SYSTEM_TABLE_SIGNATURE, + revision: EFI_2_10_SYSTEM_TABLE_REVISION, + headersize: size_of::() as u32, + crc32: 0, + reserved: 0, + }, + fw_vendor: vendor_addr.raw_value(), + fw_revision: 0, + __pad1: 0, + con_in_handle: 0, + con_in: 0, + con_out_handle: 0, + con_out: 0, + stderr_handle: 0, + stderr: 0, + runtime: 0, + boottime: 0, + nr_tables: 1, + __pad2: 0, + tables: config_addr.raw_value(), + }; + mem.write_obj(systab, systab_addr).map_err(Error::Write)?; + debug!( + "loongarch efi handoff: systab=0x{:x}, config=0x{:x}, vendor=0x{:x}, fdt=0x{:x}", + systab_addr.raw_value(), + config_addr.raw_value(), + vendor_addr.raw_value(), + info.fdt_addr, + ); + let vendor: [u16; 8] = [ + b'l' as u16, + b'i' as u16, + b'b' as u16, + b'k' as u16, + b'r' as u16, + b'u' as u16, + b'n' as u16, + 0, + ]; + for (i, ch) in vendor.iter().enumerate() { + mem.write_obj(*ch, vendor_addr.unchecked_add((i * 2) as u64)) + .map_err(Error::Write)?; + } + + Ok(()) +} diff --git a/src/arch/src/loongarch64/linux/iocsr.rs b/src/arch/src/loongarch64/linux/iocsr.rs new file mode 100644 index 000000000..03a9addbf --- /dev/null +++ b/src/arch/src/loongarch64/linux/iocsr.rs @@ -0,0 +1,263 @@ +use log::debug; +use std::sync::atomic::{AtomicU64, Ordering}; +/// LoongArch IOCSR Mailbox and Control Registers +/// +/// This module provides emulation for LoongArch IOCSR (I/O Control and Status Register) +/// mailbox system used for inter-processor communication. +use std::sync::Arc; + +/// Maximum Number of LoongArch vCpus supported +const MAX_LOONGARCH_VCPUS: usize = 16; + +/// IOCSR Mailbox addresses (each 8 bytes apart) +pub const LOONGARCH_IOCSR_MBUF0: u64 = 0x1020; +pub const LOONGARCH_IOCSR_MBUF1: u64 = 0x1028; +pub const LOONGARCH_IOCSR_MBUF2: u64 = 0x1030; +pub const LOONGARCH_IOCSR_MBUF3: u64 = 0x1038; + +/// IOCSR Mailbox send command register +pub const LOONGARCH_IOCSR_MBUF_SEND: u64 = 0x1048; + +/// IOCSR Any-Send register (for arbitrary CSR access between CPUs) +pub const LOONGARCH_IOCSR_ANY_SEND: u64 = 0x1158; + +/// IOCSR Miscellaneous function register +pub const LOONGARCH_IOCSR_MISC_FUNC: u64 = 0x0420; + +/// IOCSR feature flags (read-only) +pub const LOONGARCH_IOCSR_FEATURES: u64 = 0x0008; + +/// IOCSR identification strings +pub const LOONGARCH_IOCSR_VENDOR: u64 = 0x0010; +pub const LOONGARCH_IOCSR_MODEL: u64 = 0x0020; + +/// Bit field definitions for MBUF_SEND register +pub const IOCSR_MBUF_SEND_BLOCKING: u64 = 1 << 31; +pub const IOCSR_MBUF_SEND_BOX_SHIFT: u32 = 2; +pub const IOCSR_MBUF_SEND_BOX_LO: fn(u32) -> u32 = |box_num| box_num << 1; +pub const IOCSR_MBUF_SEND_BOX_HI: fn(u32) -> u32 = |box_num| (box_num << 1) + 1; +pub const IOCSR_MBUF_SEND_CPU_SHIFT: u32 = 16; +pub const IOCSR_MBUF_SEND_BUF_SHIFT: u32 = 32; +pub const IOCSR_MBUF_SEND_H32_MASK: u64 = 0xFFFFFFFF00000000; + +/// Feature flags for LoongArch +pub const IOCSRF_EXTIOI: u32 = 1 << 3; +pub const IOCSRF_CSRIPI: u32 = 1 << 4; +pub const IOCSRF_VM: u32 = 1 << 11; + +/// Shared IOCSR state for all vCPUs +#[derive(Debug)] +pub struct LoongArchIocsrState { + misc_func: AtomicU64, + mailboxes: Vec<[AtomicU64; 4]>, +} + +impl LoongArchIocsrState { + /// Create a new IOCSR state with the specified number of vCPUs + pub fn new(vcpu_count: usize) -> Self { + let count = vcpu_count.min(MAX_LOONGARCH_VCPUS); + Self { + misc_func: AtomicU64::new(0), + mailboxes: (0..count) + .map(|_| { + [ + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + ] + }) + .collect(), + } + } + + /// Read miscellaneous function register + pub fn read_misc_func(&self) -> u64 { + self.misc_func.load(Ordering::SeqCst) + } + + /// Write miscellaneous function register + pub fn write_misc_func(&self, value: u64) { + self.misc_func.store(value, Ordering::SeqCst); + } + + /// Read a mailbox slot for the specified CPU + pub fn read_mailbox(&self, cpu_id: usize, mailbox_id: usize) -> u64 { + if cpu_id < self.mailboxes.len() && mailbox_id < 4 { + self.mailboxes[cpu_id][mailbox_id].load(Ordering::SeqCst) + } else { + 0 + } + } + + /// Write a mailbox slot for the specified CPU + pub fn write_mailbox(&self, cpu_id: usize, mailbox_id: usize, value: u64) { + if cpu_id < self.mailboxes.len() && mailbox_id < 4 { + self.mailboxes[cpu_id][mailbox_id].store(value, Ordering::SeqCst); + } + } + + /// Process a mailbox send command + /// + /// This function parses the MBUF_SEND register value and updates the target CPU's mailbox. + /// Currently used only for SMP support, which is disabled in single-vCPU mode. + /// + /// # Arguments + /// * `value` - The 64-bit value written to MBUF_SEND register + /// + /// The value format: + /// - Bits 32-63: 32-bit data to be sent + /// - Bits 16-31: Target CPU ID (14 bits) + /// - Bit 3: HI/LO flag (0=low 32 bits, 1=high 32 bits) + /// - Bits 2-3: Mailbox number encoding + /// - Bit 2: Base mailbox number (0-3) + pub fn process_mbuf_send(&self, value: u64) -> Result<(), String> { + // Extract fields from the value + let target_cpu = ((value >> IOCSR_MBUF_SEND_CPU_SHIFT) & 0x3FFF) as usize; + // Linux encodes mailbox selector as: + // (IOCSR_MBUF_SEND_BOX_{LO,HI}(box) << IOCSR_MBUF_SEND_BOX_SHIFT) + // where BOX_LO(box)=(box<<1), BOX_HI(box)=((box<<1)+1). + // So the packed field is 3 bits: [box_num(2b), hi_low(1b)]. + let box_sel = ((value >> IOCSR_MBUF_SEND_BOX_SHIFT) & 0x7) as u32; + let box_hi = (box_sel & 0x1) != 0; + let box_num = (box_sel >> 1) as usize; + let data32 = ((value >> IOCSR_MBUF_SEND_BUF_SHIFT) & 0xFFFFFFFF) as u32; + // Validate target CPU + if target_cpu >= self.mailboxes.len() { + return Err(format!( + "Invalid target CPU: {} (max: {})", + target_cpu, + self.mailboxes.len() - 1 + )); + } + // Validate mailbox number + if box_num >= 4 { + return Err(format!("Invalid mailbox number: {} (max: 3)", box_num)); + } + // Update the target mailbox + if box_hi { + // Write high 32 bits + let current = self.read_mailbox(target_cpu, box_num); + let new_val = (current & 0xFFFFFFFF) | ((data32 as u64) << 32); + self.write_mailbox(target_cpu, box_num, new_val); + } else { + // Write low 32 bits + let current = self.read_mailbox(target_cpu, box_num); + let new_val = (current & 0xFFFFFFFF00000000) | (data32 as u64); + self.write_mailbox(target_cpu, box_num, new_val); + } + Ok(()) + } + + /// Get the number of configured vCPUs + pub fn vcpu_count(&self) -> usize { + self.mailboxes.len() + } +} + +/// IOCSR read operation result +#[derive(Debug)] +pub enum IocsrReadResult { + /// Successfully read a value + Value(u64), + /// Unhandled register address + Unhandled, +} +/// IOCSR write operation result +#[derive(Debug)] +pub enum IocsrWriteResult { + /// Successfully processed write + Handled, + /// Unhandled register address + Unhandled, +} + +/// Process an IOCSR read operation +pub fn process_iocsr_read( + addr: u64, + data: &mut [u8], + iocsr_state: &Arc, + cpu_id: u8, +) -> IocsrReadResult { + match (addr, data.len()) { + (LOONGARCH_IOCSR_FEATURES, 4) => { + // Feature flags: EXTIOI, CSRIPI, VM support + let features = IOCSRF_EXTIOI | IOCSRF_CSRIPI | IOCSRF_VM; + data.copy_from_slice(&features.to_le_bytes()); + IocsrReadResult::Value(features as u64) + } + (LOONGARCH_IOCSR_VENDOR, 8) => { + // Vendor string: "Loongson" + data.copy_from_slice(b"Loongson"); + IocsrReadResult::Value(0) + } + (LOONGARCH_IOCSR_MODEL, 8) => { + // Model string: "KVMGuest" + data.copy_from_slice(b"KVMGuest"); + IocsrReadResult::Value(0) + } + (LOONGARCH_IOCSR_MISC_FUNC, 8) => { + // Miscellaneous function register + let value = iocsr_state.read_misc_func(); + data.copy_from_slice(&value.to_le_bytes()); + IocsrReadResult::Value(value) + } + (LOONGARCH_IOCSR_MBUF0..=LOONGARCH_IOCSR_MBUF3, 8) => { + // Mailbox read operations + let mailbox_idx = ((addr - LOONGARCH_IOCSR_MBUF0) / 8) as usize; + let value = iocsr_state.read_mailbox(cpu_id as usize, mailbox_idx); + data.copy_from_slice(&value.to_le_bytes()); + IocsrReadResult::Value(value) + } + _ => IocsrReadResult::Unhandled, + } +} + +/// Process an IOCSR write operation +pub fn process_iocsr_write( + addr: u64, + data: &[u8], + iocsr_state: &Arc, + cpu_id: u8, +) -> IocsrWriteResult { + match (addr, data.len()) { + (LOONGARCH_IOCSR_MISC_FUNC, 8) => { + // Miscellaneous function register + let value = u64::from_le_bytes(data.try_into().unwrap()); + iocsr_state.write_misc_func(value); + IocsrWriteResult::Handled + } + (LOONGARCH_IOCSR_MBUF0..=LOONGARCH_IOCSR_MBUF3, 8) => { + // Mailbox write operations + let mailbox_idx = ((addr - LOONGARCH_IOCSR_MBUF0) / 8) as usize; + let value = u64::from_le_bytes(data.try_into().unwrap()); + iocsr_state.write_mailbox(cpu_id as usize, mailbox_idx, value); + IocsrWriteResult::Handled + } + (LOONGARCH_IOCSR_MBUF_SEND, 8) => { + // Mailbox send command + let value = u64::from_le_bytes(data.try_into().unwrap()); + match iocsr_state.process_mbuf_send(value) { + Ok(()) => IocsrWriteResult::Handled, + Err(_) => IocsrWriteResult::Unhandled, // Keep it simple for now + } + } + (LOONGARCH_IOCSR_ANY_SEND, 8) => { + // ANY_SEND: Send data to arbitrary CSR of another CPU + // Format: [data:32][cpu:10][mask:4][addr:16] + BLOCKING bit + // For now, just acknowledge the write (no actual cross-CPU CSR emulation needed) + let value = u64::from_le_bytes(data.try_into().unwrap()); + let blocking = (value & 0x8000_0000_0000_0000) != 0; + let target_addr = (value & 0xFFFF) as u32; + let target_cpu = ((value >> 16) & 0x3FF) as u32; + let data_val = (value >> 32) as u32; + + debug!( + "IOCSR ANY_SEND: to CPU {}, addr=0x{:x}, data=0x{:x}, blocking={}", + target_cpu, target_addr, data_val, blocking + ); + IocsrWriteResult::Handled + } + _ => IocsrWriteResult::Unhandled, + } +} diff --git a/src/arch/src/loongarch64/linux/mod.rs b/src/arch/src/loongarch64/linux/mod.rs new file mode 100644 index 000000000..3604d301f --- /dev/null +++ b/src/arch/src/loongarch64/linux/mod.rs @@ -0,0 +1,3 @@ +pub mod efi; +pub mod iocsr; +pub mod regs; diff --git a/src/arch/src/loongarch64/linux/regs.rs b/src/arch/src/loongarch64/linux/regs.rs new file mode 100644 index 000000000..730182a8a --- /dev/null +++ b/src/arch/src/loongarch64/linux/regs.rs @@ -0,0 +1,217 @@ +use kvm_bindings::{KVM_REG_LOONGARCH, KVM_REG_SIZE_U64, LOONGARCH_REG_SHIFT}; +use kvm_ioctls::VcpuFd; +use log::{debug, warn}; +use std::arch::asm; +use std::result; + +const KVM_REG_LOONGARCH_CSR: u64 = (KVM_REG_LOONGARCH as u64) | 0x10000; +const LOONGARCH_CSR_CPUID: u64 = 0x20; + +const KVM_REG_LOONGARCH_CPUCFG: u64 = (KVM_REG_LOONGARCH as u64) | 0x40000; +const CPUCFG0_REG_ID: u64 = + KVM_REG_LOONGARCH_CPUCFG | KVM_REG_SIZE_U64 | (0_u64 << LOONGARCH_REG_SHIFT); +const CSR_CPUID_REG_ID: u64 = + KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | (LOONGARCH_CSR_CPUID << LOONGARCH_REG_SHIFT); +// Keep bits [25:0] only for now. Do not expose MSGINT (bit 26) by default +// on microVM profile because host KVM support is inconsistent. +const CPUCFG1_KVM_MASK: u64 = (1u64 << 26) - 1; +const CPUCFG1_PABITS_MASK: u64 = 0xff << 4; +const CPUCFG1_GUEST_PABITS: u64 = 39; // encodes 40-bit PA + +const CPUCFG2_FP: u64 = 1 << 0; +const CPUCFG2_FPSP: u64 = 1 << 1; +const CPUCFG2_FPDP: u64 = 1 << 2; +const CPUCFG2_FPVERS: u64 = 0x7 << 3; +const CPUCFG2_LSX: u64 = 1 << 6; +const CPUCFG2_LASX: u64 = 1 << 7; +const CPUCFG2_LLFTP: u64 = 1 << 14; +const CPUCFG2_LLFTPREV: u64 = 0x7 << 15; +const CPUCFG2_LSPW: u64 = 1 << 21; +const CPUCFG2_LAM: u64 = 1 << 22; + +const CPUCFG3_SFB: u64 = 1 << 1; +const CPUCFG3_KVM_CONSERVATIVE_MASK: u64 = 0x0000_fcff & !CPUCFG3_SFB; +const CPUCFG4_KVM_MASK: u64 = 0xffff_ffff; +const CPUCFG5_KVM_MASK: u64 = 0xffff_ffff; + +const CPUCFG16_CACHE_CONFIG: u64 = 0xF; // L1I, L1D, L2, L3 present +const CPUCFG17_L1I_MASK: u64 = ((5u64) << 24) | ((8u64) << 16) | ((4u64 - 1) << 0); +const CPUCFG18_L1D_MASK: u64 = ((5u64) << 24) | ((8u64) << 16) | ((4u64 - 1) << 0); +const CPUCFG19_L2_MASK: u64 = ((6u64) << 24) | ((9u64) << 16) | ((8u64 - 1) << 0); +const CPUCFG20_L3_MASK: u64 = ((6u64) << 24) | ((10u64) << 16) | ((16u64 - 1) << 0); + +#[derive(Debug)] +pub enum Error { + GetCoreRegs(kvm_ioctls::Error), + SetCoreRegs(kvm_ioctls::Error), + SetOneReg(kvm_ioctls::Error), +} + +type Result = result::Result; + +pub fn setup_regs( + vcpu: &VcpuFd, + cpu_id: u8, + boot_ip: u64, + cmdline_addr: u64, + efi_boot: bool, + system_table: u64, +) -> Result<()> { + setup_cpucfg(vcpu)?; + let mut regs = vcpu.get_regs().map_err(Error::GetCoreRegs)?; + + // Secondary CPUs start from reset state and are brought up later by guest SMP code. + if cpu_id != 0 { + let cpuid = u64::from(cpu_id); + vcpu.set_one_reg(CSR_CPUID_REG_ID, &cpuid.to_le_bytes()) + .map_err(Error::SetOneReg)?; + debug!("loongarch set csr cpuid: {} (ap-reset-state)", cpuid); + return Ok(()); + } + + regs.pc = boot_ip; + regs.gpr[4] = u64::from(efi_boot); + regs.gpr[5] = cmdline_addr; + regs.gpr[6] = system_table; + vcpu.set_regs(®s).map_err(Error::SetCoreRegs)?; + + // KVM starts vCPU with an invalid CPUID (KVM_MAX_PHYID). Program per-vCPU CPUID. + // Do this after set_regs() so it cannot be clobbered by later register writes. + let cpuid = 0u64; + vcpu.set_one_reg(CSR_CPUID_REG_ID, &cpuid.to_le_bytes()) + .map_err(Error::SetOneReg)?; + + let mut cpuid_readback = [0_u8; 8]; + if vcpu + .get_one_reg(CSR_CPUID_REG_ID, &mut cpuid_readback) + .is_ok() + { + debug!( + "loongarch set csr cpuid: {}, readback={}", + cpuid, + u64::from_le_bytes(cpuid_readback) + ); + } else { + debug!("loongarch set csr cpuid: {}", cpuid); + } + + debug!( + "loongarch setup_regs: cpu_id={}, pc=0x{:x}, a0={}, a1=0x{:x}, a2=0x{:x}", + cpu_id, regs.pc, regs.gpr[4], regs.gpr[5], regs.gpr[6], + ); + let mut cpucfg0 = [0_u8; 8]; + if vcpu.get_one_reg(CPUCFG0_REG_ID, &mut cpucfg0).is_ok() { + let cpucfg0 = u64::from_le_bytes(cpucfg0); + debug!("loongarch cpucfg0: 0x{:x}", cpucfg0); + } + + Ok(()) +} + +#[inline] +fn cpucfg_reg_id(index: u64) -> u64 { + KVM_REG_LOONGARCH_CPUCFG | KVM_REG_SIZE_U64 | (index << LOONGARCH_REG_SHIFT) +} + +#[inline] +fn read_host_cpucfg(index: u64) -> u64 { + let value: u64; + unsafe { + asm!( + "cpucfg {value}, {index}", + value = out(reg) value, + index = in(reg) index, + ); + } + value +} + +#[inline] +fn filter_cpucfg_for_kvm(index: u64, host_value: u64) -> u64 { + match index { + 0 => host_value & 0xffff_ffff, + 1 => { + let mut v = host_value & CPUCFG1_KVM_MASK; + v = (v & !CPUCFG1_PABITS_MASK) | ((CPUCFG1_GUEST_PABITS & 0xff) << 4); + v + } + 2 => filter_cpucfg2_conservative(host_value), + 3 => filter_cpucfg3_conservative(host_value), + 4 => host_value & CPUCFG4_KVM_MASK, + 5 => host_value & CPUCFG5_KVM_MASK, + _ => 0, + } +} + +#[inline] +fn filter_cpucfg2_conservative(host_value: u64) -> u64 { + let mut mask = CPUCFG2_FP + | CPUCFG2_FPSP + | CPUCFG2_FPDP + | CPUCFG2_FPVERS + | CPUCFG2_LLFTP + | CPUCFG2_LLFTPREV + | CPUCFG2_LSPW + | CPUCFG2_LAM; + + if host_value & CPUCFG2_LSX != 0 { + mask |= CPUCFG2_LSX; + } + if host_value & CPUCFG2_LASX != 0 { + mask |= CPUCFG2_LASX; + } + + host_value & mask +} + +#[inline] +fn filter_cpucfg3_conservative(host_value: u64) -> u64 { + host_value & CPUCFG3_KVM_CONSERVATIVE_MASK +} + +fn setup_cpucfg(vcpu: &VcpuFd) -> Result<()> { + // Setup CPUCFG 0-5: Basic CPU information + for index in 0..=5u64 { + let host_value = read_host_cpucfg(index); + let guest_value = filter_cpucfg_for_kvm(index, host_value); + let reg_id = cpucfg_reg_id(index); + + if let Err(e) = vcpu.set_one_reg(reg_id, &guest_value.to_le_bytes()) { + warn!( + "loongarch set cpucfg{} failed: {:?} (host=0x{:x}, guest=0x{:x})", + index, e, host_value, guest_value + ); + return Err(Error::SetOneReg(e)); + } + + debug!( + "loongarch set cpucfg{}: host=0x{:x}, guest=0x{:x}", + index, host_value, guest_value, + ); + } + + // Setup CPUCFG 16-20: Cache configuration + // CPUCFG16: Cache configuration (which cache levels exist) + // Format: L1I present | L1D present | L2 present | L3 present + let cpucfg16 = CPUCFG16_CACHE_CONFIG; + vcpu.set_one_reg(cpucfg_reg_id(16), &cpucfg16.to_le_bytes()) + .map_err(Error::SetOneReg)?; + debug!("loongarch set cpucfg16: 0x{:x}", cpucfg16); + // CPUCFG17-20: Cache properties for each level + // Format: ways[13:8] | sets[21:16] | linesz[15:12] | other[11:0] + // We'll use common values for a typical Loongson 3A5000-like CPU + vcpu.set_one_reg(cpucfg_reg_id(17), &CPUCFG17_L1I_MASK.to_le_bytes()) + .map_err(Error::SetOneReg)?; + vcpu.set_one_reg(cpucfg_reg_id(18), &CPUCFG18_L1D_MASK.to_le_bytes()) + .map_err(Error::SetOneReg)?; + vcpu.set_one_reg(cpucfg_reg_id(19), &CPUCFG19_L2_MASK.to_le_bytes()) + .map_err(Error::SetOneReg)?; + vcpu.set_one_reg(cpucfg_reg_id(20), &CPUCFG20_L3_MASK.to_le_bytes()) + .map_err(Error::SetOneReg)?; + debug!( + "loongarch set cpucfg17-20: 0x{:x}, 0x{:x}, 0x{:x}, 0x{:x}", + CPUCFG17_L1I_MASK, CPUCFG18_L1D_MASK, CPUCFG19_L2_MASK, CPUCFG20_L3_MASK + ); + + Ok(()) +} diff --git a/src/arch/src/loongarch64/mod.rs b/src/arch/src/loongarch64/mod.rs new file mode 100644 index 000000000..683ae874c --- /dev/null +++ b/src/arch/src/loongarch64/mod.rs @@ -0,0 +1,64 @@ +/// Layout for this loongarch64 system. +pub mod layout; + +#[cfg(target_os = "linux")] +pub mod linux; +#[cfg(target_os = "linux")] +pub use self::linux::*; + +use crate::{loongarch64::layout::FIRMWARE_START, ArchMemoryInfo}; +use vm_memory::{GuestAddress, GuestMemoryMmap}; +use vmm_sys_util::align_upwards; + +/// Errors thrown while configuring loongarch64 system. +#[derive(Debug)] +pub enum Error { + /// Failed to compute the initrd address. + InitrdAddress, + /// Failed to setup EFI system table in the FDT. + EfiSystemTable(linux::efi::Error), +} + +/// The start of the memory area reserved for MMIO devices. +pub const MMIO_MEM_START: u64 = layout::MAPPED_IO_START; + +/// Returns a Vec of the valid memory addresses for loongarch64. +pub fn arch_memory_regions( + size: usize, + initrd_size: u64, + _firmware_size: Option, +) -> (ArchMemoryInfo, Vec<(GuestAddress, usize)>) { + let page_size: usize = unsafe { libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap() }; + let dram_size = align_upwards!(size, page_size); + let ram_last_addr = layout::DRAM_MEM_START + (dram_size as u64); + // Align SHM start to 1GiB boundaries without forcing an extra 1GiB gap + // when RAM already ends at an aligned boundary. + let shm_start_addr = align_upwards!(ram_last_addr, 0x4000_0000u64); + + let fdt_addr = ram_last_addr - layout::FDT_MAX_SIZE as u64; + let efi_system_table_addr = fdt_addr - layout::EFI_GUEST_SIZE; + let initrd_addr = efi_system_table_addr - initrd_size; + let info = ArchMemoryInfo { + ram_last_addr, + shm_start_addr, + page_size, + fdt_addr, + efi_system_table_addr, + initrd_addr, + firmware_addr: FIRMWARE_START, + }; + let regions = vec![(GuestAddress(layout::DRAM_MEM_START), dram_size)]; + + (info, regions) +} + +/// Configures the system and should be called once per vm before starting vcpu threads. +pub fn configure_system( + _guest_mem: &GuestMemoryMmap, + arch_memory_info: &ArchMemoryInfo, + _smbios_oem_strings: &Option>, +) -> super::Result<()> { + linux::efi::setup_fdt_system_table(_guest_mem, arch_memory_info) + .map_err(Error::EfiSystemTable)?; + Ok(()) +} diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index 94a331202..733216590 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -50,6 +50,8 @@ rutabaga_gfx = { path = "../rutabaga_gfx", features = ["x"], optional = true } caps = "0.5.5" kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } kvm-ioctls = "0.22" +vmm-sys-util = ">= 0.14" -[target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64"))'.dependencies] +[target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] vm-fdt = ">= 0.2.0" +log = "0.4" diff --git a/src/devices/src/fdt/loongarch64.rs b/src/devices/src/fdt/loongarch64.rs new file mode 100644 index 000000000..be53fa12a --- /dev/null +++ b/src/devices/src/fdt/loongarch64.rs @@ -0,0 +1,306 @@ +use std::collections::HashMap; +use std::fmt::Debug; +use std::{io, result}; + +use crate::legacy::IrqChip; +use crate::DeviceType; + +use arch::{ArchMemoryInfo, InitrdConfig}; +use vm_fdt::{Error as FdtError, FdtWriter}; +use vm_memory::{Address, Bytes, GuestAddress, GuestMemoryError, GuestMemoryMmap}; + +const CPU_INTC_PHANDLE: u32 = 1; +const EIOINTC_PHANDLE: u32 = 2; +const PCH_PIC_PHANDLE: u32 = 3; + +const ADDRESS_CELLS: u32 = 0x2; +const SIZE_CELLS: u32 = 0x2; + +const IRQ_TYPE_LEVEL_HI: u32 = 4; + +/// Trait for devices to be added to the Flattened Device Tree. +pub trait DeviceInfoForFDT { + /// Returns the address where this device will be loaded. + fn addr(&self) -> u64; + /// Returns the associated interrupt for this device. + fn irq(&self) -> u32; + /// Returns the amount of memory that needs to be reserved for this device. + fn length(&self) -> u64; +} + +/// Errors thrown while configuring the Flattened Device Tree for aarch64. +#[derive(Debug)] +pub enum Error { + /// Creating FDT failed. + CreateFDT(FdtError), + /// Failure in calling syscall for terminating this FDT. + FinishFDTReserveMap(io::Error), + /// Failure in writing FDT in memory. + WriteFDTToMemory(GuestMemoryError), +} +type Result = result::Result; + +impl From for Error { + fn from(item: FdtError) -> Self { + Error::CreateFDT(item) + } +} + +/// Creates the flattened device tree for this loongarch64 VM. +pub fn create_fdt( + guest_mem: &GuestMemoryMmap, + arch_memory_info: &ArchMemoryInfo, + num_vcpu: u32, + cmdline: &str, + device_info: &HashMap<(DeviceType, String), T>, + intc: &IrqChip, + initrd: &Option, +) -> Result> { + // Allocate stuff necessary for the holding the blob. + let mut fdt = FdtWriter::new()?; + + // For an explanation why these nodes were introduced in the blob take a look at + // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/booting-without-of.txt#L845 + // Look for "Required nodes and properties". + + // Header or the root node as per above mentioned documentation. + let root_node = fdt.begin_node("root")?; + fdt.property_string("compatible", "linux,dummy-virt")?; + // For info on #address-cells and size-cells resort to Table 3.1 Root Node + // Properties + fdt.property_u32("#address-cells", ADDRESS_CELLS)?; + fdt.property_u32("#size-cells", SIZE_CELLS)?; + create_cpu_nodes(&mut fdt, num_vcpu)?; + create_memory_node(&mut fdt, guest_mem, arch_memory_info)?; + create_chosen_node(&mut fdt, cmdline, initrd, device_info)?; + create_cpuintc_node(&mut fdt)?; + create_eiointc_node(&mut fdt)?; + create_pic_node(&mut fdt, intc)?; + create_devices_node(&mut fdt, device_info)?; + //create_console_node(&mut fdt )?; + + // End Header node. + fdt.end_node(root_node)?; + + // Allocate another buffer so we can format and then write fdt to guest. + let fdt_final = fdt.finish()?; + + // Write FDT to memory. + let fdt_address = GuestAddress(arch_memory_info.fdt_addr); + guest_mem + .write_slice(fdt_final.as_slice(), fdt_address) + .map_err(Error::WriteFDTToMemory)?; + debug!( + "loongarch fdt written: addr=0x{:x}, size=0x{:x}", + arch_memory_info.fdt_addr, + fdt_final.len(), + ); + Ok(fdt_final) +} + +// Following are the auxiliary function for creating the different nodes that we append to our FDT. +fn create_cpu_nodes(fdt: &mut FdtWriter, num_cpus: u32) -> Result<()> { + let cpus = fdt.begin_node("cpus")?; + fdt.property_u32("#address-cells", 0x01)?; + fdt.property_u32("#size-cells", 0x0)?; + + for cpu_index in 0..num_cpus { + let cpu = fdt.begin_node(&format!("cpu@{cpu_index:x}"))?; + fdt.property_string("device_type", "cpu")?; + fdt.property_string("compatible", "loongson,la664")?; + fdt.property_u32("reg", cpu_index)?; + fdt.end_node(cpu)?; + } + fdt.end_node(cpus)?; + Ok(()) +} +fn create_memory_node( + fdt: &mut FdtWriter, + _guest_mem: &GuestMemoryMmap, + arch_memory_info: &ArchMemoryInfo, +) -> Result<()> { + let mem_size = arch_memory_info.ram_last_addr - arch::loongarch64::layout::DRAM_MEM_START; + // See https://github.com/torvalds/linux/blob/master/Documentation/devicetree/booting-without-of.txt#L960 + // for an explanation of this. + let mem_reg_prop = [arch::loongarch64::layout::DRAM_MEM_START, mem_size]; + + let mem_node = fdt.begin_node("memory")?; + fdt.property_string("device_type", "memory")?; + fdt.property_array_u64("reg", &mem_reg_prop)?; + fdt.end_node(mem_node)?; + Ok(()) +} + +fn create_chosen_node( + fdt: &mut FdtWriter, + cmdline: &str, + initrd: &Option, + dev_info: &HashMap<(DeviceType, String), T>, +) -> Result<()> { + let chosen_node = fdt.begin_node("chosen")?; + fdt.property_string("bootargs", cmdline)?; + + // Only set stdout-path if we have a Serial device (not when using Virtio Console). + // When using Virtio Console (hvc0), kernel uses the console= cmdline parameter instead. + // When using Serial (ttyS0), we point FDT to the serial device node. + let has_serial = dev_info + .keys() + .any(|(device_type, _)| device_type == &DeviceType::Serial); + let has_virtio_console = dev_info + .keys() + .any(|(device_type, _)| matches!(device_type, DeviceType::Virtio(3))); // VIRTIO_ID_CONSOLE = 3 + + if has_serial && !has_virtio_console { + // Only set stdout-path if Serial is the only console device + for ((device_type, _device_id), info) in dev_info { + if device_type == &DeviceType::Serial { + fdt.property_string("stdout-path", &format!("/serial@{:x}", info.addr()))?; + break; + } + } + } + let stdout_path = if has_serial && !has_virtio_console { + dev_info + .iter() + .find_map(|((device_type, _device_id), info)| { + if device_type == &DeviceType::Serial { + Some(format!("/serial@{:x}", info.addr())) + } else { + None + } + }) + } else { + None + }; + + debug!( + "loongarch chosen: has_serial={}, has_virtio_console={}, stdout_path={:?}", + has_serial, has_virtio_console, stdout_path, + ); + + if let Some(path) = &stdout_path { + fdt.property_string("stdout-path", path)?; + } + // If Virtio Console exists, don't set stdout-path; kernel uses console= cmdline parameter + + if let Some(initrd_config) = initrd { + fdt.property_u64("linux,initrd-start", initrd_config.address.raw_value())?; + fdt.property_u64( + "linux,initrd-end", + initrd_config.address.raw_value() + initrd_config.size as u64, + )?; + } + + fdt.end_node(chosen_node)?; + + Ok(()) +} + +fn create_cpuintc_node(fdt: &mut FdtWriter) -> Result<()> { + let cpuintc_node = fdt.begin_node("interrupt-controller")?; + fdt.property_string("compatible", "loongson,cpu-interrupt-controller")?; + fdt.property_u32("#interrupt-cells", 1)?; + fdt.property_null("interrupt-controller")?; + fdt.property_u32("phandle", CPU_INTC_PHANDLE)?; + fdt.end_node(cpuintc_node)?; + Ok(()) +} +fn create_eiointc_node(fdt: &mut FdtWriter) -> Result<()> { + // Keep the external IRQ fabric in the DT for compatibility, even though + // the current serial/virtio path wires devices directly to cpuintc. + let reg = [0x1fe0_1600_u64, 0xea00_u64]; + + let node = fdt.begin_node("interrupt-controller@1fe01600")?; + fdt.property_string("compatible", "loongson,ls2k2000-eiointc")?; + fdt.property_array_u64("reg", ®)?; + fdt.property_null("interrupt-controller")?; + fdt.property_u32("#interrupt-cells", 1)?; + fdt.property_u32("phandle", EIOINTC_PHANDLE)?; + fdt.property_u32("interrupt-parent", CPU_INTC_PHANDLE)?; + fdt.property_array_u32("interrupts", &[3])?; + fdt.end_node(node)?; + Ok(()) +} +fn create_pic_node(fdt: &mut FdtWriter, intc: &IrqChip) -> Result<()> { + let intc = intc.lock().unwrap(); + let reg = [intc.get_mmio_addr(), intc.get_mmio_size()]; + + let node = fdt.begin_node(&format!("interrupt-controller@{:x}", intc.get_mmio_addr()))?; + fdt.property_string("compatible", "loongson,pch-pic-1.0")?; + fdt.property_array_u64("reg", ®)?; + fdt.property_null("interrupt-controller")?; + fdt.property_u32("#interrupt-cells", 2)?; + fdt.property_u32("phandle", PCH_PIC_PHANDLE)?; + fdt.property_u32("loongson,pic-base-vec", 0)?; + fdt.property_u32("interrupt-parent", EIOINTC_PHANDLE)?; + fdt.end_node(node)?; + Ok(()) +} +fn create_serial_node( + fdt: &mut FdtWriter, + dev_info: &T, +) -> Result<()> { + let reg = [dev_info.addr(), dev_info.length()]; + + let node = fdt.begin_node(&format!("serial@{:x}", dev_info.addr()))?; + fdt.property_string("compatible", "ns16550a")?; + fdt.property_array_u64("reg", ®)?; + fdt.property_u32("clock-frequency", 3686400)?; + //let irq = [dev_info.irq(), IRQ_TYPE_LEVEL_HI]; + //fdt.property_u32("interrupt-parent", PCH_PIC_PHANDLE)?; + //fdt.property_array_u32("interrupts", &irq)?; + // LoongArch currently injects serial/virtio interrupts through cpuintc + // with KVM_INTERRUPT instead of the retained PCH-PIC/EIOINTC path. + let irq = [dev_info.irq()]; + fdt.property_u32("interrupt-parent", CPU_INTC_PHANDLE)?; + fdt.property_array_u32("interrupts", &irq)?; + fdt.end_node(node)?; + // debug!( + // "loongarch serial node: addr=0x{:x}, len=0x{:x}, irq={}, clock-frequency={}", + // dev_info.addr(), + // dev_info.length(), + // dev_info.irq(), + // 3686400u32, + // ); + Ok(()) +} +fn create_virtio_node( + fdt: &mut FdtWriter, + dev_info: &T, +) -> Result<()> { + let reg = [dev_info.addr(), dev_info.length()]; + + // debug!( + // "loongarch virtio node: addr=0x{:x}, irq={}", + // dev_info.addr(), + // dev_info.irq(), + // ); + let node = fdt.begin_node(&format!("virtio_mmio@{:x}", dev_info.addr()))?; + fdt.property_string("compatible", "virtio,mmio")?; + fdt.property_array_u64("reg", ®)?; + let irq = [dev_info.irq()]; + fdt.property_u32("interrupt-parent", CPU_INTC_PHANDLE)?; + fdt.property_array_u32("interrupts", &irq)?; + fdt.end_node(node)?; + Ok(()) +} +fn create_devices_node( + fdt: &mut FdtWriter, + dev_info: &HashMap<(DeviceType, String), T>, +) -> Result<()> { + let mut ordered_virtio_devices: Vec<&T> = Vec::new(); + + for ((device_type, _device_id), info) in dev_info { + match device_type { + DeviceType::Serial => create_serial_node(fdt, info)?, + DeviceType::Virtio(_) => ordered_virtio_devices.push(info), + } + } + + ordered_virtio_devices.sort_by_key(|info| info.addr()); + for info in ordered_virtio_devices { + create_virtio_node(fdt, info)?; + } + + Ok(()) +} diff --git a/src/devices/src/fdt/mod.rs b/src/devices/src/fdt/mod.rs index 43a81999d..9ed18efb9 100644 --- a/src/devices/src/fdt/mod.rs +++ b/src/devices/src/fdt/mod.rs @@ -10,3 +10,8 @@ pub use aarch64::*; pub mod riscv64; #[cfg(target_arch = "riscv64")] pub use riscv64::*; + +#[cfg(target_arch = "loongarch64")] +pub mod loongarch64; +#[cfg(target_arch = "loongarch64")] +pub use loongarch64::*; diff --git a/src/devices/src/legacy/irqchip.rs b/src/devices/src/legacy/irqchip.rs index ed33bd2a8..35c2dbfb2 100644 --- a/src/devices/src/legacy/irqchip.rs +++ b/src/devices/src/legacy/irqchip.rs @@ -35,6 +35,16 @@ impl IrqChipDevice { ) -> Result<(), DeviceError> { self.inner.set_irq(irq_line, interrupt_evt) } + + #[cfg(target_arch = "loongarch64")] + pub fn set_irq_state( + &self, + irq_line: Option, + interrupt_evt: Option<&EventFd>, + active: bool, + ) -> Result<(), DeviceError> { + self.inner.set_irq_state(irq_line, interrupt_evt, active) + } } impl BusDevice for IrqChipDevice { @@ -145,6 +155,29 @@ pub trait IrqChipT: BusDevice + AIADevice { ) -> Result<(), DeviceError>; } +#[cfg(target_arch = "loongarch64")] +pub trait IrqChipT: BusDevice { + fn get_mmio_addr(&self) -> u64; + fn get_mmio_size(&self) -> u64; + fn set_irq( + &self, + irq_line: Option, + interrupt_evt: Option<&EventFd>, + ) -> Result<(), DeviceError>; + fn set_irq_state( + &self, + irq_line: Option, + interrupt_evt: Option<&EventFd>, + active: bool, + ) -> Result<(), DeviceError> { + if active { + self.set_irq(irq_line, interrupt_evt) + } else { + Ok(()) + } + } +} + #[cfg(any(test, feature = "test_utils"))] pub mod test_utils { use super::*; diff --git a/src/devices/src/legacy/kvmloongarchirqchip.rs b/src/devices/src/legacy/kvmloongarchirqchip.rs new file mode 100644 index 000000000..e86539a99 --- /dev/null +++ b/src/devices/src/legacy/kvmloongarchirqchip.rs @@ -0,0 +1,168 @@ +use std::fs::File; +use std::io; + +use crate::bus::BusDevice; +use crate::legacy::irqchip::IrqChipT; +use crate::Error as DeviceError; + +use kvm_bindings::kvm_interrupt; +use kvm_ioctls::{DeviceFd, Error as KvmError, VmFd}; +use utils::eventfd::EventFd; +use vmm_sys_util::ioctl::ioctl_with_ref; +use vmm_sys_util::ioctl_iow_nr; + +ioctl_iow_nr!( + KVM_INTERRUPT_LOONGARCH, + kvm_bindings::KVMIO, + 0x86, + kvm_interrupt +); + +pub struct KvmLoongArchIrqChip { + _ipi_fd: DeviceFd, + _eiointc_fd: DeviceFd, + _pchpic_fd: DeviceFd, + irq_vcpu_fd: File, + _vcpu_count: u32, +} + +impl KvmLoongArchIrqChip { + pub fn new(vm: &VmFd, vcpu_count: u32, irq_vcpu_fd: File) -> Result { + // Keep the in-kernel external irqchip devices around for platform + // compatibility; the active serial/virtio injection path uses + // KVM_INTERRUPT through cpuintc on vcpu0. + let mut ipi_device = kvm_bindings::kvm_create_device { + type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_LOONGARCH_IPI, + fd: 0, + flags: 0, + }; + let ipi_fd = vm.create_device(&mut ipi_device)?; + + let mut eiointc_device = kvm_bindings::kvm_create_device { + type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_LOONGARCH_EIOINTC, + fd: 0, + flags: 0, + }; + let eiointc_fd = vm.create_device(&mut eiointc_device)?; + + let nr_cpus = vcpu_count; + let nr_cpu_ptr = &nr_cpus as *const u32; + let attr = kvm_bindings::kvm_device_attr { + group: kvm_bindings::KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL, + attr: u64::from(kvm_bindings::KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU), + addr: nr_cpu_ptr as u64, + flags: 0, + }; + eiointc_fd.set_device_attr(&attr)?; + let features: u32 = 0; + let features_ptr = &features as *const u32; + let attr = kvm_bindings::kvm_device_attr { + group: kvm_bindings::KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL, + attr: u64::from(kvm_bindings::KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE), + addr: features_ptr as u64, + flags: 0, + }; + eiointc_fd.set_device_attr(&attr)?; + + let mut pchpic_device = kvm_bindings::kvm_create_device { + type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_LOONGARCH_PCHPIC, + fd: 0, + flags: 0, + }; + let pchpic_fd = vm.create_device(&mut pchpic_device)?; + let pch_pic_base: u64 = 0x1000_0000; + let pch_pic_base_ptr = &pch_pic_base as *const u64; + let attr = kvm_bindings::kvm_device_attr { + group: kvm_bindings::KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL, + attr: u64::from(kvm_bindings::KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT), + addr: pch_pic_base_ptr as u64, + flags: 0, + }; + pchpic_fd.set_device_attr(&attr)?; + + Ok(Self { + _ipi_fd: ipi_fd, + _eiointc_fd: eiointc_fd, + _pchpic_fd: pchpic_fd, + irq_vcpu_fd, + _vcpu_count: vcpu_count, + }) + } +} + +impl IrqChipT for KvmLoongArchIrqChip { + fn get_mmio_addr(&self) -> u64 { + 0x1000_0000 + } + + fn get_mmio_size(&self) -> u64 { + 0x400 + } + + fn set_irq( + &self, + irq_line: Option, + interrupt_evt: Option<&EventFd>, + ) -> Result<(), DeviceError> { + //debug!("loongarch irqchip set_irq_state irq_line={:?}", irq_line); + // LoongArch mmio/serial path does not rely on irqfd registration. + // Inject via KVM_INTERRUPT (assert). + if let Err(e) = self.set_irq_state(irq_line, interrupt_evt, true) { + error!("Failed to set irq state: {e:?}"); + return Err(e); + } + //debug!("loongarch irqchip eventfd write ok"); + Ok(()) + } + + fn set_irq_state( + &self, + irq_line: Option, + _interrupt_evt: Option<&EventFd>, + active: bool, + ) -> Result<(), DeviceError> { + let irq = match irq_line { + Some(irq) => irq, + None => { + return Err(DeviceError::FailedSignalingUsedQueue(io::Error::new( + io::ErrorKind::InvalidInput, + "irq_line not set", + ))); + } + }; + + let signed_irq = if active { irq as i32 } else { -(irq as i32) }; + let interrupt = kvm_interrupt { + // KVM uapi exposes `irq` as u32, but LoongArch KVM casts it back to `int` + // and uses the sign to distinguish assert vs deassert. + irq: signed_irq as u32, + }; + + let ret = + unsafe { ioctl_with_ref(&self.irq_vcpu_fd, KVM_INTERRUPT_LOONGARCH(), &interrupt) }; + if ret != 0 { + let e = io::Error::last_os_error(); + error!( + "KVM_INTERRUPT failed: irq={}, signed_irq={}, active={}, err={e:?}", + irq, signed_irq, active + ); + return Err(DeviceError::FailedSignalingUsedQueue(e)); + } + + // debug!( + // "KVM_INTERRUPT ok: irq={}, signed_irq={}, active={}", + // irq, signed_irq, active + // ); + Ok(()) + } +} + +impl BusDevice for KvmLoongArchIrqChip { + fn read(&mut self, _vcpuid: u64, _offset: u64, _data: &mut [u8]) { + unreachable!("MMIO operations are managed in-kernel"); + } + + fn write(&mut self, _vcpuid: u64, _offset: u64, _data: &[u8]) { + unreachable!("MMIO operations are managed in-kernel"); + } +} diff --git a/src/devices/src/legacy/loongarch64/mod.rs b/src/devices/src/legacy/loongarch64/mod.rs new file mode 100644 index 000000000..b1fc0cf1d --- /dev/null +++ b/src/devices/src/legacy/loongarch64/mod.rs @@ -0,0 +1 @@ +pub mod serial; diff --git a/src/devices/src/legacy/loongarch64/serial.rs b/src/devices/src/legacy/loongarch64/serial.rs new file mode 100644 index 000000000..2014bced6 --- /dev/null +++ b/src/devices/src/legacy/loongarch64/serial.rs @@ -0,0 +1,336 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::collections::VecDeque; +use std::io; + +use polly::event_manager::{EventManager, Subscriber}; +use utils::epoll::{EpollEvent, EventSet}; +use utils::eventfd::EventFd; + +use crate::bus::BusDevice; +use crate::legacy::{IrqChip, ReadableFd}; + +const LOOP_SIZE: usize = 0x40; + +const DATA: u8 = 0; // data reg +const IER: u8 = 1; // interrupt enable reg +const IIR: u8 = 2; // interrupt indentify reg +const LCR: u8 = 3; // line control reg +const MCR: u8 = 4; +const LSR: u8 = 5; +const MSR: u8 = 6; +const SCR: u8 = 7; + +const DLAB_LOW: u8 = 0; +const DLAB_HIGH: u8 = 1; + +const IER_RECV_BIT: u8 = 0x1; +const IER_THR_BIT: u8 = 0x2; +const IER_FIFO_BITS: u8 = 0x0f; + +const IIR_FIFO_BITS: u8 = 0xc0; +const IIR_NONE_BIT: u8 = 0x1; +const IIR_THR_BIT: u8 = 0x2; +const IIR_RECV_BIT: u8 = 0x4; + +const LCR_DLAB_BIT: u8 = 0x80; + +const LSR_DATA_BIT: u8 = 0x1; +const LSR_EMPTY_BIT: u8 = 0x20; +const LSR_IDLE_BIT: u8 = 0x40; + +const MCR_LOOP_BIT: u8 = 0x10; + +const DEFAULT_INTERRUPT_IDENTIFICATION: u8 = IIR_NONE_BIT; // no pending interrupt +const DEFAULT_LINE_STATUS: u8 = LSR_EMPTY_BIT | LSR_IDLE_BIT; // THR empty and line is idle +const DEFAULT_LINE_CONTROL: u8 = 0x3; // 8-bits per character +const DEFAULT_MODEM_CONTROL: u8 = 0x8; // Auxiliary output 2 +const DEFAULT_MODEM_STATUS: u8 = 0x20 | 0x10 | 0x80; // data ready, clear to send, carrier detect +const DEFAULT_BAUD_DIVISOR: u16 = 12; // 9600 bps + +/// Emulates serial COM ports commonly seen on x86 I/O ports 0x3f8/0x2f8/0x3e8/0x2e8. +/// +/// This can optionally write the guest's output to a Write trait object. To send input to the +/// guest, use `raw_input`. +pub struct Serial { + interrupt_enable: u8, + interrupt_identification: u8, + interrupt_evt: EventFd, + line_control: u8, + line_status: u8, + modem_control: u8, + modem_status: u8, + scratch: u8, + baud_divisor: u16, + in_buffer: VecDeque, + out: Option>, + input: Option>, + intc: Option, + irq_line: Option, +} + +impl Serial { + pub fn new( + interrupt_evt: EventFd, + out: Option>, + input: Option>, + ) -> Serial { + Serial { + interrupt_enable: 0, + interrupt_identification: DEFAULT_INTERRUPT_IDENTIFICATION, + interrupt_evt, + line_control: DEFAULT_LINE_CONTROL, + line_status: DEFAULT_LINE_STATUS, + modem_control: DEFAULT_MODEM_CONTROL, + modem_status: DEFAULT_MODEM_STATUS, + scratch: 0, + baud_divisor: DEFAULT_BAUD_DIVISOR, + in_buffer: VecDeque::new(), + out, + input, + intc: None, + irq_line: None, + } + } + + /// Constructs a Serial port ready for input and output. + pub fn new_in_out( + interrupt_evt: EventFd, + input: Box, + out: Box, + ) -> Serial { + Self::new(interrupt_evt, Some(out), Some(input)) + } + + /// Constructs a Serial port ready for output but with no input. + pub fn new_out(interrupt_evt: EventFd, out: Box) -> Serial { + Self::new(interrupt_evt, Some(out), None) + } + + /// Constructs a Serial port with no connected input or output. + pub fn new_sink(interrupt_evt: EventFd) -> Serial { + Self::new(interrupt_evt, None, None) + } + + pub fn set_intc(&mut self, intc: IrqChip) { + self.intc = Some(intc); + } + + pub fn set_irq_line(&mut self, irq: u32) { + self.irq_line = Some(irq); + } + + /// Provides a reference to the interrupt event fd. + pub fn interrupt_evt(&self) -> &EventFd { + &self.interrupt_evt + } + + fn is_dlab_set(&self) -> bool { + (self.line_control & LCR_DLAB_BIT) != 0 + } + + fn is_recv_intr_enabled(&self) -> bool { + (self.interrupt_enable & IER_RECV_BIT) != 0 + } + + fn is_thr_intr_enabled(&self) -> bool { + (self.interrupt_enable & IER_THR_BIT) != 0 + } + + fn is_loop(&self) -> bool { + (self.modem_control & MCR_LOOP_BIT) != 0 + } + + fn add_intr_bit(&mut self, bit: u8) { + self.interrupt_identification &= !IIR_NONE_BIT; + self.interrupt_identification |= bit; + } + + fn interrupt_active(&self) -> bool { + (self.interrupt_identification & IIR_NONE_BIT) == 0 + } + + fn sync_interrupt(&mut self) -> io::Result<()> { + if let Some(intc) = &self.intc { + intc.lock() + .unwrap() + .set_irq_state( + self.irq_line, + Some(&self.interrupt_evt), + self.interrupt_active(), + ) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("{e:?}")))?; + return Ok(()); + } + + if self.interrupt_active() { + self.interrupt_evt.write(1)?; + } + + Ok(()) + } + + fn del_intr_bit(&mut self, bit: u8) -> io::Result<()> { + self.interrupt_identification &= !bit; + if self.interrupt_identification == 0x0 { + self.interrupt_identification = IIR_NONE_BIT; + } + self.sync_interrupt() + } + + fn thr_empty(&mut self) -> io::Result<()> { + if self.is_thr_intr_enabled() { + self.add_intr_bit(IIR_THR_BIT); + self.sync_interrupt()? + } + Ok(()) + } + + fn recv_data(&mut self) -> io::Result<()> { + if self.is_recv_intr_enabled() { + self.add_intr_bit(IIR_RECV_BIT); + self.sync_interrupt()? + } + self.line_status |= LSR_DATA_BIT; + Ok(()) + } + + fn iir_reset(&mut self) -> io::Result<()> { + self.interrupt_identification = DEFAULT_INTERRUPT_IDENTIFICATION; + self.sync_interrupt() + } + + // Handles a write request from the driver. + fn handle_write(&mut self, offset: u8, value: u8) -> io::Result<()> { + match offset { + DLAB_LOW if self.is_dlab_set() => { + self.baud_divisor = (self.baud_divisor & 0xff00) | u16::from(value) + } + DLAB_HIGH if self.is_dlab_set() => { + self.baud_divisor = (self.baud_divisor & 0x00ff) | (u16::from(value) << 8) + } + DATA => { + if self.is_loop() { + if self.in_buffer.len() < LOOP_SIZE { + self.in_buffer.push_back(value); + self.recv_data()?; + } + } else { + if let Some(out) = self.out.as_mut() { + out.write_all(&[value])?; + out.flush()?; + } + self.thr_empty()?; + } + } + IER => { + self.interrupt_enable = value & IER_FIFO_BITS; + self.sync_interrupt()?; + } + LCR => self.line_control = value, + MCR => self.modem_control = value, + SCR => self.scratch = value, + _ => {} + } + Ok(()) + } + + // Handles a read request from the driver. + fn handle_read(&mut self, offset: u8) -> u8 { + match offset { + DLAB_LOW if self.is_dlab_set() => self.baud_divisor as u8, + DLAB_HIGH if self.is_dlab_set() => (self.baud_divisor >> 8) as u8, + DATA => { + self.del_intr_bit(IIR_RECV_BIT).ok(); + if self.in_buffer.len() <= 1 { + self.line_status &= !LSR_DATA_BIT; + } + self.in_buffer.pop_front().unwrap_or_default() + } + IER => self.interrupt_enable, + IIR => { + let v = self.interrupt_identification | IIR_FIFO_BITS; + self.iir_reset().ok(); + v + } + LCR => self.line_control, + MCR => self.modem_control, + LSR => self.line_status, + MSR => self.modem_status, + SCR => self.scratch, + _ => 0, + } + } + + fn raw_input(&mut self, data: &[u8]) -> io::Result<()> { + if !self.is_loop() { + self.in_buffer.extend(data); + self.recv_data()?; + } + Ok(()) + } +} + +impl BusDevice for Serial { + fn read(&mut self, _vcpuid: u64, offset: u64, data: &mut [u8]) { + if data.len() != 1 { + return; + } + + data[0] = self.handle_read(offset as u8); + } + + fn write(&mut self, _vcpuid: u64, offset: u64, data: &[u8]) { + if data.len() != 1 { + return; + } + if let Err(e) = self.handle_write(offset as u8, data[0]) { + error!("Failed the write to serial: {e}"); + } + } +} + +impl Subscriber for Serial { + /// Handle a read event (EPOLLIN) on the serial input fd. + fn process(&mut self, event: &EpollEvent, _: &mut EventManager) { + let source = event.fd(); + let event_set = event.event_set(); + + // TODO: also check for errors. Pending high level discussions on how we want + // to handle errors in devices. + let supported_events = EventSet::IN; + if !supported_events.contains(event_set) { + warn!("Received unknown event: {event_set:?} from source: {source:?}"); + return; + } + + if let Some(input) = self.input.as_mut() { + if input.as_raw_fd() == source { + let mut out = [0u8; 32]; + match input.read(&mut out[..]) { + Ok(count) => { + self.raw_input(&out[..count]) + .unwrap_or_else(|e| warn!("Serial error on input: {e}")); + } + Err(e) => { + warn!("error while reading stdin: {e:?}"); + } + } + } + } + } + + /// Initial registration of pollable objects. + /// If serial input is present, register the serial input FD as readable. + fn interest_list(&self) -> Vec { + match &self.input { + Some(input) => vec![EpollEvent::new(EventSet::IN, input.as_raw_fd() as u64)], + None => vec![], + } + } +} diff --git a/src/devices/src/legacy/mod.rs b/src/devices/src/legacy/mod.rs index 52d3e6cb5..2585a3dbf 100644 --- a/src/devices/src/legacy/mod.rs +++ b/src/devices/src/legacy/mod.rs @@ -23,6 +23,8 @@ mod kvmgicv2; mod kvmgicv3; #[cfg(all(target_os = "linux", target_arch = "x86_64"))] mod kvmioapic; +#[cfg(all(target_os = "linux", target_arch = "loongarch64"))] +mod kvmloongarchirqchip; #[cfg(target_arch = "aarch64")] mod rtc_pl031; #[cfg(target_os = "macos")] @@ -43,6 +45,10 @@ use aarch64::serial; mod riscv64; #[cfg(target_arch = "riscv64")] use riscv64::serial; +#[cfg(target_arch = "loongarch64")] +mod loongarch64; +#[cfg(target_arch = "loongarch64")] +use loongarch64::serial; #[cfg(target_arch = "x86_64")] pub use self::cmos::Cmos; @@ -67,6 +73,8 @@ pub use self::kvmgicv2::KvmGicV2; pub use self::kvmgicv3::KvmGicV3; #[cfg(all(target_os = "linux", target_arch = "x86_64"))] pub use self::kvmioapic::KvmIoapic; +#[cfg(all(target_os = "linux", target_arch = "loongarch64"))] +pub use self::kvmloongarchirqchip::KvmLoongArchIrqChip; #[cfg(target_arch = "aarch64")] pub use self::rtc_pl031::RTC; pub use self::serial::Serial; diff --git a/src/devices/src/lib.rs b/src/devices/src/lib.rs index 5df65dcf5..6552bb555 100644 --- a/src/devices/src/lib.rs +++ b/src/devices/src/lib.rs @@ -14,7 +14,11 @@ use std::fmt; use std::io; mod bus; -#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +#[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" +))] pub mod fdt; pub mod legacy; pub mod virtio; @@ -44,7 +48,11 @@ pub enum DeviceType { #[cfg(target_arch = "aarch64")] Gpio, /// Device Type: Serial. - #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] + #[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" + ))] Serial, /// Device Type: RTC. #[cfg(target_arch = "aarch64")] diff --git a/src/devices/src/virtio/console/device.rs b/src/devices/src/virtio/console/device.rs index b1022a38e..99c950787 100644 --- a/src/devices/src/virtio/console/device.rs +++ b/src/devices/src/virtio/console/device.rs @@ -226,6 +226,16 @@ impl Console { if !name.is_empty() { self.control.port_name(cmd.id, name) } + + #[cfg(target_arch = "loongarch64")] + { + // On some LoongArch runs, PORT_OPEN for console can be delayed. + // Start port0 on PORT_READY as a fallback. + let port_id = cmd.id as usize; + if port_id == 0 { + ports_to_start.push(port_id); + } + } } control_event::VIRTIO_CONSOLE_PORT_OPEN => { let opened = match cmd.value { @@ -252,11 +262,19 @@ impl Console { } } + ports_to_start.sort_unstable(); + ports_to_start.dedup(); + for port_id in ports_to_start { log::trace!("Starting port io for port {port_id}"); let rx_idx = port_id_to_queue_idx(QueueDirection::Rx, port_id); let tx_idx = port_id_to_queue_idx(QueueDirection::Tx, port_id); + if self.queues[rx_idx].is_none() || self.queues[tx_idx].is_none() { + log::trace!("Ignoring start for already-started port {port_id}"); + continue; + } + // Take ownership of port queues - they are moved to the port. let rx_queue = self.queues[rx_idx] .take() diff --git a/src/devices/src/virtio/fs/fuse.rs b/src/devices/src/virtio/fs/fuse.rs index 442cdad98..f8d0ab62f 100644 --- a/src/devices/src/virtio/fs/fuse.rs +++ b/src/devices/src/virtio/fs/fuse.rs @@ -585,7 +585,11 @@ impl Attr { nlink: st.st_nlink as u32, #[cfg(all( target_os = "linux", - any(target_arch = "aarch64", target_arch = "riscv64") + any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" + ) ))] nlink: st.st_nlink, #[cfg(target_os = "macos")] diff --git a/src/devices/src/virtio/mmio.rs b/src/devices/src/virtio/mmio.rs index dad448729..d44d36118 100644 --- a/src/devices/src/virtio/mmio.rs +++ b/src/devices/src/virtio/mmio.rs @@ -83,6 +83,8 @@ pub struct MmioTransport { struct InterruptTransportInner { log_target: String, status: AtomicUsize, + #[cfg(target_arch = "loongarch64")] + irq_sync: Mutex<()>, event: EventFd, intc: IrqChip, irq_line: Option, @@ -96,6 +98,8 @@ impl InterruptTransport { Ok(Self(Arc::new(InterruptTransportInner { log_target, status: AtomicUsize::new(0), + #[cfg(target_arch = "loongarch64")] + irq_sync: Mutex::new(()), event: EventFd::new(0).map_err(CreateMmioTransportError::CreateInterruptEventFd)?, intc, irq_line: None, @@ -119,7 +123,7 @@ impl InterruptTransport { } fn set_irq_line(&mut self, irq_line: u32) { - debug!(target: &self.0.log_target, "set_irq_line: {irq_line}"); + // debug!(target: &self.0.log_target, "set_irq_line: {irq_line}"); match Arc::get_mut(&mut self.0) { None => { error!("Cannot change irq_line of activated device"); @@ -131,21 +135,38 @@ impl InterruptTransport { } fn try_signal(&self, status: u32) -> Result<(), crate::Error> { - self.status().fetch_or(status as usize, Ordering::SeqCst); - self.intc() - .lock() - .unwrap() - .set_irq(self.0.irq_line, Some(&self.0.event))?; - Ok(()) + #[cfg(target_arch = "loongarch64")] + { + let _irq_sync = self.0.irq_sync.lock().unwrap(); + let old = self.status().fetch_or(status as usize, Ordering::SeqCst); + if old == 0 { + self.intc().lock().unwrap().set_irq_state( + self.0.irq_line, + Some(&self.0.event), + true, + )?; + } + return Ok(()); + } + + #[cfg(not(target_arch = "loongarch64"))] + { + self.status().fetch_or(status as usize, Ordering::SeqCst); + self.intc() + .lock() + .unwrap() + .set_irq(self.0.irq_line, Some(&self.0.event))?; + Ok(()) + } } pub fn try_signal_used_queue(&self) -> Result<(), crate::Error> { - debug!(target: &self.0.log_target, "interrupt: signal_used_queue"); + // debug!(target: &self.0.log_target, "interrupt: signal_used_queue"); self.try_signal(VIRTIO_MMIO_INT_VRING) } pub fn try_signal_config_change(&self) -> Result<(), crate::Error> { - debug!(target: &self.0.log_target, "interrupt: signal_config_change"); + // debug!(target: &self.0.log_target, "interrupt: signal_config_change"); self.try_signal(VIRTIO_MMIO_INT_CONFIG) } @@ -393,7 +414,11 @@ impl BusDevice for MmioTransport { } 0x34 => self.with_queue(0, |q| u32::from(q.get_max_size())), 0x44 => self.with_queue(0, |q| q.ready as u32), - 0x60 => self.interrupt.status().load(Ordering::SeqCst) as u32, + 0x60 => { + let s = self.interrupt.status().load(Ordering::SeqCst) as u32; + // debug!(target: &self.interrupt.0.log_target, "read interrupt status: 0x{:x}", s); + s + } 0x70 => self.device_status, 0xfc => self.config_generation, 0xb0..=0xbc => { @@ -476,7 +501,30 @@ impl BusDevice for MmioTransport { } } 0x64 => { + // debug!(target: &self.interrupt.0.log_target, "write interrupt ack: 0x{:x}", v); if self.check_device_status(device_status::DRIVER_OK, 0) { + #[cfg(target_arch = "loongarch64")] + { + let _irq_sync = self.interrupt.0.irq_sync.lock().unwrap(); + let old = self + .interrupt + .status() + .fetch_and(!(v as usize), Ordering::SeqCst); + let new = old & !(v as usize); + if old != 0 && new == 0 { + if let Err(e) = + self.interrupt.intc().lock().unwrap().set_irq_state( + self.interrupt.0.irq_line, + Some(&self.interrupt.0.event), + false, + ) + { + warn!(target: &self.interrupt.0.log_target, "Failed to deassert irq: {e:?}"); + } + } + } + + #[cfg(not(target_arch = "loongarch64"))] self.interrupt .status() .fetch_and(!(v as usize), Ordering::SeqCst); diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index ceae52c80..b8e207e94 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -547,6 +547,14 @@ pub extern "C" fn krun_free_ctx(ctx_id: u32) -> i32 { #[no_mangle] pub extern "C" fn krun_set_vm_config(ctx_id: u32, num_vcpus: u8, ram_mib: u32) -> i32 { + #[cfg(all(target_os = "linux", target_arch = "loongarch64"))] + if num_vcpus > 1 { + warn!( + "LoongArch currently supports only a single vCPU in libkrun (requested: {num_vcpus})" + ); + return -libc::EOPNOTSUPP; + } + let mem_size_mib: usize = match ram_mib.try_into() { Ok(size) => size, Err(e) => { @@ -1901,7 +1909,12 @@ pub extern "C" fn krun_get_max_vcpus() -> i32 { } } - #[cfg(target_os = "linux")] + #[cfg(all(target_os = "linux", target_arch = "loongarch64"))] + { + 1 + } + + #[cfg(all(target_os = "linux", not(target_arch = "loongarch64")))] { use kvm_ioctls::Kvm; match Kvm::new() { @@ -2060,7 +2073,7 @@ pub unsafe extern "C" fn krun_set_kernel( // process and treat it as a bundled kernel. #[cfg(all(target_arch = "x86_64", not(feature = "tee")))] 0 => return map_kernel(ctx_id, &path), - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "loongarch64"))] 0 => KernelFormat::Raw, 1 => KernelFormat::Elf, 2 => KernelFormat::PeGz, diff --git a/src/rutabaga_gfx/src/generated/virgl_debug_callback_bindings.rs b/src/rutabaga_gfx/src/generated/virgl_debug_callback_bindings.rs index a6d4e3819..f6c526937 100644 --- a/src/rutabaga_gfx/src/generated/virgl_debug_callback_bindings.rs +++ b/src/rutabaga_gfx/src/generated/virgl_debug_callback_bindings.rs @@ -46,7 +46,11 @@ pub mod stdio { pub type va_list = __builtin_va_list; } -#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +#[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" +))] pub mod stdio { extern "C" { pub fn vsnprintf( diff --git a/src/rutabaga_gfx/src/virgl_renderer.rs b/src/rutabaga_gfx/src/virgl_renderer.rs index a2627ca0d..6fd52f015 100644 --- a/src/rutabaga_gfx/src/virgl_renderer.rs +++ b/src/rutabaga_gfx/src/virgl_renderer.rs @@ -169,7 +169,8 @@ extern "C" fn debug_callback(fmt: *const ::std::os::raw::c_char, ap: stdio::va_l target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64", - target_arch = "riscv64" + target_arch = "riscv64", + target_arch = "loongarch64" ))] let size = BUF_LEN as ::std::os::raw::c_ulong; #[cfg(target_arch = "arm")] diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 3072d0349..5f4748d64 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -35,6 +35,8 @@ use devices::legacy::Cmos; use devices::legacy::KvmAia; #[cfg(target_arch = "x86_64")] use devices::legacy::KvmIoapic; +#[cfg(all(target_os = "linux", target_arch = "loongarch64"))] +use devices::legacy::KvmLoongArchIrqChip; use devices::legacy::Serial; #[cfg(target_os = "macos")] use devices::legacy::VcpuList; @@ -519,7 +521,11 @@ impl Display for StartMicrovmError { pub enum Payload { #[cfg(all(target_arch = "x86_64", not(feature = "tee")))] KernelMmap, - #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] + #[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" + ))] KernelCopy, ExternalKernel(ExternalKernel), #[cfg(test)] @@ -542,7 +548,11 @@ fn choose_payload(vm_resources: &VmResources) -> Result std::result::Result<(GuestAddress, Option, Option), StartMicrovmError> { + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] + let kernel_load = GuestAddress(0x8000_0000); + #[cfg(target_arch = "loongarch64")] + let kernel_load = GuestAddress(arch::loongarch64::layout::DRAM_MEM_START); + let entry_addr = match external_kernel.format { // Raw images are treated as bundled kernels on x86_64 #[cfg(target_arch = "x86_64")] KernelFormat::Raw => unreachable!(), - #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] + #[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" + ))] KernelFormat::Raw => { let data: Vec = std::fs::read(external_kernel.path.clone()) .map_err(StartMicrovmError::RawOpenKernel)?; - guest_mem.write(&data, GuestAddress(0x8000_0000)).unwrap(); - GuestAddress(0x8000_0000) + guest_mem.write(&data, kernel_load).unwrap(); + kernel_load } #[cfg(target_arch = "x86_64")] KernelFormat::Elf => { @@ -1170,14 +1247,106 @@ fn load_external_kernel( let mut kernel_data: Vec = Vec::new(); gz.read_to_end(&mut kernel_data) .map_err(StartMicrovmError::PeGzDecoder)?; - guest_mem - .write(&kernel_data, GuestAddress(0x8000_0000)) - .unwrap(); - GuestAddress(0x8000_0000) + guest_mem.write(&kernel_data, kernel_load).unwrap(); + kernel_load + } else { + return Err(StartMicrovmError::PeGzInvalid); + } + } + + #[cfg(target_arch = "loongarch64")] + KernelFormat::PeGz => { + const LOONGARCH_IMAGE_HEADER_SIZE: usize = 64; + const LOONGARCH_KERNEL_ENTRY_OFFSET: usize = 8; + const LOONGARCH_LOAD_OFFSET_OFFSET: usize = 24; + const LOONGARCH_LINUX_PE_MAGIC_OFFSET: usize = 56; + const LOONGARCH_LINUX_PE_MAGIC: u32 = 0x8182_23cd; + const LOONGARCH_VMLINUX_LOAD_ADDRESS: u64 = 0x9000_0000_0020_0000; + let data: Vec = std::fs::read(external_kernel.path.clone()) + .map_err(StartMicrovmError::PeGzOpenKernel)?; + + let kernel_data = if let Some(magic) = data + .windows(3) + .position(|window| window == [0x1f, 0x8b, 0x8]) + { + debug!("Found GZIP header on PE file at: 0x{magic:x}"); + let (_, compressed) = data.split_at(magic); + let mut gz = GzDecoder::new(compressed); + let mut kernel_data: Vec = Vec::new(); + gz.read_to_end(&mut kernel_data) + .map_err(StartMicrovmError::PeGzDecoder)?; + kernel_data } else { + debug!("No GZIP header found on PE file; treating it as plain PE image"); + data + }; + + if kernel_data.len() < LOONGARCH_IMAGE_HEADER_SIZE { + return Err(StartMicrovmError::PeGzInvalid); + } + + let pe_magic = u32::from_le_bytes( + kernel_data[LOONGARCH_LINUX_PE_MAGIC_OFFSET..LOONGARCH_LINUX_PE_MAGIC_OFFSET + 4] + .try_into() + .unwrap(), + ); + if pe_magic != LOONGARCH_LINUX_PE_MAGIC { return Err(StartMicrovmError::PeGzInvalid); } + + let kernel_entry = u64::from_le_bytes( + kernel_data[LOONGARCH_KERNEL_ENTRY_OFFSET..LOONGARCH_KERNEL_ENTRY_OFFSET + 8] + .try_into() + .unwrap(), + ); + let load_offset = u64::from_le_bytes( + kernel_data[LOONGARCH_LOAD_OFFSET_OFFSET..LOONGARCH_LOAD_OFFSET_OFFSET + 8] + .try_into() + .unwrap(), + ); + + let image_load_addr = GuestAddress( + arch::loongarch64::layout::DRAM_MEM_START + .checked_add(load_offset) + .ok_or(StartMicrovmError::PeGzInvalid)?, + ); + + let entry_addr = if kernel_entry >= LOONGARCH_VMLINUX_LOAD_ADDRESS { + let entry_offset = kernel_entry + .checked_sub(LOONGARCH_VMLINUX_LOAD_ADDRESS) + .ok_or(StartMicrovmError::PeGzInvalid)?; + GuestAddress( + image_load_addr + .raw_value() + .checked_add(entry_offset) + .ok_or(StartMicrovmError::PeGzInvalid)?, + ) + } else if kernel_entry >= load_offset { + GuestAddress( + arch::loongarch64::layout::DRAM_MEM_START + .checked_add(kernel_entry) + .ok_or(StartMicrovmError::PeGzInvalid)?, + ) + } else { + return Err(StartMicrovmError::PeGzInvalid); + }; + + debug!( + "loongarch pegz image_load_addr=0x{:x}, entry_addr=0x{:x}", + image_load_addr.0, entry_addr.0 + ); + + guest_mem + .write(&kernel_data, image_load_addr) + .map_err(|_| { + StartMicrovmError::KernelDoesNotFit( + image_load_addr.raw_value(), + kernel_data.len(), + ) + })?; + entry_addr } + #[cfg(target_arch = "x86_64")] KernelFormat::ImageBz2 => { let data: Vec = std::fs::read(external_kernel.path.clone()) @@ -1272,6 +1441,12 @@ fn load_external_kernel( None }; + debug!( + "external kernel initramfs_path={:?}, initrd_config_present={}", + external_kernel.initramfs_path, + initrd_config.is_some(), + ); + Ok((entry_addr, initrd_config, external_kernel.cmdline.clone())) } @@ -1290,7 +1465,11 @@ fn load_payload( StartMicrovmError, > { match payload { - #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] + #[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" + ))] Payload::KernelCopy => { let (kernel_entry_addr, kernel_host_addr, kernel_guest_addr, kernel_size) = if let Some(kernel_bundle) = &_vm_resources.kernel_bundle { @@ -1477,7 +1656,11 @@ pub fn create_guest_memory( Payload::Empty => arch::arch_memory_regions(mem_size, None, 0, 0, None), Payload::Firmware => arch::arch_memory_regions(mem_size, None, 0, 0, firmware_size), }; - #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] + #[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" + ))] let (arch_mem_info, mut arch_mem_regions) = match payload { Payload::ExternalKernel(external_kernel) => { arch::arch_memory_regions(mem_size, external_kernel.initramfs_size, None) @@ -1657,7 +1840,11 @@ fn attach_legacy_devices( } #[cfg(all( - any(target_arch = "aarch64", target_arch = "riscv64"), + any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" + ), target_os = "linux" ))] fn attach_legacy_devices( @@ -1849,6 +2036,36 @@ fn create_vcpus_riscv64( Ok(vcpus) } +#[cfg(all(target_arch = "loongarch64", target_os = "linux"))] +fn create_vcpus_loongarch64( + vm: &Vm, + vcpu_config: &VcpuConfig, + entry_addr: GuestAddress, + cmdline_addr: GuestAddress, + efi_system_table_addr: GuestAddress, + exit_evt: &EventFd, +) -> super::Result> { + use arch::loongarch64::linux::iocsr::LoongArchIocsrState; + use std::sync::Arc; + let mut vcpus = Vec::with_capacity(vcpu_config.vcpu_count as usize); + //let iocsr_misc_func = Arc::new(AtomicU64::new(0)); + let iocsr_state = Arc::new(LoongArchIocsrState::new(vcpu_config.vcpu_count as usize)); + for cpu_index in 0..vcpu_config.vcpu_count { + let mut vcpu = Vcpu::new_loongarch64( + cpu_index, + vm.fd(), + exit_evt.try_clone().map_err(Error::EventFd)?, + iocsr_state.clone(), + ) + .map_err(Error::Vcpu)?; + + vcpu.configure_loongarch64(vm.fd(), entry_addr, cmdline_addr, efi_system_table_addr) + .map_err(Error::Vcpu)?; + vcpus.push(vcpu); + } + Ok(vcpus) +} + /// Attaches an virtio mmio device to the device manager. fn attach_mmio_device( vmm: &mut Vmm, @@ -1888,7 +2105,6 @@ fn attach_fs_devices( #[cfg(target_os = "macos")] map_sender: Sender, ) -> std::result::Result<(), StartMicrovmError> { use self::StartMicrovmError::*; - for (i, config) in fs_devs.iter().enumerate() { let fs = Arc::new(Mutex::new( devices::virtio::Fs::new( diff --git a/src/vmm/src/device_manager/kvm/mmio.rs b/src/vmm/src/device_manager/kvm/mmio.rs index e739afb42..8f26fb1ff 100644 --- a/src/vmm/src/device_manager/kvm/mmio.rs +++ b/src/vmm/src/device_manager/kvm/mmio.rs @@ -9,9 +9,17 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; use std::{fmt, io}; -#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +#[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" +))] use devices::fdt::DeviceInfoForFDT; -#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +#[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" +))] use devices::legacy::IrqChip; use devices::{BusDevice, DeviceType}; use kernel::cmdline as kernel_cmdline; @@ -89,7 +97,11 @@ pub struct MMIODeviceManager { impl MMIODeviceManager { /// Create a new DeviceManager handling mmio devices (virtio net, block). pub fn new(mmio_base: &mut u64, irq_interval: (u32, u32)) -> MMIODeviceManager { - if cfg!(any(target_arch = "aarch64", target_arch = "riscv64")) { + if cfg!(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" + )) { *mmio_base += MMIO_LEN; } MMIODeviceManager { @@ -139,6 +151,7 @@ impl MMIODeviceManager { .map_err(Error::RegisterIoEvent)?; } + #[cfg(not(target_arch = "loongarch64"))] vm.register_irqfd(mmio_device.interrupt_evt(), self.irq) .map_err(Error::RegisterIrqFd)?; @@ -183,11 +196,15 @@ impl MMIODeviceManager { .map_err(Error::Cmdline) } - #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] + #[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" + ))] /// Register an early console at some MMIO address. pub fn register_mmio_serial( &mut self, - vm: &VmFd, + _vm: &VmFd, cmdline: &mut kernel_cmdline::Cmdline, intc: IrqChip, serial: Arc>, @@ -196,7 +213,8 @@ impl MMIODeviceManager { return Err(Error::IrqsExhausted); } - vm.register_irqfd(serial.lock().unwrap().interrupt_evt(), self.irq) + #[cfg(not(target_arch = "loongarch64"))] + _vm.register_irqfd(serial.lock().unwrap().interrupt_evt(), self.irq) .map_err(Error::RegisterIrqFd)?; { @@ -216,6 +234,8 @@ impl MMIODeviceManager { &format!("pl011,mmio32,0x{:08x}", self.mmio_base), #[cfg(target_arch = "riscv64")] &format!("uart,mmio,0x{:08x}", self.mmio_base), + #[cfg(target_arch = "loongarch64")] + &format!("uart8250,mmio,0x{:08x}", self.mmio_base), ) .map_err(Error::Cmdline)?; @@ -268,7 +288,11 @@ impl MMIODeviceManager { Ok(()) } - #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] + #[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" + ))] /// Gets the information of the devices registered up to some point in time. pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MMIODeviceInfo> { &self.id_to_dev_info @@ -300,7 +324,11 @@ pub struct MMIODeviceInfo { _len: u64, } -#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +#[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" +))] impl DeviceInfoForFDT for MMIODeviceInfo { fn addr(&self) -> u64 { self.addr @@ -329,9 +357,16 @@ mod tests { use std::sync::Arc; use utils::errno; use vm_memory::{GuestAddress, GuestMemoryMmap}; + #[cfg(target_arch = "loongarch64")] + const TEST_GUEST_MEM_BASE: u64 = arch::loongarch64::layout::DRAM_MEM_START; + #[cfg(not(target_arch = "loongarch64"))] + const TEST_GUEST_MEM_BASE: u64 = 0; const QUEUE_CONFIG: &[QueueConfig] = &[QueueConfig::new(64)]; + fn test_page_size() -> u64 { + unsafe { libc::sysconf(libc::_SC_PAGESIZE) as u64 } + } impl MMIODeviceManager { fn register_virtio_device( &mut self, @@ -413,10 +448,14 @@ mod tests { #[test] fn test_register_virtio_device() { - let start_addr1 = GuestAddress(0x0); - let start_addr2 = GuestAddress(0x1000); - let guest_mem = - GuestMemoryMmap::from_ranges(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]).unwrap(); + let page_size = test_page_size(); + let start_addr1 = GuestAddress(TEST_GUEST_MEM_BASE); + let start_addr2 = GuestAddress(TEST_GUEST_MEM_BASE + page_size); + let guest_mem = GuestMemoryMmap::from_ranges(&[ + (start_addr1, page_size as usize), + (start_addr2, page_size as usize), + ]) + .unwrap(); let vm = builder::setup_vm(&guest_mem, false).unwrap(); let mut device_manager = MMIODeviceManager::new(&mut 0xd000_0000, (arch::IRQ_BASE, arch::IRQ_MAX)); @@ -425,7 +464,7 @@ mod tests { #[cfg(target_arch = "aarch64")] let _gic = KvmGicV3::new(vm.fd(), 1).unwrap(); - let mut cmdline = kernel_cmdline::Cmdline::new(4096); + let mut cmdline = kernel_cmdline::Cmdline::new(page_size as usize); let dummy = Arc::new(Mutex::new(DummyDevice::new())); assert!(device_manager @@ -435,10 +474,14 @@ mod tests { #[test] fn test_register_too_many_devices() { - let start_addr1 = GuestAddress(0x0); - let start_addr2 = GuestAddress(0x1000); - let guest_mem = - GuestMemoryMmap::from_ranges(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]).unwrap(); + let page_size = test_page_size(); + let start_addr1 = GuestAddress(TEST_GUEST_MEM_BASE); + let start_addr2 = GuestAddress(TEST_GUEST_MEM_BASE + page_size); + let guest_mem = GuestMemoryMmap::from_ranges(&[ + (start_addr1, page_size as usize), + (start_addr2, page_size as usize), + ]) + .unwrap(); let vm = builder::setup_vm(&guest_mem, false).unwrap(); let mut device_manager = MMIODeviceManager::new(&mut 0xd000_0000, (arch::IRQ_BASE, arch::IRQ_MAX)); @@ -447,7 +490,7 @@ mod tests { #[cfg(target_arch = "aarch64")] let _gic = KvmGicV3::new(vm.fd(), 1).unwrap(); - let mut cmdline = kernel_cmdline::Cmdline::new(4096); + let mut cmdline = kernel_cmdline::Cmdline::new(page_size as usize); for _i in arch::IRQ_BASE..=arch::IRQ_MAX { device_manager @@ -488,9 +531,10 @@ mod tests { #[test] fn test_error_messages() { + let page_size = test_page_size(); let device_manager = MMIODeviceManager::new(&mut 0xd000_0000, (arch::IRQ_BASE, arch::IRQ_MAX)); - let mut cmdline = kernel_cmdline::Cmdline::new(4096); + let mut cmdline = kernel_cmdline::Cmdline::new(page_size as usize); let e = Error::Cmdline( cmdline .insert( @@ -538,14 +582,18 @@ mod tests { #[test] fn test_device_info() { - let start_addr1 = GuestAddress(0x0); - let start_addr2 = GuestAddress(0x1000); - let guest_mem = - GuestMemoryMmap::from_ranges(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]).unwrap(); + let page_size = test_page_size(); + let start_addr1 = GuestAddress(TEST_GUEST_MEM_BASE); + let start_addr2 = GuestAddress(TEST_GUEST_MEM_BASE + page_size); + let guest_mem = GuestMemoryMmap::from_ranges(&[ + (start_addr1, page_size as usize), + (start_addr2, page_size as usize), + ]) + .unwrap(); let vm = builder::setup_vm(&guest_mem, false).unwrap(); let mut device_manager = MMIODeviceManager::new(&mut 0xd000_0000, (arch::IRQ_BASE, arch::IRQ_MAX)); - let mut cmdline = kernel_cmdline::Cmdline::new(4096); + let mut cmdline = kernel_cmdline::Cmdline::new(page_size as usize); let dummy = Arc::new(Mutex::new(DummyDevice::new())); let type_id = 0; diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index d029ce718..ff9caccbe 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -54,7 +54,11 @@ use crate::vstate::{Vcpu, VcpuHandle, VcpuResponse, Vm}; use arch::{ArchMemoryInfo, InitrdConfig}; #[cfg(target_os = "macos")] use crossbeam_channel::Sender; -#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] +#[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" +))] use devices::fdt; use devices::legacy::IrqChip; use devices::virtio::VmmExitObserver; @@ -113,7 +117,11 @@ pub enum Error { RegisterMMIODevice(device_manager::mmio::Error), /// Write to the serial console failed. Serial(io::Error), - #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] + #[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" + ))] /// Cannot generate or write FDT SetupFDT(devices::fdt::Error), /// Cannot create Timer file descriptor. @@ -154,7 +162,11 @@ impl Display for Error { LoadCommandline(e) => write!(f, "Cannot load command line: {e}"), RegisterMMIODevice(e) => write!(f, "Cannot add a device to the MMIO Bus. {e}"), Serial(e) => write!(f, "Error writing to the serial console: {e:?}"), - #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] + #[cfg(any( + target_arch = "aarch64", + target_arch = "riscv64", + target_arch = "loongarch64" + ))] SetupFDT(e) => write!(f, "Error generating or writing FDT: {e:?}"), TimerFd(e) => write!(f, "Error creating timer fd: {e}"), Vcpu(e) => write!(f, "Vcpu error: {e}"), @@ -334,6 +346,27 @@ impl Vmm { .map_err(Error::ConfigureSystem)?; } + #[cfg(target_arch = "loongarch64")] + { + fdt::create_fdt( + &self.guest_memory, + &self.arch_memory_info, + vcpus.len() as u32, + self.kernel_cmdline.as_str(), + self.mmio_device_manager.get_device_info(), + _intc, + initrd, + ) + .map_err(Error::SetupFDT)?; + + arch::loongarch64::configure_system( + &self.guest_memory, + &self.arch_memory_info, + _smbios_oem_strings, + ) + .map_err(Error::ConfigureSystem)?; + } + Ok(()) } diff --git a/src/vmm/src/linux/vstate.rs b/src/vmm/src/linux/vstate.rs index 02282b668..a0202a5c2 100644 --- a/src/vmm/src/linux/vstate.rs +++ b/src/vmm/src/linux/vstate.rs @@ -5,15 +5,26 @@ // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. +#[cfg(target_arch = "loongarch64")] +use arch::loongarch64::linux::iocsr::{ + process_iocsr_read, process_iocsr_write, LoongArchIocsrState, +}; #[cfg(target_arch = "aarch64")] use arch::ArchMemoryInfo; use crossbeam_channel::{unbounded, Receiver, Sender, TryRecvError}; use libc::{c_int, c_void, siginfo_t}; use std::cell::Cell; use std::fmt::{Display, Formatter}; +#[cfg(target_arch = "loongarch64")] +use std::fs::File; use std::io; use std::ops::Range; +#[cfg(target_arch = "loongarch64")] +use std::os::fd::{AsRawFd, FromRawFd}; +#[cfg(target_arch = "loongarch64")] +use std::sync::Arc; + use std::os::unix::io::RawFd; #[cfg(target_arch = "x86_64")] @@ -42,6 +53,8 @@ use crate::resources::TeeConfig; use crate::vmm_config::machine_config::CpuFeaturesTemplate; #[cfg(target_arch = "x86_64")] use cpuid::{c3, filter_cpuid, t2, VmSpec}; +#[cfg(not(feature = "tee"))] +use kvm_bindings::kvm_userspace_memory_region; #[cfg(target_arch = "x86_64")] use kvm_bindings::{ kvm_clock_data, kvm_debugregs, kvm_irqchip, kvm_lapic_state, kvm_mp_state, kvm_pit_state2, @@ -49,14 +62,13 @@ use kvm_bindings::{ KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_MAX_CPUID_ENTRIES, }; -use kvm_bindings::{ - kvm_create_guest_memfd, kvm_userspace_memory_region, kvm_userspace_memory_region2, - KVM_API_VERSION, KVM_MEM_GUEST_MEMFD, KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN, -}; +#[cfg(all(feature = "tee", target_arch = "x86_64"))] +use kvm_bindings::{kvm_create_guest_memfd, kvm_userspace_memory_region2, KVM_MEM_GUEST_MEMFD}; #[cfg(feature = "tee")] use kvm_bindings::{kvm_enable_cap, KVM_CAP_EXIT_HYPERCALL, KVM_MEMORY_EXIT_FLAG_PRIVATE}; -#[cfg(not(target_arch = "riscv64"))] +#[cfg(all(feature = "tee", target_arch = "x86_64"))] use kvm_bindings::{kvm_memory_attributes, KVM_MEMORY_ATTRIBUTE_PRIVATE}; +use kvm_bindings::{KVM_API_VERSION, KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; use kvm_ioctls::{Cap::*, *}; use utils::eventfd::EventFd; use utils::signal::{register_signal_handler, sigrtmin, Killable}; @@ -121,6 +133,9 @@ pub enum Error { #[cfg(target_arch = "riscv64")] /// Error configuring the general purpose riscv64 registers. REGSConfiguration(arch::riscv64::regs::Error), + #[cfg(target_arch = "loongarch64")] + /// Error configuring the general purpose loongarch64 registers. + REGSConfiguration(arch::loongarch64::regs::Error), #[cfg(target_arch = "x86_64")] /// Error configuring the general purpose registers REGSConfiguration(arch::x86_64::regs::Error), @@ -163,6 +178,8 @@ pub enum Error { VcpuCountNotInitialized, /// Cannot open the VCPU file descriptor. VcpuFd(kvm_ioctls::Error), + /// Cannot clone the VCPU file descriptor. + VcpuFdClone(io::Error), #[cfg(target_arch = "x86_64")] /// Failed to get KVM vcpu debug regs. VcpuGetDebugRegs(kvm_ioctls::Error), @@ -284,6 +301,7 @@ impl Display for Error { VcpuCountNotInitialized => write!(f, "vCPU count is not initialized"), VmFd(e) => write!(f, "Cannot open the VM file descriptor: {e}"), VcpuFd(e) => write!(f, "Cannot open the VCPU file descriptor: {e}"), + VcpuFdClone(e) => write!(f, "Cannot clone the VCPU file descriptor: {e}"), VmSetup(e) => write!(f, "Cannot configure the microvm: {e}"), VmSplitIrqchip(e) => write!(f, "Failed to enable split IRQCHIP: {e}"), VmApicBusClockRate(e) => write!( @@ -343,6 +361,11 @@ impl Display for Error { f, "Error configuring the general purpose riscv64 registers: {e:?}" ), + #[cfg(target_arch = "loongarch64")] + REGSConfiguration(e) => write!( + f, + "Error configuring the general purpose loongarch64 registers: {e:?}" + ), #[cfg(target_arch = "x86_64")] REGSConfiguration(e) => { write!(f, "Error configuring the general purpose registers: {e:?}") @@ -461,6 +484,8 @@ impl KvmContext { #[cfg(target_arch = "riscv64")] let capabilities = [Irqchip, Ioeventfd, Irqfd, UserMemory]; + #[cfg(target_arch = "loongarch64")] + let capabilities = [Irqchip, Ioeventfd, Irqfd, UserMemory]; // Check that all desired capabilities are supported. match capabilities .iter() @@ -669,7 +694,6 @@ impl Vm { ) -> Result<()> { let host_addr = guest_mem.get_host_address(region.start_addr()).unwrap(); let start = region.start_addr().raw_value(); - let end = start + region.len(); // GuestMemfd is generally intended for either of two purposes: // * sharing the memory with out-of-process components, and conversely, @@ -681,7 +705,8 @@ impl Vm { // as of late 2025. Also, on other architectures like aarch64, guest_memfd in // general is unstable for now, so don't try to use it without a reason. - if cfg!(not(feature = "tee")) { + #[cfg(not(feature = "tee"))] + { let memory_region = kvm_userspace_memory_region { slot: self.next_mem_slot, guest_phys_addr: start, @@ -697,7 +722,12 @@ impl Vm { .set_user_memory_region(memory_region) .map_err(Error::SetUserMemoryRegion)?; }; - } else { + } + + #[cfg(all(feature = "tee", target_arch = "x86_64"))] + { + let end = start + region.len(); + if !self.fd.check_extension(GuestMemfd) { return Err(Error::KvmCap(GuestMemfd)); } @@ -746,6 +776,11 @@ impl Vm { self.guest_memfds.push((Range { start, end }, guest_memfd)); } + #[cfg(all(feature = "tee", not(target_arch = "x86_64")))] + { + return Err(Error::InvalidTee); + } + self.next_mem_slot += 1; Ok(()) @@ -949,6 +984,8 @@ pub struct Vcpu { #[cfg(feature = "tee")] pm_sender: Sender, + #[cfg(target_arch = "loongarch64")] + iocsr_state: Arc, } impl Vcpu { @@ -1137,6 +1174,30 @@ impl Vcpu { }) } + #[cfg(target_arch = "loongarch64")] + pub fn new_loongarch64( + id: u8, + vm_fd: &VmFd, + exit_evt: EventFd, + iocsr_state: Arc, + ) -> Result { + let kvm_vcpu = vm_fd.create_vcpu(id as u64).map_err(Error::VcpuFd)?; + let (event_sender, event_receiver) = unbounded(); + let (response_sender, response_receiver) = unbounded(); + + Ok(Vcpu { + fd: kvm_vcpu, + id, + mmio_bus: None, + exit_evt, + event_receiver, + event_sender: Some(event_sender), + response_receiver: Some(response_receiver), + response_sender, + iocsr_state, + }) + } + /// Returns the cpu index as seen by the guest OS. pub fn cpu_index(&self) -> u8 { self.id @@ -1153,6 +1214,15 @@ impl Vcpu { self.mmio_bus = Some(mmio_bus); } + #[cfg(target_arch = "loongarch64")] + pub fn try_clone_irq_vcpu_file(&self) -> Result { + let vcpufd = unsafe { libc::dup(self.fd.as_raw_fd()) }; + if vcpufd < 0 { + return Err(Error::VcpuFdClone(io::Error::last_os_error())); + } + Ok(unsafe { File::from_raw_fd(vcpufd) }) + } + #[cfg(target_arch = "x86_64")] #[allow(unused_variables)] /// Configures a x86_64 specific vcpu and should be called once per vcpu. @@ -1266,6 +1336,26 @@ impl Vcpu { Ok(()) } + #[cfg(target_arch = "loongarch64")] + pub fn configure_loongarch64( + &mut self, + _vm_fd: &VmFd, + kernel_load_addr: GuestAddress, + cmdline_addr: GuestAddress, + efi_system_table_addr: GuestAddress, + ) -> Result<()> { + arch::loongarch64::regs::setup_regs( + &self.fd, + self.id, + kernel_load_addr.raw_value(), + cmdline_addr.raw_value(), + true, + efi_system_table_addr.raw_value(), + ) + .map_err(Error::REGSConfiguration)?; + Ok(()) + } + /// Moves the vcpu to its own thread and constructs a VcpuHandle. /// The handle can be used to control the remote vcpu. pub fn start_threaded(mut self) -> Result { @@ -1430,54 +1520,24 @@ impl Vcpu { } match self.fd.run() { - Ok(run) => match run { - #[cfg(feature = "tee")] - VcpuExit::Hypercall(hypercall) => { - if hypercall.nr != 12 - /* KVM_HC_MAP_GPA_RANGE */ - { - return Err(Error::VcpuUnsupportedHypercall); - } - - let gpa = hypercall.args[0]; - let size = hypercall.args[1] * 0x1000; /* TARGET_PAGE_SIZE */ - let attributes = hypercall.args[2]; + Ok(run) => { + let emulation = match run { + #[cfg(feature = "tee")] + VcpuExit::Hypercall(hypercall) => { + if hypercall.nr != 12 + /* KVM_HC_MAP_GPA_RANGE */ + { + return Err(Error::VcpuUnsupportedHypercall); + } - let private = !matches!(attributes, 0); + let gpa = hypercall.args[0]; + let size = hypercall.args[1] * 0x1000; /* TARGET_PAGE_SIZE */ + let attributes = hypercall.args[2]; - let mem_properties = MemoryProperties { gpa, size, private }; + let private = !matches!(attributes, 0); - let (response_sender, response_receiver) = unbounded(); - self.pm_sender - .send(WorkerMessage::ConvertMemory( - response_sender.clone(), - mem_properties, - )) - .unwrap(); - if !response_receiver.recv().unwrap() { - error!("Unable to convert memory with properties: gpa: 0x{gpa:x} size: 0x{size:x} to_private: {private}"); - return Err(Error::VcpuUnhandledKvmExit); - } - Ok(VcpuEmulation::Handled) - } - #[cfg(target_arch = "x86_64")] - VcpuExit::IoIn(addr, data) => { - self.io_bus.read(0, u64::from(addr), data); - Ok(VcpuEmulation::Handled) - } - #[cfg(target_arch = "x86_64")] - VcpuExit::IoOut(addr, data) => { - self.io_bus.write(0, u64::from(addr), data); - Ok(VcpuEmulation::Handled) - } - #[cfg(feature = "tee")] - VcpuExit::MemoryFault { gpa, size, flags } => { - if flags & !kvm_bindings::KVM_MEMORY_EXIT_FLAG_PRIVATE as u64 != 0 { - println!("KVM_EXIT_MEMORY_FAULT: Unknown flag {flags}"); - Err(Error::VcpuUnhandledKvmExit) - } else { - let private = (flags & (KVM_MEMORY_EXIT_FLAG_PRIVATE as u64)) != 0; let mem_properties = MemoryProperties { gpa, size, private }; + let (response_sender, response_receiver) = unbounded(); self.pm_sender .send(WorkerMessage::ConvertMemory( @@ -1491,52 +1551,120 @@ impl Vcpu { } Ok(VcpuEmulation::Handled) } - } - VcpuExit::MmioRead(addr, data) => { - if let Some(ref mmio_bus) = self.mmio_bus { - mmio_bus.read(0, addr, data); + #[cfg(target_arch = "x86_64")] + VcpuExit::IoIn(addr, data) => { + self.io_bus.read(0, u64::from(addr), data); + Ok(VcpuEmulation::Handled) } - Ok(VcpuEmulation::Handled) - } - VcpuExit::MmioWrite(addr, data) => { - if let Some(ref mmio_bus) = self.mmio_bus { - mmio_bus.write(0, addr, data); + #[cfg(target_arch = "x86_64")] + VcpuExit::IoOut(addr, data) => { + self.io_bus.write(0, u64::from(addr), data); + Ok(VcpuEmulation::Handled) } - Ok(VcpuEmulation::Handled) - } - VcpuExit::Hlt => { - info!("Received KVM_EXIT_HLT signal"); - Ok(VcpuEmulation::Stopped) - } - VcpuExit::Shutdown => { - info!("Received KVM_EXIT_SHUTDOWN signal"); - Ok(VcpuEmulation::Stopped) - } - // Documentation specifies that below kvm exits are considered - // errors. - VcpuExit::FailEntry(reason, vcpu) => { - error!("Received KVM_EXIT_FAIL_ENTRY signal: reason={reason}, vcpu={vcpu}"); - Err(Error::VcpuUnhandledKvmExit) - } - VcpuExit::InternalError => { - error!("Received KVM_EXIT_INTERNAL_ERROR signal"); - Err(Error::VcpuUnhandledKvmExit) - } - VcpuExit::SystemEvent(event, _reason) => { - match event { - KVM_SYSTEM_EVENT_SHUTDOWN => info!("Received KVM_SYSTEM_EVENT_SHUTDOWN"), - KVM_SYSTEM_EVENT_RESET => info!("Received KVM_SYSTEM_EVENT_RESET"), - _ => error!("Received an unexpected System Event: {event}"), + #[cfg(feature = "tee")] + VcpuExit::MemoryFault { gpa, size, flags } => { + if flags & !kvm_bindings::KVM_MEMORY_EXIT_FLAG_PRIVATE as u64 != 0 { + println!("KVM_EXIT_MEMORY_FAULT: Unknown flag {flags}"); + Err(Error::VcpuUnhandledKvmExit) + } else { + let private = (flags & (KVM_MEMORY_EXIT_FLAG_PRIVATE as u64)) != 0; + let mem_properties = MemoryProperties { gpa, size, private }; + let (response_sender, response_receiver) = unbounded(); + self.pm_sender + .send(WorkerMessage::ConvertMemory( + response_sender.clone(), + mem_properties, + )) + .unwrap(); + if !response_receiver.recv().unwrap() { + error!("Unable to convert memory with properties: gpa: 0x{gpa:x} size: 0x{size:x} to_private: {private}"); + return Err(Error::VcpuUnhandledKvmExit); + } + Ok(VcpuEmulation::Handled) + } } - Ok(VcpuEmulation::Stopped) - } - r => { - // TODO: Are we sure we want to finish running a vcpu upon - // receiving a vm exit that is not necessarily an error? - error!("Unexpected exit reason on vcpu run: {r:?}"); - Err(Error::VcpuUnhandledKvmExit) - } - }, + VcpuExit::MmioRead(addr, data) => { + if let Some(ref mmio_bus) = self.mmio_bus { + mmio_bus.read(0, addr, data); + } + Ok(VcpuEmulation::Handled) + } + VcpuExit::MmioWrite(addr, data) => { + if let Some(ref mmio_bus) = self.mmio_bus { + mmio_bus.write(0, addr, data); + } + Ok(VcpuEmulation::Handled) + } + #[cfg(target_arch = "loongarch64")] + VcpuExit::IocsrRead(addr, data) => { + match process_iocsr_read(addr, data, &self.iocsr_state, self.id) { + arch::loongarch64::linux::iocsr::IocsrReadResult::Value(value) => { + debug!("LoongArch IOCSR read: addr=0x{addr:x}, len={}, value=0x{value:x}", data.len()); + Ok(VcpuEmulation::Handled) + } + arch::loongarch64::linux::iocsr::IocsrReadResult::Unhandled => { + error!( + "Unhandled LoongArch IOCSR read: addr=0x{addr:x}, len={}", + data.len() + ); + Err(Error::VcpuUnhandledKvmExit) + } + } + } + #[cfg(target_arch = "loongarch64")] + VcpuExit::IocsrWrite(addr, data) => { + match process_iocsr_write(addr, data, &self.iocsr_state, self.id) { + arch::loongarch64::linux::iocsr::IocsrWriteResult::Handled => { + let value = u64::from_le_bytes(data.try_into().unwrap()); + debug!("LoongArch IOCSR write: addr=0x{addr:x}, value=0x{value:x}"); + Ok(VcpuEmulation::Handled) + } + arch::loongarch64::linux::iocsr::IocsrWriteResult::Unhandled => { + error!( + "Unhandled LoongArch IOCSR write: addr=0x{addr:x}, len={}", + data.len() + ); + Err(Error::VcpuUnhandledKvmExit) + } + } + } + VcpuExit::Hlt => { + info!("Received KVM_EXIT_HLT signal"); + Ok(VcpuEmulation::Stopped) + } + VcpuExit::Shutdown => { + info!("Received KVM_EXIT_SHUTDOWN signal"); + Ok(VcpuEmulation::Stopped) + } + // Documentation specifies that below kvm exits are considered + // errors. + VcpuExit::FailEntry(reason, vcpu) => { + error!("Received KVM_EXIT_FAIL_ENTRY signal: reason={reason}, vcpu={vcpu}"); + Err(Error::VcpuUnhandledKvmExit) + } + VcpuExit::InternalError => { + error!("Received KVM_EXIT_INTERNAL_ERROR signal"); + Err(Error::VcpuUnhandledKvmExit) + } + VcpuExit::SystemEvent(event, _reason) => { + match event { + KVM_SYSTEM_EVENT_SHUTDOWN => { + info!("Received KVM_SYSTEM_EVENT_SHUTDOWN") + } + KVM_SYSTEM_EVENT_RESET => info!("Received KVM_SYSTEM_EVENT_RESET"), + _ => error!("Received an unexpected System Event: {event}"), + } + Ok(VcpuEmulation::Stopped) + } + r => { + // TODO: Are we sure we want to finish running a vcpu upon + // receiving a vm exit that is not necessarily an error? + error!("Unexpected exit reason on vcpu run: {r:?}"); + Err(Error::VcpuUnhandledKvmExit) + } + }; + emulation + } // The unwrap on raw_os_error can only fail if we have a logic // error in our code in which case it is better to panic. Err(ref e) => { @@ -1786,15 +1914,23 @@ mod tests { use std::sync::{Arc, Barrier}; use super::*; - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "loongarch64"))] use crate::builder::create_guest_memory; - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "loongarch64"))] use crate::builder::Payload; - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "loongarch64"))] use crate::resources::VmResources; use devices; #[cfg(target_arch = "x86_64")] use devices::legacy::KvmIoapic; + #[cfg(target_arch = "loongarch64")] + const TEST_GUEST_MEM_BASE: u64 = arch::loongarch64::layout::DRAM_MEM_START; + #[cfg(not(target_arch = "loongarch64"))] + const TEST_GUEST_MEM_BASE: u64 = 0; + + fn test_page_size() -> u64 { + unsafe { libc::sysconf(libc::_SC_PAGESIZE) as u64 } + } use utils::signal::validate_signal_num; @@ -1814,10 +1950,13 @@ mod tests { // Auxiliary function being used throughout the tests. fn setup_vcpu(mem_size: usize) -> (Vm, Vcpu, GuestMemoryMmap) { let kvm = KvmContext::new().unwrap(); - let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), mem_size)]).unwrap(); + let gm = + GuestMemoryMmap::from_ranges(&[(GuestAddress(TEST_GUEST_MEM_BASE), mem_size)]).unwrap(); let mut vm = Vm::new(kvm.fd()).expect("Cannot create new vm"); #[cfg(target_arch = "x86_64")] let _kvmioapic = KvmIoapic::new(&vm.fd()).unwrap(); + #[cfg(target_arch = "loongarch64")] + let loongarch_iocsr = Arc::new(LoongArchIocsrState::new(1)); assert!(vm.memory_init(&gm, kvm.max_memslots()).is_ok()); let exit_evt = EventFd::new(utils::eventfd::EFD_NONBLOCK).unwrap(); @@ -1839,13 +1978,18 @@ mod tests { { vcpu = Vcpu::new_aarch64(1, vm.fd(), exit_evt).unwrap(); } + #[cfg(target_arch = "loongarch64")] + { + vcpu = Vcpu::new_loongarch64(1, vm.fd(), exit_evt, loongarch_iocsr.clone()).unwrap(); + } (vm, vcpu, gm) } #[test] fn test_set_mmio_bus() { - let (_, mut vcpu, _) = setup_vcpu(0x1000); + let page_size = test_page_size(); + let (_, mut vcpu, _) = setup_vcpu(page_size as usize); assert!(vcpu.mmio_bus.is_none()); vcpu.set_mmio_bus(devices::Bus::new()); assert!(vcpu.mmio_bus.is_some()); @@ -1869,16 +2013,24 @@ mod tests { let mut kvm_context = KvmContext::new().unwrap(); let mut vm = Vm::new(kvm_context.fd()).expect("Cannot create new vm"); + let page_size = test_page_size(); // Create valid memory region and test that the initialization is successful. - let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap(); + let gm = GuestMemoryMmap::from_ranges(&[( + GuestAddress(TEST_GUEST_MEM_BASE), + page_size as usize, + )]) + .unwrap(); assert!(vm.memory_init(&gm, kvm_context.max_memslots()).is_ok()); // Set the maximum number of memory slots to 1 in KvmContext to check the error // path of memory_init. Create 2 non-overlapping memory slots. kvm_context.max_memslots = 1; let gm = GuestMemoryMmap::from_ranges(&[ - (GuestAddress(0x0), 0x1000), - (GuestAddress(0x1001), 0x2000), + (GuestAddress(TEST_GUEST_MEM_BASE), page_size as usize), + ( + GuestAddress(TEST_GUEST_MEM_BASE + page_size + 1), + (2 * page_size) as usize, + ), ]) .unwrap(); assert!(vm.memory_init(&gm, kvm_context.max_memslots()).is_err()); @@ -1896,19 +2048,34 @@ mod tests { }; assert!(vcpu - .configure_x86_64(&vm_mem, GuestAddress(0), &vcpu_config, true) + .configure_x86_64( + &vm_mem, + GuestAddress(TEST_GUEST_MEM_BASE), + &vcpu_config, + true + ) .is_ok()); // Test configure while using the T2 template. vcpu_config.cpu_template = Some(CpuFeaturesTemplate::T2); assert!(vcpu - .configure_x86_64(&vm_mem, GuestAddress(0), &vcpu_config, true) + .configure_x86_64( + &vm_mem, + GuestAddress(TEST_GUEST_MEM_BASE), + &vcpu_config, + true + ) .is_ok()); // Test configure while using the C3 template. vcpu_config.cpu_template = Some(CpuFeaturesTemplate::C3); assert!(vcpu - .configure_x86_64(&vm_mem, GuestAddress(0), &vcpu_config, true) + .configure_x86_64( + &vm_mem, + GuestAddress(TEST_GUEST_MEM_BASE), + &vcpu_config, + true + ) .is_ok()); } @@ -1931,7 +2098,11 @@ mod tests { .unwrap(); assert!(vcpu - .configure_aarch64(vm.fd(), &arch_memory_info, GuestAddress(0)) + .configure_aarch64( + vm.fd(), + &arch_memory_info, + GuestAddress(TEST_GUEST_MEM_BASE) + ) .is_ok()); // Try it for when vcpu id is NOT 0. @@ -1943,13 +2114,51 @@ mod tests { .unwrap(); assert!(vcpu - .configure_aarch64(vm.fd(), &arch_memory_info, GuestAddress(0)) + .configure_aarch64( + vm.fd(), + &arch_memory_info, + GuestAddress(TEST_GUEST_MEM_BASE) + ) + .is_ok()); + } + + #[cfg(target_arch = "loongarch64")] + #[test] + fn test_configure_vcpu() { + let kvm = KvmContext::new().unwrap(); + let vm_resources = VmResources::default(); + let (guest_memory, arch_memory_info, _shm_manager, _) = + create_guest_memory(128, &vm_resources, &Payload::Empty).unwrap(); + let cmdline_addr = GuestAddress( + arch_memory_info.efi_system_table_addr - arch::loongarch64::layout::CMDLINE_GUEST_SIZE, + ); + let efi_system_table_addr = GuestAddress(arch_memory_info.efi_system_table_addr); + let mut vm = Vm::new(kvm.fd()).expect("new vm failed"); + assert!(vm.memory_init(&guest_memory, kvm.max_memslots()).is_ok()); + + // Try it for when vcpu id is 0. + let mut vcpu = Vcpu::new_loongarch64( + 0, + vm.fd(), + EventFd::new(utils::eventfd::EFD_NONBLOCK).unwrap(), + Arc::new(LoongArchIocsrState::new(1)), + ) + .unwrap(); + + assert!(vcpu + .configure_loongarch64( + vm.fd(), + GuestAddress(arch::RESET_VECTOR), + cmdline_addr, + efi_system_table_addr + ) .is_ok()); } #[test] fn test_vcpu_tls() { - let (_, mut vcpu, _) = setup_vcpu(0x1000); + let page_size = test_page_size(); + let (_, mut vcpu, _) = setup_vcpu(page_size as usize); // Running on the TLS vcpu should fail before we actually initialize it. unsafe { @@ -1980,7 +2189,8 @@ mod tests { #[test] fn test_invalid_tls() { - let (_, mut vcpu, _) = setup_vcpu(0x1000); + let page_size = test_page_size(); + let (_, mut vcpu, _) = setup_vcpu(page_size as usize); // Initialize vcpu TLS. vcpu.init_thread_local_data().unwrap(); // Trying to initialize non-empty TLS should error. @@ -1990,7 +2200,8 @@ mod tests { #[test] fn test_vcpu_kick() { Vcpu::register_kick_signal_handler(); - let (vm, mut vcpu, _mem) = setup_vcpu(0x1000); + let page_size = test_page_size(); + let (vm, mut vcpu, _mem) = setup_vcpu(page_size as usize); let mut kvm_run = KvmRunWrapper::mmap_from_fd(&vcpu.fd, vm.fd.run_size()).expect("cannot mmap kvm-run"); diff --git a/tests/run.sh b/tests/run.sh index cd4a58606..f09327987 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -23,6 +23,9 @@ if [ -n "${LIBKRUN_LIB_PATH}" ]; then fi GUEST_TARGET="${ARCH}-unknown-linux-musl" +if [ "$ARCH" = "loongarch64" ]; then + GUEST_TARGET="${ARCH}-unknown-linux-gnu" +fi # Run the unit tests first (this tests the testing framework itself not libkrun) cargo test -p test_cases --features guest diff --git a/tests/test_cases/src/common.rs b/tests/test_cases/src/common.rs index 6a3ee2483..f19af74e0 100644 --- a/tests/test_cases/src/common.rs +++ b/tests/test_cases/src/common.rs @@ -2,21 +2,96 @@ use anyhow::Context; use std::ffi::CString; -use std::fs; -use std::fs::create_dir; +use std::fs::{self, create_dir, create_dir_all}; use std::os::unix::ffi::OsStrExt; -use std::path::Path; +use std::path::{Path, PathBuf}; +use std::process::Command; use std::ptr::null; use crate::{krun_call, TestSetup}; use krun_sys::*; -fn copy_guest_agent(dir: &Path) -> anyhow::Result<()> { +fn copy_guest_agent(dir: &Path) -> anyhow::Result { let path = std::env::var_os("KRUN_TEST_GUEST_AGENT_PATH") .context("KRUN_TEST_GUEST_AGENT_PATH env variable not set")?; let output_path = dir.join("guest-agent"); - fs::copy(path, output_path).context("Failed to copy executable into vm")?; + fs::copy(path, &output_path).context("Failed to copy executable into vm")?; + Ok(output_path) +} + +fn guest_agent_runtime_deps(guest_agent: &Path) -> anyhow::Result> { + let output = Command::new("ldd") + .arg(guest_agent) + .output() + .context("Failed to execute ldd for guest-agent")?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + let stdout = String::from_utf8_lossy(&output.stdout); + if stderr.contains("not a dynamic executable") + || stdout.contains("not a dynamic executable") + || stderr.contains("statically linked") + || stdout.contains("statically linked") + { + return Ok(Vec::new()); + } + anyhow::bail!("ldd guest-agent failed: {}", stderr); + } + + let stdout = String::from_utf8(output.stdout).context("ldd output is not utf8")?; + let mut deps = Vec::new(); + + for line in stdout.lines() { + let line = line.trim(); + if line.is_empty() { + continue; + } + if let Some((_, rhs)) = line.split_once("=>") { + let rhs = rhs.trim(); + if rhs.starts_with("not found") { + anyhow::bail!("Missing runtime dependency: {line}"); + } + if let Some(path) = rhs.split_whitespace().next() { + if path.starts_with('/') { + deps.push(PathBuf::from(path)); + } + } + continue; + } + if let Some(path) = line.split_whitespace().next() { + if path.starts_with('/') { + deps.push(PathBuf::from(path)); + } + } + } + + deps.sort(); + deps.dedup(); + Ok(deps) +} + +fn copy_host_path_into_root(root_dir: &Path, host_path: &Path) -> anyhow::Result<()> { + let rel = host_path + .strip_prefix("/") + .context("runtime dependency path is not absolute")?; + let dst = root_dir.join(rel); + if let Some(parent) = dst.parent() { + create_dir_all(parent).context("Failed to create parent directory in rootfs")?; + } + fs::copy(host_path, &dst).with_context(|| { + format!( + "Failed to copy runtime dependency {} into rootfs", + host_path.display() + ) + })?; + Ok(()) +} + +fn copy_guest_agent_runtime(root_dir: &Path, guest_agent: &Path) -> anyhow::Result<()> { + let deps = guest_agent_runtime_deps(guest_agent)?; + for dep in deps { + copy_host_path_into_root(root_dir, &dep)?; + } Ok(()) } @@ -32,13 +107,13 @@ pub fn setup_fs_and_enter(ctx: u32, test_setup: TestSetup) -> anyhow::Result<()> create_dir(&root_dir).context("Failed to create root directory")?; let path_str = CString::new(root_dir.as_os_str().as_bytes()).context("CString::new")?; - copy_guest_agent(&root_dir)?; + let guest_agent = copy_guest_agent(&root_dir)?; + copy_guest_agent_runtime(&root_dir, &guest_agent)?; unsafe { krun_call!(krun_set_root(ctx, path_str.as_ptr()))?; krun_call!(krun_set_workdir(ctx, c"/".as_ptr()))?; let test_case_cstr = CString::new(test_setup.test_case).context("CString::new")?; let argv = [test_case_cstr.as_ptr(), null()]; - //let envp = [c"RUST_BACKTRACE=1".as_ptr(), null()]; let envp = [null()]; krun_call!(krun_set_exec( ctx, diff --git a/tests/test_cases/src/lib.rs b/tests/test_cases/src/lib.rs index f164ed87e..760a7ec38 100644 --- a/tests/test_cases/src/lib.rs +++ b/tests/test_cases/src/lib.rs @@ -46,6 +46,13 @@ pub fn test_cases() -> Vec { ram_mib: 1024, }), ), + TestCase::new( + "configure-vm-1cpu-1GiB", + Box::new(TestVmConfig { + num_cpus: 1, + ram_mib: 1024, + }), + ), TestCase::new("vsock-guest-connect", Box::new(TestVsockGuestConnect)), TestCase::new( "tsi-tcp-guest-connect", diff --git a/tests/test_cases/src/test_vm_config.rs b/tests/test_cases/src/test_vm_config.rs index 9ccae5de1..afa5ed2f2 100644 --- a/tests/test_cases/src/test_vm_config.rs +++ b/tests/test_cases/src/test_vm_config.rs @@ -33,7 +33,7 @@ mod guest { use crate::Test; use std::fs; use std::fs::File; - use std::io::{BufRead, BufReader}; + use std::io::{BufRead, BufReader, Write}; use std::str::FromStr; fn detect_num_cpus() -> u32 { @@ -70,10 +70,17 @@ mod guest { assert_eq!(detect_num_cpus(), self.num_cpus as u32); let ram_available = detect_ram_size_mib(); - // Check if ram is within 15% of specified + // Check if ram is within the expected range for this architecture. assert!(self.ram_mib >= (ram_available as f64 * 0.85) as u32); + #[cfg(target_arch = "loongarch64")] + { + let upper = if self.ram_mib <= 256 { 2.0 } else { 1.5 }; + assert!(self.ram_mib <= (ram_available as f64 * upper) as u32); + } + #[cfg(not(target_arch = "loongarch64"))] assert!(self.ram_mib <= (ram_available as f64 * 1.15) as u32); println!("OK"); + std::io::stdout().flush().expect("flush stdout"); } } }