func SaveState(vm *platform.Vm, model *machine.Model) (State, error) { // Pause the vm. // NOTE: Our model will also be stopped automatically // with model.DeviceInfo() below, but we manually pause // the Vcpus here in order to ensure they are completely // stopped prior to saving all device state. err := vm.Pause(false) if err != nil { return State{}, err } defer vm.Unpause(false) // Grab our vcpu states. vcpus, err := vm.VcpuInfo() if err != nil { return State{}, err } // Grab our devices. // NOTE: This should block until devices have // actually quiesed (finished processing outstanding // requests generated by the VCPUs). devices, err := model.DeviceInfo(vm) if err != nil { return State{}, err } // Done. return State{Vcpus: vcpus, Devices: devices}, nil }
func Loop( vm *platform.Vm, vcpu *platform.Vcpu, model *machine.Model, tracer *loader.Tracer) error { // It's not really kosher to switch threads constantly when running a // KVM VCPU. So we simply lock this goroutine to a single system // thread. That way we know it won't be bouncing around. runtime.LockOSThread() defer runtime.UnlockOSThread() log.Printf("Vcpu[%d] running.", vcpu.Id) for { // Enter the guest. err := vcpu.Run() // Trace if requested. trace_err := tracer.Trace(vcpu, vcpu.IsStepping()) if trace_err != nil { return trace_err } // No reason for exit? if err == nil { return ExitWithoutReason } // Handle the error. switch err.(type) { case *platform.ExitPio: err = model.HandlePio(vm, err.(*platform.ExitPio)) case *platform.ExitMmio: err = model.HandleMmio(vm, err.(*platform.ExitMmio)) case *platform.ExitDebug: err = nil case *platform.ExitShutdown: // Vcpu shutdown. return nil } // Error handling the exit. if err != nil { return err } } // Unreachable. return nil }
func SetupLinux( vcpu *platform.Vcpu, model *machine.Model, orig_boot_data []byte, entry_point uint64, is_64bit bool, initrd_addr platform.Paddr, initrd_len uint64, cmdline_addr platform.Paddr) error { // Copy in the GDT table. // These match the segments below. gdt_addr, gdt, err := model.Allocate( machine.MemoryTypeUser, 0, // Start. model.Max(), // End. platform.PageSize, // Size. false) // From bottom. if err != nil { return err } if is_64bit { C.build_64bit_gdt(unsafe.Pointer(&gdt[0])) } else { C.build_32bit_gdt(unsafe.Pointer(&gdt[0])) } BootGdt := platform.DescriptorValue{ Base: uint64(gdt_addr), Limit: uint16(platform.PageSize), } err = vcpu.SetDescriptor(platform.GDT, BootGdt, true) if err != nil { return err } // Set a null IDT. BootIdt := platform.DescriptorValue{ Base: 0, Limit: 0, } err = vcpu.SetDescriptor(platform.IDT, BootIdt, true) if err != nil { return err } // Enable protected-mode. // This does not set any flags (e.g. paging) beyond the // protected mode flag. This is according to Linux entry // protocol for 32-bit protected mode. cr0, err := vcpu.GetControlRegister(platform.CR0) if err != nil { return err } cr0 = cr0 | (1 << 0) // Protected mode. err = vcpu.SetControlRegister(platform.CR0, cr0, true) if err != nil { return err } // Always have the PAE bit set. cr4, err := vcpu.GetControlRegister(platform.CR4) if err != nil { return err } cr4 = cr4 | (1 << 5) // PAE enabled. err = vcpu.SetControlRegister(platform.CR4, cr4, true) if err != nil { return err } // For 64-bit kernels, we need to enable long mode, // and load an identity page table. This will require // only a page of pages, as we use huge page sizes. if is_64bit { // Create our page tables. pde_addr, pde, err := model.Allocate( machine.MemoryTypeUser, 0, // Start. model.Max(), // End. platform.PageSize, // Size. false) // From bottom. if err != nil { return err } pgd_addr, pgd, err := model.Allocate( machine.MemoryTypeUser, 0, // Start. model.Max(), // End. platform.PageSize, // Size. false) // From bottom. if err != nil { return err } pml4_addr, pml4, err := model.Allocate( machine.MemoryTypeUser, 0, // Start. model.Max(), // End. platform.PageSize, // Size. false) // From bottom. if err != nil { return err } C.build_pde(unsafe.Pointer(&pde[0]), platform.PageSize) C.build_pgd(unsafe.Pointer(&pgd[0]), C.__u64(pde_addr), platform.PageSize) C.build_pml4(unsafe.Pointer(&pml4[0]), C.__u64(pgd_addr), platform.PageSize) log.Printf("loader: Created PDE @ %08x.", pde_addr) log.Printf("loader: Created PGD @ %08x.", pgd_addr) log.Printf("loader: Created PML4 @ %08x.", pml4_addr) // Set our newly build page table. err = vcpu.SetControlRegister( platform.CR3, platform.ControlRegisterValue(pml4_addr), true) if err != nil { return err } // Enable long mode. efer, err := vcpu.GetControlRegister(platform.EFER) if err != nil { return err } efer = efer | (1 << 8) // Long-mode enable. err = vcpu.SetControlRegister(platform.EFER, efer, true) if err != nil { return err } // Enable paging. cr0, err = vcpu.GetControlRegister(platform.CR0) if err != nil { return err } cr0 = cr0 | (1 << 31) // Paging enable. err = vcpu.SetControlRegister(platform.CR0, cr0, true) if err != nil { return err } } // NOTE: For 64-bit kernels, we need to enable // real 64-bit mode. This means that the L bit in // the segments must be one, the Db bit must be // zero, and we set the LME bit in EFER (above). var lVal uint8 var dVal uint8 if is_64bit { lVal = 1 dVal = 0 } else { lVal = 0 dVal = 1 } // Load the VMCS segments. // // NOTE: These values are loaded into the VMCS // registers and are expected to match the descriptors // we've used above. Unfortunately the API format doesn't // match, so we need to duplicate some work here. Ah, well // at least the below serves as an explanation for what // the magic numbers in GDT_ENTRY() above mean. BootCs := platform.SegmentValue{ Base: 0, Limit: 0xffffffff, Selector: uint16(C.BootCsSelector), // @ 0x10 Dpl: 0, // Privilege level (kernel). Db: dVal, // 32-bit segment? G: 1, // Granularity (page). S: 1, // As per BOOT_CS (code/data). L: lVal, // 64-bit extension. Type: 0xb, // As per BOOT_CS (access must be set). Present: 1, } BootDs := platform.SegmentValue{ Base: 0, Limit: 0xffffffff, Selector: uint16(C.BootDsSelector), // @ 0x18 Dpl: 0, // Privilege level (kernel). Db: 1, // 32-bit segment? G: 1, // Granularity (page). S: 1, // As per BOOT_DS (code/data). L: 0, // 64-bit extension. Type: 0x3, // As per BOOT_DS (access must be set). Present: 1, } BootTr := platform.SegmentValue{ Base: 0, Limit: 0xffffffff, Selector: uint16(C.BootTrSelector), // @ 0x20 Dpl: 0, // Privilege level (kernel). Db: 1, // 32-bit segment? G: 1, // Granularity (page). S: 0, // As per BOOT_TR (system). L: 0, // 64-bit extension. Type: 0xb, // As per BOOT_TR. Present: 1, } err = vcpu.SetSegment(platform.CS, BootCs, true) if err != nil { return err } err = vcpu.SetSegment(platform.DS, BootDs, true) if err != nil { return err } err = vcpu.SetSegment(platform.ES, BootDs, true) if err != nil { return err } err = vcpu.SetSegment(platform.FS, BootDs, true) if err != nil { return err } err = vcpu.SetSegment(platform.GS, BootDs, true) if err != nil { return err } err = vcpu.SetSegment(platform.SS, BootDs, true) if err != nil { return err } err = vcpu.SetSegment(platform.TR, BootTr, true) if err != nil { return err } // Create our boot parameters. boot_addr, boot_data, err := model.Allocate( machine.MemoryTypeUser, 0, // Start. model.Max(), // End. platform.PageSize, // Size. false) // From bottom. if err != nil { return err } err = SetupLinuxBootParams( model, boot_data, orig_boot_data, cmdline_addr, initrd_addr, initrd_len) if err != nil { return err } // Set our registers. // This is according to the Linux 32-bit boot protocol. log.Printf("loader: boot_params @ %08x.", boot_addr) err = vcpu.SetRegister(platform.RSI, platform.RegisterValue(boot_addr)) if err != nil { return err } err = vcpu.SetRegister(platform.RBP, 0) if err != nil { return err } err = vcpu.SetRegister(platform.RDI, 0) if err != nil { return err } err = vcpu.SetRegister(platform.RBX, 0) if err != nil { return err } // Jump to our entry point. err = vcpu.SetRegister(platform.RIP, platform.RegisterValue(entry_point)) if err != nil { return err } // Make sure interrupts are disabled. // This will actually clear out all other flags. rflags, err := vcpu.GetRegister(platform.RFLAGS) if err != nil { return err } rflags = rflags &^ (1 << 9) // Interrupts off. rflags = rflags | (1 << 1) // Reserved 1. err = vcpu.SetRegister( platform.RFLAGS, platform.RegisterValue(rflags)) if err != nil { return err } // We're done. return nil }
func LoadLinux( vcpu *platform.Vcpu, model *machine.Model, boot_params string, vmlinux string, initrd string, cmdline string, system_map string) (SystemMap, *Convention, error) { // Read the boot_params. log.Print("loader: Reading kernel image...") kernel_data, err := ioutil.ReadFile(boot_params) log.Printf("loader: Kernel is %d bytes.", len(kernel_data)) if err != nil { return nil, nil, err } // They may have passed the entire vmlinuz image as the // parameter here. That's okay, we do an efficient mmap // above. But we need to truncate the visible slice. boot_params_data := kernel_data[0:platform.PageSize] // Load the kernel. log.Print("loader: Reading kernel binary...") vmlinux_data, err := ioutil.ReadFile(vmlinux) log.Printf("loader: Kernel binary is %d bytes.", len(vmlinux_data)) if err != nil { return nil, nil, err } // Load the ramdisk. log.Print("loader: Reading ramdisk...") initrd_data, err := ioutil.ReadFile(initrd) log.Printf("loader: Ramdisk is %d bytes.", len(initrd_data)) if err != nil { return nil, nil, err } // Load the system map. log.Print("loader: Loading system map...") sysmap, err := LoadLinuxSystemMap(system_map) if err != nil { return nil, nil, err } // Load the kernel into memory. log.Print("loader: Loading kernel...") entry_point, is_64bit, err := ElfLoad(vmlinux_data, model) if err != nil { return nil, nil, err } if is_64bit { log.Print("loader: 64-bit kernel found.") } else { log.Print("loader: 32-bit kernel found.") } log.Printf("loader: Entry point is %08x.", entry_point) // Set our calling convention. var convention *Convention if is_64bit { convention = &Linux64Convention } else { convention = &Linux32Convention } // Load the cmdline. // NOTE: Here we create a full page with // trailing zeros. This is the expected form // for the command line. full_cmdline := make( []byte, platform.PageSize, platform.PageSize) copy(full_cmdline, []byte(cmdline)) cmdline_addr, err := model.MemoryMap.Load( platform.Paddr(0), model.Max(), full_cmdline, false) if err != nil { return nil, nil, err } log.Printf("loader: cmdline @ %08x: %s", cmdline_addr, cmdline) // Load the initrd. initrd_addr, err := model.MemoryMap.Load( platform.Paddr(0), model.Max(), initrd_data, true) if err != nil { return nil, nil, err } log.Printf("loader: initrd @ %08x.", initrd_addr) // Create our setup page, // and initialize the VCPU. err = SetupLinux( vcpu, model, boot_params_data, entry_point, is_64bit, initrd_addr, uint64(len(initrd_data)), cmdline_addr) // Everything is okay. return sysmap, convention, err }