qemu? kvm? libvirt? (𖦹.𖦹)

Jul 20, 2022 23:30 · 2254 words · 5 minute read Virtualization Linux

qemu

qemuFabrice Bellard 写的一款模拟器,我们通常利用它的 system emulation 模式在宿主机上运行 Guest OS(虚拟机)。OpenStack NovaKata Containers 还有 KubeVirt 这些虚拟化解决方案都使用 qemu 来承载 Guest OS:

  • OpenStack Nova:

    $ ps -ef | grep qemu
    42436    22132  3681  3 Jul08 ?        10:21:17 /usr/libexec/qemu-kvm -name guest=instance-0000000b,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-2-instance-0000000b/master-key.aes -machine pc-i440fx-rhel7.6.0,accel=kvm,usb=off,dump-guest-core=off -cpu Skylake-Server-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,pku=on,md-clear=on,stibp=on,ssbd=on -m 512 -realtime mlock=off -smp 1,sockets=1,cores=1,threads=1 -uuid 7c437668-1e95-47e8-802b-5177d0c857d7 -smbios type=1,manufacturer=OpenStack Foundation,product=OpenStack Nova,version=20.1.2,serial=7c437668-1e95-47e8-802b-5177d0c857d7,uuid=7c437668-1e95-47e8-802b-5177d0c857d7,family=Virtual Machine -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=27,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc,driftfix=slew -global kvm-pit.lost_tick_policy=delay -no-hpet -no-shutdown -boot strict=on -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -object secret,id=virtio-disk0-secret0,data=EzMF9E7z3N1F4V8N2FAJLPkEOG3sqeFkDXFdgeo1svs=,keyid=masterKey0,iv=nQT/VDfXTdlwriUo9NMLKw==,format=base64 -drive file=rbd:vms/7c437668-1e95-47e8-802b-5177d0c857d7_disk:id=nova:auth_supported=cephx\;none:mon_host=172.20.140.12\:6789\;172.20.140.50\:6789\;172.20.140.229\:6789,file.password-secret=virtio-disk0-secret0,format=raw,if=none,id=drive-virtio-disk0,cache=writeback,discard=unmap -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on -object secret,id=ide0-0-0-secret0,data=FxAFify/HGH0lJj408zYPnrwrbcLh1aXkXnP+nmf/MQ=,keyid=masterKey0,iv=eYbMicqB528Biq2uGnn0QQ==,format=base64 -drive file=rbd:vms/7c437668-1e95-47e8-802b-5177d0c857d7_disk.config:id=nova:auth_supported=cephx\;none:mon_host=172.20.140.12\:6789\;172.20.140.50\:6789\;172.20.140.229\:6789,file.password-secret=ide0-0-0-secret0,format=raw,if=none,id=drive-ide0-0-0,readonly=on,cache=writeback,discard=unmap -device ide-cd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,write-cache=on -netdev tap,fd=28,id=hostnet0,vhost=on,vhostfd=29 -device virtio-net-pci,host_mtu=1450,netdev=hostnet0,id=net0,mac=fa:16:3e:ab:91:6b,bus=pci.0,addr=0x3 -chardev pty,id=charserial0,logfile=/var/lib/nova/instances/7c437668-1e95-47e8-802b-5177d0c857d7/console.log,logappend=off -device isa-serial,chardev=charserial0,id=serial0 -device usb-tablet,id=input0,bus=usb.0,port=1 -vnc 172.20.140.229:0 -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on
    
  • Kata Containers:

    $ ps -ef | grep qemu
    root     634414      1  0 Jul11 ?        00:13:41 /opt/kata/bin/qemu-system-x86_64 -name sandbox-ae14c3fdacfe80c482dd42def9fd758e86207b03e80018feb7ef0e3a19349bfe -uuid 8a19a2f6-905a-44bf-9ed9-8599d0148e8e -machine q35,accel=kvm,kernel_irqchip=on,nvdimm=on -cpu host,pmu=off -qmp unix:/run/vc/vm/ae14c3fdacfe80c482dd42def9fd758e86207b03e80018feb7ef0e3a19349bfe/qmp.sock,server=on,wait=off -m 2048M,slots=10,maxmem=129645M -device pci-bridge,bus=pcie.0,id=pci-bridge-0,chassis_nr=1,shpc=off,addr=2,io-reserve=4k,mem-reserve=1m,pref64-reserve=1m -device virtio-serial-pci,disable-modern=false,id=serial0 -device virtconsole,chardev=charconsole0,id=console0 -chardev socket,id=charconsole0,path=/run/vc/vm/ae14c3fdacfe80c482dd42def9fd758e86207b03e80018feb7ef0e3a19349bfe/console.sock,server=on,wait=off -device nvdimm,id=nv0,memdev=mem0,unarmed=on -object memory-backend-file,id=mem0,mem-path=/opt/kata/share/kata-containers/kata-ubuntu.img,size=268435456,readonly=on -device virtio-scsi-pci,id=scsi0,disable-modern=false -object rng-random,id=rng0,filename=/dev/urandom -device virtio-rng-pci,rng=rng0 -device vhost-vsock-pci,disable-modern=false,vhostfd=3,id=vsock-431108636,guest-cid=431108636 -chardev socket,id=char-f1bf0b40070d501b,path=/run/vc/vm/ae14c3fdacfe80c482dd42def9fd758e86207b03e80018feb7ef0e3a19349bfe/vhost-fs.sock -device vhost-user-fs-pci,chardev=char-f1bf0b40070d501b,tag=kataShared -netdev tap,id=network-0,vhost=on,vhostfds=4,fds=5 -device driver=virtio-net-pci,netdev=network-0,mac=00:00:00:14:29:6d,disable-modern=false,mq=on,vectors=4 -rtc base=utc,driftfix=slew,clock=host -global kvm-pit.lost_tick_policy=discard -vga none -no-user-config -nodefaults -nographic --no-reboot -daemonize -object memory-backend-file,id=dimm1,size=2048M,mem-path=/dev/shm,share=on -numa node,memdev=dimm1 -kernel /opt/kata/share/kata-containers/vmlinux-5.15.26-90 -append tsc=reliable no_timer_check rcupdate.rcu_expedited=1 i8042.direct=1 i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k console=hvc0 console=hvc1 cryptomgr.notests net.ifnames=0 pci=lastbus=0 root=/dev/pmem0p1 rootflags=dax,data=ordered,errors=remount-ro ro rootfstype=ext4 quiet systemd.show_status=false panic=1 nr_cpus=10 systemd.unit=kata-containers.target systemd.mask=systemd-networkd.service systemd.mask=systemd-networkd.socket scsi_mod.scan=none agent.debug_console agent.debug_console_vport=1026 -pidfile /run/vc/vm/ae14c3fdacfe80c482dd42def9fd758e86207b03e80018feb7ef0e3a19349bfe/pid -smp 1,cores=1,threads=1,sockets=10,maxcpus=10
    
  • KubeVirt:

    $ ps -ef | grep qemu
    qemu        101      1  0 Jul08 ?        00:10:27 /usr/libexec/qemu-kvm -name guest=ns-demo_ecs-test3,debug-threads=on -S -object {"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-1-ns-demo_ecs-test3/master-key.aes"} -machine pc-q35-rhel8.5.0,accel=kvm,usb=off,dump-guest-core=off -cpu IvyBridge-IBRS,ss=on,vmx=on,pdcm=on,pcid=on,hypervisor=on,arat=on,tsc-adjust=on,umip=on,md-clear=on,stibp=on,arch-capabilities=on,ssbd=on,xsaveopt=on,pdpe1gb=on,ibpb=on,ibrs=on,amd-stibp=on,amd-ssbd=on,skip-l1dfl-vmentry=on,pschange-mc-no=on -m size=2000896k,slots=16,maxmem=134217728k -overcommit mem-lock=off -smp 1,maxcpus=24,sockets=2,dies=1,cores=12,threads=1 -object {"qom-type":"iothread","id":"iothread1"} -object {"qom-type":"memory-backend-ram","id":"ram-node0","size":2048917504} -numa node,nodeid=0,cpus=0-23,memdev=ram-node0 -uuid 0d0af5a0-2a22-55e1-8229-96892fdd6700 -smbios type=1,manufacturer=KubeVirt,product=None,uuid=0d0af5a0-2a22-55e1-8229-96892fdd6700,family=KubeVirt -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=19,server=on,wait=off -mon chardev=charmonitor,id=monitor,mode=control -rtc base=localtime -no-shutdown -boot strict=on -device pcie-root-port,port=0x18,chassis=8,id=pci.8,bus=pcie.0,multifunction=on,addr=0x3 -device pcie-root-port,port=0x19,chassis=9,id=pci.9,bus=pcie.0,addr=0x3.0x1 -device pcie-root-port,port=0x1a,chassis=10,id=pci.10,bus=pcie.0,addr=0x3.0x2 -device pcie-root-port,port=0x1b,chassis=11,id=pci.11,bus=pcie.0,addr=0x3.0x3 -device pcie-root-port,port=0x1c,chassis=12,id=pci.12,bus=pcie.0,addr=0x3.0x4 -device pcie-root-port,port=0x10,chassis=1,id=pci.1,bus=pcie.0,multifunction=on,addr=0x2 -device pcie-root-port,port=0x11,chassis=2,id=pci.2,bus=pcie.0,addr=0x2.0x1 -device pcie-root-port,port=0x12,chassis=3,id=pci.3,bus=pcie.0,addr=0x2.0x2 -device pcie-root-port,port=0x13,chassis=4,id=pci.4,bus=pcie.0,addr=0x2.0x3 -device pcie-root-port,port=0x14,chassis=5,id=pci.5,bus=pcie.0,addr=0x2.0x4 -device pcie-root-port,port=0x15,chassis=6,id=pci.6,bus=pcie.0,addr=0x2.0x5 -device pcie-root-port,port=0x16,chassis=7,id=pci.7,bus=pcie.0,addr=0x2.0x6 -device virtio-scsi-pci-non-transitional,id=scsi0,bus=pci.2,addr=0x0 -device virtio-serial-pci-non-transitional,id=virtio-serial0,bus=pci.3,addr=0x0 -blockdev {"driver":"host_device","filename":"/dev/bootdisk","aio":"native","node-name":"libvirt-2-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"} -blockdev {"node-name":"libvirt-2-format","read-only":false,"discard":"unmap","cache":{"direct":true,"no-flush":false},"driver":"raw","file":"libvirt-2-storage"} -device virtio-blk-pci-non-transitional,bus=pci.4,addr=0x0,drive=libvirt-2-format,id=ua-bootdisk,bootindex=1,write-cache=on,werror=stop,rerror=stop -blockdev {"driver":"file","filename":"/var/run/kubevirt-ephemeral-disks/cloud-init-data/ns-demo/ecs-test3/noCloud.iso","node-name":"libvirt-1-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"} -blockdev {"node-name":"libvirt-1-format","read-only":false,"discard":"unmap","cache":{"direct":true,"no-flush":false},"driver":"raw","file":"libvirt-1-storage"} -device virtio-blk-pci-non-transitional,bus=pci.5,addr=0x0,drive=libvirt-1-format,id=ua-cloudinitdisk,write-cache=on,werror=stop,rerror=stop -netdev tap,fd=21,id=hostua-attachnet1,vhost=on,vhostfd=22 -device virtio-net-pci-non-transitional,host_mtu=1400,netdev=hostua-attachnet1,id=ua-attachnet1,mac=00:00:00:bc:3e:5a,bus=pci.1,addr=0x0,romfile= -chardev socket,id=charserial0,fd=23,server=on,wait=off -device isa-serial,chardev=charserial0,id=serial0 -chardev socket,id=charchannel0,fd=24,server=on,wait=off -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,id=channel0,name=org.qemu.guest_agent.0 -audiodev id=audio1,driver=none -vnc vnc=unix:/var/run/kubevirt-private/bbc7cf6d-6fbf-4a0e-a0e5-ddb2bb3e58f7/virt-vnc,audiodev=audio1 -device VGA,id=video0,vgamem_mb=16,bus=pcie.0,addr=0x1 -device virtio-balloon-pci-non-transitional,id=balloon0,bus=pci.6,addr=0x0 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on
    

虚拟机本质上就是一个宿主机上的 qemu 进程。

qemu-kvm 是比较古老的 qemu 结合了 KVM 的二进制可执行文件,而更新的 qemu-system-x86_64 通过一个 –enable-kvm 选项来开启 KVM 加速。 Fabrice Bellard 是一个天才程序员,最顶级的那种,感兴趣的可以去了解一下这位传奇。

KVM

KVM(Kernel-based Virtual Machine)是 Linux 内核中的虚拟化模块,用于加速模拟器的指令,需要 CPU 支持硬件辅助虚拟化(VT-x 或 AMD-V)。KVM 使得 Linux 内核表现得就像 Type-1 hypervisor 一样。

KVM 在宿主机上通过 /dev/kvm 设备暴露接口;qemu 通过 ioctl 系统调用来与 KVM 交互。

$ ls -al /dev/kvm
crw-rw-rw- 1 qemu qemu 10, 232 Jul  8 02:09 /dev/kvm

KVM API:https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt

虚拟化

三种虚拟化方式:

  • Full Virtualization 完全虚拟化
  • Hardware-Assisted Virtualization 硬件辅助虚拟化
  • Paravirtualization 半虚拟化

完全虚拟化

qemu 模拟器转译指令。虽然 qemu 使用动态转译技术,但在完全虚拟化模式下的 Guest OS 还是非常慢。

$ qemu-system-x86_64 linux.img

硬件辅助虚拟化

区分内核态用户态,x86 CPU 提供分层权限等级(Privilege Rings)供操作系统利用:

  • Ring 0 有着最高的权限,允许直接与物理硬件(CPU 还有主板上的其他芯片)交互,在操作系统(Linux/macOS/Windows)中用于内核态
  • Ring 3 在操作系统中用于用户态(用户态的代码必须执行系统调用才能进入内核态)

一共四级,从 0 直接干到 3,不够用了。需要额外的处理器标志位来表示虚拟机(Guest OS)权限等级:Intel 的 VT-xAMD 的 AMD-V

将处理器标志位设置成虚拟机,Guest OS 内核就可以执行 Ring 0 级别的操作——通过 KVM 直接在 CPU 上运行大部分指令。

qemu 使用 KVM 加速需要硬件宿主机操作系统同时支持,缺一不可:

  1. 查看当前处理器是否支持硬件辅助虚拟化:

    $ grep -E '(vmx|svm)' /proc/cpuinfo
    flags  : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx rdtscp lm constant_tsc nopl xtopology nonstop_tsc eagerfpu pni pclmulqdq vmx ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 invpcid rdseed adx smap clflushopt xsaveopt xsavec dtherm arat pln pts
    flags  : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx rdtscp lm constant_tsc nopl xtopology nonstop_tsc eagerfpu pni pclmulqdq vmx ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 invpcid rdseed adx smap clflushopt xsaveopt xsavec dtherm arat pln pts
    flags  : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx rdtscp lm constant_tsc nopl xtopology nonstop_tsc eagerfpu pni pclmulqdq vmx ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 invpcid rdseed adx smap clflushopt xsaveopt xsavec dtherm arat pln pts
    flags  : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx rdtscp lm constant_tsc nopl xtopology nonstop_tsc eagerfpu pni pclmulqdq vmx ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 invpcid rdseed adx smap clflushopt xsaveopt xsavec dtherm arat pln pts
    
    $ lscpu | grep Virtualization
    Virtualization:      VT-x
    Virtualization type: full
    
  2. 还需要宿主机操作系统内核加载 KVM 模块:

    $ lsmod | grep kvm
    kvm_intel             188740  0
    kvm                   637515  1 kvm_intel
    irqbypass              13503  1 kvm
    

qemu 结合 KVM,无需转译直接在宿主机 CPU 上执行 Guest OS 内核的指令,这就是加速

$ qemu-system-x86_64 -enable-kvm linux.img

半虚拟化

硬件辅助虚拟化针对的是 CPU 计算资源,而半虚拟化指的是网络和磁盘方面让 Guest OS 内核加载特殊的驱动:

  • 网络 virtio_net
  • 存储 virtio_blk

半虚拟化使 Guest OS 性能接近宿主机操作系统。半虚拟化的 Guest OS 知道自己是虚拟机;而完全虚拟化的 Guest OS 不知道。

libvirt

libvirt 是 Red Hat 开发的用于管理虚拟化的一套工具组件。我们当然也可以通过 qemu-kvmqemu-system-x86_64 命令去启动 Guest OS 实例,但参数实在多,很容易出错,而且 Guest OS 本质上就是宿主机上的一个进程,通过 libvirt,相对容易地启动虚拟机、配置虚拟机硬件参数和管理实例的生命周期。

libvirt 使用 XML 描述虚拟机(也叫做 Domain):

<domain type='kvm' id='2'>
  <name>instance-0000000b</name>
  <uuid>7c437668-1e95-47e8-802b-5177d0c857d7</uuid>
  <metadata>
    <nova:instance xmlns:nova="http://openstack.org/xmlns/libvirt/nova/1.0">
      <nova:package version="20.1.2"/>
      <nova:name>vm-1-A</nova:name>
      <nova:creationTime>2022-07-08 06:46:20</nova:creationTime>
      <nova:flavor name="1">
        <nova:memory>512</nova:memory>
        <nova:disk>1</nova:disk>
        <nova:swap>0</nova:swap>
        <nova:ephemeral>0</nova:ephemeral>
        <nova:vcpus>1</nova:vcpus>
      </nova:flavor>
      <nova:owner>
        <nova:user uuid="5822557ec9c14698a8e9bec98bdf99e4">admin</nova:user>
        <nova:project uuid="2f221cffcc764b87a1a4f68c8abe5351">admin</nova:project>
      </nova:owner>
      <nova:root type="image" uuid="fc223dc5-1c51-47f4-b338-c34d9a446a09"/>
    </nova:instance>
  </metadata>
  <memory unit='KiB'>524288</memory>
  <currentMemory unit='KiB'>524288</currentMemory>
  <vcpu placement='static'>1</vcpu>
  <cputune>
    <shares>1024</shares>
  </cputune>
  <resource>
    <partition>/machine</partition>
  </resource>
  <sysinfo type='smbios'>
    <system>
      <entry name='manufacturer'>OpenStack Foundation</entry>
      <entry name='product'>OpenStack Nova</entry>
      <entry name='version'>20.1.2</entry>
      <entry name='serial'>7c437668-1e95-47e8-802b-5177d0c857d7</entry>
      <entry name='uuid'>7c437668-1e95-47e8-802b-5177d0c857d7</entry>
      <entry name='family'>Virtual Machine</entry>
    </system>
  </sysinfo>
  <os>
    <type arch='x86_64' machine='pc-i440fx-rhel7.6.0'>hvm</type>
    <boot dev='hd'/>
    <smbios mode='sysinfo'/>
  </os>
  <features>
    <acpi/>
    <apic/>
  </features>
  <cpu mode='custom' match='exact' check='full'>
    <model fallback='forbid'>Skylake-Server-IBRS</model>
    <vendor>Intel</vendor>
    <topology sockets='1' cores='1' threads='1'/>
    <feature policy='require' name='ss'/>
    <feature policy='require' name='hypervisor'/>
    <feature policy='require' name='tsc_adjust'/>
    <feature policy='require' name='clflushopt'/>
    <feature policy='require' name='pku'/>
    <feature policy='require' name='md-clear'/>
    <feature policy='require' name='stibp'/>
    <feature policy='require' name='ssbd'/>
  </cpu>
  <clock offset='utc'>
    <timer name='pit' tickpolicy='delay'/>
    <timer name='rtc' tickpolicy='catchup'/>
    <timer name='hpet' present='no'/>
  </clock>
  <on_poweroff>destroy</on_poweroff>
  <on_reboot>restart</on_reboot>
  <on_crash>destroy</on_crash>
  <devices>
    <emulator>/usr/libexec/qemu-kvm</emulator>
    <disk type='network' device='disk'>
      <driver name='qemu' type='raw' cache='writeback' discard='unmap'/>
      <auth username='nova'>
        <secret type='ceph' uuid='bf6e259b-b788-45ca-990d-4da2fb66f9b7'/>
      </auth>
      <source protocol='rbd' name='vms/7c437668-1e95-47e8-802b-5177d0c857d7_disk'>
        <host name='172.20.140.12' port='6789'/>
        <host name='172.20.140.50' port='6789'/>
        <host name='172.20.140.229' port='6789'/>
      </source>
      <target dev='vda' bus='virtio'/>
      <alias name='virtio-disk0'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/>
    </disk>
    <disk type='network' device='cdrom'>
      <driver name='qemu' type='raw' cache='writeback' discard='unmap'/>
      <auth username='nova'>
        <secret type='ceph' uuid='bf6e259b-b788-45ca-990d-4da2fb66f9b7'/>
      </auth>
      <source protocol='rbd' name='vms/7c437668-1e95-47e8-802b-5177d0c857d7_disk.config'>
        <host name='172.20.140.12' port='6789'/>
        <host name='172.20.140.50' port='6789'/>
        <host name='172.20.140.229' port='6789'/>
      </source>
      <target dev='hda' bus='ide'/>
      <readonly/>
      <alias name='ide0-0-0'/>
      <address type='drive' controller='0' bus='0' target='0' unit='0'/>
    </disk>
    <controller type='usb' index='0' model='piix3-uhci'>
      <alias name='usb'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/>
    </controller>
    <controller type='pci' index='0' model='pci-root'>
      <alias name='pci.0'/>
    </controller>
    <controller type='ide' index='0'>
      <alias name='ide'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/>
    </controller>
    <interface type='bridge'>
      <mac address='fa:16:3e:ab:91:6b'/>
      <source bridge='br-int'/>
      <virtualport type='openvswitch'>
        <parameters interfaceid='d21515c5-1550-4f71-93c6-ef6f69a7a744'/>
      </virtualport>
      <target dev='tapd21515c5-15'/>
      <model type='virtio'/>
      <mtu size='1450'/>
      <alias name='net0'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/>
    </interface>
    <serial type='pty'>
      <source path='/dev/pts/2'/>
      <log file='/var/lib/nova/instances/7c437668-1e95-47e8-802b-5177d0c857d7/console.log' append='off'/>
      <target type='isa-serial' port='0'>
        <model name='isa-serial'/>
      </target>
      <alias name='serial0'/>
    </serial>
    <console type='pty' tty='/dev/pts/2'>
      <source path='/dev/pts/2'/>
      <log file='/var/lib/nova/instances/7c437668-1e95-47e8-802b-5177d0c857d7/console.log' append='off'/>
      <target type='serial' port='0'/>
      <alias name='serial0'/>
    </console>
    <input type='tablet' bus='usb'>
      <alias name='input0'/>
      <address type='usb' bus='0' port='1'/>
    </input>
    <input type='mouse' bus='ps2'>
      <alias name='input1'/>
    </input>
    <input type='keyboard' bus='ps2'>
      <alias name='input2'/>
    </input>
    <graphics type='vnc' port='5900' autoport='yes' listen='172.20.140.229'>
      <listen type='address' address='172.20.140.229'/>
    </graphics>
    <video>
      <model type='cirrus' vram='16384' heads='1' primary='yes'/>
      <alias name='video0'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/>
    </video>
    <memballoon model='virtio'>
      <stats period='10'/>
      <alias name='balloon0'/>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/>
    </memballoon>
  </devices>
  <seclabel type='dynamic' model='dac' relabel='yes'>
    <label>+42436:+42477</label>
    <imagelabel>+42436:+42477</imagelabel>
  </seclabel>
</domain>
$ virsh create domain.xml # launch a vm

$ virsh list # list vms
 Id    Name                           State
----------------------------------------------------
 2     instance-0000000b              running

$ virsh destroy 2 # stop a vm
Domain '2' destroyed

本质上就是通过 libvirtd 将 XML 中的描述信息转换为 qemu-kvm 的启动参数,拉起一个 qemu-kvm 进程。

OpenStack Nova 和 KubeVirt 都是通过 libvirt 来管理虚拟机,而非直接接触 qemu-kvm,在启动虚拟机时 libvirt 可以看作是 qemu 的上层封装。