aboutsummaryrefslogtreecommitdiff
path: root/kvm-manager
blob: 4887ffb1f96fdef6ccf1ca9b70baac3e482ebda5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#!/bin/bash
#
# Author: Daniel Kahn Gillmor <dkg@fifthhorseman.net>
# Date: 2011-01-26
# License: GPL v3+

## expect to pull these values from the environment:
# VMNAME=snapper
# OWNER=jrollins
# RAM=512
# SMP=2 # optional, specify number of CPUs
# HDA..HDZ=/path/to/disk # optional
#
# The following optional arguments control disk i/o on a per disk basis.
# All the variables start with the disk they are referencing (e.g.
# HDA or HDB). They all require qemu 2.6 and all default to 0 (unlimited).
# If a variable is not set, it won't be included. The qemu defaults are listed
# below (0 means unlimited).
# HD[A-Z]_IOPS_TOTAL=0
# HD[A-Z]_IOPS_TOTAL_MAX=0
# HD[A-Z]_IOPS_TOTAL_MAX_LENGTH=1
# HD[A-Z]_IOPS_READ=0
# HD[A-Z]_IOPS_READ_MAX=0
# HD[A-Z]_IOPS_READ_MAX_LENGTH=1
# HD[A-Z]_IOPS_WRITE=0
# HD[A-Z]_IOPS_WRITE_MAX=0
# HD[A-Z]_IOPS_WRITE_MAX_LENGTH=1
# HD[A-Z]_IOPS_SIZE=0
# HD[A-Z]_BPS_TOTAL=0
# HD[A-Z]_BPS_TOTAL_MAX=0
# HD[A-Z]_BPS_TOTAL_MAX_LENGTH=1
# HD[A-Z]_BPS_READ=0
# HD[A-Z]_BPS_READ_MAX=0
# HD[A-Z]_BPS_READ_MAX_LENGTH=1
# HD[A-Z]_BPS_WRITE=0
# HD[A-Z]_BPS_WRITE_MAX=0
# HD[A-Z]_BPS_WRITE_MAX_LENGTH=1

set -e

if [ -z "$VMNAME" ] ; then
    exit 1
fi

TAP="${TAP:-${VMNAME}0}"
# MAC address is derived from a hash of the host's name and the guest's name:
MAC="${MAC:-$(printf "02:%s" "$(printf "%s\0%s" "$(hostname)" "${VMNAME}" | sha256sum | sed 's/\(..\)/\1:/g' | cut -f1-5 -d:)" )}"
BRIDGE="${BRIDGE:-br0}"

###################
OWNERGROUP=$(groups "$OWNER" | cut -f1 -d\  )
OWNERHOME=$(getent passwd "$OWNER" | cut -f6 -d: )

kvm_version=$(kvm --version | head -n1 | sed -E 's/^QEMU emulator version ([0-9.]+).*/\1/')
screen_version=$(screen --version | head -n1 | sed -E 's/^Screen version ([0-9.]+).*/\1/')

# Disks can be HDA, HDB, HDC, etc. For each disk, we want to detect the
# corresponding environment variables for disk read/write restrictions
# (e.g. HDA_IOPS_TOTAL, BPS_READ, etc). This function builds the variable
# disk_io_params each time it is run.
build_disk_io_params() {
  local disk="$1"
  local option variable argument value
  disk_io_params=

  if dpkg --compare-versions "$kvm_version" lt 2.6.0; then
    # Only supported in version 2.6.0 or above.
    return
  fi
  # Consider the disk i/o options
  for option in IOPS_TOTAL IOPS_TOTAL_MAX IOPS_TOTAL_MAX_LENGTH IOPS_READ \
    IOPS_READ_MAX IOPS_READ_MAX_LENGTH IOPS_WRITE IOPS_WRITE_MAX \
    IOPS_WRITE_MAX_LENGTH IOPS_SIZE BPS_TOTAL BPS_TOTAL_MAX BPS_TOTAL_MAX_LENGTH \
    BPS_READ BPS_READ_MAX BPS_READ_MAX_LENGTH BPS_WRITE BPS_WRITE_MAX \
    BPS_WRITE_MAX_LENGTH; do
    variable="${disk}_${option}"
    if [ -n "${!variable}" ]; then
      value=${!variable}
      # Convert our environment variable into the syntax qemu expects
      # (lower case with with a dash instead of underscore).
      argument=$(echo "$option" | sed "s/_/-/g" | tr "[:upper:]" "[:lower:]")

      # The variable will be appended to an existing series of options
      # for the disk, so we want it to start with a comma.
      disk_io_params="${disk_io_params},throttling.${argument}=${value}"
    fi
  done
}

up() {
# bring up the network tap:
    modprobe -v tun
    ip tuntap add dev "$TAP" mode tap user "$OWNER"
    ip link set "$TAP" up
    brctl addif "$BRIDGE" "$TAP"
    
    chpst -u "$OWNER:$OWNERGROUP" mkdir -p "$OWNERHOME/vms/$VMNAME"

    CDISO="$OWNERHOME/vms/$VMNAME/cd.iso"
    NETBOOT="$OWNERHOME/vms/$VMNAME/netboot"
    DEBIAN_INSTALLER="$OWNERHOME/vms/$VMNAME/debian-installer"
    KERNEL="$OWNERHOME/vms/$VMNAME/kernel"
    INITRD="$OWNERHOME/vms/$VMNAME/initrd"
    KVMARGS=
    unset KERNEL_CMDLINE
    
    BOOTCHOICE=c

    if [ -e "$KERNEL" -a -e "$INITRD" ] ; then
	      KVMARGS="-kernel $KERNEL -initrd $INITRD"
        if [ "$CMDLINE" ]; then
            KERNEL_CMDLINE="$CMDLINE"
        fi
    elif [ -e "$DEBIAN_INSTALLER" ] ; then
        # FIXME: this should be generalized to pick the version and
        # the architecture -- perhaps an empty $DEBIAN_INSTALLER would
        # default to the latest sensible version, but its contents
        # could otherwise select the version (and the arch if
        # specified)
	KVMARGS="-kernel /usr/lib/debian-installer/images/9/amd64/text/debian-installer/amd64/linux -initrd /usr/lib/debian-installer/images/9/amd64/text/debian-installer/amd64/initrd.gz -no-reboot"
        KERNEL_CMDLINE="console=ttyS0,115200n8 -- console=ttyS0,115200n8"
        # debian-installer should always be a one-shot, so that it reboots into
        # normal mode:
        rm -f "$DEBIAN_INSTALLER"
    elif [ -e "$NETBOOT" ] ; then
        BOOTCHOICE=n
    elif [ -e "$CDISO" ] && [ -e $(readlink -f "$CDISO") ] ; then
	      KVMARGS="-cdrom $CDISO"
        BOOTCHOICE=d
    fi

    for disk in HD{A..Z}; do
        if printf "%s" "${!disk}" | grep '^/dev/mapper/'; then
            mappername=$(printf "%s" "${!disk}" | sed 's!^/dev/mapper/!!')
            udevadm trigger --subsystem-match=block --attr-match=dm/name="$mappername"
        fi
    done
    udevadm trigger --sysname-match=kvm

    # older versions need to have at least the first disk marked as boot=on or they cannot boot.
    if kvm --version 2>/dev/null | sed 's/.*version \([0-9.]*\).*/\1/' | grep -q '^0' ; then
        first_disk_extra_args=",boot=on"
    else
        first_disk_extra_args=
    fi

    index=0
    # set up the disks, if needed:
    if [ -n "$HDA" ]; then
       build_disk_io_params HDA
       driver=${HDA_DRIVER:-virtio}
       KVMARGS="$KVMARGS -drive file=$HDA,if=$driver,cache=none,index=$index,format=raw${first_disk_extra_args}${disk_io_params}"
    fi
    # loop here on everything after HDA:
    for disk in HD{B..Z}; do
      index=$(( $index + 1 ))
      if [ -b "${!disk}" ]; then
        driver=virtio
        driver_var_name="${disk}_DRIVER"
        [ -n "${!driver_var_name}" ] && driver=${!driver_var_name}
        build_disk_io_params "${disk}"
        KVMARGS="$KVMARGS -drive file=${!disk},if=$driver,cache=none,index=$index,format=raw${disk_io_params}"
      fi
    done

    if [ -e /usr/share/qemu/sgabios.bin ]; then
        KVMARGS="$KVMARGS -device sga"
    fi

    LOGNAME="$OWNERHOME/vms/$VMNAME/console"
    ln -sfT "$LOGNAME" ./servicelog
    if [ -e "$LOGNAME" ] ; then
	      chpst -u "$OWNER" mv "$LOGNAME" "$LOGNAME".$(date +%F_%T%z|tr : .)
    fi
    
    MONITORNAME="$OWNERHOME/vms/$VMNAME/monitor.socket"
    CONSOLENAME="$OWNERHOME/vms/$VMNAME/console.socket"

    kvmsend() {
        socat STDIO "UNIX:$MONITORNAME" <<EOF
$1
EOF
    }

    chpst -u "$OWNER:$OWNERGROUP:kvm" \
        /usr/bin/kvm $KVMARGS \
        -M "${MACHINE:-pc}" \
	      ${KERNEL_CMDLINE:+-append "$KERNEL_CMDLINE"} \
        -enable-kvm \
        -nodefaults \
        -nographic \
        -name "$VMNAME" \
        -m "$RAM" \
        -boot order="$BOOTCHOICE" \
        -chardev "socket,id=monitor,path=$MONITORNAME,server,nowait" -mon chardev=monitor,mode=readline \
        -rtc base=utc \
        -usb \
        -device virtio-balloon-pci,id=balloon0,bus=pci.0 \
        -chardev "socket,id=serial0,path=$CONSOLENAME,server" -device isa-serial,chardev=serial0 \
        -smp "${SMP:-1},maxcpus=${MAXCPUS:-8}" \
        -device "virtio-net-pci,vlan=0,id=net0,mac=$MAC,bus=pci.0" \
        -net "tap,ifname=$TAP,script=no,downscript=no,vlan=0,name=hostnet0" &

    local screen_log_args=-L
    if dpkg --compare-versions "$screen_version" ge 4.05; then
        screen_log_args="-L ./servicelog"
    fi

    chpst -u "$OWNER:$OWNERGROUP" \
        /usr/bin/screen -D -m $screen_log_args -c /etc/screenrc.kvm-manager -S "$VMNAME" -t "$VMNAME" socat STDIO,raw,echo=0 "UNIX:${CONSOLENAME},retry=30" &

    set +e
    # handle regular signals
    trap 'kvmsend system_reset; wait %1' HUP
    trap 'kvmsend system_powerdown; wait %1' TERM
    trap 'kvmsend cont; wait %1' CONT
    trap 'kill -s TERM %1' QUIT
    # use SIGINT instead of SIGSTOP for freezing the guest because
    # trapping SIGSTOP is undefined:
    # http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_28
    trap 'kvmsend stop; wait %1' INT
    trap 'kill %1 ; kill %2' EXIT

    wait %1
}

down() {
    set +e
    brctl delif "$BRIDGE" "$TAP"
    ip link set "$TAP" down
    ip tuntap del mode tap dev "$TAP"
    set -e
    # no need to lock up the block device as well, since the owner might
    # prefer to manipulate the disk directly.
}

log() {
    LOGDIR="$OWNERHOME/vms/$VMNAME/servicelog"
    chpst -u "$OWNER" mkdir -p "$LOGDIR"
    exec chpst -u "$OWNER" svlogd -tt "$LOGDIR"
}

case "$1" in
    up|down|log)
	"$1"
	;;
    *)
	echo "Usage: $0 [up|down]" >&2
	exit 1
	;;
esac