OpenStack Nova instance 常见操作

1. 启动实例(start

场景:启动处于 SHUTOFF 状态的实例
源码路径

  • API 层nova/compute/api.pystart()
  • RPC 层nova/compute/rpcapi.pystart_instance()
  • 执行层nova/compute/manager.pystart_instance()
  • 驱动层nova/virt/libvirt/driver.pypower_on()

关键代码

# nova/compute/manager.py
def start_instance(self, context, instance):
    # 检查实例状态是否为 SHUTOFF
    if instance.vm_state != vm_states.STOPPED:
        raise exception.InstanceInvalidState(...)
    
    # 调用驱动启动虚拟机
    self.driver.power_on(context, instance, network_info)
    
    # 更新数据库状态为 ACTIVE
    instance.vm_state = vm_states.ACTIVE
    instance.save()
# nova/virt/libvirt/driver.py
def power_on(self, context, instance, network_info):
    # 获取 Libvirt 域对象
    domain = self._get_domain(instance)
    # 调用 Libvirt API 启动虚拟机
    domain.createWithFlags(libvirt.VIR_DOMAIN_START_AUTODESTROY)

2. 停止实例(stop

场景:正常关闭处于 ACTIVE 状态的实例
源码路径

  • API 层nova/compute/api.pystop()
  • RPC 层nova/compute/rpcapi.pystop_instance()
  • 执行层nova/compute/manager.pystop_instance()
  • 驱动层nova/virt/libvirt/driver.pypower_off()

关键代码

# nova/compute/manager.py
def stop_instance(self, context, instance):
    if instance.vm_state != vm_states.ACTIVE:
        raise exception.InstanceInvalidState(...)
    
    # 调用驱动关闭虚拟机
    self.driver.power_off(instance)
    
    # 更新状态为 STOPPED
    instance.vm_state = vm_states.STOPPED
    instance.save()
# nova/virt/libvirt/driver.py
def power_off(self, instance):
    domain = self._get_domain(instance)
    # 发送 ACPI 关机信号(正常关闭)
    domain.shutdownFlags(libvirt.VIR_DOMAIN_SHUTDOWN_ACPI_POWER_BTN)

3. 暂停实例(pause

场景:将运行中的实例状态冻结到内存
源码路径

  • API 层nova/compute/api.pypause()
  • RPC 层nova/compute/rpcapi.pypause_instance()
  • 执行层nova/compute/manager.pypause_instance()
  • 驱动层nova/virt/libvirt/driver.pypause()

关键代码

# nova/compute/manager.py
def pause_instance(self, context, instance):
    if instance.vm_state != vm_states.ACTIVE:
        raise exception.InstanceInvalidState(...)
    
    self.driver.pause(instance)
    instance.vm_state = vm_states.PAUSED  # 状态变为 PAUSED
    instance.save()
# nova/virt/libvirt/driver.py
def pause(self, instance):
    domain = self._get_domain(instance)
    domain.suspend()  # Libvirt 暂停API

4. 恢复实例(unpause

场景:从内存中恢复被暂停的实例
源码路径

  • API 层nova/compute/api.pyunpause()
  • RPC 层nova/compute/rpcapi.pyunpause_instance()
  • 执行层nova/compute/manager.pyunpause_instance()
  • 驱动层nova/virt/libvirt/driver.pyresume()

关键代码

# nova/compute/manager.py
def unpause_instance(self, context, instance):
    if instance.vm_state != vm_states.PAUSED:
        raise exception.InstanceInvalidState(...)
    
    self.driver.resume(instance)
    instance.vm_state = vm_states.ACTIVE  # 恢复为 ACTIVE
    instance.save()
# nova/virt/libvirt/driver.py
def resume(self, instance):
    domain = self._get_domain(instance)
    domain.resume()  # Libvirt 恢复API

5. 重启实例(reboot

场景:重启运行中的实例(分软重启和硬重启)
源码路径

  • API 层nova/compute/api.pyreboot()
  • RPC 层nova/compute/rpcapi.pyreboot_instance()
  • 执行层nova/compute/manager.pyreboot_instance()
  • 驱动层nova/virt/libvirt/driver.pyreboot()

关键代码

# nova/compute/manager.py
def reboot_instance(self, context, instance, reboot_type):
    # reboot_type: SOFT (操作系统级) / HARD (电源级)
    if instance.vm_state != vm_states.ACTIVE:
        raise exception.InstanceInvalidState(...)
    
    self.driver.reboot(context, instance, reboot_type)
    # 状态保持 ACTIVE
# nova/virt/libvirt/driver.py
def reboot(self, context, instance, reboot_type):
    domain = self._get_domain(instance)
    
    if reboot_type == "SOFT":
        domain.reboot(flags=0)  # 发送重启信号给操作系统
    else:  # HARD
        domain.reset()  # 模拟电源复位

6. 规格调整 (Resize)

6.1 API入口层

nova/api/openstack/compute/servers.py

def _action_resize(self, req, id, body):
    # 状态检查
    if instance.vm_state != vm_states.ACTIVE:
        raise exception.InstanceInvalidState(...)
    
    # 获取新flavor
    flavor_id = body['resize']['flavorRef']
    new_flavor = self._get_flavor(flavor_id)
    
    # 调用compute API
    self.compute_api.resize(req.environ['nova.context'], instance, new_flavor)

6.2 Compute API层

nova/compute/api.py

def resize(self, context, instance, flavor):
    # 配额检查
    self._check_quota(context, cores=flavor.vcpus, ram=flavor.memory_mb)
    
    # 创建迁移记录
    migration = objects.Migration(context, 
        instance_uuid=instance.uuid,
        source_compute=instance.host,
        status='migrating')
    migration.create()
    
    # 通过conductor发起迁移
    self.conductor_api.resize_instance(context, instance, migration, flavor, [])

6.3 Conductor层

nova/conductor/manager.py

def resize_instance(self, context, instance, migration, flavor, clean_shutdown):
    # 构建请求规格
    request_spec = self._build_request_spec(context, instance, flavor)
    
    # 调度目标主机
    hosts = self.scheduler_client.select_destinations(context, request_spec)
    host = hosts[0]
    
    # 调用目标节点准备
    self.compute_rpcapi.prep_resize(context, instance, 
        migration, host, flavor)

6.4 目标节点准备

nova/compute/manager.py

def prep_resize(self, context, instance, migration, host, flavor):
    # 创建新规格实例(不启动)
    self.driver.spawn(context, instance, image_meta, 
                      [], block_device_info, flavor)
    
    # 更新实例状态
    instance.task_state = task_states.RESIZE_PREP
    instance.save()

6.5 源节点迁移

nova/compute/manager.py

def resize_instance(self, context, instance, migration):
    # 迁移磁盘
    if not shared_storage:
        self._migrate_disk_and_power_off(context, instance, migration)
    
    # 更新实例规格
    instance.old_flavor = instance.flavor
    instance.new_flavor = new_flavor
    instance.save()
    
    # 等待用户确认
    instance.task_state = task_states.RESIZE_MIGRATED
    instance.save()

7. 卷管理 (Attach/Detach)

7.1 卷挂载 (Attach)

nova/compute/api.py

def attach_volume(self, context, instance, volume_id, device):
    # 状态检查
    if instance.vm_state not in (vm_states.ACTIVE, vm_states.PAUSED):
        raise exception.InstanceInvalidState(...)
    
    # 调用Cinder API
    volume = self.volume_api.get(context, volume_id)
    self.volume_api.reserve_volume(context, volume_id)
    
    # RPC调用计算节点
    self.compute_rpcapi.attach_volume(context, instance, volume_id, 
                                      device, disk_bus)

nova/compute/manager.py

def attach_volume(self, context, instance, volume_id, device):
    # 获取卷连接信息
    connector = self.driver.get_volume_connector(instance)
    connection_info = self.volume_api.initialize_connection(
        context, volume_id, connector)
    
    # Hypervisor挂载卷
    self.driver.attach_volume(context, connection_info, instance, device)
    
    # 更新数据库
    bdm = objects.BlockDeviceMapping(
        context, volume_id=volume_id, instance_uuid=instance.uuid,
        device_name=device, connection_info=connection_info)
    bdm.create()

7.2 卷卸载 (Detach)

nova/compute/manager.py

def detach_volume(self, context, volume_id, instance):
    # 获取连接信息
    bdm = objects.BlockDeviceMapping.get_by_volume_id(context, volume_id)
    connection_info = bdm.connection_info
    
    # Hypervisor卸载卷
    self.driver.detach_volume(context, connection_info, instance, volume_id)
    
    # 调用Cinder API
    self.volume_api.terminate_connection(context, volume_id, connection_info)
    self.volume_api.detach(context, volume_id)
    
    # 清理数据库
    bdm.destroy()

8. 添加网卡 (Add NIC)

8.1 API入口

nova/api/openstack/compute/attach_networks.py

def add(self, req, server_id, body):
    network_id = body['interfaceAttachment']['net_id']
    port_id = body['interfaceAttachment'].get('port_id')
    fixed_ip = body['interfaceAttachment'].get('fixed_ip')
    
    # 调用compute API
    self.compute_api.attach_interface(
        req.environ['nova.context'], instance, network_id, port_id, fixed_ip)

8.2 Compute API层

nova/compute/api.py

def attach_interface(self, context, instance, network_id, port_id, fixed_ip):
    # 状态检查
    if instance.vm_state not in (vm_states.ACTIVE, vm_states.PAUSED):
        raise exception.InstanceInvalidState(...)
    
    # 创建或获取端口
    if port_id:
        port = self.network_api.show_port(context, port_id)
    else:
        port = self.network_api.create_port(context, instance.project_id, 
            network_id, instance.uuid, fixed_ip=fixed_ip)
    
    # RPC调用计算节点
    self.compute_rpcapi.attach_interface(context, instance, port['id'])

8.3 计算节点执行

nova/compute/manager.py

def attach_interface(self, context, instance, port_id):
    # 获取网络信息
    network_info = self.network_api.get_instance_nw_info(context, instance)
    
    # 添加新端口
    new_port = self.network_api.show_port(context, port_id)
    network_info.append(new_port)
    
    # Hypervisor添加网卡
    self.driver.attach_interface(context, instance, network_info, new_port)
    
    # 更新实例网络缓存
    instance.info_cache.network_info = network_info
    instance.info_cache.save()

9. 冷迁移(Migrate)

冷迁移需要在实例关机状态下进行,涉及磁盘迁移和计算节点切换。

9.1 API入口层

nova/api/openstack/compute/migrate_server.py

def _migrate(self, req, id, body):
    # 状态检查:实例必须为ACTIVE或STOPPED
    if instance.vm_state not in (vm_states.ACTIVE, vm_states.STOPPED):
        raise exception.InstanceInvalidState(...)
    
    # 调用compute API
    self.compute_api.migrate(req.environ['nova.context'], instance)

9.2 Compute API层

nova/compute/api.py

def migrate(self, context, instance):
    # 检查实例是否已挂载卷
    if self.volume_api.get_volume_attachments(context, instance.uuid):
        raise exception.MigrationError(...)
    
    # 创建迁移记录
    migration = objects.Migration(context, 
        instance_uuid=instance.uuid,
        migration_type='migration',
        status='migrating')
    migration.create()
    
    # 通过conductor发起迁移
    self.conductor_api.migrate_server(context, instance, migration)

9.3 Conductor调度层

nova/conductor/manager.py

def migrate_server(self, context, instance, migration):
    # 调度目标主机
    request_spec = self._build_request_spec(context, instance)
    hosts = self.scheduler_client.select_destinations(context, request_spec)
    host = hosts[0]
    
    # 调用源节点执行迁移
    self.compute_rpcapi.migrate_instance(context, instance, migration, host)

9.4 源节点执行

nova/compute/manager.py

def migrate_instance(self, context, instance, migration, host):
    # 关闭实例(如果运行中)
    if instance.vm_state == vm_states.ACTIVE:
        self.driver.power_off(instance)
    
    # 迁移磁盘
    self._migrate_disk(context, instance, host)
    
    # 在目标节点启动实例
    self.compute_rpcapi.finish_migrate_instance(context, instance, host)
    
    # 清理源实例
    self.driver.destroy(context, instance, [], block_device_info)

10. 热迁移(Live-Migrate)

热迁移在实例运行状态下进行,需要迁移内存状态和磁盘访问。

10.1 API入口

nova/api/openstack/compute/admin_actions.py

def _migrate_live(self, req, id, body):
    # 状态检查:实例必须为ACTIVE
    if instance.vm_state != vm_states.ACTIVE:
        raise exception.InstanceInvalidState(...)
    
    # 获取目标主机
    host = body['os-migrateLive']['host']
    
    # 调用compute API
    self.compute_api.live_migrate(req.environ['nova.context'], instance, host)

10.2 Compute API层

nova/compute/api.py

def live_migrate(self, context, instance, host):
    # 检查目标主机是否可用
    service = objects.Service.get_by_host_and_binary(context, host, 'nova-compute')
    if not service:
        raise exception.ComputeHostNotFound(host=host)
    
    # 创建迁移记录
    migration = objects.Migration(context, 
        instance_uuid=instance.uuid,
        migration_type='live-migration',
        status='migrating')
    migration.create()
    
    # 通过conductor发起热迁移
    self.conductor_api.live_migrate_instance(context, instance, host)

10.3 源节点执行

nova/compute/manager.py

def live_migrate_instance(self, context, instance, host):
    # 检查Hypervisor支持
    if not self.driver.capabilities['supports_live_migration']:
        raise exception.LiveMigrationNotSupported(...)
    
    # 执行热迁移
    self.driver.live_migrate(context, instance, host)
    
    # 更新实例主机
    instance.host = host
    instance.save()

10.4 Libvirt驱动实现

nova/virt/libvirt/driver.py

def live_migrate(self, context, instance, dest):
    # 获取源域
    domain = self._get_domain(instance)
    
    # 获取目标连接
    dest_uri = self._get_dest_uri(dest)
    
    # 执行迁移
    domain.migrateToURI(dest_uri, 
        flags=libvirt.VIR_MIGRATE_LIVE | libvirt.VIR_MIGRATE_PEER2PEER)

11. 故障迁移(Evacuate)

故障迁移用于在计算节点宕机时将实例迁移到其他节点。

11.1 API入口

nova/api/openstack/compute/evacuate.py

def _evacuate(self, req, id, body):
    # 检查实例状态是否为ERROR
    if instance.vm_state != vm_states.ERROR:
        raise exception.InstanceInvalidState(...)
    
    # 获取目标主机(可选)
    host = body['evacuate'].get('host')
    
    # 调用compute API
    self.compute_api.evacuate(req.environ['nova.context'], instance, host)

11.2 Compute API层

nova/compute/api.py

def evacuate(self, context, instance, host):
    # 检查源主机是否宕机
    source_host = instance.host
    if not self.service_api.host_in_service(context, source_host):
        raise exception.ComputeHostNotDown(host=source_host)
    
    # 创建迁移记录
    migration = objects.Migration(context, 
        instance_uuid=instance.uuid,
        migration_type='evacuation',
        status='migrating')
    migration.create()
    
    # 通过conductor发起迁移
    self.conductor_api.evacuate_instance(context, instance, host)

11.3 目标节点执行

nova/compute/manager.py

def rebuild_instance(self, context, instance, migration):
    # 检查共享存储
    if not self.driver.shared_storage:
        raise exception.EvacuateNotSupported(...)
    
    # 重建实例
    self.driver.spawn(context, instance, image_meta, 
                      network_info, block_device_info)
    
    # 更新实例状态
    instance.host = self.host
    instance.vm_state = vm_states.ACTIVE
    instance.save()

12. 实例快照(Create Snapshot)

12.1 API入口层

nova/api/openstack/compute/servers.py

def _action_create_image(self, req, id, body):
    # 提取参数
    name = body['createImage']['name']
    metadata = body['createImage'].get('metadata', {})
    
    # 状态检查
    instance = self._get_server(context, req, id)
    if instance.vm_state not in (vm_states.ACTIVE, vm_states.STOPPED, 
                                 vm_states.PAUSED, vm_states.SUSPENDED):
        raise exception.InstanceInvalidState(...)
    
    # 调用compute API
    image_id = self.compute_api.snapshot(context, instance, name, 
                                         extra_properties=metadata)
    
    # 构建响应
    return webob.Response(location=self._get_image_location(req, image_id))

12.2 Compute API层

nova/compute/api.py

def snapshot(self, context, instance, name, extra_properties=None):
    # 检查配额
    self._check_image_quota(context, 1)
    
    # 创建Glance镜像记录
    image_meta = self.image_api.create(context, {
        'name': name,
        'status': 'creating',
        'container_format': 'bare',
        'disk_format': 'raw',
        'properties': extra_properties or {}
    })
    
    # 调用conductor
    self.conductor_api.snapshot_instance(context, instance, image_meta['id'])
    
    return image_meta['id']

12.3 Conductor层

nova/conductor/manager.py

def snapshot_instance(self, context, instance, image_id):
    # 更新实例任务状态
    instance.task_state = task_states.IMAGE_SNAPSHOT
    instance.save()
    
    # RPC调用计算节点
    self.compute_rpcapi.snapshot_instance(context, instance, image_id)

12.4 Compute Manager层

nova/compute/manager.py

def snapshot_instance(self, context, instance, image_id):
    try:
        # 调用驱动创建快照
        self.driver.snapshot(context, instance, image_id, 
                             self._legacy_nw_info(network_info))
        
        # 更新Glance状态
        self.image_api.update(context, image_id, {'status': 'active'})
        
    except Exception:
        # 失败处理
        self.image_api.update(context, image_id, {'status': 'killed'})
        raise
    finally:
        # 清理实例状态
        instance.task_state = None
        instance.save()

12.5 Hypervisor驱动层 (Libvirt)

nova/virt/libvirt/driver.py

def snapshot(self, context, instance, image_id, update_task_state):
    # 获取实例磁盘路径
    disk_path = self._get_disk_path(instance)
    
    # 创建临时快照文件
    with utils.tempdir() as tmpdir:
        snapshot_path = os.path.join(tmpdir, 'snapshot')
        
        # 执行快照命令
        self._create_snapshot_metadata(instance, snapshot_path)
        self._qemu_img_convert(disk_path, snapshot_path)
        
        # 上传到Glance
        with open(snapshot_path, 'rb') as image_file:
            self._glance_client.call(context, 'upload', image_id, 
                                     {}, image_file)

13. 快照恢复(Restore from Snapshot)

13.1 API入口层

nova/api/openstack/compute/servers.py

def create(self, req, body):
    # 解析请求
    server_dict = body['server']
    image_ref = server_dict.get('imageRef')
    
    # 检查是否为快照ID
    if image_ref and image_ref.startswith('snapshot-'):
        snapshot_id = image_ref.replace('snapshot-', '')
        # 验证快照存在
        self.image_api.get(context, snapshot_id)
    
    # 创建实例(与普通实例创建流程相同)
    return self._create_instance(req, body)

13.2 Compute API层

nova/compute/api.py

def create(self, context, instance_type, image_href, **kwargs):
    # 处理快照镜像
    if image_href and image_href.startswith('snapshot-'):
        snapshot_id = image_href.replace('snapshot-', '')
        image_meta = self.image_api.get(context, snapshot_id)
        image_href = image_meta['id']  # 转换为Glance镜像ID
    
    # 后续流程与普通实例创建相同
    self._provision_instance(context, instance_type, image_meta, ...)

13.3 Compute Manager层

nova/compute/manager.py

def _build_and_run_instance(self, context, instance, ...):
    # 获取镜像
    image_meta = self._get_image_metadata(context, instance.image_ref)
    
    # 从快照创建的特殊处理
    if image_meta.get('properties', {}).get('image_type') == 'snapshot':
        self._handle_snapshot_boot(context, instance, image_meta)
    
    # 正常启动流程
    self.driver.spawn(context, instance, image_meta, ...)

13.4 快照启动处理

nova/compute/manager.py

def _handle_snapshot_boot(self, context, instance, image_meta):
    # 检查快照元数据
    snapshot_properties = image_meta.get('properties', {})
    orig_instance_uuid = snapshot_properties.get('instance_uuid')
    
    # 恢复实例特定配置
    if orig_instance_uuid:
        orig_instance = objects.Instance.get_by_uuid(context, orig_instance_uuid)
        instance.key_name = orig_instance.key_name
        instance.security_groups = orig_instance.security_groups
        instance.save()

14. 删除操作(Delete)

14.1 API入口层

nova/api/openstack/compute/servers.py

def _delete(self, req, id):
    # 获取实例
    instance = self._get_server(context, req, id)
    
    # 检查状态
    if instance.vm_state == vm_states.DELETED:
        raise exception.InstanceNotFound(...)
    
    # 调用compute API
    self.compute_api.delete(req.environ['nova.context'], instance)

14.2 Compute API层

nova/compute/api.py

def delete(self, context, instance):
    # 检查权限
    self._check_policy(context, 'delete', instance)
    
    # 软删除或硬删除
    if CONF.reclaim_instance_interval > 0:
        self.soft_delete(context, instance)
    else:
        self._delete(context, instance)

14.3 软删除实现

nova/compute/api.py

def soft_delete(self, context, instance):
    # 更新状态
    instance.vm_state = vm_states.SOFT_DELETED
    instance.task_state = task_states.DELETING
    instance.deleted_at = timeutils.utcnow()
    instance.save()
    
    # RPC调用计算节点
    self.compute_rpcapi.soft_delete_instance(context, instance)

14.4 硬删除实现

nova/compute/api.py

def _delete(self, context, instance):
    # 更新状态
    instance.vm_state = vm_states.DELETED
    instance.task_state = task_states.DELETING
    instance.deleted_at = timeutils.utcnow()
    instance.save()
    
    # RPC调用计算节点
    self.compute_rpcapi.terminate_instance(context, instance)

14.5 计算节点执行

nova/compute/manager.py

def terminate_instance(self, context, instance, bdms):
    try:
        # 销毁虚拟机
        self.driver.destroy(context, instance, network_info, bdms)
        
        # 释放网络资源
        self.network_api.deallocate_for_instance(context, instance)
        
        # 分离卷
        for bdm in bdms:
            if bdm.is_volume:
                self.volume_api.terminate_connection(context, bdm.volume_id, 
                                                     self._get_volume_connector())
                self.volume_api.detach(context, bdm.volume_id)
        
        # 清理数据库
        instance.destroy()
        
    except Exception:
        LOG.exception("终止实例失败")
        instance.vm_state = vm_states.ERROR
        instance.save()
posted @ 2025-09-15 18:03  xclic  阅读(25)  评论(0)    收藏  举报