OpenStack迁移虚拟机流程分析在OpenStack中,虚拟机的迁移类似分为三种,分别是冷迁移、热迁移和故障迁移。
1.冷迁移
实现原理:使⽤原来所需的资源在⽬标节点上重新创建⼀个虚拟机。
云主机冷迁移流程图:
更详细的过程图:
发起云主机冷迁移后,⾸先调⽤到的是nova/api/openstack/compute/migrate_server.py的_migrate函数:@sponse(202)
@pected_errors((400, 403, 404, 409))
@wsgi.action('migrate')
def _migrate(self, req, id, body):
"""Permit admins to migrate a server to a new host."""
context = viron['t']
context.can(ms_policies.POLICY_ROOT % 'migrate')
host = body["migrate"]["host"]
instance = _instance(selfpute_api, context, id)
try:
viron['t'], instance, host=host)
........
这⾥的核⼼代码是调⽤到了resize函数,openstack本⾝还有个resize功能,它的作⽤是对云主机的配置进⾏升级,但只能往上升,冷迁移的流程跟resize⼯作流程⼀样,只不过是flavor没有发⽣改变。实现代码是在nova/compute/api.py:
@check_instance_lock
@check_instance_cell
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED])
def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
host=None,
**extra_instance_updates):
# 检查flavor是否是新flavor,如果是resize则进⾏配额预留分配、修改虚拟机的状态、
# 提交迁移记录和⽣成⽬的宿主机需满⾜的条件对象specrequest
........
selfpute_size_instance(context, instance,
extra_instance_updates, scheduler_hint=scheduler_hint,
flavor=new_instance_type,
servations or [],
clean_shutdown=clean_shutdown,
request_spec=request_spec,
host=host)
这⾥的核⼼是调⽤resize_instance函数进⾏处理,该函数实现在nova/conductor/api.py,该函数再调⽤了nova/conductor/rpcapi.py⽂件中的migrate_server函数:
def migrate_server(self, context, instance, scheduler_hint, live, rebuild,
flavor, block_migration, disk_over_commit,
reservations=None, clean_shutdown=True, request_spec=None, host=None):
# 根据版本号构建kw参数
return cctxt.call(context, 'migrate_server', **kw)
通过远程调⽤conductor进程的migrate_server函数,此时进⼊nova/conductor/manage.py⽂件中的migrate_server函数:
def migrate_server(self, context, instance, scheduler_hint, live, rebuild,
flavor, block_migration, disk_over_commit, reservations=None,
clean_shutdown=True, request_spec=None, host=None):
# ⼀些条件判断,判断是进⾏冷迁移还是热迁移
if not live and not rebuild and flavor:
# 冷迁移⾛这个逻辑
龙人之盾instance_uuid = instance.uuid
with compute_utils.EventReporter(context, 'cold_migrate',
instance_uuid):
self._cold_migrate(context, instance, flavor,
scheduler_hint['filter_properties'],
reservations, clean_shutdown, request_spec,
host=host)
else:
raise NotImplementedError()
核⼼代码是调⽤了_cold_migrate函数:
@wrap_instance_event(prefix='conductor')
def _cold_migrate(self, context, instance, flavor, filter_properties,
reservations, clean_shutdown, request_spec, host=None):
image = _image_from_system_metadata(
instance.system_metadata)
task = self._build_cold_migrate_task(context, instance, flavor,
request_spec,
reservations, clean_shutdown, host=host)
构建迁移任务,然后执⾏任务:
def _build_cold_migrate_task(self, context, instance, flavor,
request_spec, reservations,
clean_shutdown, host=None):
# nova/conductor/tasks/migrate.py
return migrate.MigrationTask(context, instance, flavor,
request_spec,
reservations, clean_shutdown,
孩子抚养权selfpute_rpcapi,
self.scheduler_client,
host=host)
这⾥是返回了MigrationTask类实例,该类继承的TaskBase基类的execute函数会调⽤_execute函数,所以我们直接看该MigrationTask类的execute函数实现即可:
def _execute(self):
# 选择⼀个宿主机
selfpute_rpcapi.prep_resize(
self.flavor, host, servations,
request_spec=legacy_spec, filter_properties=legacy_props,
node=node, clean_shutdown=self.clean_shutdown)
这⾥调⽤到了nova/compute/rpcapi.py中的prep_resize函数:
def prep_resize(self, ctxt, instance, image, instance_type, host,
reservations=None, request_spec=None,
filter_properties=None, node=None,
clean_shutdown=True):
image_p = _primitive(image)
msg_args = {'instance': instance,
'instance_type': instance_type,
'image': image_p,
'reservations': reservations,
'request_spec': request_spec,
'filter_properties': filter_properties,
'node': node,
'clean_shutdown': clean_shutdown}
version = '4.1'
client = uter.by_host(ctxt, host)
if not client.can_send_version(version):
version = '4.0'
msg_args['instance_type'] = objects_base.obj_to_primitive(
instance_type)
cctxt = client.prepare(server=host, version=version)
# 远程调⽤到宿主机上让其准备将要迁移过去的虚拟机的资源
cctxt.cast(ctxt, 'prep_resize', **msg_args)
这⾥远程调⽤到了nova/compute/manager.py中的prep_resize函数,该函数的核⼼代码是调⽤了_prep_resize:
def _prep_resize(self, context, image, instance, instance_type,
quotas, request_spec, filter_properties, node,
clean_shutdown=True):
.........
rt = self._get_resource_tracker()
# 这⾥进⾏了资源的检查和预留分配并更新数据库宿主机更新后的资源
size_claim(context, instance, instance_type, node,
image_meta=image, limits=limits) as claim:
LOG.info(_LI('Migrating'), instance=instance)
size_instance(
context, instance, claim.migration, image,
instance_type, servations,
clean_shutdown)
分配好资源后调⽤size_instance函数:
def resize_instance(self, ctxt, instance, migration, image, instance_type,
reservations=None, clean_shutdown=True):
msg_args = {'instance': instance, 'migration': migration,
'image': image, 'reservations': reservations,
'instance_type': instance_type,
'clean_shutdown': clean_shutdown,
}
version = '4.1'
client = uter.by_instance(ctxt, instance)
if not client.can_send_version(version):
msg_args['instance_type'] = objects_base.obj_to_primitive(
instance_type)
version = '4.0'
cctxt = client.prepare(server=_compute_host(None, instance),
version=version)
# 到源主机的nova/compute/manager.py中执⾏resize_instance函数
关联词有哪些cctxt.cast(ctxt, 'resize_instance', **msg_args)
进⼊源主机的resize_instance函数中:
def resize_instance(self, context, instance, image,
reservations, migration, instance_type,
clean_shutdown):
# 获取要迁移的云主机⽹卡信息
# 修改数据库云主机状态
# 迁移事件通知
# 关机并进⾏磁盘迁移
disk_info = self.driver.migrate_disk_and_power_off(
context, instance, migration.dest_host,
instance_type, network_info,
block_device_info,
timeout, retry_interval)
# 开始为虚拟机迁移⽹络
selfwork_api.migrate_instance_start(context,
instance,
哪个牌子隔离霜好migration_p)
selfpute_rpcapi.finish_resize(context, instance,
migration, image, disk_info,
migration.dest_compute, servations)
此时远程调⽤⽬的主机的finish_resize函数:
def finish_resize(self, context, disk_info, image, instance,
reservations, migration):
# 提交配额
.....
self._finish_resize(context, instance, migration,
disk_info, image_meta)
def _finish_resize(self, context, instance, migration, disk_info,
image_meta):
# 初始化⽹络
selfwork_api.setup_networks_on_host(context, instance,
migration['dest_compute'])
migration_p = obj_base.obj_to_primitive(migration)
selfwork_api.migrate_instance_finish(context,
instance,
migration_p)
# 获取当前云主机的⽹络信息
network_info = _instance_nw_info(context, instance)
# 更新数据库状态
instance.task_state = task_states.RESIZE_FINISH
instance.save(expected_task_state=task_states.RESIZE_MIGRATED)
# nova/virt/libvirt/driver.py
self.driver.finish_migration(context, migration, instance,
disk_info,
network_info,
image_meta, resize_instance,
block_device_info, power_on)
最后还要再出发⼀次confirm resize函数完成整个冷迁移过程,该函数是确认在源主机上删除云主机的数据和⽹络数据等,函数⽂件在nova/api/openstack/compute/servers.py:
@wsgi.action('confirmResize')旅游管理专业学什么
def _action_confirm_resize(self, req, id, body):
firm_resize(context, instance)画龙点睛
def confirm_resize(self, context, instance, migration=None):
"""Confirms a migration/resize and deletes the 'old' instance."""
# 修改迁移状态和更新配额
......
firm_resize(context,
instance,
migration,
migration.source_compute,
@wsgi.action('confirmResize')
def _action_confirm_resize(self, req, id, body):
firm_resize(context, instance)
def confirm_resize(self, context, instance, migration=None):
"""Confirms a migration/resize and deletes the 'old' instance."""
# 修改迁移状态和更新配额
......
firm_resize(context,
instance,
migration,
migration.source_compute,
通过rpc调⽤到源宿主机上进⾏confirm_resize,这函数中⽐较核⼼的部分是调⽤了_confirm_resize函数:
def _confirm_resize(self, context, instance, quotas,
migration=None):
"""Destroys the source instance."""
self._notify_about_instance_usage(context, instance,
"firm.start")
# NOTE(tr3buchet): tear down networks on source host
# 断掉⽹络
selfwork_api.setup_networks_on_host(context, instance,
migration.source_compute, teardown=True)
network_info = _instance_nw_info(context,
instance)
# TODO(mriedem): Get BDMs here and pass them to the driver.
# 删除虚拟机
firm_migration(context, migration, instance,
network_info)
# 更新迁移状态
migration.status = 'confirmed'
with migration.obj_as_admin():
migration.save()
# 更新资源
rt = self._get_resource_tracker()
rt.drop_move_claim(context, instance, migration.source_node,
old_instance_type, prefix='old_')
instance.drop_migration_context()
冷迁移过程总结:
(1)nova-api收到冷迁移请求,验证权限、配额等并获取虚拟机信息,通过消息队列向nova-conductor发起冷迁移请求
(2)nova-conductor通过消息队列请求nova-scheduler选择可迁移⽬的宿主机
(3)获取到⽬的宿主机后,nova-conductor通过消息队列请求⽬的宿主机的nova-compute服务做资源准备⼯作
(4)⽬的宿主机进⾏资源准备⼯作,⽐如执⾏claim机制检测和预分配资源,完成后通过消息队列请求源宿主机进⾏虚拟机迁移准备⼯作(5)源宿主机进⾏关机、卸载⽹络设备、磁盘等资源,完成后通过消息队列请求⽬的宿主机让它初始化好虚拟机所需的资源,⽐如⽹络虚拟设备的创建和磁盘挂载等并更新虚拟机状态为等待被确认。
(6)最后需要发起confirm_resize命令删除源主机上的该虚拟机的资源并更新虚拟机状态为正常,⽐如备份的虚拟机的⽬录
2.热迁移
实现原理:热迁移与冷迁移⼯作流程类似,但热迁移由于是在运⾏中迁移,进⾏了⽐较多的兼容性判断,⽐如两个宿主机之间的cpu兼容性等。热迁移其实并⾮没有业务中断,只是在迁移的最后时刻,虚拟机会有短暂挂起,快速完成最后⼀次内存复制。
影响热迁移的关键因素有两个:
(1)虚拟机内存脏页的速度,迭代复制是以页为单位的;
(2)⽹络带宽,如果脏页的速度远⼤于迭代复制内存页的速度,在⼀段时间内迁移是不成功的。
libvirtd数据迁移逻辑:
(1)标记所有的脏内存;
(2)传输所有的脏内存,然后开始重新计算新产⽣的脏内存,如此迭代,知道某⼀个条件退出;
(3)暂停虚拟机,传输剩余数据;
第(2)步的某个条件可以是:
(1)50%或者更少的内存需要迁移;
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论