Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

nova scheduler unfairly favoring host - filters and weights

I have a 3 node Openstack cluster (Mitaka on ubuntu 16.04) where my Controller node is also serving at Compute Host #1. My other 2 hosts are compute #2 and compute #3. I am using Ceph (Jewel) distributed cluster pools to back Nova ephemeral, Cinder, Glance, and Manila, and all is working well. I have launched a number of large and small VMs, and 80% of them (18) were provisioned on Host #1. Host 2 and 3 have 4 VMs each. I originally thought that this could be due to ceph--that when I look at the hypervisor summary under System, Host #1 local storage total shows the combined storage for all 3 hosts (42TB). Host 2 and 3 only show their local (real physical) storage (14TB).

To remedy this problem, I added the following to /etc/nova/nova.conf: scheduler_default_filters = RetryFilter, AvailabilityZoneFilter, RamFilter, ComputeFilter, ComputeCapabilitiesFilter, ImagePropertiesFilter, ServerGroupAntiAffinityFilter, ServerGroupAffinityFilter (which has 'DiskFilter' removed from the default filter list) and I restarted all of the Nova services on the controller. However, when I provision new VMs, they are still getting placed on Host #1. Does anyone have any guidance on what I need to do to better balance the allocation?

Note that my 3 hosts are identical: 32cpus, 256GB RAM, 14TB raid disk. Thank you

nova scheduler unfairly favoring host - filters and weights

I have a 3 node Openstack cluster (Mitaka on ubuntu 16.04) where my Controller node is also serving at Compute Host #1. My other 2 hosts are compute #2 and compute #3. I am using Ceph (Jewel) distributed cluster pools to back Nova ephemeral, Cinder, Glance, and Manila, and all is working well. I have launched a number of large and small VMs, and 80% of them (18) were provisioned on Host #1. Host 2 and 3 have 4 VMs each. I originally thought that this could be due to ceph--that when I look at the hypervisor summary under System, Host #1 local storage total shows the combined storage for all 3 hosts (42TB). Host 2 and 3 only show their local (real physical) storage (14TB).

To remedy this problem, I added the following to /etc/nova/nova.conf: scheduler_default_filters = RetryFilter, AvailabilityZoneFilter, RamFilter, ComputeFilter, ComputeCapabilitiesFilter, ImagePropertiesFilter, ServerGroupAntiAffinityFilter, ServerGroupAffinityFilter (which has 'DiskFilter' removed from the default filter list) and I restarted all of the Nova services on the controller. However, when I provision new VMs, they are still getting placed on Host #1. Does anyone have any guidance on what I need to do to better balance the allocation?

Note that my 3 hosts are identical: 32cpus, 256GB RAM, 14TB raid disk. Thank you

Update: Host1:


ceph -s
    cluster 6e647506-631a-457e-a52a-f21a3866a023
     health HEALTH_OK
     monmap e1: 3 mons at {arccloud01=10.155.92.128:6789/0,arccloud02=10.155.92.129:6789/0,arccloud03=10.155.92.130:6789/0}
            election epoch 5152, quorum 0,1,2 arccloud01,arccloud02,arccloud03
      fsmap e1858: 1/1/1 up {0=arccloud01=up:active}
     osdmap e1532: 3 osds: 3 up, 3 in
            flags sortbitwise
      pgmap v1982739: 384 pgs, 6 pools, 661 GB data, 2135 kobjects
            2529 GB used, 39654 GB / 42184 GB avail
                 384 active+clean
  client io 807 kB/s wr, 0 op/s rd, 301 op/s wr


cat /etc/ceph/ceph.conf
[global]
  fsid = 6e647506-631a-457e-a52a-f21a3866a023
  mon_initial_members = arccloud01, arccloud02, arccloud03
  mon_host = 10.155.92.128,10.155.92.129,10.155.92.130
  mon_pg_warn_max_per_osd = 400
  mon_lease = 50
  mon_lease_renew_interval = 30
  mon_lease_ack_timeout = 100
  auth_cluster_required = cephx
  auth_service_required = cephx
  auth_client_required = cephx
  public_network = 10.155.92.0/22
  cluster_network = 192.168.92.0/22
[client.glanceimages]
  keyring = /etc/ceph/ceph.client.glanceimages.keyring
[client.novapool]
  keyring = /etc/ceph/ceph.client.novapool.keyring
[client.cindervolumes]
  keyring = /etc/ceph/ceph.client.cindervolumes.keyring
[client.manila]
  client_mount_uid = 0
  client_mount_gid = 0
  log_file = /opt/stack/logs/ceph-client.manila.log
  admin_socket = /opt/stack/status/stack/ceph-$name.$pid.asok
  keyring = /etc/ceph/ceph.client.manila.keyring
[mon.arccloud01]
  host = arccloud01
  mon addr = 10.155.92.128:6789
[mon.arccloud02]
  host = arccloud02
  mon addr = 10.155.92.129:6789
[mon.arccloud03]
  host = arccloud03
  mon addr = 10.155.92.130:6789
[osd.2]
  host = arccloud01
  public addr = 10.155.92.128
  cluster addr = 192.168.92.128
[osd.1]
  host = arccloud02
  public addr = 10.155.92.129
  cluster addr = 192.168.92.129
[osd.0]
  host = arccloud03
  public addr = 10.155.92.130
  cluster addr = 192.168.92.130
[mds]
  mds cache size = 500000


cat /etc/nova/nova.conf
[DEFAULT]
dhcpbridge_flagfile=/etc/nova/nova.conf
dhcpbridge=/usr/bin/nova-dhcpbridge
state_path=/var/lib/nova
lock_path=/var/lock/nova
force_dhcp_release=True
libvirt_use_virtio_for_bridges=True
verbose=True
ec2_private_dns_show_ip=True
api_paste_config=/etc/nova/api-paste.ini
enabled_apis=osapi_compute,metadata
rpc_backend = rabbit
auth_strategy = keystone
my_ip = 10.155.92.128
use_neutron = True
firewall_driver = nova.virt.firewall.NoopFirewallDriver
linuxnet_interface_driver = nova.network.linux_net.NeutronLinuxBridgeInterfaceDriver
instance_usage_audit = True
instance_usage_audit_period = hour
notify_on_state_change = vm_and_task_state
notification_driver = messagingv2
scheduler_default_filters = RetryFilter, AvailabilityZoneFilter, RamFilter, ComputeFilter, ComputeCapabilitiesFilter, ImagePropertiesFilter, ServerGroupAntiAffinityFilter, ServerGroupAffinityFilter
[api_database]
connection = mysql+pymysql://nova:xxxx@controller/nova_api
[database]
connection = mysql+pymysql://nova:xxxx@controller/nova
[oslo_messaging_rabbit]
rabbit_host = controller
rabbit_userid = openstack
rabbit_password = xxxx
[keystone_authtoken]
auth_uri = http://controller:5000
auth_url = http://controller:35357
memcached_servers = controller:11211
auth_type = password
project_domain_name = default
user_domain_name = default
project_name = service
username = nova
password = xxxx
[vnc]
enabled = True
vncserver_listen = 0.0.0.0
vncserver_proxyclient_address = $my_ip
novncproxy_base_url = http://10.155.92.128:6080/vnc_auto.html
[glance]
api_servers = http://192.168.92.128:9292
[oslo_concurrency]
lock_path = /var/lib/nova/tmp
[libvirt]
images_rbd_pool=novapool
images_type=rbd
rbd_secret_uuid=xxxxxxxxx
rbd_user=novapool
[neutron]
url = http://controller:9696
auth_url = http://controller:35357
auth_type = password
project_domain_name = default
user_domain_name = default
region_name = RegionOne
project_name = service
username = neutron
password = xxxx
service_metadata_proxy = True
metadata_proxy_shared_secret = xxxxxxx
[cinder]
os_region_name = RegionOne

FROM HOST2 and HOST3 (they are the same):


cat /etc/nova/nova.conf
[DEFAULT]
dhcpbridge_flagfile=/etc/nova/nova.conf
dhcpbridge=/usr/bin/nova-dhcpbridge
state_path=/var/lib/nova
lock_path=/var/lock/nova
force_dhcp_release=True
libvirt_use_virtio_for_bridges=True
verbose=True
ec2_private_dns_show_ip=True
api_paste_config=/etc/nova/api-paste.ini
enabled_apis=osapi_compute,metadata
rpc_backend = rabbit
auth_strategy = keystone
my_ip = 10.155.92.129
use_neutron = True
firewall_driver = nova.virt.firewall.NoopFirewallDriver
linuxnet_interface_driver = nova.network.linux_net.NeutronLinuxBridgeInterfaceDriver
instance_usage_audit = True
instance_usage_audit_period = hour
notify_on_state_change = vm_and_task_state
notification_driver = messagingv2
[oslo_messaging_rabbit]
rabbit_host = controller
rabbit_userid = openstack
rabbit_password = xxxx
[keystone_authtoken]
auth_uri = http://controller:5000
auth_url = http://controller:35357
memcached_servers = controller:11211
auth_type = password
project_domain_name = default
user_domain_name = default
project_name = service
username = nova
password = xxxx
[vnc]
enabled = True
vncserver_listen = 0.0.0.0
vncserver_proxyclient_address = $my_ip
novncproxy_base_url = http://10.155.92.128:6080/vnc_auto.html
[glance]
api_servers = http://192.168.92.128:9292
[oslo_concurrency]
lock_path = /var/lib/nova/tmp
[neutron]
url = http://controller:9696
auth_url = http://controller:35357
auth_type = password
project_domain_name = default
user_domain_name = default
region_name = RegionOne
project_name = service
username = neutron
password = xxxx