summaryrefslogtreecommitdiff
path: root/scheduler
diff options
context:
space:
mode:
authorshoward <showard@592f7852-d20e-0410-864c-8624ca9c26a4>2009-05-20 00:32:58 +0000
committershoward <showard@592f7852-d20e-0410-864c-8624ca9c26a4>2009-05-20 00:32:58 +0000
commit834f122d42cacebc1f5d61ed8f84b32e083dc285 (patch)
tree13786be0c1e34d1cd83f5ff2903f2bc1d570405f /scheduler
parent2f8a7c7422e862a8aee6a26991eb352ad144b7de (diff)
Sort hosts when choosing them for use in an atomic group and when
actually assigning pending ones to run a job. Adds a Host.cmp_for_sort classmethod usable as a sort comparison function to sort Host objects by hostname in a sane manner. Signed-off-by: Gregory Smith <gps@google.com> git-svn-id: svn://test.kernel.org/autotest/trunk@3149 592f7852-d20e-0410-864c-8624ca9c26a4
Diffstat (limited to 'scheduler')
-rwxr-xr-xscheduler/monitor_db.py71
-rw-r--r--scheduler/monitor_db_unittest.py38
2 files changed, 97 insertions, 12 deletions
diff --git a/scheduler/monitor_db.py b/scheduler/monitor_db.py
index 6b4ea511..d5295864 100755
--- a/scheduler/monitor_db.py
+++ b/scheduler/monitor_db.py
@@ -444,7 +444,7 @@ class HostScheduler(object):
and not label.invalid)
- def _get_eligible_hosts_in_group(self, group_hosts, queue_entry):
+ def _get_eligible_host_ids_in_group(self, group_hosts, queue_entry):
"""
@param group_hosts - A sequence of Host ids to test for usability
and eligibility against the Job associated with queue_entry.
@@ -565,7 +565,7 @@ class HostScheduler(object):
# If we have a metahost label, only allow its hosts.
group_hosts.intersection_update(hosts_in_label)
group_hosts -= ineligible_host_ids
- eligible_hosts_in_group = self._get_eligible_hosts_in_group(
+ eligible_host_ids_in_group = self._get_eligible_host_ids_in_group(
group_hosts, queue_entry)
# Job.synch_count is treated as "minimum synch count" when
@@ -575,12 +575,14 @@ class HostScheduler(object):
min_hosts = job.synch_count
max_hosts = atomic_group.max_number_of_machines
- if len(eligible_hosts_in_group) < min_hosts:
+ if len(eligible_host_ids_in_group) < min_hosts:
# Not enough eligible hosts in this atomic group label.
continue
+ eligible_hosts_in_group = [self._hosts_available[id]
+ for id in eligible_host_ids_in_group]
# So that they show up in a sane order when viewing the job.
- eligible_hosts_in_group = sorted(eligible_hosts_in_group)
+ eligible_hosts_in_group.sort(cmp=Host.cmp_for_sort)
# Limit ourselves to scheduling the atomic group size.
if len(eligible_hosts_in_group) > max_hosts:
@@ -589,9 +591,10 @@ class HostScheduler(object):
# Remove the selected hosts from our cached internal state
# of available hosts in order to return the Host objects.
host_list = []
- for host_id in eligible_hosts_in_group:
- hosts_in_label.discard(host_id)
- host_list.append(self._hosts_available.pop(host_id))
+ for host in eligible_hosts_in_group:
+ hosts_in_label.discard(host.id)
+ self._hosts_available.pop(host.id)
+ host_list.append(host)
return host_list
return []
@@ -2238,6 +2241,46 @@ class Host(DBObject):
return [cleanup_task, verify_task]
+ _ALPHANUM_HOST_RE = re.compile(r'^([a-z-]+)(\d+)$', re.IGNORECASE)
+
+
+ @classmethod
+ def cmp_for_sort(cls, a, b):
+ """
+ A comparison function for sorting Host objects by hostname.
+
+ This strips any trailing numeric digits, ignores leading 0s and
+ compares hostnames by the leading name and the trailing digits as a
+ number. If both hostnames do not match this pattern, they are simply
+ compared as lower case strings.
+
+ Example of how hostnames will be sorted:
+
+ alice, host1, host2, host09, host010, host10, host11, yolkfolk
+
+ This hopefully satisfy most people's hostname sorting needs regardless
+ of their exact naming schemes. Nobody sane should have both a host10
+ and host010 (but the algorithm works regardless).
+ """
+ lower_a = a.hostname.lower()
+ lower_b = b.hostname.lower()
+ match_a = cls._ALPHANUM_HOST_RE.match(lower_a)
+ match_b = cls._ALPHANUM_HOST_RE.match(lower_b)
+ if match_a and match_b:
+ name_a, number_a_str = match_a.groups()
+ name_b, number_b_str = match_b.groups()
+ number_a = int(number_a_str.lstrip('0'))
+ number_b = int(number_b_str.lstrip('0'))
+ result = cmp((name_a, number_a), (name_b, number_b))
+ if result == 0 and lower_a != lower_b:
+ # If they compared equal above but the lower case names are
+ # indeed different, don't report equality. abc012 != abc12.
+ return cmp(lower_a, lower_b)
+ return result
+ else:
+ return cmp(lower_a, lower_b)
+
+
class HostQueueEntry(DBObject):
_table_name = 'host_queue_entries'
_fields = ('id', 'job_id', 'host_id', 'status', 'meta_host',
@@ -2715,7 +2758,7 @@ class Job(DBObject):
"""
@returns A tuple containing a list of HostQueueEntry instances to be
used to run this Job, a string group name to suggest giving
- to this job a results database.
+ to this job in the results database.
"""
if include_queue_entry.atomic_group_id:
atomic_group = AtomicGroup(include_queue_entry.atomic_group_id,
@@ -2732,11 +2775,15 @@ class Job(DBObject):
if num_entries_wanted > 0:
where_clause = 'job_id = %s AND status = "Pending" AND id != %s'
- pending_entries = HostQueueEntry.fetch(
+ pending_entries = list(HostQueueEntry.fetch(
where=where_clause,
- params=(self.id, include_queue_entry.id))
- # TODO(gps): sort these by hostname before slicing.
- chosen_entries += list(pending_entries)[:num_entries_wanted]
+ params=(self.id, include_queue_entry.id)))
+
+ # Sort the chosen hosts by hostname before slicing.
+ def cmp_queue_entries_by_hostname(entry_a, entry_b):
+ return Host.cmp_for_sort(entry_a.host, entry_b.host)
+ pending_entries.sort(cmp=cmp_queue_entries_by_hostname)
+ chosen_entries += pending_entries[:num_entries_wanted]
# Sanity check. We'll only ever be called if this can be met.
assert len(chosen_entries) >= self.synch_count
diff --git a/scheduler/monitor_db_unittest.py b/scheduler/monitor_db_unittest.py
index 761b5566..240eb5aa 100644
--- a/scheduler/monitor_db_unittest.py
+++ b/scheduler/monitor_db_unittest.py
@@ -1773,6 +1773,44 @@ class AgentTasksTest(unittest.TestCase):
self._test_cleanup_task_helper(False, True)
+class HostTest(BaseSchedulerTest):
+ def test_cmp_for_sort(self):
+ expected_order = [
+ 'alice', 'Host1', 'host2', 'host3', 'host09', 'HOST010',
+ 'host10', 'host11', 'yolkfolk']
+ hostname_idx = list(monitor_db.Host._fields).index('hostname')
+ row = [None] * len(monitor_db.Host._fields)
+ hosts = []
+ for hostname in expected_order:
+ row[hostname_idx] = hostname
+ hosts.append(monitor_db.Host(row=row, new_record=True))
+
+ host1 = hosts[expected_order.index('Host1')]
+ host010 = hosts[expected_order.index('HOST010')]
+ host10 = hosts[expected_order.index('host10')]
+ host3 = hosts[expected_order.index('host3')]
+ alice = hosts[expected_order.index('alice')]
+ self.assertEqual(0, monitor_db.Host.cmp_for_sort(host10, host10))
+ self.assertEqual(1, monitor_db.Host.cmp_for_sort(host10, host010))
+ self.assertEqual(-1, monitor_db.Host.cmp_for_sort(host010, host10))
+ self.assertEqual(-1, monitor_db.Host.cmp_for_sort(host1, host10))
+ self.assertEqual(-1, monitor_db.Host.cmp_for_sort(host1, host010))
+ self.assertEqual(-1, monitor_db.Host.cmp_for_sort(host3, host10))
+ self.assertEqual(-1, monitor_db.Host.cmp_for_sort(host3, host010))
+ self.assertEqual(1, monitor_db.Host.cmp_for_sort(host3, host1))
+ self.assertEqual(-1, monitor_db.Host.cmp_for_sort(host1, host3))
+ self.assertEqual(-1, monitor_db.Host.cmp_for_sort(alice, host3))
+ self.assertEqual(1, monitor_db.Host.cmp_for_sort(host3, alice))
+ self.assertEqual(0, monitor_db.Host.cmp_for_sort(alice, alice))
+
+ hosts.sort(cmp=monitor_db.Host.cmp_for_sort)
+ self.assertEqual(expected_order, [h.hostname for h in hosts])
+
+ hosts.reverse()
+ hosts.sort(cmp=monitor_db.Host.cmp_for_sort)
+ self.assertEqual(expected_order, [h.hostname for h in hosts])
+
+
class HostQueueEntryTest(BaseSchedulerTest):
def _create_hqe(self, dependency_labels=(), **create_job_kwargs):
job = self._create_job(**create_job_kwargs)