Verified Commit 81148dba authored by Löscher Mario's avatar Löscher Mario
Browse files

implemented kind of a cheapest solution.

parent 6c7a7a27
#!/usr/bin/env python3
from typing import List
from parser import parse_tasks, parse_machine_prices
from parser import parse_tasks, parse_machine_prices, parse_durations_on_machines
from preprocessing import parallel_width_assuming_mapreduce, best_performance_under_limit
......@@ -8,12 +8,13 @@ def main(limit = 6):
# parse
tasks = parse_tasks()
prices = parse_machine_prices()
durations = parse_durations_on_machines()
# preprocess
parallel_width_estimate = parallel_width_assuming_mapreduce(tasks)
# print(parallel_width_estimate)
cost, machine_count = best_performance_under_limit(parallel_width_estimate, prices, limit)
print(cost, machine_count)
cost, machine_count = best_performance_under_limit(parallel_width_estimate, prices, durations, limit)
print(str(cost)+"$", "Machines: " + str(machine_count))
if __name__ == "__main__":
main()
......@@ -14,7 +14,7 @@ def parse_machine_prices(fname: str = "price.csv") -> List[float]:
return [float(line.split(',')[1]) for line in lines]
def parse_durations_on_machines(fname: str) -> List[List[float]]:
def parse_durations_on_machines(fname: str = "task_time_instance.csv") -> List[List[float]]:
result = []
with open(fname, "r") as f:
f.readline() # ignore header
......
#!/usr/bin/env python3
from typing import List, Tuple
from taskgraph import *
import math
def parallel_width_assuming_mapreduce(taskgraph: List[Task]) -> int:
......@@ -23,14 +24,63 @@ def parallel_width_assuming_mapreduce(taskgraph: List[Task]) -> int:
return max_width
def cost_per_process_and_machine(
durations: List[float],
costs: List[float]):
operational_cost = []
for process in durations:
i = 0
process_cost = []
for price in costs:
process_cost.append(price*process[i]/3600)
#print ("Price: " + str(price) + " times duration " + str(process[i]) + " equals " + str(price * process[i]))
i+=1
operational_cost.append(process_cost)
return operational_cost
def get_n_best_price_per_process(process: List[float], n = 1):
index = len(process) - 1
cheapest = index
n -= 1
for bla in reversed(process):
if (index < 0):
break
if(bla < process[index]):
cheapest = index
if (n > 0):
# todo will not work if best value is not the last element, but does (almost) always in this example case
del process[cheapest]
cheapest = get_n_best_price_per_process(process, n)
return cheapest
def calculate_batch_duration(processes: List[int], durations):
length = len(durations[0])
batch = []
for i in range(length):
element = 0
for process in processes:
element += durations[process][i]
batch.append(element)
return batch
def best_performance_under_limit(
max_width: int,
prices: List[float],
durations: List[float],
limit: float,
strategy: str = "Biggest") -> Tuple[float, List[int]]:
strategy: str = "Cheapest") -> Tuple[float, List[int]]:
current_solution = [0 for machine in prices]
current_price = 0.0
operational_costs = cost_per_process_and_machine(durations, prices)
# TODO: this assumes more expensive is better and simply fills up with the largest possible
# TODO: it also completely ignores that we may need the machines for MORE than one hour
if strategy == "Biggest":
......@@ -40,4 +90,82 @@ def best_performance_under_limit(
current_price += price
current_solution[-machine - 1] += 1
#
if strategy == "Cheapest":
# information extrapolated by drawing a dot graph, keep strongly tied functions together
batch_durations = []
batch_durations.append(calculate_batch_duration([0, 7], durations))
batch_durations.append(calculate_batch_duration([1, 8], durations))
batch_durations.append(calculate_batch_duration([2, 9], durations))
batch_durations.append(calculate_batch_duration([3, 10], durations))
batch_durations.append(calculate_batch_duration([4, 11], durations))
batch_durations.append(calculate_batch_duration([5, 12], durations))
batch_durations.append(calculate_batch_duration([6, 13], durations))
batch_durations.append(calculate_batch_duration([14], durations))
batch_durations.append(calculate_batch_duration([15, 22], durations))
batch_durations.append(calculate_batch_duration([16, 23], durations))
batch_durations.append(calculate_batch_duration([17, 24], durations))
batch_durations.append(calculate_batch_duration([18, 25], durations))
batch_durations.append(calculate_batch_duration([19, 26], durations))
batch_durations.append(calculate_batch_duration([20, 27], durations))
batch_durations.append(calculate_batch_duration([21, 28], durations))
batch_durations.append(calculate_batch_duration([29], durations))
batch_costs = cost_per_process_and_machine(batch_durations, prices)
solutions_hours = []
solutions_machine = []
#calculate some solutions
for i in range(1,5):
runtime = 0 # in seconds
machines = []
index = 0
for process in batch_durations:
machines.append(get_n_best_price_per_process(process, i))
#for process in batch_durations:
bestprice = machines[index]
index += 1
runtime += process[bestprice]
solutions_hours.append(runtime)
solutions_machine.append(machines[0])
machineHours = math.ceil(runtime / 3600)
# print(str(runtime) + " seconds")
# print(machines)
# print("Machine hours: " + str(machineHours))
# print("Costs: " + str(machineHours * prices[machines[0]]))
#test if we can reduce costs or runtime (without network traffic
index = 0
max_price = -1
max_price_position = -1
for solution in solutions_hours:
if (max_price < math.ceil(solution / 3600) * prices[solutions_machine[index]]):
max_price = math.ceil(solution / 3600) * prices[solutions_machine[index]]
max_price_position = index
index+=1
#write solution
current_price = max_price
current_solution[solutions_machine[max_price_position]] = math.ceil(solutions_hours[max_price_position]/3600 * prices[solutions_machine[max_price_position]])
index = 0
for solution in solutions_hours:
if (index == max_price_position):
index+=1
continue
local_price = math.ceil(solution / 3600) * prices[solutions_machine[index]]
factor = math.floor(max_price / local_price)
new_runtime = solution / factor
new_price = math.ceil(solution / 3600 /factor ) * prices[solutions_machine[index]] * factor
if (new_runtime < solutions_hours[max_price_position] * 0.9): # 0.9 to simulate the traffic time needed to transfer
current_price = new_price
current_solution = [0 for machine in prices]
max_price_position = index
current_solution[solutions_machine[index]] = factor * math.ceil(solutions_hours[index] / 3600)
index += 1
print ("final duration[s]: " + str(solutions_hours[max_price_position]))
# The calculation only uses homogeneous VM distribution currently
return current_price, current_solution
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment