Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
User expired
2018s-advanced-distributed-systems
Commits
81148dba
Verified
Commit
81148dba
authored
Dec 09, 2018
by
Löscher Mario
Browse files
implemented kind of a cheapest solution.
parent
6c7a7a27
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
134 additions
and
5 deletions
+134
-5
scheduler/main.py
scheduler/main.py
+4
-3
scheduler/parser.py
scheduler/parser.py
+1
-1
scheduler/preprocessing.py
scheduler/preprocessing.py
+129
-1
No files found.
scheduler/main.py
View file @
81148dba
#!/usr/bin/env python3
from
typing
import
List
from
parser
import
parse_tasks
,
parse_machine_prices
from
parser
import
parse_tasks
,
parse_machine_prices
,
parse_durations_on_machines
from
preprocessing
import
parallel_width_assuming_mapreduce
,
best_performance_under_limit
...
...
@@ -8,12 +8,13 @@ def main(limit = 6):
# parse
tasks
=
parse_tasks
()
prices
=
parse_machine_prices
()
durations
=
parse_durations_on_machines
()
# preprocess
parallel_width_estimate
=
parallel_width_assuming_mapreduce
(
tasks
)
# print(parallel_width_estimate)
cost
,
machine_count
=
best_performance_under_limit
(
parallel_width_estimate
,
prices
,
limit
)
print
(
cost
,
machine_count
)
cost
,
machine_count
=
best_performance_under_limit
(
parallel_width_estimate
,
prices
,
durations
,
limit
)
print
(
str
(
cost
)
+
"$"
,
"Machines: "
+
str
(
machine_count
)
)
if
__name__
==
"__main__"
:
main
()
scheduler/parser.py
View file @
81148dba
...
...
@@ -14,7 +14,7 @@ def parse_machine_prices(fname: str = "price.csv") -> List[float]:
return
[
float
(
line
.
split
(
','
)[
1
])
for
line
in
lines
]
def
parse_durations_on_machines
(
fname
:
str
)
->
List
[
List
[
float
]]:
def
parse_durations_on_machines
(
fname
:
str
=
"task_time_instance.csv"
)
->
List
[
List
[
float
]]:
result
=
[]
with
open
(
fname
,
"r"
)
as
f
:
f
.
readline
()
# ignore header
...
...
scheduler/preprocessing.py
View file @
81148dba
#!/usr/bin/env python3
from
typing
import
List
,
Tuple
from
taskgraph
import
*
import
math
def
parallel_width_assuming_mapreduce
(
taskgraph
:
List
[
Task
])
->
int
:
...
...
@@ -23,14 +24,63 @@ def parallel_width_assuming_mapreduce(taskgraph: List[Task]) -> int:
return
max_width
def
cost_per_process_and_machine
(
durations
:
List
[
float
],
costs
:
List
[
float
]):
operational_cost
=
[]
for
process
in
durations
:
i
=
0
process_cost
=
[]
for
price
in
costs
:
process_cost
.
append
(
price
*
process
[
i
]
/
3600
)
#print ("Price: " + str(price) + " times duration " + str(process[i]) + " equals " + str(price * process[i]))
i
+=
1
operational_cost
.
append
(
process_cost
)
return
operational_cost
def
get_n_best_price_per_process
(
process
:
List
[
float
],
n
=
1
):
index
=
len
(
process
)
-
1
cheapest
=
index
n
-=
1
for
bla
in
reversed
(
process
):
if
(
index
<
0
):
break
if
(
bla
<
process
[
index
]):
cheapest
=
index
if
(
n
>
0
):
# todo will not work if best value is not the last element, but does (almost) always in this example case
del
process
[
cheapest
]
cheapest
=
get_n_best_price_per_process
(
process
,
n
)
return
cheapest
def
calculate_batch_duration
(
processes
:
List
[
int
],
durations
):
length
=
len
(
durations
[
0
])
batch
=
[]
for
i
in
range
(
length
):
element
=
0
for
process
in
processes
:
element
+=
durations
[
process
][
i
]
batch
.
append
(
element
)
return
batch
def
best_performance_under_limit
(
max_width
:
int
,
prices
:
List
[
float
],
durations
:
List
[
float
],
limit
:
float
,
strategy
:
str
=
"
Bigg
est"
)
->
Tuple
[
float
,
List
[
int
]]:
strategy
:
str
=
"
Cheap
est"
)
->
Tuple
[
float
,
List
[
int
]]:
current_solution
=
[
0
for
machine
in
prices
]
current_price
=
0.0
operational_costs
=
cost_per_process_and_machine
(
durations
,
prices
)
# TODO: this assumes more expensive is better and simply fills up with the largest possible
# TODO: it also completely ignores that we may need the machines for MORE than one hour
if
strategy
==
"Biggest"
:
...
...
@@ -40,4 +90,82 @@ def best_performance_under_limit(
current_price
+=
price
current_solution
[
-
machine
-
1
]
+=
1
#
if
strategy
==
"Cheapest"
:
# information extrapolated by drawing a dot graph, keep strongly tied functions together
batch_durations
=
[]
batch_durations
.
append
(
calculate_batch_duration
([
0
,
7
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
1
,
8
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
2
,
9
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
3
,
10
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
4
,
11
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
5
,
12
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
6
,
13
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
14
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
15
,
22
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
16
,
23
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
17
,
24
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
18
,
25
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
19
,
26
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
20
,
27
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
21
,
28
],
durations
))
batch_durations
.
append
(
calculate_batch_duration
([
29
],
durations
))
batch_costs
=
cost_per_process_and_machine
(
batch_durations
,
prices
)
solutions_hours
=
[]
solutions_machine
=
[]
#calculate some solutions
for
i
in
range
(
1
,
5
):
runtime
=
0
# in seconds
machines
=
[]
index
=
0
for
process
in
batch_durations
:
machines
.
append
(
get_n_best_price_per_process
(
process
,
i
))
#for process in batch_durations:
bestprice
=
machines
[
index
]
index
+=
1
runtime
+=
process
[
bestprice
]
solutions_hours
.
append
(
runtime
)
solutions_machine
.
append
(
machines
[
0
])
machineHours
=
math
.
ceil
(
runtime
/
3600
)
# print(str(runtime) + " seconds")
# print(machines)
# print("Machine hours: " + str(machineHours))
# print("Costs: " + str(machineHours * prices[machines[0]]))
#test if we can reduce costs or runtime (without network traffic
index
=
0
max_price
=
-
1
max_price_position
=
-
1
for
solution
in
solutions_hours
:
if
(
max_price
<
math
.
ceil
(
solution
/
3600
)
*
prices
[
solutions_machine
[
index
]]):
max_price
=
math
.
ceil
(
solution
/
3600
)
*
prices
[
solutions_machine
[
index
]]
max_price_position
=
index
index
+=
1
#write solution
current_price
=
max_price
current_solution
[
solutions_machine
[
max_price_position
]]
=
math
.
ceil
(
solutions_hours
[
max_price_position
]
/
3600
*
prices
[
solutions_machine
[
max_price_position
]])
index
=
0
for
solution
in
solutions_hours
:
if
(
index
==
max_price_position
):
index
+=
1
continue
local_price
=
math
.
ceil
(
solution
/
3600
)
*
prices
[
solutions_machine
[
index
]]
factor
=
math
.
floor
(
max_price
/
local_price
)
new_runtime
=
solution
/
factor
new_price
=
math
.
ceil
(
solution
/
3600
/
factor
)
*
prices
[
solutions_machine
[
index
]]
*
factor
if
(
new_runtime
<
solutions_hours
[
max_price_position
]
*
0.9
):
# 0.9 to simulate the traffic time needed to transfer
current_price
=
new_price
current_solution
=
[
0
for
machine
in
prices
]
max_price_position
=
index
current_solution
[
solutions_machine
[
index
]]
=
factor
*
math
.
ceil
(
solutions_hours
[
index
]
/
3600
)
index
+=
1
print
(
"final duration[s]: "
+
str
(
solutions_hours
[
max_price_position
]))
# The calculation only uses homogeneous VM distribution currently
return
current_price
,
current_solution
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment