ray分布式
import os
os.environ["RAY_DEDUP_LOGS"] = "0"
import time
import ray
database = ["Learning", "ray", "a", "b","c"]
db_obeject_ref = ray.put(database)
@ray.remote
def retrieve_task(item, db):
print(f"Task {item} 当前进程ID(PID): {os.getpid()}", flush=True)
time.sleep(item/10.)
return item, db[item]
def print_runtime(input_data, start_time):
print(f'runtime:{time.time() - start_time:.2f} seconds, data:')
print(*input_data, sep="\n")
start = time.time()
object_references = [retrieve_task.remote(item,db_obeject_ref) for item in range(5)]
data = ray.get(object_references)
print_runtime(data, start)
使用ray.wait获取已经完成的任务结果:
import os
os.environ["RAY_DEDUP_LOGS"] = "0"
import time
import ray
database = ["Learning", "ray", "a", "b","c"]
db_obeject_ref = ray.put(database)
@ray.remote
def retrieve_task(item, db):
print(f"Task {item} 当前进程ID(PID): {os.getpid()}", flush=True)
time.sleep(item/10.)
return item, db[item]
def print_runtime(input_data, start_time):
print(f'runtime:{time.time() - start_time:.2f} seconds, data:')
print(*input_data, sep="\n")
start = time.time()
object_references = [retrieve_task.remote(item,db_obeject_ref) for item in range(5)]
all_data = []
while len(object_references) > 0:
# 使用ray.wait获取已经完成的任务数据
finished, object_references = ray.wait(object_references, num_returns=min(2, len(object_references)), timeout = 7.0)
data = ray.get(finished)
print_runtime(data, start)
all_data.extend(data)
处理任务的依赖
import os
os.environ["RAY_DEDUP_LOGS"] = "0"
import time
import ray
database = ["Learning", "ray", "a", "b","c", "d"]
db_obeject_ref = ray.put(database)
def retrieve(item,db_obeject_ref):
print(f"retrieve {item} 当前进程ID(PID): {os.getpid()}", flush=True)
time.sleep(item/10.)
return db_obeject_ref[item]
@ray.remote
def follow_up_task(retrieve_result, db_obeject_ref):
original_item, _ = retrieve_result
follow_up_result = retrieve(original_item + 1, db_obeject_ref)
return retrieve_result, follow_up_result
@ray.remote
def retrieve_task(item, db):
print(f"Task {item} 当前进程ID(PID): {os.getpid()}", flush=True)
time.sleep(item/10.)
return item, db[item]
retrieve_refs = [retrieve_task.remote(item, db_obeject_ref) for item in [0,2,4]]
# 通过传递 futures对象, 来管理任务之间的依赖
follow_up_refs = [follow_up_task.remote(ref, db_obeject_ref) for ref in retrieve_refs]
result = [print(data) for data in ray.get(follow_up_refs)]
上面的代码,当脚本启动后,通过:
ps -ef|grep ray
xzc 3550114 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550115 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550116 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550117 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550118 3550014 5 23:12 pts/14 00:00:00 ray::follow_up_task
xzc 3550119 3550014 6 23:12 pts/14 00:00:00 ray::follow_up_task
xzc 3550120 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550121 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550122 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550123 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550124 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550125 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550126 3550014 5 23:12 pts/14 00:00:00 ray::follow_up_task
xzc 3550127 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550128 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550129 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550130 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550131 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550132 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550143 3550014 6 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550156 3550014 6 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550162 3550014 6 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550164 3550014 6 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550167 3550014 6 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550168 3550014 6 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550171 3550014 6 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550172 3550014 6 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550173 3550014 6 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550174 3550014 6 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550175 3550014 6 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550176 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
xzc 3550177 3550014 5 23:12 pts/14 00:00:00 ray::IDLE
如果在没有显示的调用ray.init的情况下,ray的默认初始化如下:
ray.init(
num_cpus=os.cpu_count(), # 默认使用所有CPU核心
ignore_reinit_error=True
)
follow_up_task 的 PID 与对应的 retrieve_task 相同
这是因为 Ray 会尽量将有依赖关系的任务调度到同一个 worker
DataTracker 执行器
当看到"执行器"时,可以自动脑补成:
"这是一个分布式服务实例,就像微服务中的一个服务节点,有自己的状态和专属资源"
import os
os.environ["RAY_DEDUP_LOGS"] = "0"
import time
import ray
@ray.remote
class DataTracker:
def __init__(self):
self._counts = 0
def increment(self):
print(f"increment当前进程ID(PID): {os.getpid()}", flush=True)
time.sleep(10)
self._counts += 1
def counts(self):
print(f"counts当前进程ID(PID): {os.getpid()}", flush=True)
return self._counts
database = ["Learning", "ray", "a", "b","c", "d"]
db_obeject_ref = ray.put(database)
@ray.remote
def retrieve_tracker_task(item, tracker, db):
print(f"Task {item} 当前进程ID(PID): {os.getpid()}", flush=True)
time.sleep(item/10.)
tracker.increment.remote()
return item, db[item]
tracker = DataTracker.remote()
retrieve_refs = [retrieve_tracker_task.remote(item, tracker, db_obeject_ref) for item in range(6)]
data = ray.get(retrieve_refs)
print(data)
print(ray.get(tracker.counts.remote()))
输出结果:
2025-05-22 23:40:01,734 INFO worker.py:1852 -- Started a local Ray instance.
(retrieve_tracker_task pid=3583820) Task 5 当前进程ID(PID): 3583820
(DataTracker pid=3583819) increment当前进程ID(PID): 3583819
(retrieve_tracker_task pid=3583823) Task 1 当前进程ID(PID): 3583823
(retrieve_tracker_task pid=3583826) Task 2 当前进程ID(PID): 3583826
(retrieve_tracker_task pid=3583822) Task 4 当前进程ID(PID): 3583822
(retrieve_tracker_task pid=3583833) Task 0 当前进程ID(PID): 3583833
(retrieve_tracker_task pid=3583830) Task 3 当前进程ID(PID): 3583830
[(0, 'Learning'), (1, 'ray'), (2, 'a'), (3, 'b'), (4, 'c'), (5, 'd')]
(DataTracker pid=3583819) increment当前进程ID(PID): 3583819
(DataTracker pid=3583819) increment当前进程ID(PID): 3583819
(DataTracker pid=3583819) increment当前进程ID(PID): 3583819
(DataTracker pid=3583819) increment当前进程ID(PID): 3583819
(DataTracker pid=3583819) increment当前进程ID(PID): 3583819
6
(DataTracker pid=3583819) counts当前进程ID(PID): 3583819
知识是我们已知的
也是我们未知的
基于已有的知识之上
我们去发现未知的
由此,知识得到扩充
我们获得的知识越多
未知的知识就会更多
因而,知识扩充永无止境

浙公网安备 33010602011771号