爱穿豹纹的阿姨 2025-01-07 17:13 采纳率: 0%
浏览 9

python多进程只能一个一个进程运行

使用pool多进程无法正确的执行多进程。而是一个任务一个任务的运行。

import os
import time
import random
from concurrent.futures import ThreadPoolExecutor
from multiprocessing import Pool

import numpy as np
import pandas as pd
from collections import defaultdict
import itertools

# 读取病毒与GOterm关系
def read_virus_go(file_path):
    virus_to_go = {}
    df = pd.read_csv(file_path, sep='\t', index_col=0)
    for idx, row in df.iterrows():
        cols_with_1 = row[row == 1].index.tolist()
        virus_to_go[idx] = cols_with_1
    return virus_to_go

# 读取GOterm间的相似性
def read_go_similarity(file_path):
    go_similarity = defaultdict(np.float32)
    with open(file_path, 'r') as f:
        for line in f:
            go1, go2, _, sim = line.strip().split()
            if go1 > go2:
                go1, go2 = go2, go1
            go_similarity[(go1, go2)] = np.float32(sim)
    return go_similarity

# 计算Sim(go, G1)
def calc_sim_go_to_g(go, g_terms, go_similarity):
    # 如果go在g_terms中,返回1
    if go in g_terms:
        return 1
    return max(go_similarity[(min(go, g), max(go, g))] if (min(go, g), max(go, g)) in go_similarity else 0 for g in g_terms)



# 计算Sim(G1, G2)
def calc_sim_g1_g2(g1_terms, g2_terms, go_similarity):
    m, n = len(g1_terms), len(g2_terms)
    if m == 0 or n == 0:
        return 0
    sim_g1_g2 = sum(calc_sim_go_to_g(go, g2_terms, go_similarity) for go in g1_terms)
    sim_g2_g1 = sum(calc_sim_go_to_g(go, g1_terms, go_similarity) for go in g2_terms)
    return (sim_g1_g2 + sim_g2_g1) / (m + n)

# 计算相似度的任务函数
def compute_similarity_for_pair(pair, virus_to_go, disease_to_go, go_similarity):
    v1, v2 = pair
    sim = calc_sim_g1_g2(virus_to_go[v1], disease_to_go[v2], go_similarity)
    return (v1, v2, sim)

# 主函数
def main(virus_go_file, disease_go_file, go_similarity_file, output_file):
    # 读取数据
    virus_to_go = read_virus_go(virus_go_file)
    disease_to_go = read_virus_go(disease_go_file)
    go_similarity = read_go_similarity(go_similarity_file)

    # 病毒和疾病列表
    viruses = list(virus_to_go.keys())
    diseases = list(disease_to_go.keys())

    # 生成病毒和疾病对
    viruses_pairs = list(itertools.product(viruses, diseases))[:10]

    # 随机分成10份
    random.shuffle(viruses_pairs)
    num_splits = 5
    split_pairs = [viruses_pairs[i::num_splits] for i in range(num_splits)]

    # 使用多进程进行计算
    with Pool(processes=num_splits) as pool:
        # 每个进程处理一部分任务
        results = pool.starmap(compute_similarity_for_pair,
                               [(pair, virus_to_go, disease_to_go, go_similarity) for pair in itertools.chain(*split_pairs)])

    # 保存结果
    df = pd.DataFrame(results, columns=['Virus1', 'Virus2', 'Similarity'])
    df.to_csv(output_file, index=False, sep='\t')
    print(f"结果已保存到 {output_file}")




if __name__ == '__main__':
    # 文件路径(需要根据您的文件路径修改)
    virus_go_file = "../allGO/allGO_Virus"
    disease_go_file = "../allGO/allGO_Disease"
    go_similarity_file = "./id_combinations/out/allGoTermSmi.txt"
    output_file = "./result/virusAndDisease_similarity_results.txt"
    main(virus_go_file,disease_go_file, go_similarity_file, output_file)



  • 写回答

3条回答 默认 最新

  • 道友老李 JWE233286一种基于机器视觉的水表指针读数识别及修正的方法 专利发明者 2025-01-07 17:14
    关注
    让【道友老李】来帮你解答,本回答参考gpt编写,并整理提供,如果还有疑问可以点击头像关注私信或评论。
    如果答案让您满意,请采纳、关注,非常感谢!
    在这段代码中,我们可以看到主要是使用了多进程的Pool来进行计算。但是由于使用了Pool,导致无法正确执行多进程,任务是一个一个地按顺序运行的,没有实现并发执行的效果。 为了正确运行多进程,可以使用concurrent.futures库中的ThreadPoolExecutor来代替multiprocessing.Pool。这样可以实现多线程的并发执行效果。 下面是修改后的代码示例:
    from concurrent.futures import ThreadPoolExecutor
    import numpy as np
    import pandas as pd
    from collections import defaultdict
    import itertools
    # 其他函数省略...
    # 主函数
    def main(virus_go_file, disease_go_file, go_similarity_file, output_file):
        # 读取数据
        virus_to_go = read_virus_go(virus_go_file)
        disease_to_go = read_virus_go(disease_go_file)
        go_similarity = read_go_similarity(go_similarity_file)
        
        # 病毒和疾病列表
        viruses = list(virus_to_go.keys())
        diseases = list(disease_to_go.keys())
        
        # 生成病毒和疾病对
        viruses_pairs = list(itertools.product(viruses, diseases))[:10]
        
        # 随机分成10份
        random.shuffle(viruses_pairs)
        num_splits = 5
        split_pairs = [viruses_pairs[i::num_splits] for i in range(num_splits)]
        
        # 使用ThreadPoolExecutor进行计算
        with ThreadPoolExecutor() as executor:
            # 并发执行每个任务
            results = list(executor.map(compute_similarity_for_pair, 
                                        [(pair, virus_to_go, disease_to_go, go_similarity) for pair in itertools.chain(*split_pairs)]))
        
        # 保存结果
        df = pd.DataFrame(results, columns=['Virus1', 'Virus2', 'Similarity'])
        df.to_csv(output_file, index=False, sep='\t')
        print(f"结果已保存到 {output_file}")
    if __name__ == '__main__':
        # 其他代码省略...
    

    通过使用ThreadPoolExecutor来实现多线程并发执行任务,可以提高程序的效率并正确地执行多进程任务。

    评论

报告相同问题?

问题事件

  • 创建了问题 1月7日