Source code for af_analysis.format.afpulldown

#!/usr/bin/env python3
# coding: utf-8

import os
import logging
import json
from tqdm.auto import tqdm
import pandas as pd
import numpy as np

logger = logging.getLogger()
logger.setLevel(logging.INFO)

weigths = [
    "multimer_v1",
    "multimer_v2",
    "multimer_v3",
]


[docs] def read_dir(directory, query=None): """Extract pdb list from a directory. Parameters ---------- directory : str Path to the directory containing the pdb files. query : str Name of the query, default is None. Returns ------- log_pd : pandas.DataFrame Dataframe containing the information extracted from the directory. """ logger.info(f"Reading {directory}") log_dict_list = [] json_score = os.path.join(directory, f"ranking_debug.json") with open(json_score, "r") as f_in: json_dict = json.load(f_in) model_list = list(json_dict["iptm+ptm"].keys()) for model in model_list: pdb_name = os.path.join(directory, "unrelaxed_" + model + ".pdb") assert os.path.isfile(pdb_name) token = model.split("_") if query is None: query_local = os.path.basename(os.path.normpath(directory)) else: query_local = query # print(f"query_local: {query_local} {directory}") weight = "_".join(token[2:4]) assert weight in weigths json_file = os.path.join(directory, "pae_" + model + ".json") assert os.path.isfile(json_file) iptm = float(json_dict["iptm"][model]) conf = float(json_dict["iptm+ptm"][model]) ptm = (conf - 0.8 * iptm) / 0.2 local_json_score = os.path.join(directory, "confidence_" + model + ".json") with open(local_json_score, "r") as f_in: local_json_dict = json.load(f_in) pLDDT = np.mean(local_json_dict["confidenceScore"]) log_dict_list.append( { "pdb": pdb_name, "query": query_local, "rank": int(json_dict["order"].index(model) + 1), "state": "unrelaxed_", "model": int(token[1]), "weight": weight, "data_file": json_file, "ipTM": iptm, "ranking_confidence": conf, "pTM": ptm, "pLDDT": pLDDT, } ) log_pd = pd.DataFrame(log_dict_list) return log_pd
[docs] def read_full_dir(directory): """Extract pdb list from a directory and return as a dictionary. Parameters ---------- directory : str Path to the directory containing the pdb files. Returns ------- log_dict : dict Dictionary containing the information extracted from the directory. """ logger.info(f"Reading full AlphaPulldown {directory}") subfolders = [f.path for f in os.scandir(directory) if f.is_dir()] log_pd_list = [] for folder in subfolders: # print(f"Reading {folder}") log_pd = read_dir(folder) # print(log_pd) log_pd_list.append(log_pd) log_dict = pd.concat(log_pd_list, ignore_index=True) return log_dict