Source code for af_analysis.format.af3_webserver

#!/usr/bin/env python3
# coding: utf-8

import os
import logging
import json
from tqdm.auto import tqdm
import pandas as pd

logger = logging.getLogger()
logger.setLevel(logging.INFO)


[docs] def read_dir(directory): """Extract pdb list from a directory. Parameters ---------- directory : str Path to the directory containing the pdb files. Returns ------- log_pd : pandas.DataFrame Dataframe containing the information extracted from the directory. """ logger.info(f"Reading {directory}") log_dict_list = [] for file in os.listdir(directory): # We only consider .cif files that do not start with a dot (hidden files) if file.endswith(".cif") and not file.startswith("."): token = file[:-4].split("_") model = int(token[-1]) query = "_".join(token[1:-2]) # print(token, query, model) json_score = os.path.join( directory, f"fold_{query}_summary_confidences_{model}.json" ) with open(json_score, "r") as f_in: json_dict = json.load(f_in) info_dict = { "pdb": os.path.join(directory, file), "query": query, "model": model, "data_file": os.path.join( directory, f"fold_{query}_full_data_{model}.json" ), } info_dict.update(json_dict) log_dict_list.append(info_dict) log_pd = pd.DataFrame(log_dict_list) # Update column names log_pd = log_pd.rename( columns={ "ranking_score": "ranking_confidence", "ptm": "pTM", "iptm": "ipTM", } ) # To ensure that tests are consistent across different systems # we sort the dataframe by pdb log_pd = log_pd.sort_values(by=["pdb"]).reset_index(drop=True) return log_pd