import { ParserOptionsSpec } from "./ParserOptionsSpec";
import { base64ToArray, inflateString } from "../utilities";
import { MMTFobj } from "./MMTF"
//import { Matrix4 } from "../WebGL";
import { computeSecondaryStructure } from "./utils/computeSecondaryStructure";
import { processSymmetries } from "./utils/processSymmetries";
import { Category } from "./cifutils/category";
import { AtomSpec } from "specs";
import { assignPDBBonds } from "./utils/assignPDBBonds";
import { Matrix4 } from "../WebGL";
import { isEmpty } from "./utils/isEmpty";
declare var MMTF: MMTFobj;
class Connectivity {
C: Record<string, Record<string, Record<string, number>>> = {};
constructor(comp_bond) {
if (comp_bond) {
let ids = comp_bond.getField('comp_id');
let a1s = comp_bond.getField('atom_id_1');
let a2s = comp_bond.getField('atom_id_2');
let orders = comp_bond.getField('value_order');
for (let i = 0; i < ids.length; i++) {
let resn = ids[i];
let a1 = a1s[i];
let a2 = a2s[i];
let oname = orders[i];
let o = 1;
if (oname == 'doub') o = 2;
else if (oname == 'trip') o = 3;
if (this.C[resn] == undefined) {
this.C[resn] = {};
}
if (this.C[resn][a1] == undefined) {
this.C[resn][a1] = {};
}
if (this.C[resn][a2] == undefined) {
this.C[resn][a2] = {};
}
this.C[resn][a1][a2] = o;
this.C[resn][a2][a1] = o;
}
}
}
//returns bond order, zero if not connected
order(resn: string, atom1: string, atom2: string): number {
if (this.C[resn] !== undefined) {
if (this.C[resn][atom1] !== undefined) {
if (this.C[resn][atom1][atom2] !== undefined) {
return this.C[resn][atom1][atom2];
}
}
}
return 0;
}
}
/* Class for recording inter-component connectivity */
class StructConn {
public C: [[string,number,string,string],[string,number,string,string],number][] = []; //chain,resi,resn,atomn
constructor(struct_conn) {
if(struct_conn) {
//have no idea what the deal with with ptnr3..
let types = struct_conn.getField('conn_type_id');
let chain1 = struct_conn.getField('ptnr1_label_asym_id');
let resi1 = struct_conn.getField('ptnr1_label_seq_id');
let resn1 = struct_conn.getField('ptnr1_label_comp_id');
let a1 = struct_conn.getField('ptnr1_label_atom_id');
let chain2 = struct_conn.getField('ptnr2_label_asym_id');
let resi2 = struct_conn.getField('ptnr2_label_seq_id');
let resn2 = struct_conn.getField('ptnr2_label_comp_id');
let a2 = struct_conn.getField('ptnr2_label_atom_id');
let bo = struct_conn.getField('pdbx_value_order');
for(let i = 0; i < types.length; i++) {
if(types[i] == 'disulf' || types[i] == 'covale') { //metal too?
let o = bo ? (bo[i] == "" ? 1: parseInt(bo[i])) : 1;
this.C.push([[chain1[i],resi1[i],resn1[i],a1[i]],[chain2[i],resi2[i],resn2[i],a2[i]],o]);
}
}
}
}
}
/* group atoms by chain/resid */
class Residues {
R: Record<string, Record<number, Record<string, Array<AtomSpec>>>> = {}; //chain, resid, resn (redundant), atom list
constructor() {
}
add(a: AtomSpec) {
if (this.R[a.lchain] == undefined) this.R[a.lchain] = {};
if (this.R[a.lchain][a.lresi] == undefined) this.R[a.lchain][a.lresi] = {};
if (this.R[a.lchain][a.lresi][a.lresn] == undefined) this.R[a.lchain][a.lresi][a.lresn] = [];
this.R[a.lchain][a.lresi][a.lresn].push(a);
this.R[a.lchain][a.lresi][a.lresn][a.atom] = a; //look up by atom name
}
private geta([ch,resi,resn,aname]: [string,number,string,string]) {
if(this.R[ch] !== undefined &&
this.R[ch][resi] !== undefined &&
this.R[ch][resi][resn] !== undefined) {
return this.R[ch][resi][resn][aname];
}
return undefined;
}
setBonds(C: Connectivity, SC: StructConn) {
for(let ch in this.R) {
for(let resi in this.R[ch]) {
for(let resn in this.R[ch][resi]) {
let atoms = this.R[ch][resi][resn];
for(let i = 0; i < atoms.length; i++) {
for(let j = i+1; j < atoms.length; j++) {
let a1 = atoms[i];
let a2 = atoms[j];
let bo = C.order(resn,a1.atom,a2.atom);
if(bo > 0) {
a1.bonds.push(a2.index);
a2.bonds.push(a1.index);
a1.bondOrder.push(bo);
a2.bondOrder.push(bo);
}
}
}
}
}
}
for(let conn of SC.C) {
let a1 = conn[0];
let a2 = conn[1];
let bo = conn[2];
let atom1 = this.geta(a1);
let atom2 = this.geta(a2);
if(atom1 != undefined && atom2 != undefined) {
atom1.bonds.push(atom2.index);
atom2.bonds.push(atom1.index);
atom1.bondOrder.push(bo);
atom2.bondOrder.push(bo);
}
}
}
}
/**
* @param bindata - binary UInt8Array buffer or a base64 encoded string
* @param ParserOptionsSpec
* @category Parsers
*/
export function BCIF(bindata: any, options: ParserOptionsSpec) {
var noH = !options.keepH; // suppress hydrogens by default
var selAltLoc = options.altLoc ? options.altLoc : 'A'; //default alternate location to select if present
var computeStruct = !options.noComputeSecondaryStructure;
//var assemblyIndex = options.assemblyIndex ? options.assemblyIndex : 0;
const noAssembly = !options.doAssembly; // don't assemble by default
const assignbonds =
options.assignBonds === undefined ? true : options.assignBonds;
if (typeof (bindata) == "string") {
//assume base64 encoded
try {
bindata = base64ToArray(bindata);
} catch (error) {
//not base64
const encoder = new TextEncoder();
bindata = encoder.encode(bindata);
}
} else {
bindata = new Uint8Array(bindata);
}
var bcifData = MMTF.decodeMsgpack(bindata);
if (bcifData == 31) {
//was gziped
bindata = inflateString(bindata, false);
bcifData = MMTF.decodeMsgpack(bindata);
}
var atoms: any[][] & Record<string, any> = [];
var modelData: any[] = atoms.modelData = [];
var numModels = bcifData.dataBlocks.length;
if (numModels == 0) return atoms;
if (!options.multimodel) numModels = 1; //first only
//loop over models
for (let m = 0; m < numModels; m++) {
let startm = atoms.length;
const serialToIndex: [number, number][] = []; // map from pdb serial to index in atoms
modelData.push({ symmetries: [] });
atoms.push([]);
const block = bcifData.dataBlocks[m];
const cats = Object.create(null);
for (const cat of block.categories) {
cats[cat.name.substr(1)] = Category(cat);
}
//extract secondary structure information
//helices
let sslookup = {}; //chain to residue range
let sshelix = cats.struct_conf;
if (sshelix) {
let htypes = sshelix.getField('conf_type_id');
let hchain = sshelix.getField('beg_label_asym_id');
let hstart = sshelix.getField('beg_label_seq_id');
let hend = sshelix.getField('end_label_seq_id');
for (let i = 0; i < htypes.length; i++) {
if (htypes[i].startsWith('H')) {
let ch = hchain[i];
let startResi = hstart[i];
let endResi = hend[i];
if (!(ch in sslookup)) {
sslookup[ch] = {};
}
sslookup[ch][startResi] = "h1";
for (let res = startResi + 1; res < endResi; res++) {
sslookup[ch][res] = "h";
}
sslookup[ch][endResi] = "h2";
}
}
}
//sheets
let sssheet = cats.struct_sheet_range;
if (sssheet) {
let sids = sssheet.getField('id');
let schain = sssheet.getField('beg_label_asym_id');
let sstart = sssheet.getField('beg_label_seq_id');
let send = sssheet.getField('end_label_seq_id');
for (let i = 0; i < sids.length; i++) {
let ch = schain[i];
let startResi = sstart[i];
let endResi = send[i];
if (!(ch in sslookup)) {
sslookup[ch] = {};
}
sslookup[ch][startResi] = "s1";
for (let res = startResi + 1; res < endResi; res++) {
sslookup[ch][res] = "s";
}
sslookup[ch][endResi] = "s2";
}
}
//symmetry operations
let structops = cats.pdbx_struct_oper_list;
let opids = structops.getField('id');
if (opids && !noAssembly) {
let matrix11 = structops.getField('matrix[1][1]');
let matrix12 = structops.getField('matrix[1][2]');
let matrix13 = structops.getField('matrix[1][3]');
let matrix21 = structops.getField('matrix[2][1]');
let matrix22 = structops.getField('matrix[2][2]');
let matrix23 = structops.getField('matrix[2][3]');
let matrix31 = structops.getField('matrix[3][1]');
let matrix32 = structops.getField('matrix[3][2]');
let matrix33 = structops.getField('matrix[3][3]');
let vector1 = structops.getField('vector[1]');
let vector2 = structops.getField('vector[2]');
let vector3 = structops.getField('vector[3]');
for (let i = 0; i < opids.length; i++) {
const matrix = new Matrix4(
matrix11[i],
matrix12[i],
matrix13[i],
vector1[i],
matrix21[i],
matrix22[i],
matrix23[i],
vector2[i],
matrix31[i],
matrix32[i],
matrix33[i],
vector3[i]
);
modelData[modelData.length - 1].symmetries.push(matrix);
}
}
//extract connectivity information
let connect = new Connectivity(cats.chem_comp_bond);
let residues = new Residues();
let sconnect = new StructConn(cats.struct_conn);
//atom info
let asites = cats.atom_site;
let atomCount = asites.rowCount;
let group_pdb = asites.getField('group_PDB')
let cartn_x = asites.getField('Cartn_x');
let cartn_y = asites.getField('Cartn_y');
let cartn_z = asites.getField('Cartn_z');
let auth_asym_id = asites.getField('auth_asym_id');
let label_asym_id = asites.getField('label_asym_id');
let auth_seq_id = asites.getField('auth_seq_id');
let label_seq_id = asites.getField('label_seq_id');
let auth_comp_id = asites.getField('auth_comp_id');
let label_comp_id = asites.getField('label_comp_id');
let auth_atom_id = asites.getField('auth_atom_id');
let label_atom_id = asites.getField('label_atom_id');
let type_symbol = asites.getField('type_symbol');
let bfactors = asites.getField("B_iso_or_equiv");
let serials = asites.getField('id');
let icodes = asites.getField('label_alt_id');
let modelnums = asites.getField('pdbx_PDB_model_num');
let curmodel = modelnums ? modelnums[0] : 0;
for (let i = 0; i < atomCount; i++) {
if (group_pdb !== undefined &&
group_pdb[i] === "TER"
)
continue;
if (modelnums && modelnums[i] != curmodel) {
curmodel = modelnums[i];
if (options.multimodel) {
if (!options.onemol) {
atoms.push([]);
modelData.push(modelData[modelData.length - 1]);
curmodel = modelnums[i];
residues.setBonds(connect, sconnect);
residues = new Residues();
}
} else {
break; //first model only
}
}
const atom: AtomSpec = {};
atom.x = cartn_x[i];
atom.y = cartn_y[i];
atom.z = cartn_z[i];
atom.chain = auth_asym_id
? auth_asym_id[i]
: label_asym_id
? label_asym_id[i]
: undefined;
atom.lchain = label_asym_id
? label_asym_id[i]
: undefined;
atom.resi = auth_seq_id
? auth_seq_id[i]
: label_seq_id
? label_seq_id[i]
: undefined;
atom.lresi = label_seq_id
? label_seq_id[i]
: undefined;
atom.resn = auth_comp_id
? auth_comp_id[i].trim()
: label_comp_id
? label_comp_id[i].trim()
: undefined;
atom.lresn = label_comp_id ? label_comp_id[i].trim() : undefined;
atom.atom = auth_atom_id
? auth_atom_id[i].replace(/"/gm, "")
: label_atom_id
? label_atom_id[i].replace(/"/gm, "")
: undefined; //"primed" names are in quotes
atom.icode = icodes ? icodes[i] : undefined;
atom.altLoc = atom.icode;
atom.hetflag =
!group_pdb ||
group_pdb[i] === "HETA" ||
group_pdb[i] === "HETATM";
let elem = "X";
if (type_symbol) {
elem = type_symbol[i].replace(/\(?\+?\d+.*/, "");
}
atom.elem = elem[0].toUpperCase() + elem.substring(1, 2).toLowerCase();
if (bfactors) atom.b = bfactors[i];
if (noH && atom.elem == 'H') {
continue;
}
if (atom.altLoc != '' && atom.altLoc != selAltLoc && selAltLoc != '*') {
continue;
}
atom.bonds = [];
atom.ss = "c";
atom.serial = serials[i];
atom.model = curmodel;
atom.bondOrder = [];
atom.properties = {};
atom.index = atoms[atoms.length - 1].length;
serialToIndex[atom.serial] = [atoms.length, atom.index];
atoms[atoms.length - 1].push(atom);
residues.add(atom);
}
residues.setBonds(connect, sconnect);
// Assign secondary structures from pdb file
if (!isEmpty(sslookup)) {
for (let mi = startm; mi < atoms.length; mi++) {
let matoms = atoms[mi];
for (let i = 0; i < matoms.length; i++) {
const atom = matoms[i];
if (atom === undefined) continue;
if (atom.lchain in sslookup && atom.lresi in sslookup[atom.lchain]) {
const code = sslookup[atom.lchain][atom.lresi];
atom.ss = code[0];
if (code.length > 1) {
if (code[1] == "1") atom.ssbegin = true;
else if (code[1] == "2") atom.ssend = true;
}
}
}
}
}
if (options.multimodel && m < numModels - 1) {
if (!options.onemol) {
atoms.push([]);
modelData.push({ symmetries: [] });
}
}
}
for (let i = 0; i < atoms.length; i++) {
if (
assignbonds &&
!(options.duplicateAssemblyAtoms && !options.dontConnectDuplicatedAtoms)
) {
assignPDBBonds(atoms[i], options);
}
if (computeStruct) {
computeSecondaryStructure(atoms[i], options.hbondCutoff);
}
processSymmetries(
modelData[i].symmetries,
atoms[i],
options,
modelData[i].cryst
);
if (
options.duplicateAssemblyAtoms &&
!options.dontConnectDuplicatedAtoms &&
assignbonds
)
assignPDBBonds(atoms[i], options);
}
return atoms;
};