parsers_MMTF.ts

import { base64ToArray } from "../utilities";
import { Matrix4 } from "../WebGL";
import { ParserOptionsSpec } from "./ParserOptionsSpec";
import { computeSecondaryStructure } from "./utils/computeSecondaryStructure";
import { processSymmetries } from "./utils/processSymmetries";

export interface MMTFobj {
    decode(data: Uint8Array | ArrayBuffer): any;
    decodeMsgpack(data: Uint8Array | ArrayBuffer): any;
}
declare var MMTF: MMTFobj;

var fromCharCode = function (charCodeArray: any) {
    return String.fromCharCode.apply(null, charCodeArray).replace(/\0/g, '');
};

var convertSS = function (val: number | boolean) {
    // Convert mmtf code to 3dmol code
    /*    
      0:  pi helix
      1:  bend
      2:  alpha helix
      3:  sheet extended
      4:  3-10 helix
      5:  bridge
      6:  turn
      7:  coil
     */
    if (val == 0 || val == 2 || val == 4) return 'h';
    if (val == 3) return 's';
    return 'c';
};

let mmtfHETATMtypes = new Set([
    "D-SACCHARIDE",
    "D-SACCHARIDE 1,4 AND 1,4 LINKING",
    "D-SACCHARIDE 1,4 AND 1,6 LINKING",
    "L-SACCHARIDE",
    "L-SACCHARIDE 1,4 AND 1,4 LINKING",
    "L-SACCHARIDE 1,4 AND 1,6 LINKING",
    "NON-POLYMER",
    "OTHER",
    "PEPTIDE-LIKE",
    "SACCHARIDE"]);

/** 
 * @param bindata - binary UInt8Array buffer or a base64 encoded string
 * @param ParserOptionsSpec
 * @category Parsers
*/
export function MMTFparser(bindata: any, options: ParserOptionsSpec) {

    var noH = !options.keepH; // suppress hydrogens by default
    var selAltLoc = options.altLoc ? options.altLoc : 'A'; //default alternate location to select if present
    var ignoreStruct = !!options.noSecondaryStructure;
    var computeStruct = !options.noComputeSecondaryStructure;
    //extract symmetries - only take first assembly, apply to all models (ignoring changes for now)
    var noAssembly = !options.doAssembly; // don't assemble by default
    var assemblyIndex = options.assemblyIndex ? options.assemblyIndex : 0;

    if (typeof (bindata) == "string") {
        //assume base64 encoded
        bindata = base64ToArray(bindata);
    } else {
        bindata = new Uint8Array(bindata);
    }

    var mmtfData = MMTF.decode(bindata);

    var atoms: any[][] & Record<string, any> = [[]];
    var modelData: any[] = atoms.modelData = [];

    // setup index counters
    var chainIndex = 0;
    var groupIndex = 0;
    var atomIndex = 0;

    // setup optional fields
    var secStructList = mmtfData.secStructList;
    var bFactorList = mmtfData.bFactorList;
    var altLocList = mmtfData.altLocList;
    var occupancyList = mmtfData.occupancyList;
    var bondAtomList = mmtfData.bondAtomList;
    var bondOrderList = mmtfData.bondOrderList;

    var numModels = mmtfData.numModels;
    if (numModels == 0) return atoms;
    if (!options.multimodel) numModels = 1; //first only
    // hoisted loop variables
    var i: number, j: number, k: number, kl: number, m: number, n: number;



    var symmetries: Matrix4[] = [];
    if (!noAssembly && mmtfData.bioAssemblyList && mmtfData.bioAssemblyList.length > 0) {
        var transforms = mmtfData.bioAssemblyList[assemblyIndex].transformList;
        for (i = 0, n = transforms.length; i < n; i++) {
            var matrix = new Matrix4(transforms[i].matrix);
            matrix.transpose();
            symmetries.push(matrix);
        }
    }
    var unitCell = null as Record<string, number> | null;
    //unit cell info
    if (mmtfData.unitCell) {
        var u = mmtfData.unitCell;
        unitCell = { 'a': u[0], 'b': u[1], 'c': u[2], 'alpha': u[3], 'beta': u[4], 'gamma': u[5] };
    }

    let chainIsPolymer: boolean[] = [];
    mmtfData.entityList.forEach((entity: { chainIndexList: any[]; type: string; }) => {
        entity.chainIndexList.forEach(ch => {
            chainIsPolymer[ch] = entity.type == "polymer";
        });
    });
    var bondAtomListStart = 0; //for current model
    //loop over models, 
    for (m = 0; m < numModels; m++) {
        var modelChainCount = mmtfData.chainsPerModel[m];
        var matoms = atoms[atoms.length - 1];
        var serialToIndex: number[] = []; // map to matoms index, needed for noh

        modelData.push({ symmetries: symmetries, cryst: unitCell });
        for (i = 0; i < modelChainCount; ++i) {

            var chainGroupCount = mmtfData.groupsPerChain[chainIndex];
            var chainId = fromCharCode(
                mmtfData.chainIdList.subarray(chainIndex * 4, chainIndex * 4 + 4)
            );
            if (mmtfData.chainNameList) {
                chainId = fromCharCode(
                    mmtfData.chainNameList.subarray(chainIndex * 4, chainIndex * 4 + 4)
                );
            }

            var startGroup = groupIndex;
            var prevSS = '';
            for (j = 0; j < chainGroupCount; ++j) { //over residues (groups)

                var groupData = mmtfData.groupList[mmtfData.groupTypeList[groupIndex]];
                var groupAtomCount = groupData.atomNameList.length;
                var secStruct = 0;
                var secStructBegin = false;
                var secStructEnd = false;

                if (secStructList) {
                    secStruct = secStructList[groupIndex];
                    var sscode = convertSS(secStruct);
                    if (groupIndex == 0 || sscode != prevSS) {
                        secStructBegin = true;
                    }
                    prevSS = sscode;
                    var nextgroup = groupIndex + 1;
                    if (nextgroup >= secStructList.length || convertSS(secStructList[nextgroup] != sscode)) {
                        secStructEnd = true;
                    }
                }
                var groupId = mmtfData.groupIdList[groupIndex];
                var groupName = groupData.groupName;
                let groupType = groupData.chemCompType;
                var startAtom = atomIndex;
                // Note the following is not identical to respecting HETATM records
                // this information isn't available in MMTF.  
                let isHETATM = mmtfHETATMtypes.has(groupType) || !chainIsPolymer[chainIndex];

                for (k = 0; k < groupAtomCount; ++k) {

                    var element = groupData.elementList[k];
                    if (noH && element == 'H') {
                        atomIndex += 1;
                        continue;
                    }

                    var bFactor = '';
                    if (bFactorList) {
                        bFactor = bFactorList[atomIndex];
                    }
                    var altLoc = '';
                    if (altLocList && altLocList[atomIndex]) { //not zero
                        altLoc = String.fromCharCode(altLocList[atomIndex]);
                    }
                    var occupancy = '';
                    if (occupancyList) {
                        occupancy = occupancyList[atomIndex];
                    }

                    if (altLoc != '' && altLoc != selAltLoc && selAltLoc != '*') {
                        atomIndex += 1;
                        continue;
                    }

                    var atomId = mmtfData.atomIdList[atomIndex];
                    var atomName = groupData.atomNameList[k];
                    var atomCharge = 0;
                    if (groupData.atomChargeList) atomCharge = groupData.atomChargeList[k];
                    var xCoord = mmtfData.xCoordList[atomIndex];
                    var yCoord = mmtfData.yCoordList[atomIndex];
                    var zCoord = mmtfData.zCoordList[atomIndex];

                    serialToIndex[atomIndex] = matoms.length;
                    matoms.push({
                        'resn': groupName,
                        'x': xCoord,
                        'y': yCoord,
                        'z': zCoord,
                        'elem': element,
                        'hetflag': isHETATM,
                        'chain': chainId,
                        'resi': groupId,
                        'icode': altLoc,
                        'rescode': groupId + (altLoc != ' ' ? "^" + altLoc : ""), // combo
                        // resi
                        // and
                        // icode
                        'serial': atomId,
                        'altLoc': altLoc,
                        'index': atomIndex,
                        'atom': atomName,
                        'bonds': [],
                        'ss': convertSS(secStruct),
                        'ssbegin': secStructBegin,
                        'ssend': secStructEnd,
                        'bondOrder': [],
                        'properties': { charge: atomCharge, occupancy: occupancy },
                        'b': bFactor,
                    });

                    atomIndex += 1;
                }

                // intra group bonds
                var groupBondAtomList = groupData.bondAtomList;
                for (k = 0, kl = groupData.bondOrderList.length; k < kl; ++k) {
                    var atomIndex1 = startAtom + groupBondAtomList[k * 2];
                    var atomIndex2 = startAtom + groupBondAtomList[k * 2 + 1];
                    var bondOrder = groupData.bondOrderList[k];

                    //I assume bonds are only recorded once
                    var i1 = serialToIndex[atomIndex1];
                    var i2 = serialToIndex[atomIndex2];
                    var a1 = matoms[i1];
                    var a2 = matoms[i2];
                    if (a1 && a2) {
                        a1.bonds.push(i2);
                        a1.bondOrder.push(bondOrder);
                        a2.bonds.push(i1);
                        a2.bondOrder.push(bondOrder);
                    }
                }

                groupIndex += 1;
            }

            //reset for bonds
            groupIndex = startGroup;
            for (j = 0; j < chainGroupCount; ++j) { //over residues (groups)

                groupIndex += 1;

            }

            chainIndex += 1;
        }


        // inter group bonds
        if (bondAtomList) {
            for (let k = bondAtomListStart, kl = bondAtomList.length; k < kl; k += 2) {
                let atomIndex1 = bondAtomList[k];
                let atomIndex2 = bondAtomList[k + 1];
                let bondOrder = bondOrderList ? bondOrderList[k / 2] : 1;

                if (atomIndex1 >= atomIndex) {
                    bondAtomListStart = k;
                    break; //on next model
                }
                //I assume bonds are only recorded once
                let i1 = serialToIndex[atomIndex1];
                let i2 = serialToIndex[atomIndex2];
                let a1 = matoms[i1];
                let a2 = matoms[i2];
                if (a1 && a2) {
                    a1.bonds.push(i2);
                    a1.bondOrder.push(bondOrder);
                    a2.bonds.push(i1);
                    a2.bondOrder.push(bondOrder);
                }
            }
        }

        if (options.multimodel) {
            if (!options.onemol) atoms.push([]);
        }
    }

    if (!noAssembly) {
        for (let n = 0; n < atoms.length; n++) {
            processSymmetries(modelData[n].symmetries, atoms[n], options, modelData[n].cryst);
        }
    }

    if (computeStruct && !ignoreStruct) {
        computeSecondaryStructure(atoms as any, options.hbondCutoff);
    }

    return atoms;
};