@prefix : <https://biohub.ai/esm/protein/about#> .
@prefix schema: <http://schema.org/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix prov: <http://www.w3.org/ns/prov#> .

<https://biohub.ai/esm/protein/about> a schema:TechArticle ;
    schema:name "A World Model of Protein Biology"@en ;
    schema:description "Biohub releases ESM (Evolutionary Scale Models) - a world model of protein biology comprising ESMFold2 (structure prediction), ESM Atlas (map of 6.8B sequences/1.1B structures), and ESMC (protein language model trained on ~2.8B sequences)."@en ;
    schema:about :esmResearchProgram ;
    schema:datePublished "2026-05-28"^^xsd:date ;
    schema:publisher :biohubOrganization ;
    schema:hasPart :faqSection, :glossarySection, :howtoSection, :technicalSection, :therapeuticSection, :latentSpaceSection, :atlasSection, :possibilitiesSection ;
    prov:wasGeneratedBy <https://github.com/OpenLinkSoftware/ai-agent-skills/tree/main/kg-generator#this> .

:ProteinModel a rdfs:Class ;
    rdfs:label "Protein Model"@en ;
    rdfs:comment "Base class for protein models developed at Biohub"@en .

:StructurePredictionModel a rdfs:Class ;
    rdfs:subClassOf :ProteinModel ;
    rdfs:label "Structure Prediction Model"@en ;
    rdfs:comment "Protein structure prediction models that predict 3D protein structures from amino acid sequences"@en .

:ProteinLanguageModel a rdfs:Class ;
    rdfs:subClassOf :ProteinModel ;
    rdfs:label "Protein Language Model"@en ;
    rdfs:comment "Language models trained on protein sequences that learn evolutionary patterns and biological properties"@en .

:SequenceAtlas a rdfs:Class ;
    rdfs:label "Sequence Atlas"@en ;
    rdfs:comment "Comprehensive maps of protein sequences and predicted structures across life"@en .

:hasTrainingScale a rdf:Property ;
    rdfs:label "has training scale"@en ;
    rdfs:range xsd:string .

:hasAccuracyMetric a rdf:Property ;
    rdfs:label "has accuracy metric"@en ;
    rdfs:range xsd:string .

:usesArchitecture a rdf:Property ;
    rdfs:label "uses architecture"@en ;
    rdfs:range xsd:string .

:binderDesignAlgorithm a rdf:Property ;
    rdfs:label "binder design algorithm"@en ;
    rdfs:range xsd:string .

:hasValidationMethod a rdf:Property ;
    rdfs:label "has validation method"@en ;
    rdfs:range xsd:string .

:biohubOrganization a schema:Organization ;
    schema:name "Biohub"@en ;
    schema:description "A scientific research organization developing world models of protein biology"@en ;
    schema:url <https://biohub.ai> ;
    owl:sameAs <http://dbpedia.org/resource/Biohub> .

:esmResearchProgram a schema:SoftwareApplication ;
    schema:name "ESM (Evolutionary Scale Models)"@en ;
    schema:description "A world model of protein biology comprising ESMFold2, ESM Atlas, and ESMC for protein structure prediction, sequence mapping, and language modeling"@en ;
    schema:applicationCategory "Scientific Research Software"@en ;
    schema:operatingSystem "Cloud Platform"@en ;
    schema:offers <https://biohub.ai/esm/protein/about#biohubPlatformAccess> .

:biohubPlatformAccess a schema:Offer ;
    schema:price "0"^^xsd:float ;
    schema:priceCurrency "USD"@en ;
    schema:availability <http://schema.org/Available> .

:esmfold2Model a :StructurePredictionModel, schema:SoftwareApplication ;
    schema:name "ESMFold2"@en ;
    schema:description "State-of-the-art protein structure prediction model using looped transformer architecture that predicts protein structure and powers the design of new proteins"@en ;
    schema:applicationCategory "Protein Structure Prediction"@en ;
    :hasTrainingScale "Not applicable (inference-time compute scaling via loop iterations)"@en ;
    :hasAccuracyMetric "55% antibody-antigen complexes; 71% protein-protein interactions (single sequence); 77% with alignment data"@en ;
    :usesArchitecture "Looped Transformer"@en ;
    schema:citation <https://biohub.ai/esm/protein/about#preprint> .

:esmfold2Fast a :StructurePredictionModel ;
    schema:name "ESMFold2-Fast"@en ;
    schema:description "Fast protein structure prediction variant that predicts a 1024-length protein structure in 9.4 seconds while outperforming existing models on antibody-antigen folding"@en ;
    :hasTrainingScale "9.4 seconds per 1024-length protein"@en ;
    :usesArchitecture "Looped Transformer (Optimized)"@en .

:esmCModel a :ProteinLanguageModel, schema:SoftwareApplication ;
    schema:name "ESMC (ESM Cambrian)"@en ;
    schema:description "State-of-the-art protein language model trained on approximately 2.8 billion sequences drawn from across all of life, providing a foundation for modeling sequence, structure, and function of proteins"@en ;
    schema:applicationCategory "Protein Language Modeling"@en ;
    :hasTrainingScale "~2.8 billion sequences"@en ;
    :usesArchitecture "Transformer"@en ;
    schema:citation <https://biohub.ai/esm/protein/about#preprint> .

:esmAtlas a :SequenceAtlas, schema:SoftwareApplication ;
    schema:name "ESM Atlas"@en ;
    schema:description "A map of 6.8 billion sequences and 1.1 billion predicted structures, enabling the sequences and structures of proteins across all of life to be studied as a complete picture"@en ;
    schema:applicationCategory "Protein Sequence Atlas"@en ;
    :hasTrainingScale "6.8 billion sequences; 1.1 billion predicted structures"@en .

:nvidiaCorporation a schema:Organization ;
    schema:name "NVIDIA"@en ;
    schema:description "Partner providing TransformerEngine and cuEquivariance kernels for training ESMC and ESMFold2"@en ;
    schema:url <https://www.nvidia.com> ;
    owl:sameAs <http://dbpedia.org/resource/NVIDIA> .

:awsBioDiscovery a schema:Organization ;
    schema:name "AWS Bio Discovery"@en ;
    schema:url <https://aws.amazon.com/> ;
    schema:description "Partner platform for ESM access"@en .

:benchlingAI a schema:Organization ;
    schema:name "Benchling AI"@en ;
    schema:url <https://www.benchling.com> ;
    schema:description "Partner platform for ESM access"@en .

:modalPlatform a schema:Organization ;
    schema:name "Modal"@en ;
    schema:url <https://modal.com> ;
    schema:description "Partner platform for ESM access"@en .

:phyloplatform a schema:Organization ;
    schema:name "Phylo"@en ;
    schema:url <https://phyl.io> ;
    schema:description "Partner platform for ESM access"@en .

:sandboxaqPlatform a schema:Organization ;
    schema:name "SandboxAQ"@en ;
    schema:url <https://sandboxaq.com> ;
    schema:description "Partner platform for ESM access"@en .

:tamarindBio a schema:Organization ;
    schema:name "Tamarind Bio"@en ;
    schema:url <https://tamarind.bio> ;
    schema:description "Partner platform for ESM access"@en .

:toolUniverse a schema:Organization ;
    schema:name "Tool Universe"@en ;
    schema:url <https://tooluniverse.com> ;
    schema:description "Partner platform for ESM access"@en .

:egfrTarget a schema:Product, schema:Thing ;
    schema:name "EGFR"@en ;
    schema:description "Epidermal Growth Factor Receptor - a receptor tyrosine kinase implicated in tumor growth, one of five clinically relevant therapeutic targets validated with ESMFold2"@en ;
    owl:sameAs <http://dbpedia.org/resource/Epidermal_growth_factor_receptor> ;
    owl:sameAs <http://www.wikidata.org/wiki/Q173624> .

:pdgfrbTarget a schema:Product, schema:Thing ;
    schema:name "PDGFRβ"@en ;
    schema:description "Platelet-Derived Growth Factor Receptor beta - a receptor tyrosine kinase implicated in tumor growth, one of five clinically relevant therapeutic targets validated with ESMFold2"@en ;
    owl:sameAs <http://www.wikidata.org/wiki/Q24760> .

:pdL1Target a schema:Product, schema:Thing ;
    schema:name "PD-L1"@en ;
    schema:description "Programmed Death-Ligand 1 - an immune checkpoint that cancer cells exploit to evade immune surveillance, one of five clinically relevant therapeutic targets validated with ESMFold2. An ESMFold2-designed scFv bound PD-L1 with 4.3 nM affinity."@en ;
    owl:sameAs <http://dbpedia.org/resource/PD-L1> ;
    owl:sameAs <http://www.wikidata.org/wiki/Q3024054> .

:ctla4Target a schema:Product, schema:Thing ;
    schema:name "CTLA-4"@en ;
    schema:description "Cytotoxic T-Lymphocyte Antigen 4 - an immune checkpoint that cancer cells exploit to evade immune surveillance, one of five clinically relevant therapeutic targets validated with ESMFold2"@en ;
    owl:sameAs <http://dbpedia.org/resource/CTLA-4> ;
    owl:sameAs <http://www.wikidata.org/wiki/Q211304> .

:cd45Target a schema:Product, schema:Thing ;
    schema:name "CD45"@en ;
    schema:description "Cluster of Differentiation 45 - a regulator of immune cell signaling, one of five clinically relevant therapeutic targets validated with ESMFold2"@en ;
    owl:sameAs <http://www.wikidata.org/wiki/Q232015> .

:minibinderFormat a schema:Thing ;
    schema:name "Minibinder"@en ;
    schema:description "Compact, de novo protein scaffolds with no predetermined structure used for binder design. Success rates averaged 54% to 70% at higher compute across five targets."@en .

:scFvFormat a schema:Thing ;
    schema:name "scFv (Single-chain Variable Fragment)"@en ;
    schema:description "Single-chain variable fragment antibodies - antibody-derived molecules using unstructured loops to bind targets. Success rates nearly doubled from 12% to 21% across five targets."@en .

:saeTechnique a schema:Thing ;
    schema:name "Sparse Autoencoders (SAE)"@en ;
    schema:description "Technique developed to identify interpretable structure in large language models, used to decompose ESMC's internal representations into more than 16,000 distinct features"@en ;
    owl:sameAs <http://www.wikidata.org/wiki/Q107435> .

:crisprCasSystem a schema:Thing ;
    schema:name "CRISPR-Cas Systems"@en ;
    schema:description "Naturally occurring gene-editing systems that bacteria evolved to defend themselves against viruses. Featured in ESM Atlas SAE feature clusters, bringing together Fanzor and TnpB proteins despite high evolutionary divergence."@en ;
    owl:sameAs <http://dbpedia.org/resource/CRISPR> ;
    owl:sameAs <http://www.wikidata.org/wiki/Q417987> .

:fanzorProtein a schema:Thing ;
    schema:name "Fanzor"@en ;
    schema:description "Eukaryotic RNA-guided DNA endonuclease proteins that cluster with prokaryotic TnpB in ESM Atlas SAE features, despite high evolutionary divergence and low sequence similarity"@en .

:tnpBProtein a schema:Thing ;
    schema:name "TnpB"@en ;
    schema:description "Prokaryotic evolutionary ancestor of eukaryotic Fanzor proteins, appearing in the same SAE feature cluster in ESM Atlas despite high evolutionary divergence and low sequence similarity"@en .

:cryoEMValidation a schema:Thing ;
    schema:name "Cryo-EM"@en ;
    schema:description "Cryo-electron microscopy used to verify the structure of ESMFold2-designed minibinders bound to targets. High concordance between computational and observed models (1.204 Å RMSD) demonstrated."@en ;
    owl:sameAs <http://dbpedia.org/resource/Cryo-electron_microscopy> .

:nucleophilicElbow a schema:Thing ;
    schema:name "Nucleophilic Elbow"@en ;
    schema:description "A catalytic motif that has evolved independently many times, appearing in enzymes that share no common ancestor and adopt entirely different folds. Represented as an abstract feature in ESMC's latent space."@en .

:alphaHelixFeature a schema:Thing ;
    schema:name "Alpha Helix"@en ;
    schema:description "Primary secondary structure arrangement that forms when a protein backbone folds into stable conformations. Features representing alpha helices were discovered in ESMC's latent space."@en .

:betaSheetFeature a schema:Thing ;
    schema:name "Beta Sheet"@en ;
    schema:description "Primary secondary structure arrangement that forms when a protein backbone folds into stable conformations. Features representing beta sheets were discovered in ESMC's latent space."@en .

:loopTransformerArch a schema:Thing ;
    schema:name "Looped Transformer Architecture"@en ;
    schema:description "ESMFold2 architecture that passes representations through the same parameters multiple times, optimized through this loop during training. Enables compute scaling at inference time by running more loops."@en .

:foldbenchBenchmark a schema:Thing ;
    schema:name "Foldbench"@en ;
    schema:description "A suite of structure prediction benchmarks used to evaluate ESMFold2's state-of-the-art accuracy"@en .

:preprint a schema:Article ;
    schema:name "ESM Research Preprint"@en ;
    schema:description "Preprint describing full results of ESM research program, to be posted on bioRxiv"@en ;
    schema:license <https://opensource.org/licenses/MIT> .

:mitLicense a schema:CreativeWork ;
    schema:name "MIT License"@en ;
    schema:description "All ESM models released under MIT license"@en ;
    schema:license <https://opensource.org/licenses/MIT> .

:faqSection a schema:FAQPage, schema:ArticleSection ;
    schema:name "Frequently Asked Questions about ESM"@en ;
    schema:mainEntity :faq1, :faq2, :faq3, :faq4, :faq5, :faq6, :faq7, :faq8, :faq9, :faq10 .

:faq1 a schema:Question ;
    schema:question "What is ESM (Evolutionary Scale Models)?"@en ;
    schema:acceptedAnswer :faq1Answer .
:faq1Answer a schema:Answer ;
    schema:text "ESM is a world model of protein biology comprising three artifacts: ESMFold2 (structure prediction), ESM Atlas (map of 6.8B sequences/1.1B structures), and ESMC (protein language model trained on ~2.8B sequences). It learns from protein sequences produced by evolution to represent, map, predict, and design proteins."@en .

:faq2 a schema:Question ;
    schema:question "How does ESMFold2 differ from other structure prediction models?"@en ;
    schema:acceptedAnswer :faq2Answer .
:faq2Answer a schema:Answer ;
    schema:text "ESMFold2 uses a looped transformer architecture rather than searching for evolutionarily related sequences (MSAs). It operates directly from ESMC's learned protein representations, capturing evolutionary information encoded during language model pretraining. It achieves 55% on antibody-antigen complexes and can predict a 1024-length protein structure in 9.4 seconds."@en .

:faq3 a schema:Question ;
    schema:question "What therapeutic targets has ESMFold2 been validated against?"@en ;
    schema:acceptedAnswer :faq3Answer .
:faq3Answer a schema:Answer ;
    schema:text "ESMFold2 has been validated against five clinically relevant targets: EGFR and PDGFRβ (receptor tyrosine kinases in tumor growth), PD-L1 and CTLA-4 (immune checkpoints cancer cells exploit), and CD45 (immune cell signaling regulator). An ESMFold2-designed scFv bound PD-L1 with 4.3 nM affinity."@en .

:faq4 a schema:Question ;
    schema:question "What is the scale of ESM Atlas?"@en ;
    schema:acceptedAnswer :faq4Answer .
:faq4Answer a schema:Answer ;
    schema:text "ESM Atlas contains 6.8 billion sequences and 1.1 billion predicted structures, enabling the sequences and structures of proteins across all of life to be studied as a complete picture."@en .

:faq5 a schema:Question ;
    schema:question "How was ESMC trained and what scaling laws were discovered?"@en ;
    schema:acceptedAnswer :faq5Answer .
:faq5Answer a schema:Answer ;
    schema:text "ESMC was trained on approximately 2.8 billion sequences drawn from across all of life. Researchers identified a scaling law linking compute power used in training to how accurately representations capture biology. In ESMC, this powers linear returns with scale, leading to state-of-the-art protein representations."@en .

:faq6 a schema:Question ;
    schema:question "What are minibinders and scFvs in protein design?"@en ;
    schema:acceptedAnswer :faq6Answer .
:faq6Answer a schema:Answer ;
    schema:text "Minibinders are compact, de novo protein scaffolds with no predetermined structure used for binder design. scFvs (single-chain variable fragment antibodies) are antibody-derived molecules that use unstructured loops to bind targets. ESMFold2 can design both formats computationally with therapeutic-relevant affinities."@en .

:faq7 a schema:Question ;
    schema:question "What did sparse autoencoders reveal about ESMC's latent space?"@en ;
    schema:acceptedAnswer :faq7Answer .
:faq7Answer a schema:Answer ;
    schema:text "Sparse autoencoders decomposed ESMC's internal representations into more than 16,000 distinct features. The model independently recovered basic organizing principles of biology: amino acid chemistry, local structural interactions, abstract functional concepts recurring across unrelated proteins, and evolutionary themes connecting all of life."@en .

:faq8 a schema:Question ;
    schema:question "How does ESMFold2 achieve compute-time scaling?"@en ;
    schema:acceptedAnswer :faq8Answer .
:faq8Answer a schema:Answer ;
    schema:text "ESMFold2 uses a looped transformer where representations pass through the same parameters multiple times. The model learns to use each pass to refine its structural representation based on previous computations. This allows compute scaling at inference by running more loops without retraining, allocating more capacity to harder predictions."@en .

:faq9 a schema:Question ;
    schema:question "What partner platforms provide access to ESM?"@en ;
    schema:acceptedAnswer :faq9Answer .
:faq9Answer a schema:Answer ;
    schema:text "ESM is available through partner platforms including AWS Bio Discovery, Benchling AI, Modal, Phylo, SandboxAQ, Tamarind Bio, and Tool Universe. ESMFold2, ESMC, and ESM Atlas are available at the Biohub Platform."@en .

:faq10 a schema:Question ;
    schema:question "What is the significance of ESM for medicine?"@en ;
    schema:acceptedAnswer :faq10Answer .
:faq10Answer a schema:Answer ;
    schema:text "ESM enables computational protein binder design validated against five clinically relevant targets in oncology and immunology. When digital representations of biology become accurate enough, protein designs can be tested computationally before they reach the bench. This is particularly promising for cancer and rare diseases where much of disease is individual."@en .

:glossarySection a schema:DefinedTermSet ;
    schema:name "ESM Protein Biology Glossary"@en ;
    schema:hasDefinedTerm :term1, :term2, :term3, :term4, :term5, :term6, :term7, :term8, :term9, :term10 .

:term1 a schema:DefinedTerm ;
    schema:termName "ESM (Evolutionary Scale Models)"@en ;
    schema:description "Evolutionary Scale Models - A world model of protein biology comprising ESMFold2, ESM Atlas, and ESMC that learns from protein sequences produced by evolution to represent, map, predict, and design proteins."@en .

:term2 a schema:DefinedTerm ;
    schema:termName "ESMFold2"@en ;
    schema:description "State-of-the-art protein structure prediction model using looped transformer architecture that predicts protein structure from amino acid sequences without requiring multiple sequence alignments."@en .

:term3 a schema:DefinedTerm ;
    schema:termName "ESMC (ESM Cambrian)"@en ;
    schema:description "The protein language model component of ESM, trained on approximately 2.8 billion sequences from across all of life, providing representations that capture fundamental principles of protein structure and function."@en .

:term4 a schema:DefinedTerm ;
    schema:termName "ESM Atlas"@en ;
    schema:description "A map of 6.8 billion protein sequences and 1.1 billion predicted structures, enabling comprehensive study of proteins across all of life."@en .

:term5 a schema:DefinedTerm ;
    schema:termName "Protein Sequence"@en ;
    schema:description "Chains of 20 chemical building blocks (amino acids) whose order determines how the chain folds into a three-dimensional structure, and folding determines function."@en .

:term6 a schema:DefinedTerm ;
    schema:termName "Protein Structure"@en ;
    schema:description "The three-dimensional arrangement of atoms in a protein, determined by the amino acid sequence, which determines the protein's biological function."@en .

:term7 a schema:DefinedTerm ;
    schema:termName "Minibinder"@en ;
    schema:description "Compact, de novo protein scaffolds with no predetermined structure used for designing protein binders computationally."@en .

:term8 a schema:DefinedTerm ;
    schema:termName "scFv (Single-chain Variable Fragment)"@en ;
    schema:description "Single-chain variable fragment antibodies - antibody-derived molecules that use unstructured loops to bind their targets, particularly demanding examples of the binder design problem."@en .

:term9 a schema:DefinedTerm ;
    schema:termName "Sparse Autoencoders (SAE)"@en ;
    schema:description "A technique developed to identify interpretable structure in large language models by decomposing internal representations into distinct features."@en .

:term10 a schema:DefinedTerm ;
    schema:termName "Alpha Helix and Beta Sheet"@en ;
    schema:description "The two primary secondary structure arrangements that form when a protein backbone folds into its most stable conformations."@en .

:howtoSection a schema:HowTo ;
    schema:name "How to Design Protein Binders with ESMFold2"@en ;
    schema:description "Step-by-step guide to computationally designing protein binders using ESMFold2 for clinically relevant molecular targets"@en ;
    schema:step :step1, :step2, :step3, :step4, :step5, :step6, :step7 .

:step1 a schema:HowToStep ;
    schema:name "Select Target Molecule"@en ;
    schema:text "Choose a clinically relevant molecular target such as EGFR, PDGFRβ, PD-L1, CTLA-4, or CD45. Define the binding site and desired affinity characteristics for therapeutic activity."@en ;
    schema:position "1"^^xsd:integer .

:step2 a schema:HowToStep ;
    schema:name "Choose Binder Format"@en ;
    schema:text "Select between minibinders (compact de novo scaffolds with no predetermined structure) or scFvs (single-chain variable fragment antibodies) depending on therapeutic requirements and target characteristics."@en ;
    schema:position "2"^^xsd:integer .

:step3 a schema:HowToStep ;
    schema:name "Define Design Constraints"@en ;
    schema:text "Establish parameters for the binder design including required affinity (nanomolar potency), specificity requirements, and structural constraints for the target binding interface."@en ;
    schema:position "3"^^xsd:integer .

:step4 a schema:HowToStep ;
    schema:name "Run ESMFold2 Design Algorithm"@en ;
    schema:text "Use ESMFold2's design algorithm that searches through the joint model of sequence and structure for proteins predicted to bind the target. Allocate compute resources based on desired success rates (higher compute yields up to 70% minibinder success)."@en ;
    schema:position "4"^^xsd:integer .

:step5 a schema:HowToStep ;
    schema:name "Evaluate Predicted Binders"@en ;
    schema:text "Review computational predictions for binding affinity and selectivity. For scFvs, expect success rates around 21% with higher compute. Select top candidates for experimental validation."@en ;
    schema:position "5"^^xsd:integer .

:step6 a schema:HowToStep ;
    schema:name "Validate in Laboratory"@en ;
    schema:text "Test designed binders experimentally using cell-based assays to measure affinity and functional activity. Verify structure using cryo-electron microscopy if crystal structure confirmation is needed."@en ;
    schema:position "6"^^xsd:integer .

:step7 a schema:HowToStep ;
    schema:name "Iterate and Optimize"@en ;
    schema:text "Use experimental feedback to refine design parameters. ESM enables rapid iteration - designs can be computationally validated before bench experiments, accelerating the design cycle for therapeutic protein development."@en ;
    schema:position "7"^^xsd:integer .

:technicalSection a schema:ArticleSection ;
    schema:name "Technical Details of ESM Architecture"@en ;
    schema:description "Deep dive into the technical architecture of ESMC and ESMFold2 including transformer language models, looped transformer architecture, and scaling laws"@en .

:technicalDetail1 a schema:ArticleSection ;
    schema:name "Transformer Language Model Evolution"@en ;
    schema:text "ESM program developed the first transformer language model of protein sequences in 2019. Core scientific hypothesis: training a language model across sequences of all life causes it to internalize fundamental properties governing protein biology - rules underlying protein folding, interaction, and function."@en .

:technicalDetail2 a schema:ArticleSection ;
    schema:name "Scaling Laws and Representation Quality"@en ;
    schema:text "ESMC identified scaling law linking compute power used in training to degree representations accurately capture biology. Linear returns with scale lead to state-of-the-art protein representations."@en .

:technicalDetail3 a schema:ArticleSection ;
    schema:name "Looped Transformer Architecture"@en ;
    schema:text "ESMFold2 uses looped transformer where representations pass through same parameters multiple times, optimized through this loop during training. Each pass refines structural representation based on previous computation."@en .

:therapeuticSection a schema:ArticleSection ;
    schema:name "Therapeutic Protein Design Results"@en ;
    schema:description "Results of ESMFold2 validation against five clinically relevant targets in oncology and immunology"@en .

:therapeuticDetail1 a schema:ArticleSection ;
    schema:name "Five Validated Targets"@en ;
    schema:text "Five targets central to oncology and immunology: EGFR and PDGFRβ (receptor tyrosine kinases implicated in tumor growth); PD-L1 and CTLA-4 (immune checkpoints that cancer cells exploit to evade immune surveillance); CD45 (regulator of immune cell signaling)."@en .

:therapeuticDetail2 a schema:ArticleSection ;
    schema:name "Design Success Rates"@en ;
    schema:text "Averaged across targets, minibinder success rates rose from 54% to 70% at higher compute. For scFvs, success rates nearly doubled from 12% to 21%. ESMFold2-designed de novo scFv bound PD-L1 with measured affinity of 4.3 nM."@en .

:therapeuticDetail3 a schema:ArticleSection ;
    schema:name "Cryo-EM Verification"@en ;
    schema:text "Using cryo-electron microscopy, researchers verified structure of ESMFold2-designed minibinder bound to its target. High concordance between computational and observed models demonstrated 1.204 Å RMSD, exemplifying ESMFold2's design capabilities."@en .

:latentSpaceSection a schema:ArticleSection ;
    schema:name "Organization of Concepts in ESMC's Latent Space"@en ;
    schema:description "How sparse autoencoders revealed the structure of concepts learned by ESMC without any prior biological knowledge"@en .

:latentDetail1 a schema:ArticleSection ;
    schema:name "Feature Decomposition with SAEs"@en ;
    schema:text "Sparse autoencoders decomposed ESMC's internal representations into more than 16,000 distinct features. The model, trained only to predict amino acid sequences with no prior biological knowledge, independently recovered many basic organizing principles of biology."@en .

:latentDetail2 a schema:ArticleSection ;
    schema:name "Feature Categories Discovered"@en ;
    schema:text "Features discovered: specific amino acids; classes of amino acids (aromatics, small hydrophobics); context-dependent configurations; alpha helices and beta sheets; abstract concepts like cellular localization signals, post-translational modifications, local biochemical and electrostatic environments, functional classes independent of fold."@en .

:latentDetail3 a schema:ArticleSection ;
    schema:name "Nucleophilic Elbow Example"@en ;
    schema:text "Example: nucleophilic elbow, a catalytic motif that evolved independently many times, appearing in enzymes sharing no common ancestor and adopting entirely different folds. Features representing fundamental concepts in proteins combine to create complex functions."@en .

:atlasSection a schema:ArticleSection ;
    schema:name "ESM Atlas: Map of Known and Unknown Biology"@en ;
    schema:description "How ESM Atlas reveals evolutionary relationships and functional connections across all of life"@en .

:atlasDetail1 a schema:ArticleSection ;
    schema:name "Scale and Coverage"@en ;
    schema:text "ESM Atlas is a map of 6.8 billion sequences and 1.1 billion predicted structures. The atlas enables sequences and structures of proteins across all of life to be studied as a complete picture."@en .

:atlasDetail2 a schema:ArticleSection ;
    schema:name "CRISPR-Cas Discovery"@en ;
    schema:text "In SAE feature clusters derived from ESM Atlas, a cluster of RNA-guided DNA endonucleases - functional class including CRISPR-Cas systems - brings together eukaryotic Fanzor proteins and their evolutionary ancestor prokaryotic TnpB, despite high evolutionary divergence and low sequence similarity."@en .

:possibilitiesSection a schema:ArticleSection ;
    schema:name "What This Makes Possible"@en ;
    schema:description "Future implications of ESM for medicine, drug discovery, and understanding biology"@en .

:possibilityDetail1 a schema:ArticleSection ;
    schema:name "Computational Scale Advantages"@en ;
    schema:text "Computing structures for 1.1 billion proteins in a week, or screening millions of candidates in hours, isn't just faster science. Ability to run experiments virtually at scale not possible in laboratory starts to unlock new possibilities in biology."@en .

:possibilityDetail2 a schema:ArticleSection ;
    schema:name "Personalized Medicine Potential"@en ;
    schema:text "While disease follows common patterns, much of disease is individual. For cancer and rare diseases, potential is most immediate. ESM can design lab-validated protein binders for five clinically relevant targets in days."@en .

:possibilityDetail3 a schema:ArticleSection ;
    schema:name "Future of Biology"@en ;
    schema:text "Future of biology will depend on systems combining scaled data, predictive models, and experimental feedback. This world model is an important step toward that future: an open foundation for understanding proteins, discovering new biology, and designing molecular tools that can help prevent, treat, or cure disease."@en .

:kgSkill a schema:SoftwareApplication ;
    schema:name "kg-generator skill" ;
    schema:url <https://github.com/OpenLinkSoftware/ai-agent-skills/tree/main/kg-generator> ;
    schema:description "AI agent skill for generating knowledge graphs from web content" .
